mirror of
https://github.com/LibraHp/GetQzonehistory.git
synced 2025-02-23 05:49:07 +00:00
添加获取说说发布时间的逻辑
This commit is contained in:
parent
a8ae14e88f
commit
1bf7727321
18
main.py
18
main.py
@ -16,7 +16,7 @@ def signal_handler(signal, frame):
|
|||||||
|
|
||||||
|
|
||||||
def save_data():
|
def save_data():
|
||||||
df = pd.DataFrame(texts, columns=['内容'])
|
df = pd.DataFrame(texts, columns=['时间', '内容'])
|
||||||
df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False)
|
df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False)
|
||||||
print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx')
|
print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx')
|
||||||
|
|
||||||
@ -36,16 +36,22 @@ if __name__ == '__main__':
|
|||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
signal.signal(signal.SIGTERM, signal_handler)
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
for i in trange(1000, desc='Progress', unit='iteration'):
|
for i in trange(1000, desc='Progress', unit='100条'):
|
||||||
message = Request.get_message(i * 100, 100).content.decode('utf-8')
|
message = Request.get_message(i * 100, 100).content.decode('utf-8')
|
||||||
html = Tools.process_old_html(message)
|
html = Tools.process_old_html(message)
|
||||||
if "li" not in html:
|
if "li" not in html:
|
||||||
break
|
break
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
for element in soup.find_all('p', class_='txt-box-title ellipsis-one'):
|
for element in soup.find_all('li', class_='f-single f-s-s'):
|
||||||
text = element.get_text().replace('\xa0', ' ')
|
time = None
|
||||||
if text not in texts:
|
text = None
|
||||||
texts.append(text)
|
time_element = element.find('div', class_='info-detail')
|
||||||
|
text_element = element.find('p', class_='txt-box-title ellipsis-one')
|
||||||
|
if time_element is not None and text_element is not None:
|
||||||
|
time = time_element.get_text().replace('\xa0', ' ')
|
||||||
|
text = text_element.get_text().replace('\xa0', ' ')
|
||||||
|
if text not in [sublist[1] for sublist in texts] and time is not None and text is not None:
|
||||||
|
texts.append([time, text])
|
||||||
|
|
||||||
if len(texts) > 0:
|
if len(texts) > 0:
|
||||||
save_data()
|
save_data()
|
||||||
|
Loading…
Reference in New Issue
Block a user