From 87d663e12197e13fbe7ef60df3d7c8b7b15b1eab Mon Sep 17 00:00:00 2001 From: LibraHp_0928 <1941163264@qq.com> Date: Tue, 13 Feb 2024 21:15:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E9=94=99=E8=AF=AF=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 53 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/main.py b/main.py index b2179a1..b3b4c79 100644 --- a/main.py +++ b/main.py @@ -4,25 +4,48 @@ import util.RequestUtil as Request import util.ToolsUtil as Tools import util.ConfigUtil as Config import pandas as pd +import signal + + +# 信号处理函数 +def signal_handler(signal, frame): + # 在手动结束程序时保存已有的数据 + if len(texts) > 0: + save_data() + exit(0) + + +def save_data(): + df = pd.DataFrame(texts, columns=['内容']) + df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False) + print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx') + if __name__ == '__main__': user_info = Request.get_login_user_info() user_nickname = user_info[Request.uin][6] print(f"用户<{Request.uin}>,<{user_nickname}>登录成功") texts = [] - for i in trange(1000, desc='Progress', unit='iteration'): - message = Request.get_message(i * 100, 100).content.decode('utf-8') - html = Tools.process_old_html(message) - if "li" not in html: - break - soup = BeautifulSoup(html, 'html.parser') - for element in soup.find_all('p', class_='txt-box-title ellipsis-one'): - text = element.get_text().replace('\xa0', ' ') - if text not in texts: - texts.append(text) - # 创建一个DataFrame对象 - df = pd.DataFrame(texts, columns=['内容']) - # 导出为Excel - df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False) - print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx') + try: + # 注册信号处理函数 + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + for i in trange(1000, desc='Progress', unit='iteration'): + message = Request.get_message(i * 100, 100).content.decode('utf-8') + html = Tools.process_old_html(message) + if "li" not in html: + break + soup = BeautifulSoup(html, 'html.parser') + for element in soup.find_all('p', class_='txt-box-title ellipsis-one'): + text = element.get_text().replace('\xa0', ' ') + if text not in texts: + texts.append(text) + + if len(texts) > 0: + save_data() + except Exception as e: + print(f"发生异常: {str(e)}") + if len(texts) > 0: + save_data()