同步single分支修改

This commit is contained in:
LibraHp_0928 2024-09-11 09:46:59 +08:00
parent fca8508768
commit e39eb40d06

86
main.py
View File

@ -5,7 +5,11 @@ import util.ToolsUtil as Tools
import util.ConfigUtil as Config import util.ConfigUtil as Config
import pandas as pd import pandas as pd
import signal import signal
import os
import re
from tqdm import trange,tqdm
import requests
import time
# 信号处理函数 # 信号处理函数
def signal_handler(signal, frame): def signal_handler(signal, frame):
@ -15,11 +19,65 @@ def signal_handler(signal, frame):
exit(0) exit(0)
def save_data(): def save_data():
df = pd.DataFrame(texts, columns=['时间', '内容']) df = pd.DataFrame(texts, columns=['时间', '内容'])
df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False) df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False)
print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx') print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx')
def save_data():
user_save_path = Config.result_path + Request.uin + '/'
pic_save_path = user_save_path + 'pic/'
if not os.path.exists(user_save_path):
os.makedirs(user_save_path)
print(f"Created directory: {user_save_path}")
if not os.path.exists(pic_save_path):
os.makedirs(pic_save_path)
print(f"Created directory: {pic_save_path}")
pd.DataFrame(texts, columns=['时间', '内容', '图片链接']).to_excel(user_save_path + Request.uin + '_全部列表.xlsx', index=False)
pd.DataFrame(all_friends, columns=['昵称', 'QQ', '空间主页']).to_excel(user_save_path + Request.uin + '_好友列表.xlsx', index=False)
for item in tqdm(texts, desc="处理消息列表", unit="item"):
item_text = item[1]
item_pic_link = item[2]
if item_pic_link is not None and len(item_pic_link) > 0 and 'http' in item_pic_link:
# 保存图片
pic_name = re.sub(r'[\\/:*?"<>|]', '_', item_text) + '.jpg'
# 去除文件名中的空格
pic_name = pic_name.replace(' ', '')
# 限制文件名长度
if len(pic_name) > 40:
pic_name = pic_name[:40] + '.jpg'
# pic_name = pic_name.split('')[1] + '.jpg'
response = requests.get(item_pic_link)
if response.status_code == 200:
with open(pic_save_path + pic_name, 'wb') as f:
f.write(response.content)
if user_nickname in item_text:
if '留言' in item_text:
leave_message.append(item)
elif '转发' in item_text:
forward_message.append(item)
else:
user_message.append(item)
else:
other_message.append(item)
pd.DataFrame(user_message, columns=['时间', '内容', '图片链接']).to_excel(user_save_path + Request.uin + '_说说列表.xlsx', index=False)
pd.DataFrame(forward_message, columns=['时间', '内容', '图片链接']).to_excel(user_save_path + Request.uin + '_转发列表.xlsx', index=False)
pd.DataFrame(leave_message, columns=['时间', '内容', '图片链接']).to_excel(user_save_path + Request.uin + '_留言列表.xlsx', index=False)
pd.DataFrame(other_message, columns=['时间', '内容', '图片链接']).to_excel(user_save_path + Request.uin + '_其他列表.xlsx', index=False)
print('\033[36m' + '导出成功,请查看 ' + user_save_path + Request.uin + ' 文件夹内容' + '\033[0m')
print('\033[32m' + '共有 ' + str(len(texts)) + ' 条消息' + '\033[0m')
print('\033[36m' + '最早的一条说说发布在' + texts[texts.__len__() - 1][0] + '\033[0m')
print('\033[32m' + '好友列表共有 ' + str(len(all_friends)) + ' 个好友' + '\033[0m')
print('\033[36m' + '说说列表共有 ' + str(len(user_message)) + ' 条说说' + '\033[0m')
print('\033[32m' + '转发列表共有 ' + str(len(forward_message)) + ' 条转发' + '\033[0m')
print('\033[36m' + '留言列表共有 ' + str(len(leave_message)) + ' 条留言' + '\033[0m')
print('\033[32m' + '其他列表共有 ' + str(len(other_message)) + ' 条内容' + '\033[0m')
print('\033[36m' + '图片列表共有 ' + str(len(os.listdir(pic_save_path))) + ' 张图片' + '\033[0m')
current_directory = os.getcwd()
os.startfile(current_directory + user_save_path[1:])
os.system('pause')
if __name__ == '__main__': if __name__ == '__main__':
try: try:
@ -30,6 +88,11 @@ if __name__ == '__main__':
print(f"登录失败:请重新登录,错误信息:{str(e)}") print(f"登录失败:请重新登录,错误信息:{str(e)}")
exit(0) exit(0)
texts = [] texts = []
all_friends = []
other_message = []
user_message = []
leave_message = []
forward_message = []
count = Request.get_message_count() count = Request.get_message_count()
try: try:
# 注册信号处理函数 # 注册信号处理函数
@ -38,24 +101,37 @@ if __name__ == '__main__':
for i in trange(int(count / 100) + 1, desc='Progress', unit='100条'): for i in trange(int(count / 100) + 1, desc='Progress', unit='100条'):
message = Request.get_message(i * 100, 100).content.decode('utf-8') message = Request.get_message(i * 100, 100).content.decode('utf-8')
time.sleep(0.2)
html = Tools.process_old_html(message) html = Tools.process_old_html(message)
if "li" not in html: if "li" not in html:
continue continue
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
for element in soup.find_all('li', class_='f-single f-s-s'): for element in soup.find_all('li', class_='f-single f-s-s'):
time = None put_time = None
text = None text = None
img = None
friend_element = element.find('a', class_='f-name q_namecard')
# 获取好友昵称和QQ
if friend_element is not None:
friend_name = friend_element.get_text()
friend_qq = friend_element.get('link')[9:]
friend_link = friend_element.get('href')
if friend_qq not in [sublist[1] for sublist in all_friends]:
all_friends.append([friend_name, friend_qq, friend_link])
time_element = element.find('div', class_='info-detail') time_element = element.find('div', class_='info-detail')
text_element = element.find('p', class_='txt-box-title ellipsis-one') text_element = element.find('p', class_='txt-box-title ellipsis-one')
img_element = element.find('a', class_='img-item')
if time_element is not None and text_element is not None: if time_element is not None and text_element is not None:
time = time_element.get_text().replace('\xa0', ' ') put_time = time_element.get_text().replace('\xa0', ' ')
text = text_element.get_text().replace('\xa0', ' ') text = text_element.get_text().replace('\xa0', ' ')
if img_element is not None:
img = img_element.find('img').get('src')
if text not in [sublist[1] for sublist in texts]: if text not in [sublist[1] for sublist in texts]:
texts.append([time, text]) texts.append([put_time, text, img])
if len(texts) > 0: if len(texts) > 0:
save_data() save_data()
except Exception as e: except Exception as e:
print(f"发生异常: {str(e)}") print(f"发生异常: {str(e)}")
if len(texts) > 0: if len(texts) > 0:
save_data() save_data()