diff --git a/fetch_all_message.py b/fetch_all_message.py index 15afacf..6efe267 100644 --- a/fetch_all_message.py +++ b/fetch_all_message.py @@ -1,4 +1,5 @@ import json +import math import os import re import sys @@ -11,17 +12,19 @@ from util import LoginUtil WORKDIR = "./resource/fetch-all/" MESSAGE_SAMPLE = 'msg-one.json' MESSAGE_ALL = 'msg-all.json' - - +cookies = None # 获取所有可见的未删除的说说+高清图片(包含2014年之前) def get_visible_msg_list(): + global cookies + if cookies is None: + cookies = LoginUtil.cookie() # 1. 获取说说总条数 try: msgSample = read_txt_file(MESSAGE_SAMPLE) except FileNotFoundError as e: - print("样本缓存未找到,开始请求获取样本") + # 样本缓存未找到,开始请求获取样本 qqResponse = get_msg_list(1) - print("创建缓存文件并写入") + # 创建缓存文件并写入 write_txt_file(MESSAGE_SAMPLE, qqResponse) msgSample = read_txt_file(MESSAGE_SAMPLE) @@ -34,14 +37,27 @@ def get_visible_msg_list(): sys.exit(1) # 2. 获取所有说说数据 - print('开始不分页获取所有未删除说说') try: msgAll = read_txt_file(MESSAGE_ALL) except FileNotFoundError as e: - print("缓存未找到,开始请求获取所有未删除说说") - qqResponse = get_msg_list(totalCount) - write_txt_file(MESSAGE_ALL, qqResponse) - msgAll = read_txt_file(MESSAGE_ALL) + # 缓存未找到,准备分页获取所有未删除说说" + # 一页20条 + defaultPageSize = 30 + # 总页数 + totalPageNum = math.ceil(totalCount / defaultPageSize) + # 用于存储所有页的数据 + allPageData = [] + print(f"一共{totalPageNum}页") + for currentPageNum in range(0, totalPageNum): + # 数据偏移量 + pos = currentPageNum * defaultPageSize + print( + f"一页{defaultPageSize}条, 获取第{currentPageNum + 1}页") + qqResponse = get_msg_list(defaultPageSize, pos) + currentPageData = json.loads(qqResponse)["msglist"] + allPageData.extend(currentPageData) + msgAll = json.dumps({"msglist": allPageData}, ensure_ascii=False, indent=2) + write_txt_file(MESSAGE_ALL, msgAll) try: json_dict = json.loads(msgAll) @@ -96,9 +112,8 @@ def get_visible_msg_list(): write_txt_file("所有可见说说.md", markdown_content) -def get_msg_list(num): +def get_msg_list(pageSize, offset=0): url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6' - cookies = LoginUtil.cookie() g_tk = LoginUtil.bkn(cookies.get('p_skey')) qqNumber = re.sub(r'o0*', '', cookies.get('uin')) skey = cookies.get('skey') @@ -124,8 +139,8 @@ def get_msg_list(num): 'uin': f'{qqNumber}', 'ftype': '0', 'sort': '0', - 'pos': '0', - 'num': f'{num}', + 'pos': f'{offset}', + 'num': f'{pageSize}', 'replynum': '100', 'g_tk': f'{g_tk}', 'callback': '_preloadCallback', @@ -159,7 +174,6 @@ def write_txt_file(file_name, data): def read_txt_file(file_name): base_path_file_name = os.path.join(WORKDIR, file_name) if os.path.exists(base_path_file_name): - print("读取缓存文件") with open(base_path_file_name, 'r', encoding='utf-8') as file: return file.read() else: diff --git a/util/LoginUtil.py b/util/LoginUtil.py index 590328f..1631e1d 100644 --- a/util/LoginUtil.py +++ b/util/LoginUtil.py @@ -1,8 +1,10 @@ +import platform import sys + +import qrcode import requests from PIL import Image -import qrcode -import platform + try: from pyzbar.pyzbar import decode except Exception as e: @@ -10,8 +12,13 @@ except Exception as e: if platform.system() == "Linux": print("对于基于 RPM 的系统(如 Fedora), 您可以运行以下命令:") print("sudo dnf install -y zbar") + elif platform.system() == "Darwin": + print("MacOS 安装 zbar 请参考:") + print("https://github.com/LibraHp/GetQzonehistory/issues/23#issuecomment-2349269027") + sys.exit(1) print("有关更多安装指南,请参考 zbar 的官方文档或您的发行版文档。") sys.exit(1) + import time import re import util.ConfigUtil as Config