Merge pull request #31 from 4Aiur/pr

分页获取未删除数据，修复数量过多无法一次全部获取的问题
2024-12-28 15:09:10 +00:00 · 2024-09-15 23:16:36 +08:00 · 2024-09-15 23:16:36 +08:00 · 8cbf5c9330
commit 8cbf5c9330
parent 37479661a3 3806e7ff07
2 changed files with 37 additions and 16 deletions
--- a/fetch_all_message.py
+++ b/fetch_all_message.py
@ -1,4 +1,5 @@
 import json
 import math
 import os
 import re
 import sys
@ -11,17 +12,19 @@ from util import LoginUtil
 WORKDIR = "./resource/fetch-all/"
 MESSAGE_SAMPLE = 'msg-one.json'
 MESSAGE_ALL = 'msg-all.json'
-
+cookies = None
 # 获取所有可见的未删除的说说+高清图片（包含2014年之前）
 def get_visible_msg_list():
    global cookies
    if cookies is None:
        cookies = LoginUtil.cookie()
    # 1. 获取说说总条数
    try:
        msgSample = read_txt_file(MESSAGE_SAMPLE)
    except FileNotFoundError as e:
-        print("样本缓存未找到，开始请求获取样本")
+        # 样本缓存未找到，开始请求获取样本
        qqResponse = get_msg_list(1)
-        print("创建缓存文件并写入")
+        # 创建缓存文件并写入
        write_txt_file(MESSAGE_SAMPLE, qqResponse)
        msgSample = read_txt_file(MESSAGE_SAMPLE)
@ -34,14 +37,27 @@ def get_visible_msg_list():
        sys.exit(1)
    # 2. 获取所有说说数据
    print('开始不分页获取所有未删除说说')
    try:
        msgAll = read_txt_file(MESSAGE_ALL)
    except FileNotFoundError as e:
-        print("缓存未找到，开始请求获取所有未删除说说")
+        # 缓存未找到，准备分页获取所有未删除说说"
-        qqResponse = get_msg_list(totalCount)
+        # 一页20条
-        write_txt_file(MESSAGE_ALL, qqResponse)
+        defaultPageSize = 30
-        msgAll = read_txt_file(MESSAGE_ALL)
+        # 总页数
        totalPageNum = math.ceil(totalCount / defaultPageSize)
        # 用于存储所有页的数据
        allPageData = []
        print(f"一共{totalPageNum}页")
        for currentPageNum in range(0, totalPageNum):
            # 数据偏移量
            pos = currentPageNum * defaultPageSize
            print(
                f"一页{defaultPageSize}条, 获取第{currentPageNum + 1}页")
            qqResponse = get_msg_list(defaultPageSize, pos)
            currentPageData = json.loads(qqResponse)["msglist"]
            allPageData.extend(currentPageData)
        msgAll = json.dumps({"msglist": allPageData}, ensure_ascii=False, indent=2)
        write_txt_file(MESSAGE_ALL, msgAll)
    try:
        json_dict = json.loads(msgAll)
@ -96,9 +112,8 @@ def get_visible_msg_list():
    write_txt_file("所有可见说说.md", markdown_content)
-def get_msg_list(num):
+def get_msg_list(pageSize, offset=0):
    url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6'
    cookies = LoginUtil.cookie()
    g_tk = LoginUtil.bkn(cookies.get('p_skey'))
    qqNumber = re.sub(r'o0*', '', cookies.get('uin'))
    skey = cookies.get('skey')
@ -124,8 +139,8 @@ def get_msg_list(num):
        'uin': f'{qqNumber}',
        'ftype': '0',
        'sort': '0',
-        'pos': '0',
+        'pos': f'{offset}',
-        'num': f'{num}',
+        'num': f'{pageSize}',
        'replynum': '100',
        'g_tk': f'{g_tk}',
        'callback': '_preloadCallback',
@ -159,7 +174,6 @@ def write_txt_file(file_name, data):
 def read_txt_file(file_name):
    base_path_file_name = os.path.join(WORKDIR, file_name)
    if os.path.exists(base_path_file_name):
        print("读取缓存文件")
        with open(base_path_file_name, 'r', encoding='utf-8') as file:
            return file.read()
    else:
--- a/util/LoginUtil.py
+++ b/util/LoginUtil.py
@ -1,8 +1,10 @@
 import platform
 import sys
 import qrcode
 import requests
 from PIL import Image
-import qrcode
+
 import platform
 try:
    from pyzbar.pyzbar import decode
 except Exception as e:
@ -10,8 +12,13 @@ except Exception as e:
    if platform.system() == "Linux":
        print("对于基于 RPM 的系统（如 Fedora), 您可以运行以下命令:")
        print("sudo dnf install -y zbar")
    elif platform.system() == "Darwin":
        print("MacOS 安装 zbar 请参考:")
        print("https://github.com/LibraHp/GetQzonehistory/issues/23#issuecomment-2349269027")
        sys.exit(1)
    print("有关更多安装指南，请参考 zbar 的官方文档或您的发行版文档。")
    sys.exit(1)
 import time
 import re
 import util.ConfigUtil as Config