Merge pull request #31 from 4Aiur/pr

分页获取未删除数据,修复数量过多无法一次全部获取的问题
This commit is contained in:
LibraHp_0928 2024-09-15 23:16:36 +08:00 committed by GitHub
commit 8cbf5c9330
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 16 deletions

View File

@ -1,4 +1,5 @@
import json import json
import math
import os import os
import re import re
import sys import sys
@ -11,17 +12,19 @@ from util import LoginUtil
WORKDIR = "./resource/fetch-all/" WORKDIR = "./resource/fetch-all/"
MESSAGE_SAMPLE = 'msg-one.json' MESSAGE_SAMPLE = 'msg-one.json'
MESSAGE_ALL = 'msg-all.json' MESSAGE_ALL = 'msg-all.json'
cookies = None
# 获取所有可见的未删除的说说+高清图片包含2014年之前 # 获取所有可见的未删除的说说+高清图片包含2014年之前
def get_visible_msg_list(): def get_visible_msg_list():
global cookies
if cookies is None:
cookies = LoginUtil.cookie()
# 1. 获取说说总条数 # 1. 获取说说总条数
try: try:
msgSample = read_txt_file(MESSAGE_SAMPLE) msgSample = read_txt_file(MESSAGE_SAMPLE)
except FileNotFoundError as e: except FileNotFoundError as e:
print("样本缓存未找到,开始请求获取样本") # 样本缓存未找到,开始请求获取样本
qqResponse = get_msg_list(1) qqResponse = get_msg_list(1)
print("创建缓存文件并写入") # 创建缓存文件并写入
write_txt_file(MESSAGE_SAMPLE, qqResponse) write_txt_file(MESSAGE_SAMPLE, qqResponse)
msgSample = read_txt_file(MESSAGE_SAMPLE) msgSample = read_txt_file(MESSAGE_SAMPLE)
@ -34,14 +37,27 @@ def get_visible_msg_list():
sys.exit(1) sys.exit(1)
# 2. 获取所有说说数据 # 2. 获取所有说说数据
print('开始不分页获取所有未删除说说')
try: try:
msgAll = read_txt_file(MESSAGE_ALL) msgAll = read_txt_file(MESSAGE_ALL)
except FileNotFoundError as e: except FileNotFoundError as e:
print("缓存未找到,开始请求获取所有未删除说说") # 缓存未找到,准备分页获取所有未删除说说"
qqResponse = get_msg_list(totalCount) # 一页20条
write_txt_file(MESSAGE_ALL, qqResponse) defaultPageSize = 30
msgAll = read_txt_file(MESSAGE_ALL) # 总页数
totalPageNum = math.ceil(totalCount / defaultPageSize)
# 用于存储所有页的数据
allPageData = []
print(f"一共{totalPageNum}")
for currentPageNum in range(0, totalPageNum):
# 数据偏移量
pos = currentPageNum * defaultPageSize
print(
f"一页{defaultPageSize}条, 获取第{currentPageNum + 1}")
qqResponse = get_msg_list(defaultPageSize, pos)
currentPageData = json.loads(qqResponse)["msglist"]
allPageData.extend(currentPageData)
msgAll = json.dumps({"msglist": allPageData}, ensure_ascii=False, indent=2)
write_txt_file(MESSAGE_ALL, msgAll)
try: try:
json_dict = json.loads(msgAll) json_dict = json.loads(msgAll)
@ -96,9 +112,8 @@ def get_visible_msg_list():
write_txt_file("所有可见说说.md", markdown_content) write_txt_file("所有可见说说.md", markdown_content)
def get_msg_list(num): def get_msg_list(pageSize, offset=0):
url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6' url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6'
cookies = LoginUtil.cookie()
g_tk = LoginUtil.bkn(cookies.get('p_skey')) g_tk = LoginUtil.bkn(cookies.get('p_skey'))
qqNumber = re.sub(r'o0*', '', cookies.get('uin')) qqNumber = re.sub(r'o0*', '', cookies.get('uin'))
skey = cookies.get('skey') skey = cookies.get('skey')
@ -124,8 +139,8 @@ def get_msg_list(num):
'uin': f'{qqNumber}', 'uin': f'{qqNumber}',
'ftype': '0', 'ftype': '0',
'sort': '0', 'sort': '0',
'pos': '0', 'pos': f'{offset}',
'num': f'{num}', 'num': f'{pageSize}',
'replynum': '100', 'replynum': '100',
'g_tk': f'{g_tk}', 'g_tk': f'{g_tk}',
'callback': '_preloadCallback', 'callback': '_preloadCallback',
@ -159,7 +174,6 @@ def write_txt_file(file_name, data):
def read_txt_file(file_name): def read_txt_file(file_name):
base_path_file_name = os.path.join(WORKDIR, file_name) base_path_file_name = os.path.join(WORKDIR, file_name)
if os.path.exists(base_path_file_name): if os.path.exists(base_path_file_name):
print("读取缓存文件")
with open(base_path_file_name, 'r', encoding='utf-8') as file: with open(base_path_file_name, 'r', encoding='utf-8') as file:
return file.read() return file.read()
else: else:

View File

@ -1,8 +1,10 @@
import platform
import sys import sys
import qrcode
import requests import requests
from PIL import Image from PIL import Image
import qrcode
import platform
try: try:
from pyzbar.pyzbar import decode from pyzbar.pyzbar import decode
except Exception as e: except Exception as e:
@ -10,8 +12,13 @@ except Exception as e:
if platform.system() == "Linux": if platform.system() == "Linux":
print("对于基于 RPM 的系统(如 Fedora), 您可以运行以下命令:") print("对于基于 RPM 的系统(如 Fedora), 您可以运行以下命令:")
print("sudo dnf install -y zbar") print("sudo dnf install -y zbar")
elif platform.system() == "Darwin":
print("MacOS 安装 zbar 请参考:")
print("https://github.com/LibraHp/GetQzonehistory/issues/23#issuecomment-2349269027")
sys.exit(1)
print("有关更多安装指南,请参考 zbar 的官方文档或您的发行版文档。") print("有关更多安装指南,请参考 zbar 的官方文档或您的发行版文档。")
sys.exit(1) sys.exit(1)
import time import time
import re import re
import util.ConfigUtil as Config import util.ConfigUtil as Config