mirror of
https://github.com/LibraHp/GetQzonehistory.git
synced 2024-12-27 06:29:55 +00:00
160 lines
6.3 KiB
Python
160 lines
6.3 KiB
Python
import json
|
||
import math
|
||
import os
|
||
import re
|
||
import sys
|
||
import time
|
||
|
||
import requests
|
||
from tqdm import tqdm
|
||
|
||
from util import RequestUtil as Request
|
||
from util import LoginUtil
|
||
from util import ToolsUtil as Tool
|
||
|
||
QQ_NUMBER = Request.uin
|
||
WORKDIR = f"./resource/fetch-all/{QQ_NUMBER}" # 通过QQ号进行区分配置文件,防止误加载其他用户信息
|
||
USER_QZONE_INFO = 'user_qzone_info.json'
|
||
QZONE_MOMENTS_ALL = 'qzone_moments_all.json'
|
||
|
||
|
||
# 获取所有可见的未删除的说说+高清图片(包含2014年之前)
|
||
def get_visible_moments_list():
|
||
|
||
# 1. 获取说说总条数
|
||
user_qzone_info = Tool.read_txt_file(WORKDIR, USER_QZONE_INFO)
|
||
if not user_qzone_info:
|
||
# 样本缓存未找到,开始请求获取样本
|
||
qq_userinfo_response = get_user_qzone_info(1)
|
||
Tool.write_txt_file(WORKDIR, USER_QZONE_INFO, qq_userinfo_response)
|
||
user_qzone_info = Tool.read_txt_file(WORKDIR, USER_QZONE_INFO)
|
||
|
||
if not Tool.is_valid_json(user_qzone_info):
|
||
print("获取QQ空间信息失败")
|
||
return None
|
||
json_dict = json.loads(user_qzone_info)
|
||
total_moments_count = json_dict['total']
|
||
print(f'你的未删除说说总条数{total_moments_count}')
|
||
|
||
# 当前未删除说说总数为0, 直接返回
|
||
if total_moments_count == 0:
|
||
return None
|
||
|
||
# 2. 获取所有说说数据
|
||
print("开始获取所有未删除说说")
|
||
qzone_moments_all = Tool.read_txt_file(WORKDIR, QZONE_MOMENTS_ALL)
|
||
if not qzone_moments_all:
|
||
# 缓存未找到,开始请求获取所有未删除说说
|
||
# qq_userinfo_response = get_user_qzone_info(totalMomentsCount)
|
||
# Tool.write_txt_file(WORKDIR, QZONE_MOMENTS_ALL, qq_userinfo_response)
|
||
# qzone_moments_all = Tool.read_txt_file(WORKDIR, QZONE_MOMENTS_ALL)
|
||
default_page_size = 30 # 默认一页30条
|
||
total_page_num = math.ceil(total_moments_count / default_page_size) # 总页数
|
||
all_page_data = [] # 用于存储所有页的数据
|
||
for current_page_num in range(0, total_page_num):
|
||
# 数据偏移量
|
||
pos = current_page_num * default_page_size
|
||
qq_userinfo_response = get_user_qzone_info(default_page_size, pos)
|
||
current_page_data = json.loads(qq_userinfo_response)["msglist"]
|
||
if current_page_data:
|
||
all_page_data.extend(current_page_data)
|
||
time.sleep(0.02)
|
||
qq_userinfo = json.dumps({"msglist": all_page_data}, ensure_ascii=False, indent=2)
|
||
Tool.write_txt_file(WORKDIR, QZONE_MOMENTS_ALL, qq_userinfo)
|
||
qzone_moments_all = Tool.read_txt_file(WORKDIR, QZONE_MOMENTS_ALL)
|
||
|
||
if not Tool.is_valid_json(qzone_moments_all):
|
||
print("获取QQ空间说说失败")
|
||
return None
|
||
json_dict = json.loads(qzone_moments_all)
|
||
qzone_moments_list = json_dict['msglist']
|
||
print(f'已获取到数据的说说总条数{len(qzone_moments_list)}')
|
||
|
||
# 3. 添加说说列表
|
||
texts = []
|
||
for item in tqdm(qzone_moments_list, desc="获取未删除说说", unit="条"):
|
||
content = item['content'] if item['content'] else ""
|
||
nickname = item['name']
|
||
create_time = Tool.format_timestamp(item['created_time'])
|
||
pictures = ""
|
||
# 如果有图片
|
||
if 'pic' in item:
|
||
for index, picture in enumerate(item['pic']):
|
||
pictures += picture['url1'] + ","
|
||
if 'video' in item:
|
||
for index, picture in enumerate(item['video']):
|
||
pictures += picture['url1'] + ","
|
||
|
||
# 去除最后一个逗号
|
||
pictures = pictures[:-1] if pictures != "" else pictures
|
||
comments = []
|
||
if 'commentlist' in item:
|
||
for index, commentToMe in enumerate(item['commentlist']):
|
||
comment_content = commentToMe['content']
|
||
comment_create_time = commentToMe['createTime2']
|
||
comment_nickname = commentToMe['name']
|
||
comment_uin = commentToMe['uin']
|
||
# 时间,内容,昵称,QQ号
|
||
comments.append([comment_create_time, comment_content, comment_nickname, comment_uin])
|
||
|
||
# 格式:时间、内容、图片链接、转发内容、评论内容
|
||
texts.append([create_time, f"{nickname} :{content}", pictures, comments])
|
||
return texts
|
||
|
||
|
||
# 获取用户QQ空间相关信息
|
||
def get_user_qzone_info(page_size, offset=0):
|
||
url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6'
|
||
cookies = Request.cookies
|
||
g_tk = LoginUtil.bkn(cookies.get('p_skey'))
|
||
qqNumber = re.sub(r'o0*', '', cookies.get('uin'))
|
||
skey = cookies.get('skey')
|
||
p_uin = cookies.get('p_uin')
|
||
pt4_token = cookies.get('pt4_token')
|
||
p_skey = cookies.get('p_skey')
|
||
headers = {
|
||
'accept': '*/*',
|
||
'accept-language': 'en-US,en;q=0.9',
|
||
'cookie': f'uin={p_uin};skey={skey};p_uin={p_uin};pt4_token={pt4_token};p_skey={p_skey}',
|
||
'priority': 'u=1, i',
|
||
'referer': f'https://user.qzone.qq.com/{qqNumber}/main',
|
||
'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
|
||
'sec-ch-ua-mobile': '?0',
|
||
'sec-ch-ua-platform': '"Linux"',
|
||
'sec-fetch-dest': 'empty',
|
||
'sec-fetch-mode': 'cors',
|
||
'sec-fetch-site': 'same-origin',
|
||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
|
||
}
|
||
|
||
params = {
|
||
'uin': f'{qqNumber}',
|
||
'ftype': '0',
|
||
'sort': '0',
|
||
'pos': f'{offset}',
|
||
'num': f'{page_size}',
|
||
'replynum': '100',
|
||
'g_tk': f'{g_tk}',
|
||
'callback': '_preloadCallback',
|
||
'code_version': '1',
|
||
'format': 'jsonp',
|
||
'need_private_comment': '1'
|
||
}
|
||
try:
|
||
response = requests.get(url, headers=headers, params=params)
|
||
except Exception as e:
|
||
print(e)
|
||
rawResponse = response.text
|
||
# 使用正则表达式去掉 _preloadCallback(),并提取其中的 JSON 数据
|
||
raw_txt = re.sub(r'^_preloadCallback\((.*)\);?$', r'\1', rawResponse, flags=re.S)
|
||
# 再转一次是为了去掉响应值本身自带的转义符http:\/\/
|
||
json_dict = json.loads(raw_txt)
|
||
if json_dict['code'] != 0:
|
||
print(f"错误 {json_dict['message']}")
|
||
sys.exit(1)
|
||
return json.dumps(json_dict, indent=2, ensure_ascii=False)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
get_visible_moments_list()
|