2024-09-12 09:44:29 +00:00
|
|
|
|
import json
|
2024-09-15 02:20:53 +00:00
|
|
|
|
import math
|
2024-09-12 09:44:29 +00:00
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
from util import LoginUtil
|
|
|
|
|
|
|
|
|
|
WORKDIR = "./resource/fetch-all/"
|
|
|
|
|
MESSAGE_SAMPLE = 'msg-one.json'
|
|
|
|
|
MESSAGE_ALL = 'msg-all.json'
|
2024-09-15 02:55:35 +00:00
|
|
|
|
cookies = None
|
2024-09-12 09:44:29 +00:00
|
|
|
|
# 获取所有可见的未删除的说说+高清图片(包含2014年之前)
|
|
|
|
|
def get_visible_msg_list():
|
2024-09-15 02:55:35 +00:00
|
|
|
|
global cookies
|
|
|
|
|
if cookies is None:
|
|
|
|
|
cookies = LoginUtil.cookie()
|
2024-09-12 09:44:29 +00:00
|
|
|
|
# 1. 获取说说总条数
|
|
|
|
|
try:
|
|
|
|
|
msgSample = read_txt_file(MESSAGE_SAMPLE)
|
|
|
|
|
except FileNotFoundError as e:
|
2024-09-15 02:20:53 +00:00
|
|
|
|
# 样本缓存未找到,开始请求获取样本
|
2024-09-12 09:44:29 +00:00
|
|
|
|
qqResponse = get_msg_list(1)
|
2024-09-15 02:20:53 +00:00
|
|
|
|
# 创建缓存文件并写入
|
2024-09-12 09:44:29 +00:00
|
|
|
|
write_txt_file(MESSAGE_SAMPLE, qqResponse)
|
|
|
|
|
msgSample = read_txt_file(MESSAGE_SAMPLE)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
json_dict = json.loads(msgSample)
|
|
|
|
|
totalCount = json_dict['total']
|
|
|
|
|
print(f'你的未删除说说总条数{totalCount}')
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
print(f"JSON解析错误: {e}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# 2. 获取所有说说数据
|
|
|
|
|
try:
|
|
|
|
|
msgAll = read_txt_file(MESSAGE_ALL)
|
|
|
|
|
except FileNotFoundError as e:
|
2024-09-15 02:20:53 +00:00
|
|
|
|
# 缓存未找到,准备分页获取所有未删除说说"
|
|
|
|
|
# 一页20条
|
|
|
|
|
defaultPageSize = 30
|
|
|
|
|
# 总页数
|
|
|
|
|
totalPageNum = math.ceil(totalCount / defaultPageSize)
|
|
|
|
|
# 用于存储所有页的数据
|
|
|
|
|
allPageData = []
|
|
|
|
|
print(f"一共{totalPageNum}页")
|
|
|
|
|
for currentPageNum in range(0, totalPageNum):
|
|
|
|
|
# 数据偏移量
|
|
|
|
|
pos = currentPageNum * defaultPageSize
|
|
|
|
|
print(
|
2024-09-15 02:55:35 +00:00
|
|
|
|
f"一页{defaultPageSize}条, 获取第{currentPageNum + 1}页")
|
2024-09-15 02:20:53 +00:00
|
|
|
|
qqResponse = get_msg_list(defaultPageSize, pos)
|
|
|
|
|
currentPageData = json.loads(qqResponse)["msglist"]
|
|
|
|
|
allPageData.extend(currentPageData)
|
|
|
|
|
msgAll = json.dumps({"msglist": allPageData}, ensure_ascii=False, indent=2)
|
|
|
|
|
write_txt_file(MESSAGE_ALL, msgAll)
|
2024-09-12 09:44:29 +00:00
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
json_dict = json.loads(msgAll)
|
|
|
|
|
msgList = json_dict['msglist']
|
|
|
|
|
print(f'已获取到数据的说说总条数{len(msgList)}')
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
print(f"JSON解析错误: {e}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
# 3. 解析原始JSON写成Markdown
|
|
|
|
|
markdown_content = ''
|
|
|
|
|
for item in msgList:
|
|
|
|
|
|
|
|
|
|
myWord = item['content'] if item['content'] else ""
|
|
|
|
|
myCurrentQQName = item['name']
|
|
|
|
|
myCreateTime = format_timestamp(item['created_time'])
|
|
|
|
|
myCurrentSourceName = '\n来自 ' + item['source_name'] if item['source_name'] else ""
|
|
|
|
|
|
|
|
|
|
# 如果有图片
|
|
|
|
|
markdown_pictures = ""
|
|
|
|
|
if 'pic' in item:
|
|
|
|
|
for index, myPic in enumerate(item['pic']):
|
|
|
|
|
myPicUrl = myPic['url1']
|
|
|
|
|
myPicFileName = f"{item['tid']}{index}.jpeg"
|
|
|
|
|
get_image(myPicUrl, myPicFileName)
|
|
|
|
|
markdown_pictures += f"![{myPicFileName}](./{myPicFileName})"
|
|
|
|
|
|
|
|
|
|
markdown_content += f"## {myCurrentQQName} {myCreateTime} \n{myWord} {markdown_pictures} \n{myCurrentSourceName}"
|
|
|
|
|
|
|
|
|
|
# 有转发的内容
|
|
|
|
|
if 'rt_tid' in item:
|
|
|
|
|
rt_tid = item['rt_tid']
|
|
|
|
|
rtContent = item['rt_con']['content']
|
|
|
|
|
rtQQName = item['rt_uinname']
|
|
|
|
|
rt_uin = item['rt_uin']
|
|
|
|
|
markdown_content += f"\n> {rtQQName} - {rt_uin} : {rtContent}"
|
|
|
|
|
|
|
|
|
|
# 有人评论
|
|
|
|
|
if 'commentlist' in item:
|
|
|
|
|
markdown_content += f"\n💬 **{len(item['commentlist'])}条评论回复**\n"
|
|
|
|
|
for index, commentToMe in enumerate(item['commentlist']):
|
|
|
|
|
commentContent = commentToMe['content']
|
|
|
|
|
commentCreateTime = commentToMe['createTime2']
|
|
|
|
|
commentQQName = commentToMe['name']
|
|
|
|
|
commentQQNumber = commentToMe['uin']
|
|
|
|
|
markdown_content += f"- {commentQQName}({commentQQNumber}) : {commentContent} - {commentCreateTime}\n"
|
|
|
|
|
|
|
|
|
|
# append write
|
|
|
|
|
markdown_content += "\n\n"
|
|
|
|
|
|
|
|
|
|
# write markdown to file
|
|
|
|
|
write_txt_file("所有可见说说.md", markdown_content)
|
|
|
|
|
|
|
|
|
|
|
2024-09-15 02:20:53 +00:00
|
|
|
|
def get_msg_list(pageSize, offset=0):
|
2024-09-12 09:44:29 +00:00
|
|
|
|
url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6'
|
|
|
|
|
g_tk = LoginUtil.bkn(cookies.get('p_skey'))
|
|
|
|
|
qqNumber = re.sub(r'o0*', '', cookies.get('uin'))
|
|
|
|
|
skey = cookies.get('skey')
|
|
|
|
|
p_uin = cookies.get('p_uin')
|
|
|
|
|
pt4_token = cookies.get('pt4_token')
|
|
|
|
|
p_skey = cookies.get('p_skey')
|
|
|
|
|
headers = {
|
|
|
|
|
'accept': '*/*',
|
|
|
|
|
'accept-language': 'en-US,en;q=0.9',
|
|
|
|
|
'cookie': f'uin={p_uin};skey={skey};p_uin={p_uin};pt4_token={pt4_token};p_skey={p_skey}',
|
|
|
|
|
'priority': 'u=1, i',
|
|
|
|
|
'referer': f'https://user.qzone.qq.com/{qqNumber}/main',
|
|
|
|
|
'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
|
|
|
|
|
'sec-ch-ua-mobile': '?0',
|
|
|
|
|
'sec-ch-ua-platform': '"Linux"',
|
|
|
|
|
'sec-fetch-dest': 'empty',
|
|
|
|
|
'sec-fetch-mode': 'cors',
|
|
|
|
|
'sec-fetch-site': 'same-origin',
|
|
|
|
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
params = {
|
|
|
|
|
'uin': f'{qqNumber}',
|
|
|
|
|
'ftype': '0',
|
|
|
|
|
'sort': '0',
|
2024-09-15 02:20:53 +00:00
|
|
|
|
'pos': f'{offset}',
|
|
|
|
|
'num': f'{pageSize}',
|
2024-09-12 09:44:29 +00:00
|
|
|
|
'replynum': '100',
|
|
|
|
|
'g_tk': f'{g_tk}',
|
|
|
|
|
'callback': '_preloadCallback',
|
|
|
|
|
'code_version': '1',
|
|
|
|
|
'format': 'jsonp',
|
|
|
|
|
'need_private_comment': '1'
|
|
|
|
|
}
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url, headers=headers, params=params)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(e)
|
|
|
|
|
rawResponse = response.text
|
|
|
|
|
# 使用正则表达式去掉 _preloadCallback(),并提取其中的 JSON 数据
|
|
|
|
|
raw_txt = re.sub(r'^_preloadCallback\((.*)\);?$', r'\1', rawResponse, flags=re.S)
|
|
|
|
|
# 再转一次是为了去掉响应值本身自带的转义符http:\/\/
|
|
|
|
|
json_dict = json.loads(raw_txt)
|
|
|
|
|
if json_dict['code'] != 0:
|
|
|
|
|
print(f"错误 {json_dict['message']}")
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
return json.dumps(json_dict, indent=2, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def write_txt_file(file_name, data):
|
|
|
|
|
if not os.path.exists(WORKDIR):
|
|
|
|
|
os.makedirs(WORKDIR)
|
|
|
|
|
base_path_file_name = os.path.join(WORKDIR, file_name)
|
|
|
|
|
with open(base_path_file_name, 'w', encoding='utf-8') as file:
|
|
|
|
|
file.write(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_txt_file(file_name):
|
|
|
|
|
base_path_file_name = os.path.join(WORKDIR, file_name)
|
|
|
|
|
if os.path.exists(base_path_file_name):
|
|
|
|
|
with open(base_path_file_name, 'r', encoding='utf-8') as file:
|
|
|
|
|
return file.read()
|
|
|
|
|
else:
|
|
|
|
|
raise FileNotFoundError(f"文件 {base_path_file_name} 不存在")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_timestamp(timestamp):
|
|
|
|
|
time_struct = time.localtime(timestamp)
|
|
|
|
|
formatted_time = time.strftime("%Y年%m月%d日 %H:%M:%S", time_struct)
|
|
|
|
|
return formatted_time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_image(url, img_name):
|
|
|
|
|
headers = {
|
|
|
|
|
'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
|
|
|
|
|
'Referer': 'https://user.qzone.qq.com/',
|
|
|
|
|
'sec-ch-ua-mobile': '?0',
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
|
|
|
|
'sec-ch-ua-platform': '"Linux"',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 发起GET请求
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
|
|
|
|
|
# 检查请求是否成功
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
# 保存图片到本地
|
|
|
|
|
file_path = os.path.join(WORKDIR, img_name)
|
|
|
|
|
with open(file_path, 'wb') as file:
|
|
|
|
|
file.write(response.content)
|
|
|
|
|
print('图片下载成功')
|
|
|
|
|
else:
|
|
|
|
|
print(f'请求失败,状态码:{response.status_code}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
get_visible_msg_list()
|