获取所有可见的未删除的说说+高清图片（包含2014年之前）

2025-02-22 05:09:07 +00:00 · 2024-09-12 17:44:29 +08:00 · 2024-09-12 17:44:29 +08:00 · e68fd87d33
commit e68fd87d33
parent a11e0cc7e2
1 changed files with 199 additions and 0 deletions
--- a/fetch_all_message.py
+++ b/fetch_all_message.py
@ -0,0 +1,199 @@
+import json
+import os
+import re
+import sys
+import time
+
+import requests
+
+from util import LoginUtil
+
+WORKDIR = "./resource/fetch-all/"
+MESSAGE_SAMPLE = 'msg-one.json'
+MESSAGE_ALL = 'msg-all.json'
+
+
+# 获取所有可见的未删除的说说+高清图片（包含2014年之前）
+def get_visible_msg_list():
+    # 1. 获取说说总条数
+    try:
+        msgSample = read_txt_file(MESSAGE_SAMPLE)
+    except FileNotFoundError as e:
+        print("样本缓存未找到，开始请求获取样本")
+        qqResponse = get_msg_list(1)
+        print("创建缓存文件并写入")
+        write_txt_file(MESSAGE_SAMPLE, qqResponse)
+        msgSample = read_txt_file(MESSAGE_SAMPLE)
+
+    try:
+        json_dict = json.loads(msgSample)
+        totalCount = json_dict['total']
+        print(f'你的未删除说说总条数{totalCount}')
+    except json.JSONDecodeError as e:
+        print(f"JSON解析错误: {e}")
+        sys.exit(1)
+
+    # 2. 获取所有说说数据
+    print('开始不分页获取所有未删除说说')
+    try:
+        msgAll = read_txt_file(MESSAGE_ALL)
+    except FileNotFoundError as e:
+        print("缓存未找到，开始请求获取所有未删除说说")
+        qqResponse = get_msg_list(totalCount)
+        write_txt_file(MESSAGE_ALL, qqResponse)
+        msgAll = read_txt_file(MESSAGE_ALL)
+
+    try:
+        json_dict = json.loads(msgAll)
+        msgList = json_dict['msglist']
+        print(f'已获取到数据的说说总条数{len(msgList)}')
+    except json.JSONDecodeError as e:
+        print(f"JSON解析错误: {e}")
+        sys.exit(1)
+
+    # 3. 解析原始JSON写成Markdown
+    markdown_content = ''
+    for item in msgList:
+
+        myWord = item['content'] if item['content'] else ""
+        myCurrentQQName = item['name']
+        myCreateTime = format_timestamp(item['created_time'])
+        myCurrentSourceName = '\n来自 ' + item['source_name'] if item['source_name'] else ""
+
+        # 如果有图片
+        markdown_pictures = ""
+        if 'pic' in item:
+            for index, myPic in enumerate(item['pic']):
+                myPicUrl = myPic['url1']
+                myPicFileName = f"{item['tid']}{index}.jpeg"
+                get_image(myPicUrl, myPicFileName)
+                markdown_pictures += f"![{myPicFileName}](./{myPicFileName})"
+
+        markdown_content += f"## {myCurrentQQName} {myCreateTime}  \n{myWord} {markdown_pictures} \n{myCurrentSourceName}"
+
+        # 有转发的内容
+        if 'rt_tid' in item:
+            rt_tid = item['rt_tid']
+            rtContent = item['rt_con']['content']
+            rtQQName = item['rt_uinname']
+            rt_uin = item['rt_uin']
+            markdown_content += f"\n> {rtQQName} - {rt_uin} : {rtContent}"
+
+        # 有人评论
+        if 'commentlist' in item:
+            markdown_content += f"\n💬 **{len(item['commentlist'])}条评论回复**\n"
+            for index, commentToMe in enumerate(item['commentlist']):
+                commentContent = commentToMe['content']
+                commentCreateTime = commentToMe['createTime2']
+                commentQQName = commentToMe['name']
+                commentQQNumber = commentToMe['uin']
+                markdown_content += f"- {commentQQName}({commentQQNumber}) : {commentContent} - {commentCreateTime}\n"
+
+        # append write
+        markdown_content += "\n\n"
+
+    # write markdown to file
+    write_txt_file("所有可见说说.md", markdown_content)
+
+
+def get_msg_list(num):
+    url = 'https://user.qzone.qq.com/proxy/domain/taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6'
+    cookies = LoginUtil.cookie()
+    g_tk = LoginUtil.bkn(cookies.get('p_skey'))
+    qqNumber = re.sub(r'o0*', '', cookies.get('uin'))
+    skey = cookies.get('skey')
+    p_uin = cookies.get('p_uin')
+    pt4_token = cookies.get('pt4_token')
+    p_skey = cookies.get('p_skey')
+    headers = {
+        'accept': '*/*',
+        'accept-language': 'en-US,en;q=0.9',
+        'cookie': f'uin={p_uin};skey={skey};p_uin={p_uin};pt4_token={pt4_token};p_skey={p_skey}',
+        'priority': 'u=1, i',
+        'referer': f'https://user.qzone.qq.com/{qqNumber}/main',
+        'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Linux"',
+        'sec-fetch-dest': 'empty',
+        'sec-fetch-mode': 'cors',
+        'sec-fetch-site': 'same-origin',
+        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
+    }
+
+    params = {
+        'uin': f'{qqNumber}',
+        'ftype': '0',
+        'sort': '0',
+        'pos': '0',
+        'num': f'{num}',
+        'replynum': '100',
+        'g_tk': f'{g_tk}',
+        'callback': '_preloadCallback',
+        'code_version': '1',
+        'format': 'jsonp',
+        'need_private_comment': '1'
+    }
+    try:
+        response = requests.get(url, headers=headers, params=params)
+    except Exception as e:
+        print(e)
+    rawResponse = response.text
+    # 使用正则表达式去掉 _preloadCallback()，并提取其中的 JSON 数据
+    raw_txt = re.sub(r'^_preloadCallback\((.*)\);?$', r'\1', rawResponse, flags=re.S)
+    # 再转一次是为了去掉响应值本身自带的转义符http:\/\/ 
+    json_dict = json.loads(raw_txt)
+    if json_dict['code'] != 0:
+        print(f"错误 {json_dict['message']}")
+        sys.exit(1)
+    return json.dumps(json_dict, indent=2, ensure_ascii=False)
+
+
+def write_txt_file(file_name, data):
+    if not os.path.exists(WORKDIR):
+        os.makedirs(WORKDIR)
+    base_path_file_name = os.path.join(WORKDIR, file_name)
+    with open(base_path_file_name, 'w', encoding='utf-8') as file:
+        file.write(data)
+
+
+def read_txt_file(file_name):
+    base_path_file_name = os.path.join(WORKDIR, file_name)
+    if os.path.exists(base_path_file_name):
+        print("读取缓存文件")
+        with open(base_path_file_name, 'r', encoding='utf-8') as file:
+            return file.read()
+    else:
+        raise FileNotFoundError(f"文件 {base_path_file_name} 不存在")
+
+
+def format_timestamp(timestamp):
+    time_struct = time.localtime(timestamp)
+    formatted_time = time.strftime("%Y年%m月%d日 %H:%M:%S", time_struct)
+    return formatted_time
+
+
+def get_image(url, img_name):
+    headers = {
+        'sec-ch-ua': '"Not;A=Brand";v="24", "Chromium";v="128"',
+        'Referer': 'https://user.qzone.qq.com/',
+        'sec-ch-ua-mobile': '?0',
+        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
+        'sec-ch-ua-platform': '"Linux"',
+    }
+
+    # 发起GET请求
+    response = requests.get(url, headers=headers)
+
+    # 检查请求是否成功
+    if response.status_code == 200:
+        # 保存图片到本地
+        file_path = os.path.join(WORKDIR, img_name)
+        with open(file_path, 'wb') as file:
+            file.write(response.content)
+        print('图片下载成功')
+    else:
+        print(f'请求失败，状态码：{response.status_code}')
+
+
+if __name__ == '__main__':
+    get_visible_msg_list()