mirror of
https://github.com/LibraHp/GetQzonehistory.git
synced 2024-12-29 07:29:40 +00:00
343 lines
11 KiB
Python
343 lines
11 KiB
Python
from bs4 import BeautifulSoup
|
||
from tqdm import trange,tqdm
|
||
import pandas as pd
|
||
import signal
|
||
import configparser
|
||
import os
|
||
import re
|
||
import requests
|
||
import json
|
||
from PIL import Image
|
||
import qrcode
|
||
from pyzbar.pyzbar import decode
|
||
import time
|
||
|
||
config = configparser.ConfigParser()
|
||
config.read('./config.ini')
|
||
temp_path = config.get('File', 'temp')
|
||
user_path = config.get('File', 'user')
|
||
result_path = config.get('File', 'result')
|
||
|
||
|
||
def init_flooder():
|
||
# 初始化temp文件夹
|
||
if not os.path.exists(temp_path):
|
||
os.makedirs(temp_path)
|
||
print(f"Created directory: {temp_path}")
|
||
|
||
# 初始化user文件夹
|
||
if not os.path.exists(user_path):
|
||
os.makedirs(user_path)
|
||
print(f"Created directory: {user_path}")
|
||
|
||
# 初始化result文件夹
|
||
if not os.path.exists(result_path):
|
||
os.makedirs(result_path)
|
||
print(f"Created directory: {result_path}")
|
||
|
||
|
||
def read_files_in_folder():
|
||
# 获取文件夹下的所有文件
|
||
files = os.listdir(user_path)
|
||
# 如果文件夹为空
|
||
if not files:
|
||
return None
|
||
# 输出文件列表
|
||
print("已登录用户列表:")
|
||
for i, file in enumerate(files):
|
||
print(f"{i + 1}. {file}")
|
||
|
||
# 选择文件
|
||
while True:
|
||
try:
|
||
choice = int(input("请选择要登录的用户序号,重新登录输入0: "))
|
||
if 1 <= choice <= len(files):
|
||
break
|
||
elif choice == 0:
|
||
return None
|
||
else:
|
||
print("无效的选择,请重新输入。")
|
||
except ValueError:
|
||
print("无效的选择,请重新输入。")
|
||
|
||
# 读取选择的文件
|
||
selected_file = files[choice - 1]
|
||
file_path = os.path.join(user_path, selected_file)
|
||
with open(file_path, 'r') as file:
|
||
content = file.read()
|
||
|
||
return content
|
||
|
||
|
||
# 信号处理函数
|
||
def signal_handler(signal, frame):
|
||
# 在手动结束程序时保存已有的数据
|
||
if len(texts) > 0:
|
||
save_data()
|
||
exit(0)
|
||
|
||
|
||
def save_data():
|
||
df = pd.DataFrame(texts, columns=['时间', '内容'])
|
||
df.to_excel(result_path + uin + '.xlsx', index=False)
|
||
print('导出成功,请查看 ' + result_path + uin + '.xlsx')
|
||
|
||
|
||
def bkn(pSkey):
|
||
# 计算bkn
|
||
t, n, o = 5381, 0, len(pSkey)
|
||
|
||
while n < o:
|
||
t += (t << 5) + ord(pSkey[n])
|
||
n += 1
|
||
|
||
return t & 2147483647
|
||
|
||
|
||
def ptqrToken(qrsig):
|
||
# 计算ptqrtoken
|
||
n, i, e = len(qrsig), 0, 0
|
||
|
||
while n > i:
|
||
e += (e << 5) + ord(qrsig[i])
|
||
i += 1
|
||
|
||
return 2147483647 & e
|
||
|
||
|
||
def QR():
|
||
# 获取 qq空间 二维码
|
||
url = 'https://ssl.ptlogin2.qq.com/ptqrshow?appid=549000912&e=2&l=M&s=3&d=72&v=4&t=0.8692955245720428&daid=5&pt_3rd_aid=0'
|
||
|
||
try:
|
||
r = requests.get(url)
|
||
qrsig = requests.utils.dict_from_cookiejar(r.cookies).get('qrsig')
|
||
|
||
with open(temp_path + 'QR.png', 'wb') as f:
|
||
f.write(r.content)
|
||
|
||
im = Image.open(temp_path + 'QR.png')
|
||
im = im.resize((350, 350))
|
||
print(time.strftime('%H:%M:%S'), '登录二维码获取成功')
|
||
|
||
# 解码二维码
|
||
decoded_objects = decode(im)
|
||
for obj in decoded_objects:
|
||
qr = qrcode.QRCode()
|
||
qr.add_data(obj.data.decode('utf-8'))
|
||
# invert=True白底黑块,有些app不识别黑底白块.
|
||
qr.print_ascii(invert=True)
|
||
|
||
return qrsig
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
|
||
|
||
def save_user(cookies):
|
||
with open(user_path + cookies.get('uin'), 'w') as f:
|
||
f.write(str(cookies))
|
||
|
||
|
||
def cookie():
|
||
init_flooder()
|
||
select_user = read_files_in_folder()
|
||
if select_user:
|
||
return eval(select_user)
|
||
# 获取 QQ空间 cookie
|
||
qrsig = QR()
|
||
ptqrtoken = ptqrToken(qrsig)
|
||
|
||
while True:
|
||
url = 'https://ssl.ptlogin2.qq.com/ptqrlogin?u1=https%3A%2F%2Fqzs.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara' \
|
||
'%3Dizone&ptqrtoken=' + str(ptqrtoken) + '&ptredirect=0&h=1&t=1&g=1&from_ui=1&ptlang=2052&action=0-0-' \
|
||
+ str(time.time()) + '&js_ver=20032614&js_type=1&login_sig=&pt_uistyle=40&aid=549000912&daid=5&'
|
||
cookies = {'qrsig': qrsig}
|
||
try:
|
||
r = requests.get(url, cookies=cookies)
|
||
if '二维码未失效' in r.text:
|
||
# print(time.strftime('%H:%M:%S'), '二维码未失效')
|
||
pass
|
||
elif '二维码认证中' in r.text:
|
||
print(time.strftime('%H:%M:%S'), '二维码认证中')
|
||
elif '二维码已失效' in r.text:
|
||
print(time.strftime('%H:%M:%S'), '二维码已失效')
|
||
else:
|
||
print(time.strftime('%H:%M:%S'), '登录成功')
|
||
cookies = requests.utils.dict_from_cookiejar(r.cookies)
|
||
uin = requests.utils.dict_from_cookiejar(r.cookies).get('uin')
|
||
regex = re.compile(r'ptsigx=(.*?)&')
|
||
sigx = re.findall(regex, r.text)[0]
|
||
url = 'https://ptlogin2.qzone.qq.com/check_sig?pttype=1&uin=' + uin + '&service=ptqrlogin&nodirect=0' \
|
||
'&ptsigx=' + sigx + \
|
||
'&s_url=https%3A%2F%2Fqzs.qq.com%2Fqzone%2Fv5%2Floginsucc.html%3Fpara%3Dizone&f_url=&ptlang' \
|
||
'=2052&ptredirect=100&aid=549000912&daid=5&j_later=0&low_login_hour=0®master=0&pt_login_type' \
|
||
'=3&pt_aid=0&pt_aaid=16&pt_light=0&pt_3rd_aid=0'
|
||
try:
|
||
r = requests.get(url, cookies=cookies, allow_redirects=False)
|
||
target_cookies = requests.utils.dict_from_cookiejar(r.cookies)
|
||
p_skey = requests.utils.dict_from_cookiejar(r.cookies).get('p_skey')
|
||
save_user(target_cookies)
|
||
break
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
|
||
time.sleep(3)
|
||
|
||
return target_cookies
|
||
|
||
|
||
# 登陆后获取到的cookies
|
||
cookies = cookie()
|
||
# 获取g_tk
|
||
g_tk = bkn(cookies.get('p_skey'))
|
||
# 获取uin
|
||
uin = re.sub(r'o0*', '', cookies.get('uin'))
|
||
# 全局header
|
||
headers = {
|
||
'authority': 'user.qzone.qq.com',
|
||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,'
|
||
'application/signed-exchange;v=b3;q=0.7',
|
||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||
'cache-control': 'no-cache',
|
||
'pragma': 'no-cache',
|
||
'sec-ch-ua': '"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"',
|
||
'sec-ch-ua-mobile': '?0',
|
||
'sec-ch-ua-platform': '"Windows"',
|
||
'sec-fetch-dest': 'document',
|
||
'sec-fetch-mode': 'navigate',
|
||
'sec-fetch-site': 'none',
|
||
'sec-fetch-user': '?1',
|
||
'upgrade-insecure-requests': '1',
|
||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 '
|
||
'Safari/537.36 Edg/121.0.0.0',
|
||
}
|
||
|
||
|
||
# 获取历史消息列表
|
||
def get_message(start, count):
|
||
params = {
|
||
'uin': uin,
|
||
'begin_time': '0',
|
||
'end_time': '0',
|
||
'getappnotification': '1',
|
||
'getnotifi': '1',
|
||
'has_get_key': '0',
|
||
'offset': start,
|
||
'set': '0',
|
||
'count': count,
|
||
'useutf8': '1',
|
||
'outputhtmlfeed': '1',
|
||
'scope': '1',
|
||
'format': 'jsonp',
|
||
'g_tk': [
|
||
g_tk,
|
||
g_tk,
|
||
],
|
||
}
|
||
response = requests.get('https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds2_html_pav_all',
|
||
params=params, cookies=cookies, headers=headers)
|
||
return response
|
||
|
||
|
||
def get_login_user_info():
|
||
response = requests.get('https://r.qzone.qq.com/fcg-bin/cgi_get_portrait.fcg?g_tk=' + str(g_tk) + '&uins=' + uin,
|
||
headers=headers, cookies=cookies)
|
||
info = response.content.decode('GBK')
|
||
info = info.strip().lstrip('portraitCallBack(').rstrip(');')
|
||
info = json.loads(info)
|
||
return info
|
||
|
||
|
||
def get_message_count():
|
||
# 初始的总量范围
|
||
lower_bound = 0
|
||
upper_bound = 100000 # 假设最大总量为1000000
|
||
total = upper_bound // 2 # 初始的总量为上下界的中间值
|
||
with tqdm(desc="正在获取消息列表数量...", total=17, unit='页') as pbar:
|
||
while lower_bound <= upper_bound:
|
||
response = get_message(total, 100)
|
||
if "li" in response.text:
|
||
# 请求成功,总量应该在当前总量的右侧
|
||
lower_bound = total + 1
|
||
else:
|
||
# 请求失败,总量应该在当前总量的左侧
|
||
upper_bound = total - 1
|
||
total = (lower_bound + upper_bound) // 2 # 更新总量为新的中间值
|
||
pbar.update(1)
|
||
return total
|
||
|
||
|
||
# 提取两个字符串之间的内容
|
||
def extract_string_between(source_string, start_string, end_string):
|
||
start_index = source_string.find(start_string) + len(start_string)
|
||
end_index = source_string.find(end_string)
|
||
extracted_string = source_string[start_index:-37]
|
||
return extracted_string
|
||
|
||
|
||
# 去除多余的空格
|
||
def replace_multiple_spaces(string):
|
||
pattern = r'\s+'
|
||
replaced_string = re.sub(pattern, ' ', string)
|
||
return replaced_string
|
||
|
||
|
||
# 替换十六进制编码
|
||
def process_old_html(message):
|
||
def replace_hex(match):
|
||
hex_value = match.group(0)
|
||
byte_value = bytes(hex_value, 'utf-8').decode('unicode_escape')
|
||
return byte_value
|
||
|
||
new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)
|
||
start_string = "html:'"
|
||
end_string = "',opuin"
|
||
new_text = extract_string_between(new_text, start_string, end_string)
|
||
new_text = replace_multiple_spaces(new_text).replace('\\', '')
|
||
return new_text
|
||
|
||
|
||
if __name__ == '__main__':
|
||
try:
|
||
user_info = get_login_user_info()
|
||
user_nickname = user_info[uin][6]
|
||
print(f"用户<{uin}>,<{user_nickname}>登录成功")
|
||
except Exception as e:
|
||
print(f"登录失败:请重新登录,错误信息:{str(e)}")
|
||
exit(0)
|
||
texts = []
|
||
count = get_message_count()
|
||
try:
|
||
# 注册信号处理函数
|
||
signal.signal(signal.SIGINT, signal_handler)
|
||
signal.signal(signal.SIGTERM, signal_handler)
|
||
|
||
for i in trange(int(count / 100) + 1, desc='Progress', unit='100条'):
|
||
message = get_message(i * 100, 100).content.decode('utf-8')
|
||
html = process_old_html(message)
|
||
if "li" not in html:
|
||
break
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
for element in soup.find_all('li', class_='f-single f-s-s'):
|
||
time = None
|
||
text = None
|
||
time_element = element.find('div', class_='info-detail')
|
||
text_element = element.find('p', class_='txt-box-title ellipsis-one')
|
||
if time_element is not None and text_element is not None:
|
||
time = time_element.get_text().replace('\xa0', ' ')
|
||
text = text_element.get_text().replace('\xa0', ' ')
|
||
if text not in [sublist[1] for sublist in texts]:
|
||
texts.append([time, text])
|
||
|
||
if len(texts) > 0:
|
||
save_data()
|
||
except Exception as e:
|
||
print(f"发生异常: {str(e)}")
|
||
if len(texts) > 0:
|
||
save_data()
|