mirror of
https://github.com/LibraHp/GetQzonehistory.git
synced 2024-12-27 06:29:55 +00:00
简单实现查找历史说说并导出为excel
This commit is contained in:
parent
672b82746e
commit
9f5a6cc5d1
17
main.py
17
main.py
@ -2,12 +2,19 @@ from bs4 import BeautifulSoup
|
||||
from tqdm import trange
|
||||
import util.RequestUtil as Request
|
||||
import util.ToolsUtil as Tools
|
||||
import util.ConfigUtil as Config
|
||||
import pandas as pd
|
||||
|
||||
if __name__ == '__main__':
|
||||
user_info = Request.get_login_user_info()
|
||||
user_nickname = user_info[Request.uin][6]
|
||||
print(f"用户<{Request.uin}>,<{user_nickname}>登录成功")
|
||||
texts = []
|
||||
for i in trange(2, desc='Progress', unit='iteration'):
|
||||
message = Request.get_message(i*100, 100).content.decode('utf-8')
|
||||
for i in trange(1000, desc='Progress', unit='iteration'):
|
||||
message = Request.get_message(i * 100, 100).content.decode('utf-8')
|
||||
html = Tools.process_old_html(message)
|
||||
if "li" not in html:
|
||||
break
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
for element in soup.find_all('p', class_='txt-box-title ellipsis-one'):
|
||||
text = element.get_text().replace('\xa0', ' ')
|
||||
@ -16,6 +23,6 @@ if __name__ == '__main__':
|
||||
# 创建一个DataFrame对象
|
||||
df = pd.DataFrame(texts, columns=['内容'])
|
||||
|
||||
# 将DataFrame对象导出为CSV文件,使用UTF-8编码
|
||||
df.to_excel('output.xlsx', index=False)
|
||||
print(texts)
|
||||
# 导出为Excel
|
||||
df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False)
|
||||
print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx')
|
||||
|
@ -1,3 +1,4 @@
|
||||
[File]
|
||||
temp = ./resource/temp/
|
||||
user = ./resource/user/
|
||||
user = ./resource/user/
|
||||
result = ./resource/result/
|
@ -6,6 +6,7 @@ config.read('./resource/config/config.ini')
|
||||
|
||||
temp_path = config.get('File', 'temp')
|
||||
user_path = config.get('File', 'user')
|
||||
result_path = config.get('File', 'result')
|
||||
|
||||
|
||||
def save_user(cookies):
|
||||
|
@ -2,9 +2,9 @@ import requests
|
||||
from PIL import Image
|
||||
import time
|
||||
import re
|
||||
import os
|
||||
import util.ConfigUtil as Config
|
||||
|
||||
|
||||
def bkn(pSkey):
|
||||
# 计算bkn
|
||||
t, n, o = 5381, 0, len(pSkey)
|
||||
|
@ -1,6 +1,6 @@
|
||||
import util.LoginUtil as Login
|
||||
import requests
|
||||
|
||||
import json
|
||||
# 登陆后获取到的cookies
|
||||
cookies = Login.cookie()
|
||||
# 获取g_tk
|
||||
@ -52,3 +52,12 @@ def get_message(start, count):
|
||||
response = requests.get('https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds2_html_pav_all',
|
||||
params=params, cookies=cookies, headers=headers)
|
||||
return response
|
||||
|
||||
|
||||
def get_login_user_info():
|
||||
response = requests.get('https://r.qzone.qq.com/fcg-bin/cgi_get_portrait.fcg?g_tk=' + str(g_tk) + '&uins=' + uin,
|
||||
headers=headers, cookies=cookies)
|
||||
info = response.content.decode('GBK')
|
||||
info = info.strip().lstrip('portraitCallBack(').rstrip(');')
|
||||
info = json.loads(info)
|
||||
return info
|
||||
|
@ -25,7 +25,7 @@ def process_old_html(message):
|
||||
|
||||
new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)
|
||||
start_string = "html:'"
|
||||
end_string = "\'\,opuin"
|
||||
end_string = "',opuin"
|
||||
new_text = extract_string_between(new_text, start_string, end_string)
|
||||
new_text = replace_multiple_spaces(new_text).replace('\\', '')
|
||||
return new_text
|
||||
|
Loading…
Reference in New Issue
Block a user