mirror of
https://github.com/LibraHp/GetQzonehistory.git
synced 2024-12-27 22:49:09 +00:00
简单实现查找历史说说并导出为excel
This commit is contained in:
parent
672b82746e
commit
9f5a6cc5d1
17
main.py
17
main.py
@ -2,12 +2,19 @@ from bs4 import BeautifulSoup
|
|||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
import util.RequestUtil as Request
|
import util.RequestUtil as Request
|
||||||
import util.ToolsUtil as Tools
|
import util.ToolsUtil as Tools
|
||||||
|
import util.ConfigUtil as Config
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
user_info = Request.get_login_user_info()
|
||||||
|
user_nickname = user_info[Request.uin][6]
|
||||||
|
print(f"用户<{Request.uin}>,<{user_nickname}>登录成功")
|
||||||
texts = []
|
texts = []
|
||||||
for i in trange(2, desc='Progress', unit='iteration'):
|
for i in trange(1000, desc='Progress', unit='iteration'):
|
||||||
message = Request.get_message(i*100, 100).content.decode('utf-8')
|
message = Request.get_message(i * 100, 100).content.decode('utf-8')
|
||||||
html = Tools.process_old_html(message)
|
html = Tools.process_old_html(message)
|
||||||
|
if "li" not in html:
|
||||||
|
break
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
for element in soup.find_all('p', class_='txt-box-title ellipsis-one'):
|
for element in soup.find_all('p', class_='txt-box-title ellipsis-one'):
|
||||||
text = element.get_text().replace('\xa0', ' ')
|
text = element.get_text().replace('\xa0', ' ')
|
||||||
@ -16,6 +23,6 @@ if __name__ == '__main__':
|
|||||||
# 创建一个DataFrame对象
|
# 创建一个DataFrame对象
|
||||||
df = pd.DataFrame(texts, columns=['内容'])
|
df = pd.DataFrame(texts, columns=['内容'])
|
||||||
|
|
||||||
# 将DataFrame对象导出为CSV文件,使用UTF-8编码
|
# 导出为Excel
|
||||||
df.to_excel('output.xlsx', index=False)
|
df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False)
|
||||||
print(texts)
|
print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx')
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
[File]
|
[File]
|
||||||
temp = ./resource/temp/
|
temp = ./resource/temp/
|
||||||
user = ./resource/user/
|
user = ./resource/user/
|
||||||
|
result = ./resource/result/
|
@ -6,6 +6,7 @@ config.read('./resource/config/config.ini')
|
|||||||
|
|
||||||
temp_path = config.get('File', 'temp')
|
temp_path = config.get('File', 'temp')
|
||||||
user_path = config.get('File', 'user')
|
user_path = config.get('File', 'user')
|
||||||
|
result_path = config.get('File', 'result')
|
||||||
|
|
||||||
|
|
||||||
def save_user(cookies):
|
def save_user(cookies):
|
||||||
|
@ -2,9 +2,9 @@ import requests
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
import util.ConfigUtil as Config
|
import util.ConfigUtil as Config
|
||||||
|
|
||||||
|
|
||||||
def bkn(pSkey):
|
def bkn(pSkey):
|
||||||
# 计算bkn
|
# 计算bkn
|
||||||
t, n, o = 5381, 0, len(pSkey)
|
t, n, o = 5381, 0, len(pSkey)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import util.LoginUtil as Login
|
import util.LoginUtil as Login
|
||||||
import requests
|
import requests
|
||||||
|
import json
|
||||||
# 登陆后获取到的cookies
|
# 登陆后获取到的cookies
|
||||||
cookies = Login.cookie()
|
cookies = Login.cookie()
|
||||||
# 获取g_tk
|
# 获取g_tk
|
||||||
@ -52,3 +52,12 @@ def get_message(start, count):
|
|||||||
response = requests.get('https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds2_html_pav_all',
|
response = requests.get('https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds2_html_pav_all',
|
||||||
params=params, cookies=cookies, headers=headers)
|
params=params, cookies=cookies, headers=headers)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def get_login_user_info():
|
||||||
|
response = requests.get('https://r.qzone.qq.com/fcg-bin/cgi_get_portrait.fcg?g_tk=' + str(g_tk) + '&uins=' + uin,
|
||||||
|
headers=headers, cookies=cookies)
|
||||||
|
info = response.content.decode('GBK')
|
||||||
|
info = info.strip().lstrip('portraitCallBack(').rstrip(');')
|
||||||
|
info = json.loads(info)
|
||||||
|
return info
|
||||||
|
@ -25,7 +25,7 @@ def process_old_html(message):
|
|||||||
|
|
||||||
new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)
|
new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)
|
||||||
start_string = "html:'"
|
start_string = "html:'"
|
||||||
end_string = "\'\,opuin"
|
end_string = "',opuin"
|
||||||
new_text = extract_string_between(new_text, start_string, end_string)
|
new_text = extract_string_between(new_text, start_string, end_string)
|
||||||
new_text = replace_multiple_spaces(new_text).replace('\\', '')
|
new_text = replace_multiple_spaces(new_text).replace('\\', '')
|
||||||
return new_text
|
return new_text
|
||||||
|
Loading…
Reference in New Issue
Block a user