From 9f5a6cc5d187562ae1e97b0bfb369e0a3edcbf1d Mon Sep 17 00:00:00 2001 From: LibraHp_0928 <1941163264@qq.com> Date: Tue, 13 Feb 2024 20:47:18 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AE=80=E5=8D=95=E5=AE=9E=E7=8E=B0=E6=9F=A5?= =?UTF-8?q?=E6=89=BE=E5=8E=86=E5=8F=B2=E8=AF=B4=E8=AF=B4=E5=B9=B6=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E4=B8=BAexcel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 17 ++++++++++++----- resource/config/config.ini | 3 ++- util/ConfigUtil.py | 1 + util/LoginUtil.py | 2 +- util/RequestUtil.py | 11 ++++++++++- util/ToolsUtil.py | 2 +- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index 60084a8..b2179a1 100644 --- a/main.py +++ b/main.py @@ -2,12 +2,19 @@ from bs4 import BeautifulSoup from tqdm import trange import util.RequestUtil as Request import util.ToolsUtil as Tools +import util.ConfigUtil as Config import pandas as pd + if __name__ == '__main__': + user_info = Request.get_login_user_info() + user_nickname = user_info[Request.uin][6] + print(f"用户<{Request.uin}>,<{user_nickname}>登录成功") texts = [] - for i in trange(2, desc='Progress', unit='iteration'): - message = Request.get_message(i*100, 100).content.decode('utf-8') + for i in trange(1000, desc='Progress', unit='iteration'): + message = Request.get_message(i * 100, 100).content.decode('utf-8') html = Tools.process_old_html(message) + if "li" not in html: + break soup = BeautifulSoup(html, 'html.parser') for element in soup.find_all('p', class_='txt-box-title ellipsis-one'): text = element.get_text().replace('\xa0', ' ') @@ -16,6 +23,6 @@ if __name__ == '__main__': # 创建一个DataFrame对象 df = pd.DataFrame(texts, columns=['内容']) - # 将DataFrame对象导出为CSV文件,使用UTF-8编码 - df.to_excel('output.xlsx', index=False) - print(texts) + # 导出为Excel + df.to_excel(Config.result_path + Request.uin + '.xlsx', index=False) + print('导出成功,请查看 ' + Config.result_path + Request.uin + '.xlsx') diff --git a/resource/config/config.ini b/resource/config/config.ini index 11bc5c3..ed2f6b5 100644 --- a/resource/config/config.ini +++ b/resource/config/config.ini @@ -1,3 +1,4 @@ [File] temp = ./resource/temp/ -user = ./resource/user/ \ No newline at end of file +user = ./resource/user/ +result = ./resource/result/ \ No newline at end of file diff --git a/util/ConfigUtil.py b/util/ConfigUtil.py index 2ecdacf..2b5da14 100644 --- a/util/ConfigUtil.py +++ b/util/ConfigUtil.py @@ -6,6 +6,7 @@ config.read('./resource/config/config.ini') temp_path = config.get('File', 'temp') user_path = config.get('File', 'user') +result_path = config.get('File', 'result') def save_user(cookies): diff --git a/util/LoginUtil.py b/util/LoginUtil.py index f6e46df..83ab7b6 100644 --- a/util/LoginUtil.py +++ b/util/LoginUtil.py @@ -2,9 +2,9 @@ import requests from PIL import Image import time import re -import os import util.ConfigUtil as Config + def bkn(pSkey): # 计算bkn t, n, o = 5381, 0, len(pSkey) diff --git a/util/RequestUtil.py b/util/RequestUtil.py index d0534b0..a150b61 100644 --- a/util/RequestUtil.py +++ b/util/RequestUtil.py @@ -1,6 +1,6 @@ import util.LoginUtil as Login import requests - +import json # 登陆后获取到的cookies cookies = Login.cookie() # 获取g_tk @@ -52,3 +52,12 @@ def get_message(start, count): response = requests.get('https://user.qzone.qq.com/proxy/domain/ic2.qzone.qq.com/cgi-bin/feeds/feeds2_html_pav_all', params=params, cookies=cookies, headers=headers) return response + + +def get_login_user_info(): + response = requests.get('https://r.qzone.qq.com/fcg-bin/cgi_get_portrait.fcg?g_tk=' + str(g_tk) + '&uins=' + uin, + headers=headers, cookies=cookies) + info = response.content.decode('GBK') + info = info.strip().lstrip('portraitCallBack(').rstrip(');') + info = json.loads(info) + return info diff --git a/util/ToolsUtil.py b/util/ToolsUtil.py index 42e14b8..2c5035d 100644 --- a/util/ToolsUtil.py +++ b/util/ToolsUtil.py @@ -25,7 +25,7 @@ def process_old_html(message): new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message) start_string = "html:'" - end_string = "\'\,opuin" + end_string = "',opuin" new_text = extract_string_between(new_text, start_string, end_string) new_text = replace_multiple_spaces(new_text).replace('\\', '') return new_text