From b70b6725f3588a48e2251065ff6613e8bc689add Mon Sep 17 00:00:00 2001 From: wangxiaowen Date: Wed, 13 Nov 2024 18:36:46 +0800 Subject: [PATCH] fix: utf-8 decode error close #156 #157 #158 #161 --- main.py | 5 ++++- requirements.txt | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index f08ce99..c977e11 100644 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ from tqdm import trange, tqdm import requests import time import platform +import chardet texts = list() all_friends = list() @@ -234,7 +235,9 @@ if __name__ == '__main__': signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) for i in trange(int(count / 100) + 1, desc='Progress', unit='100条'): - message = Request.get_message(i * 100, 100).content.decode('utf-8') + content_bytes = Request.get_message(i * 100, 100).content + detected_encoding = chardet.detect(content_bytes)['encoding'] + message = content_bytes.decode(detected_encoding if detected_encoding else "utf-8") time.sleep(0.2) html = Tools.process_old_html(message) if "li" not in html: diff --git a/requirements.txt b/requirements.txt index 0e99343..d481310 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pyarrow==14.0.1 openpyxl==3.1.2 pyzbar~=0.1.9 qrcode~=7.4.2 -fake-useragent \ No newline at end of file +fake-useragent~=1.5.1 +chardet~=5.2.0 \ No newline at end of file