From 39751eec7a27f1fc7661e0058a6c8a9cf9aebd72 Mon Sep 17 00:00:00 2001 From: zlaazlaa <2889827787@qq.com> Date: Tue, 12 Nov 2024 21:38:05 +0800 Subject: [PATCH] Export to html: save all images to local folder Signed-off-by: zlaazlaa <2889827787@qq.com> --- main.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 1fc6a80..af3efda 100644 --- a/main.py +++ b/main.py @@ -15,6 +15,8 @@ import platform from pathlib import Path import traceback import dateparser +import hashlib +import copy # 程序版本 version = "1.0.2" @@ -80,6 +82,12 @@ def bkn(pSkey): return t & 2147483647 +def string_to_md5(input_string): + md5_hash = hashlib.md5() + md5_hash.update(input_string.encode('utf-8')) + return md5_hash.hexdigest() + + def ptqrToken(qrsig): # 计算ptqrtoken n, i, e = len(qrsig), 0, 0 @@ -201,15 +209,14 @@ def clean_content(): log(f"清理内容时发生错误: {e}", "ERROR") -def save_image(url, file_name): - global save_path +def save_image(url, path2save, file_name): valid_file_name = re.sub(r'[<>:"/\\|?*]', '_', file_name) try: response = requests.get(url) if response.status_code == 200: - with open(f'{save_path}/{valid_file_name}.jpg', 'wb') as f: + with open(f'{path2save}/{valid_file_name}.jpg', 'wb') as f: f.write(response.content) - log(f"图片保存成功:{save_path}/{valid_file_name}.jpg") + log(f"图片保存成功:{path2save}/{valid_file_name}.jpg") except Exception as e: log(e, "ERROR") @@ -367,6 +374,18 @@ class PaginatedContainer(ft.Column): log(e, "ERROR") def export_html(self, e): + global save_path + image_path = os.path.join(save_path, "images") + os.makedirs(image_path, exist_ok=True) + data = copy.deepcopy(self.data) + for item in data: + if item.images and 'http' in item.images: + image_name_md5 = string_to_md5(item.images) + image_file_path = os.path.join(image_path, f"{image_name_md5}.jpg") + if not os.path.exists(image_file_path): + save_image(item.images, image_path, image_name_md5) + relative_img_path = os.path.join("./images", image_name_md5) + ".jpg" + item.images = relative_img_path # HTML 头部和样式 html_start = ''' @@ -459,7 +478,7 @@ class PaginatedContainer(ft.Column): # HTML 中间部分,动态生成每个数据项的卡片 html_middle = '' - for item in self.data: + for item in data: # 处理每个数据项的内容,包括用户头像、用户名、发布时间、内容等 html_middle += f'''
{item.content}
- {f'' if item.images and 'http' in item.images else ''} + {f'' if item.images else ''}