GetQzonehistory/util/ToolsUtil.py

import re


# 提取两个字符串之间的内容
def extract_string_between(source_string, start_string, end_string):
    start_index = source_string.find(start_string) + len(start_string)
    end_index = source_string.find(end_string)
    extracted_string = source_string[start_index:-37]
    return extracted_string


# 去除多余的空格
def replace_multiple_spaces(string):
    pattern = r'\s+'
    replaced_string = re.sub(pattern, ' ', string)
    return replaced_string


# 替换十六进制编码
def process_old_html(message):
    def replace_hex(match):
        hex_value = match.group(0)
        byte_value = bytes(hex_value, 'utf-8').decode('unicode_escape')
        return byte_value

    new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)
    start_string = "html:'"
    end_string = "\'\,opuin"
    new_text = extract_string_between(new_text, start_string, end_string)
    new_text = replace_multiple_spaces(new_text).replace('\\', '')
    return new_text
简单实现查找历史说说并导出为excel 2024-02-13 10:22:44 +00:00			`import re`


			`# 提取两个字符串之间的内容`
			`def extract_string_between(source_string, start_string, end_string):`
			`start_index = source_string.find(start_string) + len(start_string)`
			`end_index = source_string.find(end_string)`
			`extracted_string = source_string[start_index:-37]`
			`return extracted_string`


			`# 去除多余的空格`
			`def replace_multiple_spaces(string):`
			`pattern = r'\s+'`
			`replaced_string = re.sub(pattern, ' ', string)`
			`return replaced_string`


			`# 替换十六进制编码`
			`def process_old_html(message):`
			`def replace_hex(match):`
			`hex_value = match.group(0)`
			`byte_value = bytes(hex_value, 'utf-8').decode('unicode_escape')`
			`return byte_value`

			`new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message)`
			`start_string = "html:'"`
			`end_string = "\'\,opuin"`
			`new_text = extract_string_between(new_text, start_string, end_string)`
			`new_text = replace_multiple_spaces(new_text).replace('\\', '')`
			`return new_text`