import re # 提取两个字符串之间的内容 def extract_string_between(source_string, start_string, end_string): start_index = source_string.find(start_string) + len(start_string) end_index = source_string.find(end_string) extracted_string = source_string[start_index:-37] return extracted_string # 去除多余的空格 def replace_multiple_spaces(string): pattern = r'\s+' replaced_string = re.sub(pattern, ' ', string) return replaced_string # 替换十六进制编码 def process_old_html(message): def replace_hex(match): hex_value = match.group(0) byte_value = bytes(hex_value, 'utf-8').decode('unicode_escape') return byte_value new_text = re.sub(r'\\x[0-9a-fA-F]{2}', replace_hex, message) start_string = "html:'" end_string = "\'\,opuin" new_text = extract_string_between(new_text, start_string, end_string) new_text = replace_multiple_spaces(new_text).replace('\\', '') return new_text