Files
online_sys/frontend_智能开发/complete_clean.py

103 lines
3.4 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
def complete_clean_markdown(content):
"""彻底清理所有markdown格式"""
if not content:
return content
# 1. 先处理删除线 - 删除所有删除线内容
# 处理标准删除线 ~~text~~
while '~~' in content:
content = re.sub(r'~~[^~]+~~', '', content)
# 处理中文删除线 text
while '' in content:
content = re.sub(r'[^]+', '', content)
# 2. 处理加粗 - 保留内容,删除符号
# 处理 **text** 格式
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# 处理 __text__ 格式
content = re.sub(r'__([^_]+)__', r'\1', content)
# 3. 清理因删除产生的问题
# 清理多余的标点符号
content = re.sub(r'\s*', '', content)
content = re.sub(r'\s*、', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*', '', content)
# 清理行首的标点
content = re.sub(r'^[,、;]\s*', '', content, flags=re.MULTILINE)
# 清理多余空格和换行
content = re.sub(r' +', ' ', content)
content = re.sub(r'\n{3,}', '\n\n', content)
# 清理空的列表项
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
return content.strip()
# 读取文件
print("读取文件...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
file_content = f.read()
# 统计初始的删除线和加粗数量
initial_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
initial_strikethrough += len(re.findall(r'[^]+', file_content))
initial_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"文件中发现 {initial_strikethrough} 处删除线")
print(f"文件中发现 {initial_bold} 处加粗符号")
# 处理所有的modified内容
cleaned_count = 0
# 查找所有modified字段
pattern = r'(modified:\s*`)([^`]+)(`)'
def clean_modified_content(match):
global cleaned_count
prefix = match.group(1)
content = match.group(2)
suffix = match.group(3)
# 清理内容
cleaned = complete_clean_markdown(content)
cleaned_count += 1
return prefix + cleaned + suffix
# 替换所有modified内容
print("\n开始清理modified字段内容...")
file_content = re.sub(pattern, clean_modified_content, file_content)
print(f"✅ 清理了 {cleaned_count} 个modified字段")
# 再次统计,确认清理效果
final_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
final_strikethrough += len(re.findall(r'[^]+', file_content))
final_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"\n清理后统计:")
print(f" 剩余删除线: {final_strikethrough} (清理了 {initial_strikethrough - final_strikethrough} 处)")
print(f" 剩余加粗符号: {final_bold} (清理了 {initial_bold - final_bold} 处)")
# 写回文件
print("\n写入文件...")
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(file_content)
print("✅ 清理完成!")
# 如果还有剩余的格式符号,显示它们的位置
if final_strikethrough > 0:
print(f"\n⚠️ 注意:文件中仍有 {final_strikethrough} 处删除线在modified字段之外")
if final_bold > 0:
print(f"⚠️ 注意:文件中仍有 {final_bold} 处加粗符号在modified字段之外")