103 lines
3.4 KiB
Python
103 lines
3.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def complete_clean_markdown(content):
|
|||
|
|
"""彻底清理所有markdown格式"""
|
|||
|
|
if not content:
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
# 1. 先处理删除线 - 删除所有删除线内容
|
|||
|
|
# 处理标准删除线 ~~text~~
|
|||
|
|
while '~~' in content:
|
|||
|
|
content = re.sub(r'~~[^~]+~~', '', content)
|
|||
|
|
|
|||
|
|
# 处理中文删除线 ~~text~~
|
|||
|
|
while '~~' in content:
|
|||
|
|
content = re.sub(r'~~[^~]+~~', '', content)
|
|||
|
|
|
|||
|
|
# 2. 处理加粗 - 保留内容,删除符号
|
|||
|
|
# 处理 **text** 格式
|
|||
|
|
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
|
|||
|
|
# 处理 __text__ 格式
|
|||
|
|
content = re.sub(r'__([^_]+)__', r'\1', content)
|
|||
|
|
|
|||
|
|
# 3. 清理因删除产生的问题
|
|||
|
|
# 清理多余的标点符号
|
|||
|
|
content = re.sub(r',\s*,', ',', content)
|
|||
|
|
content = re.sub(r'、\s*、', '、', content)
|
|||
|
|
content = re.sub(r',\s*。', '。', content)
|
|||
|
|
content = re.sub(r'、\s*。', '。', content)
|
|||
|
|
content = re.sub(r';\s*;', ';', content)
|
|||
|
|
|
|||
|
|
# 清理行首的标点
|
|||
|
|
content = re.sub(r'^[,、;]\s*', '', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
# 清理多余空格和换行
|
|||
|
|
content = re.sub(r' +', ' ', content)
|
|||
|
|
content = re.sub(r'\n{3,}', '\n\n', content)
|
|||
|
|
|
|||
|
|
# 清理空的列表项
|
|||
|
|
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
return content.strip()
|
|||
|
|
|
|||
|
|
# 读取文件
|
|||
|
|
print("读取文件...")
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
file_content = f.read()
|
|||
|
|
|
|||
|
|
# 统计初始的删除线和加粗数量
|
|||
|
|
initial_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
|
|||
|
|
initial_strikethrough += len(re.findall(r'~~[^~]+~~', file_content))
|
|||
|
|
initial_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
|
|||
|
|
|
|||
|
|
print(f"文件中发现 {initial_strikethrough} 处删除线")
|
|||
|
|
print(f"文件中发现 {initial_bold} 处加粗符号")
|
|||
|
|
|
|||
|
|
# 处理所有的modified内容
|
|||
|
|
cleaned_count = 0
|
|||
|
|
|
|||
|
|
# 查找所有modified字段
|
|||
|
|
pattern = r'(modified:\s*`)([^`]+)(`)'
|
|||
|
|
|
|||
|
|
def clean_modified_content(match):
|
|||
|
|
global cleaned_count
|
|||
|
|
prefix = match.group(1)
|
|||
|
|
content = match.group(2)
|
|||
|
|
suffix = match.group(3)
|
|||
|
|
|
|||
|
|
# 清理内容
|
|||
|
|
cleaned = complete_clean_markdown(content)
|
|||
|
|
cleaned_count += 1
|
|||
|
|
|
|||
|
|
return prefix + cleaned + suffix
|
|||
|
|
|
|||
|
|
# 替换所有modified内容
|
|||
|
|
print("\n开始清理modified字段内容...")
|
|||
|
|
file_content = re.sub(pattern, clean_modified_content, file_content)
|
|||
|
|
|
|||
|
|
print(f"✅ 清理了 {cleaned_count} 个modified字段")
|
|||
|
|
|
|||
|
|
# 再次统计,确认清理效果
|
|||
|
|
final_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
|
|||
|
|
final_strikethrough += len(re.findall(r'~~[^~]+~~', file_content))
|
|||
|
|
final_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
|
|||
|
|
|
|||
|
|
print(f"\n清理后统计:")
|
|||
|
|
print(f" 剩余删除线: {final_strikethrough} (清理了 {initial_strikethrough - final_strikethrough} 处)")
|
|||
|
|
print(f" 剩余加粗符号: {final_bold} (清理了 {initial_bold - final_bold} 处)")
|
|||
|
|
|
|||
|
|
# 写回文件
|
|||
|
|
print("\n写入文件...")
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(file_content)
|
|||
|
|
|
|||
|
|
print("✅ 清理完成!")
|
|||
|
|
|
|||
|
|
# 如果还有剩余的格式符号,显示它们的位置
|
|||
|
|
if final_strikethrough > 0:
|
|||
|
|
print(f"\n⚠️ 注意:文件中仍有 {final_strikethrough} 处删除线在modified字段之外")
|
|||
|
|
if final_bold > 0:
|
|||
|
|
print(f"⚠️ 注意:文件中仍有 {final_bold} 处加粗符号在modified字段之外")
|