114 lines
4.0 KiB
Python
114 lines
4.0 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def deep_clean_markdown(content):
|
|||
|
|
"""深度清理markdown内容中的所有删除线和加粗符号"""
|
|||
|
|
if not content:
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
# 1. 删除所有类型的删除线及其内容
|
|||
|
|
# 标准删除线 ~~text~~
|
|||
|
|
content = re.sub(r'~~[^~]*~~', '', content)
|
|||
|
|
# 中文删除线 ~~text~~
|
|||
|
|
content = re.sub(r'~~[^~]*~~', '', content)
|
|||
|
|
|
|||
|
|
# 2. 去除加粗符号但保留内容
|
|||
|
|
# **text** -> text
|
|||
|
|
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
|
|||
|
|
# __text__ -> text
|
|||
|
|
content = re.sub(r'__([^_]+)__', r'\1', content)
|
|||
|
|
|
|||
|
|
# 3. 清理因删除产生的多余符号和空格
|
|||
|
|
# 清理多余的逗号和顿号
|
|||
|
|
content = re.sub(r',\s*,', ',', content)
|
|||
|
|
content = re.sub(r'、\s*、', '、', content)
|
|||
|
|
content = re.sub(r',\s*。', '。', content)
|
|||
|
|
content = re.sub(r'、\s*。', '。', content)
|
|||
|
|
|
|||
|
|
# 清理行首的逗号或顿号
|
|||
|
|
content = re.sub(r'^[,、]\s*', '', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
# 清理多余的空格
|
|||
|
|
content = re.sub(r' {2,}', ' ', content)
|
|||
|
|
content = re.sub(r'\n{3,}', '\n\n', content)
|
|||
|
|
|
|||
|
|
# 清理空的列表项
|
|||
|
|
content = re.sub(r'^\d+\.\s*$', '', content, flags=re.MULTILINE)
|
|||
|
|
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
return content.strip()
|
|||
|
|
|
|||
|
|
# 读取文件
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 需要清理的岗位列表
|
|||
|
|
positions_to_clean = [
|
|||
|
|
"会展策划师",
|
|||
|
|
"会展讲解员",
|
|||
|
|
"活动执行",
|
|||
|
|
"活动策划师",
|
|||
|
|
"漫展策划师",
|
|||
|
|
"会展执行助理",
|
|||
|
|
"旅游规划师",
|
|||
|
|
"旅游计调专员",
|
|||
|
|
"景区运营专员",
|
|||
|
|
"文旅运营总监助理"
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
print("开始深度清理修改版简历内容...")
|
|||
|
|
total_cleaned = 0
|
|||
|
|
|
|||
|
|
for position in positions_to_clean:
|
|||
|
|
# 查找该岗位的modified内容
|
|||
|
|
# 使用更宽松的正则表达式来匹配
|
|||
|
|
pattern = rf'title:\s*["\']({position})["\'][^}}]*?modified:\s*`([^`]+)`'
|
|||
|
|
|
|||
|
|
matches = list(re.finditer(pattern, content, re.DOTALL))
|
|||
|
|
|
|||
|
|
for match in matches:
|
|||
|
|
original_modified = match.group(2)
|
|||
|
|
|
|||
|
|
# 统计删除线数量
|
|||
|
|
strikethrough_count = len(re.findall(r'~~[^~]*~~', original_modified))
|
|||
|
|
strikethrough_count += len(re.findall(r'~~[^~]*~~', original_modified))
|
|||
|
|
|
|||
|
|
# 统计加粗数量
|
|||
|
|
bold_count = len(re.findall(r'\*\*[^*]+\*\*', original_modified))
|
|||
|
|
bold_count += len(re.findall(r'__[^_]+__', original_modified))
|
|||
|
|
|
|||
|
|
if strikethrough_count > 0 or bold_count > 0:
|
|||
|
|
cleaned_modified = deep_clean_markdown(original_modified)
|
|||
|
|
|
|||
|
|
# 替换内容
|
|||
|
|
old_text = f"modified: `{original_modified}`"
|
|||
|
|
new_text = f"modified: `{cleaned_modified}`"
|
|||
|
|
content = content.replace(old_text, new_text)
|
|||
|
|
|
|||
|
|
print(f"\n✓ {position}")
|
|||
|
|
print(f" - 删除了 {strikethrough_count} 处删除线")
|
|||
|
|
print(f" - 清理了 {bold_count} 处加粗符号")
|
|||
|
|
total_cleaned += 1
|
|||
|
|
|
|||
|
|
# 写回文件
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(content)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 深度清理完成!共处理了 {total_cleaned} 个岗位的修改版内容")
|
|||
|
|
|
|||
|
|
# 验证是否还有遗漏的删除线
|
|||
|
|
remaining_strikethrough = len(re.findall(r'~~[^~]*~~', content))
|
|||
|
|
remaining_strikethrough += len(re.findall(r'~~[^~]*~~', content))
|
|||
|
|
|
|||
|
|
if remaining_strikethrough > 0:
|
|||
|
|
print(f"\n⚠️ 警告:文件中仍有 {remaining_strikethrough} 处删除线符号")
|
|||
|
|
# 查找并显示位置
|
|||
|
|
for match in re.finditer(r'(~~[^~]*~~|~~[^~]*~~)', content):
|
|||
|
|
start = max(0, match.start() - 50)
|
|||
|
|
end = min(len(content), match.end() + 50)
|
|||
|
|
context = content[start:end]
|
|||
|
|
print(f" 位置: ...{context}...")
|
|||
|
|
else:
|
|||
|
|
print("\n✅ 已确认:所有删除线符号都已清理完毕")
|