#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re def deep_clean_markdown(content): """深度清理markdown内容中的所有删除线和加粗符号""" if not content: return content # 1. 删除所有类型的删除线及其内容 # 标准删除线 ~~text~~ content = re.sub(r'~~[^~]*~~', '', content) # 中文删除线 ~~text~~ content = re.sub(r'~~[^~]*~~', '', content) # 2. 去除加粗符号但保留内容 # **text** -> text content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content) # __text__ -> text content = re.sub(r'__([^_]+)__', r'\1', content) # 3. 清理因删除产生的多余符号和空格 # 清理多余的逗号和顿号 content = re.sub(r',\s*,', ',', content) content = re.sub(r'、\s*、', '、', content) content = re.sub(r',\s*。', '。', content) content = re.sub(r'、\s*。', '。', content) # 清理行首的逗号或顿号 content = re.sub(r'^[,、]\s*', '', content, flags=re.MULTILINE) # 清理多余的空格 content = re.sub(r' {2,}', ' ', content) content = re.sub(r'\n{3,}', '\n\n', content) # 清理空的列表项 content = re.sub(r'^\d+\.\s*$', '', content, flags=re.MULTILINE) content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE) return content.strip() # 读取文件 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 需要清理的岗位列表 positions_to_clean = [ "会展策划师", "会展讲解员", "活动执行", "活动策划师", "漫展策划师", "会展执行助理", "旅游规划师", "旅游计调专员", "景区运营专员", "文旅运营总监助理" ] print("开始深度清理修改版简历内容...") total_cleaned = 0 for position in positions_to_clean: # 查找该岗位的modified内容 # 使用更宽松的正则表达式来匹配 pattern = rf'title:\s*["\']({position})["\'][^}}]*?modified:\s*`([^`]+)`' matches = list(re.finditer(pattern, content, re.DOTALL)) for match in matches: original_modified = match.group(2) # 统计删除线数量 strikethrough_count = len(re.findall(r'~~[^~]*~~', original_modified)) strikethrough_count += len(re.findall(r'~~[^~]*~~', original_modified)) # 统计加粗数量 bold_count = len(re.findall(r'\*\*[^*]+\*\*', original_modified)) bold_count += len(re.findall(r'__[^_]+__', original_modified)) if strikethrough_count > 0 or bold_count > 0: cleaned_modified = deep_clean_markdown(original_modified) # 替换内容 old_text = f"modified: `{original_modified}`" new_text = f"modified: `{cleaned_modified}`" content = content.replace(old_text, new_text) print(f"\n✓ {position}") print(f" - 删除了 {strikethrough_count} 处删除线") print(f" - 清理了 {bold_count} 处加粗符号") total_cleaned += 1 # 写回文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) print(f"\n✅ 深度清理完成!共处理了 {total_cleaned} 个岗位的修改版内容") # 验证是否还有遗漏的删除线 remaining_strikethrough = len(re.findall(r'~~[^~]*~~', content)) remaining_strikethrough += len(re.findall(r'~~[^~]*~~', content)) if remaining_strikethrough > 0: print(f"\n⚠️ 警告:文件中仍有 {remaining_strikethrough} 处删除线符号") # 查找并显示位置 for match in re.finditer(r'(~~[^~]*~~|~~[^~]*~~)', content): start = max(0, match.start() - 50) end = min(len(content), match.end() + 50) context = content[start:end] print(f" 位置: ...{context}...") else: print("\n✅ 已确认:所有删除线符号都已清理完毕")