Files
online_sys/frontend_智能开发/deep_clean_modified.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

114 lines
4.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
def deep_clean_markdown(content):
"""深度清理markdown内容中的所有删除线和加粗符号"""
if not content:
return content
# 1. 删除所有类型的删除线及其内容
# 标准删除线 ~~text~~
content = re.sub(r'~~[^~]*~~', '', content)
# 中文删除线 text
content = re.sub(r'[^]*', '', content)
# 2. 去除加粗符号但保留内容
# **text** -> text
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# __text__ -> text
content = re.sub(r'__([^_]+)__', r'\1', content)
# 3. 清理因删除产生的多余符号和空格
# 清理多余的逗号和顿号
content = re.sub(r'\s*', '', content)
content = re.sub(r'\s*、', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*。', '', content)
# 清理行首的逗号或顿号
content = re.sub(r'^[,、]\s*', '', content, flags=re.MULTILINE)
# 清理多余的空格
content = re.sub(r' {2,}', ' ', content)
content = re.sub(r'\n{3,}', '\n\n', content)
# 清理空的列表项
content = re.sub(r'^\d+\.\s*$', '', content, flags=re.MULTILINE)
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
return content.strip()
# 读取文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 需要清理的岗位列表
positions_to_clean = [
"会展策划师",
"会展讲解员",
"活动执行",
"活动策划师",
"漫展策划师",
"会展执行助理",
"旅游规划师",
"旅游计调专员",
"景区运营专员",
"文旅运营总监助理"
]
print("开始深度清理修改版简历内容...")
total_cleaned = 0
for position in positions_to_clean:
# 查找该岗位的modified内容
# 使用更宽松的正则表达式来匹配
pattern = rf'title:\s*["\']({position})["\'][^}}]*?modified:\s*`([^`]+)`'
matches = list(re.finditer(pattern, content, re.DOTALL))
for match in matches:
original_modified = match.group(2)
# 统计删除线数量
strikethrough_count = len(re.findall(r'~~[^~]*~~', original_modified))
strikethrough_count += len(re.findall(r'[^]*', original_modified))
# 统计加粗数量
bold_count = len(re.findall(r'\*\*[^*]+\*\*', original_modified))
bold_count += len(re.findall(r'__[^_]+__', original_modified))
if strikethrough_count > 0 or bold_count > 0:
cleaned_modified = deep_clean_markdown(original_modified)
# 替换内容
old_text = f"modified: `{original_modified}`"
new_text = f"modified: `{cleaned_modified}`"
content = content.replace(old_text, new_text)
print(f"\n{position}")
print(f" - 删除了 {strikethrough_count} 处删除线")
print(f" - 清理了 {bold_count} 处加粗符号")
total_cleaned += 1
# 写回文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n✅ 深度清理完成!共处理了 {total_cleaned} 个岗位的修改版内容")
# 验证是否还有遗漏的删除线
remaining_strikethrough = len(re.findall(r'~~[^~]*~~', content))
remaining_strikethrough += len(re.findall(r'[^]*', content))
if remaining_strikethrough > 0:
print(f"\n⚠️ 警告:文件中仍有 {remaining_strikethrough} 处删除线符号")
# 查找并显示位置
for match in re.finditer(r'(~~[^~]*~~|[^]*)', content):
start = max(0, match.start() - 50)
end = min(len(content), match.end() + 50)
context = content[start:end]
print(f" 位置: ...{context}...")
else:
print("\n✅ 已确认:所有删除线符号都已清理完毕")