Files
ALL-teach_sys/frontend_交通物流/deep_clean_modified.py
KQL cd2e307402 初始化12个产业教务系统项目
主要内容:
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容:
- 删除所有node_modules和.yoyo文件夹,从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表:
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 14:14:14 +08:00

114 lines
4.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
def deep_clean_markdown(content):
"""深度清理markdown内容中的所有删除线和加粗符号"""
if not content:
return content
# 1. 删除所有类型的删除线及其内容
# 标准删除线 ~~text~~
content = re.sub(r'~~[^~]*~~', '', content)
# 中文删除线 text
content = re.sub(r'[^]*', '', content)
# 2. 去除加粗符号但保留内容
# **text** -> text
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# __text__ -> text
content = re.sub(r'__([^_]+)__', r'\1', content)
# 3. 清理因删除产生的多余符号和空格
# 清理多余的逗号和顿号
content = re.sub(r'\s*', '', content)
content = re.sub(r'\s*、', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*。', '', content)
# 清理行首的逗号或顿号
content = re.sub(r'^[,、]\s*', '', content, flags=re.MULTILINE)
# 清理多余的空格
content = re.sub(r' {2,}', ' ', content)
content = re.sub(r'\n{3,}', '\n\n', content)
# 清理空的列表项
content = re.sub(r'^\d+\.\s*$', '', content, flags=re.MULTILINE)
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
return content.strip()
# 读取文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 需要清理的岗位列表
positions_to_clean = [
"会展策划师",
"会展讲解员",
"活动执行",
"活动策划师",
"漫展策划师",
"会展执行助理",
"旅游规划师",
"旅游计调专员",
"景区运营专员",
"文旅运营总监助理"
]
print("开始深度清理修改版简历内容...")
total_cleaned = 0
for position in positions_to_clean:
# 查找该岗位的modified内容
# 使用更宽松的正则表达式来匹配
pattern = rf'title:\s*["\']({position})["\'][^}}]*?modified:\s*`([^`]+)`'
matches = list(re.finditer(pattern, content, re.DOTALL))
for match in matches:
original_modified = match.group(2)
# 统计删除线数量
strikethrough_count = len(re.findall(r'~~[^~]*~~', original_modified))
strikethrough_count += len(re.findall(r'[^]*', original_modified))
# 统计加粗数量
bold_count = len(re.findall(r'\*\*[^*]+\*\*', original_modified))
bold_count += len(re.findall(r'__[^_]+__', original_modified))
if strikethrough_count > 0 or bold_count > 0:
cleaned_modified = deep_clean_markdown(original_modified)
# 替换内容
old_text = f"modified: `{original_modified}`"
new_text = f"modified: `{cleaned_modified}`"
content = content.replace(old_text, new_text)
print(f"\n{position}")
print(f" - 删除了 {strikethrough_count} 处删除线")
print(f" - 清理了 {bold_count} 处加粗符号")
total_cleaned += 1
# 写回文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n✅ 深度清理完成!共处理了 {total_cleaned} 个岗位的修改版内容")
# 验证是否还有遗漏的删除线
remaining_strikethrough = len(re.findall(r'~~[^~]*~~', content))
remaining_strikethrough += len(re.findall(r'[^]*', content))
if remaining_strikethrough > 0:
print(f"\n⚠️ 警告:文件中仍有 {remaining_strikethrough} 处删除线符号")
# 查找并显示位置
for match in re.finditer(r'(~~[^~]*~~|[^]*)', content):
start = max(0, match.start() - 50)
end = min(len(content), match.end() + 50)
context = content[start:end]
print(f" 位置: ...{context}...")
else:
print("\n✅ 已确认:所有删除线符号都已清理完毕")