Files
online_sys/frontend_智能制造/complete_clean.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

103 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
def complete_clean_markdown(content):
"""彻底清理所有markdown格式"""
if not content:
return content
# 1. 先处理删除线 - 删除所有删除线内容
# 处理标准删除线 ~~text~~
while '~~' in content:
content = re.sub(r'~~[^~]+~~', '', content)
# 处理中文删除线 text
while '' in content:
content = re.sub(r'[^]+', '', content)
# 2. 处理加粗 - 保留内容,删除符号
# 处理 **text** 格式
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# 处理 __text__ 格式
content = re.sub(r'__([^_]+)__', r'\1', content)
# 3. 清理因删除产生的问题
# 清理多余的标点符号
content = re.sub(r'\s*', '', content)
content = re.sub(r'\s*、', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*', '', content)
# 清理行首的标点
content = re.sub(r'^[,、;]\s*', '', content, flags=re.MULTILINE)
# 清理多余空格和换行
content = re.sub(r' +', ' ', content)
content = re.sub(r'\n{3,}', '\n\n', content)
# 清理空的列表项
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
return content.strip()
# 读取文件
print("读取文件...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
file_content = f.read()
# 统计初始的删除线和加粗数量
initial_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
initial_strikethrough += len(re.findall(r'[^]+', file_content))
initial_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"文件中发现 {initial_strikethrough} 处删除线")
print(f"文件中发现 {initial_bold} 处加粗符号")
# 处理所有的modified内容
cleaned_count = 0
# 查找所有modified字段
pattern = r'(modified:\s*`)([^`]+)(`)'
def clean_modified_content(match):
global cleaned_count
prefix = match.group(1)
content = match.group(2)
suffix = match.group(3)
# 清理内容
cleaned = complete_clean_markdown(content)
cleaned_count += 1
return prefix + cleaned + suffix
# 替换所有modified内容
print("\n开始清理modified字段内容...")
file_content = re.sub(pattern, clean_modified_content, file_content)
print(f"✅ 清理了 {cleaned_count} 个modified字段")
# 再次统计,确认清理效果
final_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
final_strikethrough += len(re.findall(r'[^]+', file_content))
final_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"\n清理后统计:")
print(f" 剩余删除线: {final_strikethrough} (清理了 {initial_strikethrough - final_strikethrough} 处)")
print(f" 剩余加粗符号: {final_bold} (清理了 {initial_bold - final_bold} 处)")
# 写回文件
print("\n写入文件...")
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(file_content)
print("✅ 清理完成!")
# 如果还有剩余的格式符号,显示它们的位置
if final_strikethrough > 0:
print(f"\n⚠️ 注意:文件中仍有 {final_strikethrough} 处删除线在modified字段之外")
if final_bold > 0:
print(f"⚠️ 注意:文件中仍有 {final_bold} 处加粗符号在modified字段之外")