Files
jiaowu-test/complete_clean.py
KQL 1b964b3886 chore: 更新数据文件和组件优化
主要更新内容:
- 优化UI组件(视频播放器、HR访问模态框、岗位信息展示等)
- 更新数据文件(简历、岗位、项目案例等)
- 添加新的图片资源(面试状态图标等)
- 新增AgentPage等页面组件
- 清理旧的备份文件,提升代码库整洁度
- 优化岗位等级和面试状态的数据结构

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-15 15:55:25 +08:00

103 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
def complete_clean_markdown(content):
"""彻底清理所有markdown格式"""
if not content:
return content
# 1. 先处理删除线 - 删除所有删除线内容
# 处理标准删除线 ~~text~~
while '~~' in content:
content = re.sub(r'~~[^~]+~~', '', content)
# 处理中文删除线 text
while '' in content:
content = re.sub(r'[^]+', '', content)
# 2. 处理加粗 - 保留内容,删除符号
# 处理 **text** 格式
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# 处理 __text__ 格式
content = re.sub(r'__([^_]+)__', r'\1', content)
# 3. 清理因删除产生的问题
# 清理多余的标点符号
content = re.sub(r'\s*', '', content)
content = re.sub(r'\s*、', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*。', '', content)
content = re.sub(r'\s*', '', content)
# 清理行首的标点
content = re.sub(r'^[,、;]\s*', '', content, flags=re.MULTILINE)
# 清理多余空格和换行
content = re.sub(r' +', ' ', content)
content = re.sub(r'\n{3,}', '\n\n', content)
# 清理空的列表项
content = re.sub(r'^\d+\.\s*\n', '', content, flags=re.MULTILINE)
return content.strip()
# 读取文件
print("读取文件...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
file_content = f.read()
# 统计初始的删除线和加粗数量
initial_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
initial_strikethrough += len(re.findall(r'[^]+', file_content))
initial_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"文件中发现 {initial_strikethrough} 处删除线")
print(f"文件中发现 {initial_bold} 处加粗符号")
# 处理所有的modified内容
cleaned_count = 0
# 查找所有modified字段
pattern = r'(modified:\s*`)([^`]+)(`)'
def clean_modified_content(match):
global cleaned_count
prefix = match.group(1)
content = match.group(2)
suffix = match.group(3)
# 清理内容
cleaned = complete_clean_markdown(content)
cleaned_count += 1
return prefix + cleaned + suffix
# 替换所有modified内容
print("\n开始清理modified字段内容...")
file_content = re.sub(pattern, clean_modified_content, file_content)
print(f"✅ 清理了 {cleaned_count} 个modified字段")
# 再次统计,确认清理效果
final_strikethrough = len(re.findall(r'~~[^~]+~~', file_content))
final_strikethrough += len(re.findall(r'[^]+', file_content))
final_bold = len(re.findall(r'\*\*[^*]+\*\*', file_content))
print(f"\n清理后统计:")
print(f" 剩余删除线: {final_strikethrough} (清理了 {initial_strikethrough - final_strikethrough} 处)")
print(f" 剩余加粗符号: {final_bold} (清理了 {initial_bold - final_bold} 处)")
# 写回文件
print("\n写入文件...")
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(file_content)
print("✅ 清理完成!")
# 如果还有剩余的格式符号,显示它们的位置
if final_strikethrough > 0:
print(f"\n⚠️ 注意:文件中仍有 {final_strikethrough} 处删除线在modified字段之外")
if final_bold > 0:
print(f"⚠️ 注意:文件中仍有 {final_bold} 处加粗符号在modified字段之外")