Files
online_sys/frontend_财经商贸/clean_modified_content.py

92 lines
2.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
def clean_markdown_content(content):
"""清理markdown内容中的删除线和加粗符号"""
if not content:
return content
# 删除删除线及其内容 ~~text~~
content = re.sub(r'~~[^~]+~~', '', content)
# 删除中文删除线及其内容 text
content = re.sub(r'[^]+', '', content)
# 删除加粗符号但保留内容 **text** -> text
content = re.sub(r'\*\*([^*]+)\*\*', r'\1', content)
# 删除加粗符号但保留内容 __text__ -> text
content = re.sub(r'__([^_]+)__', r'\1', content)
# 清理多余的空格和换行
content = re.sub(r'\n{3,}', '\n\n', content) # 多个换行变为最多两个
content = re.sub(r' +', ' ', content) # 多个空格变为一个
return content.strip()
# 读取数据文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 提取data对象
import ast
data_start = content.find('const data = {')
data_end = content.rfind('};') + 2
data_str = content[data_start:data_end]
# 解析JSON部分
json_start = data_str.find('{')
json_str = data_str[json_start:]
# 手动解析positions数组找到有modified字段的岗位
positions_with_modified = [
"会展策划师",
"会展讲解员",
"活动执行",
"活动策划师",
"漫展策划师",
"会展执行助理",
"旅游规划师",
"旅游计调专员",
"景区运营专员",
"文旅运营总监助理"
]
print("开始清理修改版简历内容...")
print(f"需要清理的岗位: {positions_with_modified}")
# 逐个处理每个岗位
for position_name in positions_with_modified:
print(f"\n处理岗位: {position_name}")
# 查找该岗位在文件中的位置
# 使用更精确的模式匹配
pattern = rf'title:\s*["\']({position_name})["\'].*?content:\s*\{{.*?modified:\s*`([^`]+)`'
matches = list(re.finditer(pattern, content, re.DOTALL))
if matches:
for match in matches:
original_modified = match.group(2)
cleaned_modified = clean_markdown_content(original_modified)
# 替换文件内容
content = content.replace(
f"modified: `{original_modified}`",
f"modified: `{cleaned_modified}`"
)
print(f" ✓ 清理了修改版内容")
print(f" 原长度: {len(original_modified)}")
print(f" 新长度: {len(cleaned_modified)}")
else:
print(f" ⚠ 未找到该岗位的修改版内容")
# 写回文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print("\n✅ 清理完成!")