133 lines
4.5 KiB
Python
133 lines
4.5 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
面试题数据换行符清理脚本 - 安全版本
|
|||
|
|
清理resumeInterviewMock.js中的换行符,包括\\n转换和JSON字符转义
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
import shutil
|
|||
|
|
from datetime import datetime
|
|||
|
|
import codecs
|
|||
|
|
|
|||
|
|
def safe_clean_newlines_in_text(text):
|
|||
|
|
"""
|
|||
|
|
安全清理文本中的换行符,确保JSON兼容性
|
|||
|
|
"""
|
|||
|
|
if not isinstance(text, str):
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
# 1. 将\\n转换为真实换行符
|
|||
|
|
text = text.replace('\\n', '\n')
|
|||
|
|
|
|||
|
|
# 2. 移除多余的连续换行符(保留最多2个连续换行)
|
|||
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|||
|
|
|
|||
|
|
# 3. 清理首尾空白
|
|||
|
|
text = text.strip()
|
|||
|
|
|
|||
|
|
# 4. 确保JSON字符串兼容性 - 转义特殊字符
|
|||
|
|
text = text.replace('"', '\\"') # 转义双引号
|
|||
|
|
text = text.replace('\t', '\\t') # 转义制表符
|
|||
|
|
text = text.replace('\r', '\\r') # 转义回车符
|
|||
|
|
|
|||
|
|
# 5. 将换行符转换为\\n以符合JSON格式
|
|||
|
|
text = text.replace('\n', '\\n')
|
|||
|
|
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
def process_interview_data():
|
|||
|
|
"""
|
|||
|
|
处理面试题数据,清理换行符
|
|||
|
|
"""
|
|||
|
|
file_path = "src/mocks/resumeInterviewMock.js"
|
|||
|
|
|
|||
|
|
# 创建备份
|
|||
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|||
|
|
backup_path = f"{file_path}.backup_safe_newlines_{timestamp}"
|
|||
|
|
shutil.copy2(file_path, backup_path)
|
|||
|
|
print(f"📦 已创建备份: {backup_path}")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 读取文件
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 提取JSON部分
|
|||
|
|
start_marker = "export const resumeInterviewMockData = "
|
|||
|
|
end_marker = "export default resumeInterviewMockData;"
|
|||
|
|
|
|||
|
|
start_idx = content.find(start_marker)
|
|||
|
|
if start_idx == -1:
|
|||
|
|
raise ValueError("找不到数据开始标记")
|
|||
|
|
|
|||
|
|
json_start = start_idx + len(start_marker)
|
|||
|
|
end_idx = content.find(end_marker)
|
|||
|
|
if end_idx == -1:
|
|||
|
|
raise ValueError("找不到数据结束标记")
|
|||
|
|
|
|||
|
|
json_content = content[json_start:end_idx].strip()
|
|||
|
|
if json_content.endswith(';'):
|
|||
|
|
json_content = json_content[:-1]
|
|||
|
|
|
|||
|
|
# 解析JSON
|
|||
|
|
data = json.loads(json_content)
|
|||
|
|
|
|||
|
|
# 统计处理信息
|
|||
|
|
total_questions = 0
|
|||
|
|
processed_fields = 0
|
|||
|
|
|
|||
|
|
# 处理数据
|
|||
|
|
for industry in data['industries']:
|
|||
|
|
for question_group in industry['questions']:
|
|||
|
|
total_questions += 1
|
|||
|
|
for sub_question in question_group.get('subQuestions', []):
|
|||
|
|
if 'answer' in sub_question:
|
|||
|
|
original_answer = sub_question['answer']
|
|||
|
|
cleaned_answer = safe_clean_newlines_in_text(original_answer)
|
|||
|
|
if cleaned_answer != original_answer:
|
|||
|
|
sub_question['answer'] = cleaned_answer
|
|||
|
|
processed_fields += 1
|
|||
|
|
|
|||
|
|
# 重新构建文件内容
|
|||
|
|
new_json_content = json.dumps(data, ensure_ascii=False, indent=2)
|
|||
|
|
new_content = content[:json_start] + new_json_content + ";\n\n" + content[end_idx:]
|
|||
|
|
|
|||
|
|
# 写入文件
|
|||
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(new_content)
|
|||
|
|
|
|||
|
|
print(f"📊 处理统计:")
|
|||
|
|
print(f" - 总面试题数: {total_questions}")
|
|||
|
|
print(f" - 处理的字段数: {processed_fields}")
|
|||
|
|
print(f"✅ 已更新 {file_path}")
|
|||
|
|
|
|||
|
|
# 验证结果
|
|||
|
|
print(f"\n🔍 验证清理结果...")
|
|||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|||
|
|
final_content = f.read()
|
|||
|
|
# 检查是否还有未处理的\\n(排除JSON转义的\\n)
|
|||
|
|
unescaped_newlines = final_content.count('\\\\n') # 查找\\\\n模式
|
|||
|
|
print(f" - 剩余的未转义\\n数量: {unescaped_newlines}")
|
|||
|
|
|
|||
|
|
# 验证JSON有效性
|
|||
|
|
try:
|
|||
|
|
json.loads(json.dumps(data))
|
|||
|
|
print("✅ JSON格式验证通过")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ JSON验证失败: {e}")
|
|||
|
|
# 恢复备份
|
|||
|
|
shutil.copy2(backup_path, file_path)
|
|||
|
|
print("🔄 已恢复原始文件")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 处理失败: {e}")
|
|||
|
|
# 恢复备份
|
|||
|
|
shutil.copy2(backup_path, file_path)
|
|||
|
|
print("🔄 已恢复原始文件")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
print("🚀 开始安全处理面试题中的换行符...")
|
|||
|
|
process_interview_data()
|
|||
|
|
print("🎉 处理完成!")
|