#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 面试题数据换行符清理脚本 - 安全版本 清理resumeInterviewMock.js中的换行符,包括\\n转换和JSON字符转义 """ import json import re import shutil from datetime import datetime import codecs def safe_clean_newlines_in_text(text): """ 安全清理文本中的换行符,确保JSON兼容性 """ if not isinstance(text, str): return text # 1. 将\\n转换为真实换行符 text = text.replace('\\n', '\n') # 2. 移除多余的连续换行符(保留最多2个连续换行) text = re.sub(r'\n{3,}', '\n\n', text) # 3. 清理首尾空白 text = text.strip() # 4. 确保JSON字符串兼容性 - 转义特殊字符 text = text.replace('"', '\\"') # 转义双引号 text = text.replace('\t', '\\t') # 转义制表符 text = text.replace('\r', '\\r') # 转义回车符 # 5. 将换行符转换为\\n以符合JSON格式 text = text.replace('\n', '\\n') return text def process_interview_data(): """ 处理面试题数据,清理换行符 """ file_path = "src/mocks/resumeInterviewMock.js" # 创建备份 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = f"{file_path}.backup_safe_newlines_{timestamp}" shutil.copy2(file_path, backup_path) print(f"📦 已创建备份: {backup_path}") try: # 读取文件 with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 提取JSON部分 start_marker = "export const resumeInterviewMockData = " end_marker = "export default resumeInterviewMockData;" start_idx = content.find(start_marker) if start_idx == -1: raise ValueError("找不到数据开始标记") json_start = start_idx + len(start_marker) end_idx = content.find(end_marker) if end_idx == -1: raise ValueError("找不到数据结束标记") json_content = content[json_start:end_idx].strip() if json_content.endswith(';'): json_content = json_content[:-1] # 解析JSON data = json.loads(json_content) # 统计处理信息 total_questions = 0 processed_fields = 0 # 处理数据 for industry in data['industries']: for question_group in industry['questions']: total_questions += 1 for sub_question in question_group.get('subQuestions', []): if 'answer' in sub_question: original_answer = sub_question['answer'] cleaned_answer = safe_clean_newlines_in_text(original_answer) if cleaned_answer != original_answer: sub_question['answer'] = cleaned_answer processed_fields += 1 # 重新构建文件内容 new_json_content = json.dumps(data, ensure_ascii=False, indent=2) new_content = content[:json_start] + new_json_content + ";\n\n" + content[end_idx:] # 写入文件 with open(file_path, 'w', encoding='utf-8') as f: f.write(new_content) print(f"📊 处理统计:") print(f" - 总面试题数: {total_questions}") print(f" - 处理的字段数: {processed_fields}") print(f"✅ 已更新 {file_path}") # 验证结果 print(f"\n🔍 验证清理结果...") with open(file_path, 'r', encoding='utf-8') as f: final_content = f.read() # 检查是否还有未处理的\\n(排除JSON转义的\\n) unescaped_newlines = final_content.count('\\\\n') # 查找\\\\n模式 print(f" - 剩余的未转义\\n数量: {unescaped_newlines}") # 验证JSON有效性 try: json.loads(json.dumps(data)) print("✅ JSON格式验证通过") except Exception as e: print(f"❌ JSON验证失败: {e}") # 恢复备份 shutil.copy2(backup_path, file_path) print("🔄 已恢复原始文件") except Exception as e: print(f"❌ 处理失败: {e}") # 恢复备份 shutil.copy2(backup_path, file_path) print("🔄 已恢复原始文件") if __name__ == "__main__": print("🚀 开始安全处理面试题中的换行符...") process_interview_data() print("🎉 处理完成!")