#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 面试题数据换行符清理脚本 - 最终版本 直接处理JS文件中的字符串,清理\\n换行符 """ import re import shutil from datetime import datetime def clean_escaped_newlines_in_js(): """ 清理JS文件中的转义换行符\\n """ file_path = "src/mocks/resumeInterviewMock.js" # 创建备份 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = f"{file_path}.backup_final_newlines_{timestamp}" shutil.copy2(file_path, backup_path) print(f"📦 已创建备份: {backup_path}") try: # 读取文件 with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 统计原始\\n数量 original_count = content.count('\\\\n') print(f"📊 发现 {original_count} 个转义换行符(\\\\n)") # 处理转义的换行符:\\n -> 实际换行符 # 但要保持在JSON字符串中的格式正确 processed_content = content # 查找并处理answer字段中的\\n def replace_newlines_in_answers(match): full_match = match.group(0) answer_content = match.group(1) # 将\\n替换为实际换行符,然后重新转义为JSON格式 cleaned = answer_content.replace('\\\\n', '\n') # 移除多余连续换行 cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) # 清理首尾空白 cleaned = cleaned.strip() # 重新转义为JSON格式的换行符 cleaned = cleaned.replace('\n', '\\n') return f'"answer": "{cleaned}"' # 使用正则表达式找到所有answer字段并处理 answer_pattern = r'"answer":\s*"([^"]*(?:\\.[^"]*)*)"' processed_content = re.sub(answer_pattern, replace_newlines_in_answers, processed_content, flags=re.DOTALL) # 统计处理后的\\n数量 final_count = processed_content.count('\\\\n') processed_fields = original_count - final_count # 写入文件 with open(file_path, 'w', encoding='utf-8') as f: f.write(processed_content) print(f"📊 处理统计:") print(f" - 原始\\\\n数量: {original_count}") print(f" - 处理后\\\\n数量: {final_count}") print(f" - 已处理字段数: {processed_fields}") print(f"✅ 已更新 {file_path}") # 验证文件语法 import subprocess try: result = subprocess.run(['node', '-c', file_path], capture_output=True, text=True) if result.returncode == 0: print("✅ JavaScript语法验证通过") else: print(f"❌ JavaScript语法验证失败: {result.stderr}") # 恢复备份 shutil.copy2(backup_path, file_path) print("🔄 已恢复原始文件") except Exception as e: print(f"⚠️ 无法验证JavaScript语法: {e}") except Exception as e: print(f"❌ 处理失败: {e}") # 恢复备份 shutil.copy2(backup_path, file_path) print("🔄 已恢复原始文件") if __name__ == "__main__": print("🚀 开始清理面试题中的换行符...") clean_escaped_newlines_in_js() print("🎉 处理完成!")