#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from datetime import datetime import shutil def clean_newlines_in_text(text): """清理文本中的换行符问题""" if not isinstance(text, str): return text # 将 \\n 转换为实际的换行符 text = text.replace('\\n', '\n') # 清理多余的换行符 text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # 将三个或更多连续换行符合并为两个 text = re.sub(r'^\n+', '', text) # 删除开头的换行符 text = re.sub(r'\n+$', '', text) # 删除结尾的换行符 return text.strip() def process_interview_data(): """处理面试题数据中的换行符""" print("🚀 开始处理面试题中的换行符...") # 创建备份 backup_name = f"src/mocks/resumeInterviewMock.js.backup_newlines_{datetime.now().strftime('%Y%m%d_%H%M%S')}" shutil.copy('src/mocks/resumeInterviewMock.js', backup_name) print(f"📦 已创建备份: {backup_name}") # 读取文件 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 提取industries数据 industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL) if not industries_match: print("❌ 未找到industries数据") return industries_str = industries_match.group(1) industries = json.loads(industries_str) # 处理面试题数据 total_questions = 0 processed_questions = 0 for industry in industries: for question_group in industry.get('questions', []): for sub_question in question_group.get('subQuestions', []): total_questions += 1 # 清理问题文本 if 'question' in sub_question: original_question = sub_question['question'] cleaned_question = clean_newlines_in_text(original_question) if original_question != cleaned_question: sub_question['question'] = cleaned_question processed_questions += 1 # 清理答案文本 if 'answer' in sub_question: original_answer = sub_question['answer'] cleaned_answer = clean_newlines_in_text(original_answer) if original_answer != cleaned_answer: sub_question['answer'] = cleaned_answer processed_questions += 1 print(f"📊 处理统计:") print(f" - 总面试题数: {total_questions}") print(f" - 处理的字段数: {processed_questions}") # 重新生成文件内容 new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2) # 替换原有的industries数据 new_content = re.sub( r'const industries = \[.*?\];', f'const industries = {new_industries_str};', content, flags=re.DOTALL ) # 写入文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(new_content) print("✅ 已更新 resumeInterviewMock.js") return industries def verify_cleaned_data(): """验证清理后的数据""" print("\\n🔍 验证清理结果...") with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 检查是否还有 \\n escaped_newlines = content.count('\\\\n') print(f" - 剩余的 \\\\n 数量: {escaped_newlines}") # 提取一个示例查看 industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL) if industries_match: industries = json.loads(industries_match.group(1)) # 查看第一个面试题的答案 if industries and industries[0]['questions'] and industries[0]['questions'][0]['subQuestions']: first_answer = industries[0]['questions'][0]['subQuestions'][0]['answer'] print(f"\\n📝 清理后的答案示例:") print(f" 长度: {len(first_answer)} 字符") print(f" 前100字符: {first_answer[:100]}...") # 检查换行符 real_newlines = first_answer.count('\\n') print(f" 实际换行符数量: {real_newlines}") if escaped_newlines == 0: print("\\n✅ 换行符清理完成!") else: print(f"\\n⚠️ 仍有 {escaped_newlines} 个转义换行符需要处理") def main(): try: industries = process_interview_data() verify_cleaned_data() print("\\n🎉 面试题换行符处理完成!") print("\\n📝 主要改进:") print(" - 清理了双重转义的换行符 (\\\\n → \\n)") print(" - 合并了多余的连续换行符") print(" - 删除了开头和结尾的多余换行符") print(" - 保持了文本的可读性和格式") except Exception as e: print(f"❌ 处理失败: {e}") if __name__ == "__main__": main()