#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from datetime import datetime
import shutil

def clean_newlines_in_text(text):
    """清理文本中的换行符问题"""
    if not isinstance(text, str):
        return text
    
    # 将 \\n 转换为实际的换行符
    text = text.replace('\\n', '\n')
    
    # 清理多余的换行符
    text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)  # 将三个或更多连续换行符合并为两个
    text = re.sub(r'^\n+', '', text)  # 删除开头的换行符
    text = re.sub(r'\n+$', '', text)  # 删除结尾的换行符
    
    return text.strip()

def process_interview_data():
    """处理面试题数据中的换行符"""
    print("🚀 开始处理面试题中的换行符...")
    
    # 创建备份
    backup_name = f"src/mocks/resumeInterviewMock.js.backup_newlines_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    shutil.copy('src/mocks/resumeInterviewMock.js', backup_name)
    print(f"📦 已创建备份: {backup_name}")
    
    # 读取文件
    with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 提取industries数据
    industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL)
    if not industries_match:
        print("❌ 未找到industries数据")
        return
    
    industries_str = industries_match.group(1)
    industries = json.loads(industries_str)
    
    # 处理面试题数据
    total_questions = 0
    processed_questions = 0
    
    for industry in industries:
        for question_group in industry.get('questions', []):
            for sub_question in question_group.get('subQuestions', []):
                total_questions += 1
                
                # 清理问题文本
                if 'question' in sub_question:
                    original_question = sub_question['question']
                    cleaned_question = clean_newlines_in_text(original_question)
                    if original_question != cleaned_question:
                        sub_question['question'] = cleaned_question
                        processed_questions += 1
                
                # 清理答案文本
                if 'answer' in sub_question:
                    original_answer = sub_question['answer']
                    cleaned_answer = clean_newlines_in_text(original_answer)
                    if original_answer != cleaned_answer:
                        sub_question['answer'] = cleaned_answer
                        processed_questions += 1
    
    print(f"📊 处理统计:")
    print(f"   - 总面试题数: {total_questions}")
    print(f"   - 处理的字段数: {processed_questions}")
    
    # 重新生成文件内容
    new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2)
    
    # 替换原有的industries数据
    new_content = re.sub(
        r'const industries = \[.*?\];',
        f'const industries = {new_industries_str};',
        content,
        flags=re.DOTALL
    )
    
    # 写入文件
    with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
        f.write(new_content)
    
    print("✅ 已更新 resumeInterviewMock.js")
    return industries

def verify_cleaned_data():
    """验证清理后的数据"""
    print("\\n🔍 验证清理结果...")
    
    with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 检查是否还有 \\n
    escaped_newlines = content.count('\\\\n')
    print(f"   - 剩余的 \\\\n 数量: {escaped_newlines}")
    
    # 提取一个示例查看
    industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL)
    if industries_match:
        industries = json.loads(industries_match.group(1))
        
        # 查看第一个面试题的答案
        if industries and industries[0]['questions'] and industries[0]['questions'][0]['subQuestions']:
            first_answer = industries[0]['questions'][0]['subQuestions'][0]['answer']
            print(f"\\n📝 清理后的答案示例:")
            print(f"   长度: {len(first_answer)} 字符")
            print(f"   前100字符: {first_answer[:100]}...")
            
            # 检查换行符
            real_newlines = first_answer.count('\\n')
            print(f"   实际换行符数量: {real_newlines}")
    
    if escaped_newlines == 0:
        print("\\n✅ 换行符清理完成！")
    else:
        print(f"\\n⚠️  仍有 {escaped_newlines} 个转义换行符需要处理")

def main():
    try:
        industries = process_interview_data()
        verify_cleaned_data()
        
        print("\\n🎉 面试题换行符处理完成！")
        print("\\n📝 主要改进:")
        print("   - 清理了双重转义的换行符 (\\\\n → \\n)")
        print("   - 合并了多余的连续换行符")
        print("   - 删除了开头和结尾的多余换行符")
        print("   - 保持了文本的可读性和格式")
        
    except Exception as e:
        print(f"❌ 处理失败: {e}")

if __name__ == "__main__":
    main()