#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
面试题数据换行符清理脚本 - 安全版本
清理resumeInterviewMock.js中的换行符，包括\\n转换和JSON字符转义
"""

import json
import re
import shutil
from datetime import datetime
import codecs

def safe_clean_newlines_in_text(text):
    """
    安全清理文本中的换行符，确保JSON兼容性
    """
    if not isinstance(text, str):
        return text
    
    # 1. 将\\n转换为真实换行符
    text = text.replace('\\n', '\n')
    
    # 2. 移除多余的连续换行符(保留最多2个连续换行)
    text = re.sub(r'\n{3,}', '\n\n', text)
    
    # 3. 清理首尾空白
    text = text.strip()
    
    # 4. 确保JSON字符串兼容性 - 转义特殊字符
    text = text.replace('"', '\\"')  # 转义双引号
    text = text.replace('\t', '\\t')  # 转义制表符
    text = text.replace('\r', '\\r')  # 转义回车符
    
    # 5. 将换行符转换为\\n以符合JSON格式
    text = text.replace('\n', '\\n')
    
    return text

def process_interview_data():
    """
    处理面试题数据，清理换行符
    """
    file_path = "src/mocks/resumeInterviewMock.js"
    
    # 创建备份
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = f"{file_path}.backup_safe_newlines_{timestamp}"
    shutil.copy2(file_path, backup_path)
    print(f"📦 已创建备份: {backup_path}")
    
    try:
        # 读取文件
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # 提取JSON部分
        start_marker = "export const resumeInterviewMockData = "
        end_marker = "export default resumeInterviewMockData;"
        
        start_idx = content.find(start_marker)
        if start_idx == -1:
            raise ValueError("找不到数据开始标记")
        
        json_start = start_idx + len(start_marker)
        end_idx = content.find(end_marker)
        if end_idx == -1:
            raise ValueError("找不到数据结束标记")
        
        json_content = content[json_start:end_idx].strip()
        if json_content.endswith(';'):
            json_content = json_content[:-1]
        
        # 解析JSON
        data = json.loads(json_content)
        
        # 统计处理信息
        total_questions = 0
        processed_fields = 0
        
        # 处理数据
        for industry in data['industries']:
            for question_group in industry['questions']:
                total_questions += 1
                for sub_question in question_group.get('subQuestions', []):
                    if 'answer' in sub_question:
                        original_answer = sub_question['answer']
                        cleaned_answer = safe_clean_newlines_in_text(original_answer)
                        if cleaned_answer != original_answer:
                            sub_question['answer'] = cleaned_answer
                            processed_fields += 1
        
        # 重新构建文件内容
        new_json_content = json.dumps(data, ensure_ascii=False, indent=2)
        new_content = content[:json_start] + new_json_content + ";\n\n" + content[end_idx:]
        
        # 写入文件
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(new_content)
        
        print(f"📊 处理统计:")
        print(f"   - 总面试题数: {total_questions}")
        print(f"   - 处理的字段数: {processed_fields}")
        print(f"✅ 已更新 {file_path}")
        
        # 验证结果
        print(f"\n🔍 验证清理结果...")
        with open(file_path, 'r', encoding='utf-8') as f:
            final_content = f.read()
            # 检查是否还有未处理的\\n（排除JSON转义的\\n）
            unescaped_newlines = final_content.count('\\\\n')  # 查找\\\\n模式
            print(f"   - 剩余的未转义\\n数量: {unescaped_newlines}")
        
        # 验证JSON有效性
        try:
            json.loads(json.dumps(data))
            print("✅ JSON格式验证通过")
        except Exception as e:
            print(f"❌ JSON验证失败: {e}")
            # 恢复备份
            shutil.copy2(backup_path, file_path)
            print("🔄 已恢复原始文件")
            
    except Exception as e:
        print(f"❌ 处理失败: {e}")
        # 恢复备份
        shutil.copy2(backup_path, file_path)
        print("🔄 已恢复原始文件")

if __name__ == "__main__":
    print("🚀 开始安全处理面试题中的换行符...")
    process_interview_data()
    print("🎉 处理完成！")