#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from collections import defaultdict
import datetime
import shutil

def parse_interview_questions(content):
    """
    解析面试题内容，提取问题和答案
    """
    questions = []

    # 移除markdown标题
    content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE)

    # 分割成行
    lines = content.split('\n')

    current_question = None
    current_answer = []
    question_id = 1

    for line in lines:
        line = line.strip()
        if not line:
            continue

        # 检查是否是问题行（数字+句号开头）
        question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
        if question_match:
            # 保存上一个问题
            if current_question and current_answer:
                current_question['answer'] = ' '.join(current_answer).strip()
                questions.append(current_question)
                current_answer = []

            # 创建新问题
            question_text = question_match.group(2)
            # 移除问题末尾的问号（如果有）
            question_text = question_text.rstrip('？?')

            current_question = {
                'id': f'q{question_id}',
                'question': question_text,
                'answer': '',
                'difficulty': '中等',
                'tags': ['能源行业', '专业知识']
            }
            question_id += 1

        # 检查是否是答案行
        elif line.startswith('示例答案') or line.startswith('答案'):
            # 答案开始标记，清空答案缓冲区
            current_answer = []
        elif current_question and not question_match:
            # 收集答案内容
            if line and not line.startswith('示例答案') and not line.startswith('答案'):
                current_answer.append(line)

    # 保存最后一个问题
    if current_question and current_answer:
        current_question['answer'] = ' '.join(current_answer).strip()
        questions.append(current_question)

    # 如果没有找到问题，生成默认问题
    if not questions:
        questions = [
            {
                'id': 'q1',
                'question': '请介绍一下你的专业背景',
                'answer': '我具有能源相关专业背景，熟悉行业标准和规范，有实际项目经验。',
                'difficulty': '简单',
                'tags': ['基础问题']
            },
            {
                'id': 'q2',
                'question': '你为什么选择这个岗位',
                'answer': '这个岗位与我的专业背景高度匹配，能够发挥我的专业技能。',
                'difficulty': '简单',
                'tags': ['基础问题']
            }
        ]

    return questions[:5]  # 返回前5个问题

def extract_and_update_interview_questions():
    """
    从能源岗位简历.json提取面试题并更新mock文件
    """
    # 读取能源岗位简历数据
    with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
        energy_jobs = json.load(f)

    # 按岗位群分组并提取面试题
    interview_questions = {}

    for job in energy_jobs:
        group_name = job.get("简历岗位群", "")
        if group_name and group_name not in interview_questions:
            # 提取面试题内容
            if "面试题内容" in job:
                questions = parse_interview_questions(job["面试题内容"])
                interview_questions[group_name] = questions

    # 读取mock文件
    mock_file = "src/mocks/resumeInterviewMock.js"

    # 备份文件
    backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
    shutil.copy(mock_file, backup_path)
    print(f"✅ 已备份文件到：{backup_path}")

    # 读取文件内容
    with open(mock_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # 更新每个岗位群的subQuestions
    new_lines = []
    i = 0
    while i < len(lines):
        line = lines[i]

        # 检查是否是questions数组开始
        if '"questions":' in line:
            new_lines.append(line)
            i += 1

            # 找到当前岗位群的名称
            group_name = None
            for j in range(max(0, i-20), i):
                if '"name":' in lines[j]:
                    name_match = re.search(r'"name":\s*"([^"]+)"', lines[j])
                    if name_match:
                        group_name = name_match.group(1)
                        break

            # 处理questions数组
            while i < len(lines):
                if '"subQuestions":' in lines[i]:
                    # 找到subQuestions行
                    if group_name and group_name in interview_questions:
                        # 生成新的subQuestions
                        questions_json = json.dumps(interview_questions[group_name], ensure_ascii=False, indent=8)
                        # 调整缩进
                        indent = len(lines[i]) - len(lines[i].lstrip())
                        indented_questions = '\n'.join([' ' * indent + line if line.strip() else line
                                                      for line in questions_json.split('\n')])
                        new_lines.append(' ' * indent + f'"subQuestions": {indented_questions}\n')
                    else:
                        new_lines.append(lines[i])
                    i += 1
                    break
                else:
                    new_lines.append(lines[i])
                    i += 1
        else:
            new_lines.append(line)
            i += 1

    # 写回文件
    with open(mock_file, 'w', encoding='utf-8') as f:
        f.writelines(new_lines)

    print("✅ 成功更新面试题数据")
    print("\n📊 更新的岗位群面试题：")
    for group_name, questions in interview_questions.items():
        print(f"   - {group_name}: {len(questions)}个问题")

if __name__ == "__main__":
    extract_and_update_interview_questions()