#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re

# 读取智能制造岗位简历数据
with open('网页未导入数据/智能制造产业/智能制造岗位简历.json', 'r', encoding='utf-8') as f:
    smart_mfg_data = json.load(f)

# 统计每个岗位群的面试题数量
interview_stats = {}

for item in smart_mfg_data:
    job_group = item.get('简历岗位群', '')
    interview_content = item.get('面试题内容', '')

    if job_group and interview_content:
        # 计算问题数量（查找数字+点的模式）
        questions = re.findall(r'\d+\.\s+[^\\n]+', interview_content)

        if job_group not in interview_stats:
            interview_stats[job_group] = {
                'count': 0,
                'positions': []
            }

        interview_stats[job_group]['count'] = max(interview_stats[job_group]['count'], len(questions))
        interview_stats[job_group]['positions'].append(item.get('岗位名称', ''))

# 打印统计结果
print("=== 智能制造岗位简历.json 面试题统计 ===\n")
for group, data in sorted(interview_stats.items()):
    print(f"{group}:")
    print(f"  - 面试题数量: {data['count']} 道")
    print(f"  - 包含岗位: {', '.join(data['positions'])}")
    print()

# 读取当前mock文件检查现有数据
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    mock_content = f.read()

# 统计mock文件中每个岗位群的面试题数量
print("\n=== 当前 mock 文件面试题统计 ===\n")

# 查找每个岗位群
industries = re.findall(r'"name":\s*"([^"]+)"[^}]*?"questions":\s*\[(.*?)\]\s*\}', mock_content, re.DOTALL)

for industry_name, questions_str in industries:
    # 计算该岗位群的面试题数量
    # 查找所有的 "question": 模式
    question_count = len(re.findall(r'"question":\s*"[^"]+",\s*"answer":', questions_str))

    print(f"{industry_name}: {question_count} 道题")

print("\n=== 对比分析 ===\n")
print("需要更新的岗位群：")
for group in interview_stats:
    expected_count = interview_stats[group]['count']
    # 在mock中查找对应岗位群的题目数量
    pattern = rf'"name":\s*"{re.escape(group)}".*?"questions":\s*\[(.*?)\]\s*\}}'
    match = re.search(pattern, mock_content, re.DOTALL)
    if match:
        current_count = len(re.findall(r'"question":\s*"[^"]+",\s*"answer":', match.group(1)))
        if current_count < expected_count:
            print(f"- {group}: 当前 {current_count} 道，应有 {expected_count} 道（缺少 {expected_count - current_count} 道）")
    else:
        print(f"- {group}: 未找到对应数据")