#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re

def get_position_levels_from_resume():
    """从化工岗位简历.json获取岗位等级映射"""
    with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
        data = json.load(f)

    position_levels = {}
    for item in data:
        position = item.get('岗位名称', '').strip()
        level = item.get('岗位等级标签', '').strip()
        if position and level:
            position_levels[position] = level

    return position_levels

def extract_positions_from_mock():
    """从mock文件中提取项目详情中的岗位信息"""
    with open('src/mocks/projectLibraryMock.js', 'r', encoding='utf-8') as f:
        lines = f.readlines()

    projects_positions = {}
    current_project = None
    in_positions = False
    current_positions = []

    for i, line in enumerate(lines):
        # 查找项目ID
        if '"id":' in line and 'const project' not in lines[max(0, i-5):i+1]:
            match = re.search(r'"id":\s*(\d+)', line)
            if match:
                # 如果有之前的项目数据，保存它
                if current_project and current_positions:
                    projects_positions[current_project] = current_positions

                current_project = int(match.group(1))
                current_positions = []
                in_positions = False

        # 查找positions数组开始
        if '"positions":' in line and '[' in line:
            in_positions = True

        # 在positions数组中查找岗位
        if in_positions:
            # 查找level
            level_match = re.search(r'"level":\s*"([^"]*)"', line)
            if level_match:
                level = level_match.group(1)

                # 查找下一行的position
                if i + 1 < len(lines):
                    next_line = lines[i + 1]
                    pos_match = re.search(r'"position":\s*"([^"]*)"', next_line)
                    if pos_match:
                        position = pos_match.group(1)
                        current_positions.append({
                            'level': level,
                            'position': position
                        })

            # 检查positions数组结束
            if ']' in line:
                in_positions = False

    # 保存最后一个项目
    if current_project and current_positions:
        projects_positions[current_project] = current_positions

    return projects_positions

def compare_and_report():
    """比较并报告差异"""
    # 获取数据
    resume_levels = get_position_levels_from_resume()
    mock_positions = extract_positions_from_mock()

    print("=" * 60)
    print("岗位等级对比报告")
    print("=" * 60)
    print(f"\n从简历数据中读取到 {len(resume_levels)} 个岗位等级")
    print(f"从Mock文件中读取到 {len(mock_positions)} 个项目的岗位数据\n")

    # 统计信息
    total_positions = 0
    mismatches = []
    not_found = []
    correct_matches = []

    # 检查每个项目的岗位
    for project_id in sorted(mock_positions.keys()):
        positions = mock_positions[project_id]
        if positions:
            print(f"\n【项目 {project_id}】({len(positions)}个岗位)")
            for pos_info in positions:
                current_level = pos_info['level']
                position_name = pos_info['position']
                total_positions += 1

                if position_name in resume_levels:
                    correct_level = resume_levels[position_name]
                    if current_level != correct_level:
                        print(f"  ❌ {position_name}: 当前[{current_level}] → 应为[{correct_level}]")
                        mismatches.append({
                            'project': project_id,
                            'position': position_name,
                            'current': current_level,
                            'correct': correct_level
                        })
                    else:
                        print(f"  ✅ {position_name}: {current_level}")
                        correct_matches.append(position_name)
                else:
                    print(f"  ⚠️  {position_name}: 在简历数据中未找到 (当前: {current_level})")
                    not_found.append({
                        'project': project_id,
                        'position': position_name,
                        'current': current_level
                    })

    # 汇总报告
    print("\n" + "=" * 60)
    print("汇总统计")
    print("=" * 60)
    print(f"总岗位数: {total_positions}")
    print(f"等级正确: {len(correct_matches)} 个 ({len(correct_matches)*100/max(1,total_positions):.1f}%)")
    print(f"等级错误: {len(mismatches)} 个 ({len(mismatches)*100/max(1,total_positions):.1f}%)")
    print(f"未找到岗位: {len(not_found)} 个 ({len(not_found)*100/max(1,total_positions):.1f}%)")

    return mismatches, not_found

if __name__ == "__main__":
    mismatches, not_found = compare_and_report()