#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re

def get_position_levels_from_resume():
    """从化工岗位简历.json获取岗位等级映射"""
    with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
        data = json.load(f)

    position_levels = {}
    for item in data:
        position = item.get('岗位名称', '').strip()
        level = item.get('岗位等级标签', '').strip()
        if position and level:
            position_levels[position] = level

    return position_levels

def extract_positions_from_mock():
    """从mock文件中提取所有项目的岗位信息"""
    with open('src/mocks/projectLibraryMock.js', 'r', encoding='utf-8') as f:
        content = f.read()

    # 查找所有项目的岗位数据
    projects_positions = {}

    # 用正则表达式提取项目ID和positions数组
    pattern = r'"id":\s*(\d+),.*?"positions":\s*\[(.*?)\]'
    matches = re.finditer(pattern, content, re.DOTALL)

    for match in matches:
        project_id = int(match.group(1))
        positions_str = match.group(2)

        # 提取每个岗位
        pos_pattern = r'"level":\s*"([^"]*)".*?"position":\s*"([^"]*)"'
        positions = []
        for pos_match in re.finditer(pos_pattern, positions_str):
            level = pos_match.group(1)
            position = pos_match.group(2)
            positions.append({'level': level, 'position': position})

        if positions:
            projects_positions[project_id] = positions

    return projects_positions

def compare_and_report():
    """比较并报告差异"""
    # 获取数据
    resume_levels = get_position_levels_from_resume()
    mock_positions = extract_positions_from_mock()

    print("=" * 60)
    print("岗位等级对比报告")
    print("=" * 60)

    # 统计信息
    total_positions = 0
    mismatches = []
    not_found = []

    # 检查每个项目的岗位
    for project_id, positions in mock_positions.items():
        print(f"\n项目 {project_id}:")
        for pos_info in positions:
            current_level = pos_info['level']
            position_name = pos_info['position']
            total_positions += 1

            if position_name in resume_levels:
                correct_level = resume_levels[position_name]
                if current_level != correct_level:
                    print(f"  ❌ {position_name}: 当前等级[{current_level}] → 应为[{correct_level}]")
                    mismatches.append({
                        'project': project_id,
                        'position': position_name,
                        'current': current_level,
                        'correct': correct_level
                    })
                else:
                    print(f"  ✅ {position_name}: {current_level}")
            else:
                print(f"  ⚠️  {position_name}: 在简历数据中未找到")
                not_found.append({
                    'project': project_id,
                    'position': position_name,
                    'current': current_level
                })

    # 汇总报告
    print("\n" + "=" * 60)
    print("汇总统计")
    print("=" * 60)
    print(f"总岗位数: {total_positions}")
    print(f"等级不匹配: {len(mismatches)} 个")
    print(f"未找到岗位: {len(not_found)} 个")
    print(f"正确匹配: {total_positions - len(mismatches) - len(not_found)} 个")

    # 显示所有需要修正的岗位
    if mismatches:
        print("\n需要修正的岗位等级：")
        for item in mismatches:
            print(f"  项目{item['project']}: {item['position']} [{item['current']}→{item['correct']}]")

    if not_found:
        print("\n未在简历数据中找到的岗位：")
        for item in not_found:
            print(f"  项目{item['project']}: {item['position']} (当前: {item['current']})")

    return mismatches, not_found

if __name__ == "__main__":
    mismatches, not_found = compare_and_report()

    # 保存需要更新的数据
    update_data = {
        'mismatches': mismatches,
        'not_found': not_found
    }

    with open('position_updates_needed.json', 'w', encoding='utf-8') as f:
        json.dump(update_data, f, ensure_ascii=False, indent=2)

    print(f"\n需要更新的数据已保存到 position_updates_needed.json")