#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re

print("=== 分析智能制造岗位简历.json 中的面试题内容 ===\n")

# 读取智能制造岗位简历数据
with open('网页未导入数据/智能制造产业/智能制造岗位简历.json', 'r', encoding='utf-8') as f:
    smart_mfg_data = json.load(f)

# 分析每个岗位群的面试题
interview_map = {}

for item in smart_mfg_data:
    job_group = item.get('简历岗位群', '')
    position = item.get('岗位名称', '')
    interview_type = item.get('面试题', '')
    interview_content = item.get('面试题内容', '')

    if job_group and interview_content:
        if job_group not in interview_map:
            interview_map[job_group] = {
                'type': interview_type,
                'positions': [],
                'sections': [],
                'total_questions': 0
            }

        interview_map[job_group]['positions'].append(position)

        # 解析面试题内容
        lines = interview_content.split('\n')
        current_section = None
        question_count = 0

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # 大标题（如 # 一、xxx）
            if line.startswith('# '):
                section_title = line[2:].strip()
                if section_title and section_title not in interview_map[job_group]['sections']:
                    interview_map[job_group]['sections'].append(section_title)
                current_section = section_title

            # 问题编号（如 1. xxx）
            elif re.match(r'^\d+\.\s+', line):
                question_count += 1

        interview_map[job_group]['total_questions'] = max(
            interview_map[job_group]['total_questions'],
            question_count
        )

# 打印分析结果
for group, data in sorted(interview_map.items()):
    print(f"{group} ({data['type']}):")
    print(f"  包含岗位: {', '.join(set(data['positions']))}")
    print(f"  面试题章节: {len(data['sections'])} 个")
    for section in data['sections']:
        print(f"    - {section}")
    print(f"  总题目数: {data['total_questions']} 道\n")

# 检查一个具体例子
print("\n=== 示例：PLC类岗位面试题详细内容 ===\n")
for item in smart_mfg_data:
    if item.get('简历岗位群') == 'PLC' and item.get('岗位名称') == 'PLC编程工程师':
        content = item.get('面试题内容', '')
        # 提取所有问题
        questions = re.findall(r'(\d+\.\s+[^\n]+)', content)
        print(f"找到 {len(questions)} 个问题：")
        for i, q in enumerate(questions[:10], 1):  # 只显示前10个
            print(f"  {q[:80]}..." if len(q) > 80 else f"  {q}")
        if len(questions) > 10:
            print(f"  ... 还有 {len(questions) - 10} 个问题")
        break