#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re # 读取智能制造岗位简历数据 with open('网页未导入数据/智能制造产业/智能制造岗位简历.json', 'r', encoding='utf-8') as f: smart_mfg_data = json.load(f) # 统计每个岗位群的面试题数量 interview_stats = {} for item in smart_mfg_data: job_group = item.get('简历岗位群', '') interview_content = item.get('面试题内容', '') if job_group and interview_content: # 计算问题数量(查找数字+点的模式) questions = re.findall(r'\d+\.\s+[^\\n]+', interview_content) if job_group not in interview_stats: interview_stats[job_group] = { 'count': 0, 'positions': [] } interview_stats[job_group]['count'] = max(interview_stats[job_group]['count'], len(questions)) interview_stats[job_group]['positions'].append(item.get('岗位名称', '')) # 打印统计结果 print("=== 智能制造岗位简历.json 面试题统计 ===\n") for group, data in sorted(interview_stats.items()): print(f"{group}:") print(f" - 面试题数量: {data['count']} 道") print(f" - 包含岗位: {', '.join(data['positions'])}") print() # 读取当前mock文件检查现有数据 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: mock_content = f.read() # 统计mock文件中每个岗位群的面试题数量 print("\n=== 当前 mock 文件面试题统计 ===\n") # 查找每个岗位群 industries = re.findall(r'"name":\s*"([^"]+)"[^}]*?"questions":\s*\[(.*?)\]\s*\}', mock_content, re.DOTALL) for industry_name, questions_str in industries: # 计算该岗位群的面试题数量 # 查找所有的 "question": 模式 question_count = len(re.findall(r'"question":\s*"[^"]+",\s*"answer":', questions_str)) print(f"{industry_name}: {question_count} 道题") print("\n=== 对比分析 ===\n") print("需要更新的岗位群:") for group in interview_stats: expected_count = interview_stats[group]['count'] # 在mock中查找对应岗位群的题目数量 pattern = rf'"name":\s*"{re.escape(group)}".*?"questions":\s*\[(.*?)\]\s*\}}' match = re.search(pattern, mock_content, re.DOTALL) if match: current_count = len(re.findall(r'"question":\s*"[^"]+",\s*"answer":', match.group(1))) if current_count < expected_count: print(f"- {group}: 当前 {current_count} 道,应有 {expected_count} 道(缺少 {expected_count - current_count} 道)") else: print(f"- {group}: 未找到对应数据")