#!/usr/bin/env python3 import json import re # 读取化工岗位简历数据 with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f: chemical_data = json.load(f) # 统计和收集所有岗位群的面试题 job_group_questions = {} total_questions = 0 positions_per_group = {} # 遍历所有岗位 for position_data in chemical_data: position_name = position_data.get('positionName', '') job_group = position_data.get('jobGroup', '') # 初始化岗位群数据 if job_group not in job_group_questions: job_group_questions[job_group] = [] positions_per_group[job_group] = [] positions_per_group[job_group].append(position_name) # 提取该岗位的面试题 if 'interviewQuestions' in position_data: interview_content = position_data['interviewQuestions'] # 解析面试题内容 questions = [] # 按换行分割,每两行构成一个问答对 lines = [line.strip() for line in interview_content.split('\n') if line.strip()] for i in range(0, len(lines), 2): if i + 1 < len(lines): question = lines[i] answer = lines[i + 1] # 清理问题和答案文本 question = re.sub(r'^问题\d+[::]\s*', '', question) question = re.sub(r'^问[::]\s*', '', question) question = re.sub(r'^\d+[、.]\s*', '', question) answer = re.sub(r'^答案[::]\s*', '', answer) answer = re.sub(r'^答[::]\s*', '', answer) answer = re.sub(r'^解答[::]\s*', '', answer) if question and answer: questions.append({ "question": question.strip(), "answer": answer.strip() }) elif i < len(lines): # 如果只有一行,作为问题,答案使用通用答案 question = lines[i] question = re.sub(r'^问题\d+[::]\s*', '', question) question = re.sub(r'^\d+[、.]\s*', '', question) if question.strip(): questions.append({ "question": question.strip(), "answer": f"这是{position_name}岗位的重要考察点,需要根据个人经验和专业知识进行回答。" }) # 添加到岗位群的问题列表 for q in questions: # 检查是否已存在完全相同的问题 is_duplicate = False for existing in job_group_questions[job_group]: if existing['question'] == q['question']: is_duplicate = True break if not is_duplicate and q['question'] and q['answer']: job_group_questions[job_group].append(q) total_questions += 1 # 输出统计信息 print(f"\n===== 化工岗位面试题统计 =====") print(f"总岗位数: {len(chemical_data)}") print(f"总岗位群数: {len(job_group_questions)}") print(f"总面试题数: {total_questions}") print(f"\n各岗位群面试题数量:") for job_group, questions in sorted(job_group_questions.items()): positions = positions_per_group[job_group] if len(positions) > 3: positions_str = ', '.join(positions[:3]) + f' 等{len(positions)}个岗位' else: positions_str = ', '.join(positions) print(f" {job_group}: {len(questions)} 题 (包含: {positions_str})") # 输出前几个岗位群的面试题示例 print(f"\n===== 面试题示例 =====") for job_group in list(job_group_questions.keys())[:2]: print(f"\n【{job_group}】的前3道面试题:") for i, q in enumerate(job_group_questions[job_group][:3], 1): print(f" {i}. 问:{q['question'][:50]}...") print(f" 答:{q['answer'][:50]}...") # 保存面试题数据到JSON文件 with open('all_interview_questions.json', 'w', encoding='utf-8') as f: json.dump(job_group_questions, f, ensure_ascii=False, indent=2) print(f"\n✅ 已保存所有面试题到 all_interview_questions.json") print(f"\n下一步:生成更新脚本...")