Files
ALL-teach_sys/frontend_化工/extract_all_interview_questions.py

108 lines
4.1 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import json
import re
# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
chemical_data = json.load(f)
# 统计和收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0
positions_per_group = {}
# 遍历所有岗位
for position_data in chemical_data:
position_name = position_data.get('positionName', '')
job_group = position_data.get('jobGroup', '')
# 初始化岗位群数据
if job_group not in job_group_questions:
job_group_questions[job_group] = []
positions_per_group[job_group] = []
positions_per_group[job_group].append(position_name)
# 提取该岗位的面试题
if 'interviewQuestions' in position_data:
interview_content = position_data['interviewQuestions']
# 解析面试题内容
questions = []
# 按换行分割,每两行构成一个问答对
lines = [line.strip() for line in interview_content.split('\n') if line.strip()]
for i in range(0, len(lines), 2):
if i + 1 < len(lines):
question = lines[i]
answer = lines[i + 1]
# 清理问题和答案文本
question = re.sub(r'^问题\d+[:]\s*', '', question)
question = re.sub(r'^问[:]\s*', '', question)
question = re.sub(r'^\d+[、.]\s*', '', question)
answer = re.sub(r'^答案[:]\s*', '', answer)
answer = re.sub(r'^答[:]\s*', '', answer)
answer = re.sub(r'^解答[:]\s*', '', answer)
if question and answer:
questions.append({
"question": question.strip(),
"answer": answer.strip()
})
elif i < len(lines):
# 如果只有一行,作为问题,答案使用通用答案
question = lines[i]
question = re.sub(r'^问题\d+[:]\s*', '', question)
question = re.sub(r'^\d+[、.]\s*', '', question)
if question.strip():
questions.append({
"question": question.strip(),
"answer": f"这是{position_name}岗位的重要考察点,需要根据个人经验和专业知识进行回答。"
})
# 添加到岗位群的问题列表
for q in questions:
# 检查是否已存在完全相同的问题
is_duplicate = False
for existing in job_group_questions[job_group]:
if existing['question'] == q['question']:
is_duplicate = True
break
if not is_duplicate and q['question'] and q['answer']:
job_group_questions[job_group].append(q)
total_questions += 1
# 输出统计信息
print(f"\n===== 化工岗位面试题统计 =====")
print(f"总岗位数: {len(chemical_data)}")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量:")
for job_group, questions in sorted(job_group_questions.items()):
positions = positions_per_group[job_group]
if len(positions) > 3:
positions_str = ', '.join(positions[:3]) + f'{len(positions)}个岗位'
else:
positions_str = ', '.join(positions)
print(f" {job_group}: {len(questions)} 题 (包含: {positions_str})")
# 输出前几个岗位群的面试题示例
print(f"\n===== 面试题示例 =====")
for job_group in list(job_group_questions.keys())[:2]:
print(f"\n{job_group}】的前3道面试题")
for i, q in enumerate(job_group_questions[job_group][:3], 1):
print(f" {i}. 问:{q['question'][:50]}...")
print(f" 答:{q['answer'][:50]}...")
# 保存面试题数据到JSON文件
with open('all_interview_questions.json', 'w', encoding='utf-8') as f:
json.dump(job_group_questions, f, ensure_ascii=False, indent=2)
print(f"\n✅ 已保存所有面试题到 all_interview_questions.json")
print(f"\n下一步:生成更新脚本...")