108 lines
4.1 KiB
Python
108 lines
4.1 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
# 读取化工岗位简历数据
|
|||
|
|
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
|
|||
|
|
chemical_data = json.load(f)
|
|||
|
|
|
|||
|
|
# 统计和收集所有岗位群的面试题
|
|||
|
|
job_group_questions = {}
|
|||
|
|
total_questions = 0
|
|||
|
|
positions_per_group = {}
|
|||
|
|
|
|||
|
|
# 遍历所有岗位
|
|||
|
|
for position_data in chemical_data:
|
|||
|
|
position_name = position_data.get('positionName', '')
|
|||
|
|
job_group = position_data.get('jobGroup', '')
|
|||
|
|
|
|||
|
|
# 初始化岗位群数据
|
|||
|
|
if job_group not in job_group_questions:
|
|||
|
|
job_group_questions[job_group] = []
|
|||
|
|
positions_per_group[job_group] = []
|
|||
|
|
|
|||
|
|
positions_per_group[job_group].append(position_name)
|
|||
|
|
|
|||
|
|
# 提取该岗位的面试题
|
|||
|
|
if 'interviewQuestions' in position_data:
|
|||
|
|
interview_content = position_data['interviewQuestions']
|
|||
|
|
|
|||
|
|
# 解析面试题内容
|
|||
|
|
questions = []
|
|||
|
|
|
|||
|
|
# 按换行分割,每两行构成一个问答对
|
|||
|
|
lines = [line.strip() for line in interview_content.split('\n') if line.strip()]
|
|||
|
|
|
|||
|
|
for i in range(0, len(lines), 2):
|
|||
|
|
if i + 1 < len(lines):
|
|||
|
|
question = lines[i]
|
|||
|
|
answer = lines[i + 1]
|
|||
|
|
|
|||
|
|
# 清理问题和答案文本
|
|||
|
|
question = re.sub(r'^问题\d+[::]\s*', '', question)
|
|||
|
|
question = re.sub(r'^问[::]\s*', '', question)
|
|||
|
|
question = re.sub(r'^\d+[、.]\s*', '', question)
|
|||
|
|
|
|||
|
|
answer = re.sub(r'^答案[::]\s*', '', answer)
|
|||
|
|
answer = re.sub(r'^答[::]\s*', '', answer)
|
|||
|
|
answer = re.sub(r'^解答[::]\s*', '', answer)
|
|||
|
|
|
|||
|
|
if question and answer:
|
|||
|
|
questions.append({
|
|||
|
|
"question": question.strip(),
|
|||
|
|
"answer": answer.strip()
|
|||
|
|
})
|
|||
|
|
elif i < len(lines):
|
|||
|
|
# 如果只有一行,作为问题,答案使用通用答案
|
|||
|
|
question = lines[i]
|
|||
|
|
question = re.sub(r'^问题\d+[::]\s*', '', question)
|
|||
|
|
question = re.sub(r'^\d+[、.]\s*', '', question)
|
|||
|
|
|
|||
|
|
if question.strip():
|
|||
|
|
questions.append({
|
|||
|
|
"question": question.strip(),
|
|||
|
|
"answer": f"这是{position_name}岗位的重要考察点,需要根据个人经验和专业知识进行回答。"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 添加到岗位群的问题列表
|
|||
|
|
for q in questions:
|
|||
|
|
# 检查是否已存在完全相同的问题
|
|||
|
|
is_duplicate = False
|
|||
|
|
for existing in job_group_questions[job_group]:
|
|||
|
|
if existing['question'] == q['question']:
|
|||
|
|
is_duplicate = True
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if not is_duplicate and q['question'] and q['answer']:
|
|||
|
|
job_group_questions[job_group].append(q)
|
|||
|
|
total_questions += 1
|
|||
|
|
|
|||
|
|
# 输出统计信息
|
|||
|
|
print(f"\n===== 化工岗位面试题统计 =====")
|
|||
|
|
print(f"总岗位数: {len(chemical_data)}")
|
|||
|
|
print(f"总岗位群数: {len(job_group_questions)}")
|
|||
|
|
print(f"总面试题数: {total_questions}")
|
|||
|
|
print(f"\n各岗位群面试题数量:")
|
|||
|
|
|
|||
|
|
for job_group, questions in sorted(job_group_questions.items()):
|
|||
|
|
positions = positions_per_group[job_group]
|
|||
|
|
if len(positions) > 3:
|
|||
|
|
positions_str = ', '.join(positions[:3]) + f' 等{len(positions)}个岗位'
|
|||
|
|
else:
|
|||
|
|
positions_str = ', '.join(positions)
|
|||
|
|
print(f" {job_group}: {len(questions)} 题 (包含: {positions_str})")
|
|||
|
|
|
|||
|
|
# 输出前几个岗位群的面试题示例
|
|||
|
|
print(f"\n===== 面试题示例 =====")
|
|||
|
|
for job_group in list(job_group_questions.keys())[:2]:
|
|||
|
|
print(f"\n【{job_group}】的前3道面试题:")
|
|||
|
|
for i, q in enumerate(job_group_questions[job_group][:3], 1):
|
|||
|
|
print(f" {i}. 问:{q['question'][:50]}...")
|
|||
|
|
print(f" 答:{q['answer'][:50]}...")
|
|||
|
|
|
|||
|
|
# 保存面试题数据到JSON文件
|
|||
|
|
with open('all_interview_questions.json', 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(job_group_questions, f, ensure_ascii=False, indent=2)
|
|||
|
|
print(f"\n✅ 已保存所有面试题到 all_interview_questions.json")
|
|||
|
|
|
|||
|
|
print(f"\n下一步:生成更新脚本...")
|