Files
ALL-teach_sys/frontend_化工/extract_all_interview_questions.py
KQL cd2e307402 初始化12个产业教务系统项目
主要内容:
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容:
- 删除所有node_modules和.yoyo文件夹,从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表:
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 14:14:14 +08:00

108 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import json
import re
# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
chemical_data = json.load(f)
# 统计和收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0
positions_per_group = {}
# 遍历所有岗位
for position_data in chemical_data:
position_name = position_data.get('positionName', '')
job_group = position_data.get('jobGroup', '')
# 初始化岗位群数据
if job_group not in job_group_questions:
job_group_questions[job_group] = []
positions_per_group[job_group] = []
positions_per_group[job_group].append(position_name)
# 提取该岗位的面试题
if 'interviewQuestions' in position_data:
interview_content = position_data['interviewQuestions']
# 解析面试题内容
questions = []
# 按换行分割,每两行构成一个问答对
lines = [line.strip() for line in interview_content.split('\n') if line.strip()]
for i in range(0, len(lines), 2):
if i + 1 < len(lines):
question = lines[i]
answer = lines[i + 1]
# 清理问题和答案文本
question = re.sub(r'^问题\d+[:]\s*', '', question)
question = re.sub(r'^问[:]\s*', '', question)
question = re.sub(r'^\d+[、.]\s*', '', question)
answer = re.sub(r'^答案[:]\s*', '', answer)
answer = re.sub(r'^答[:]\s*', '', answer)
answer = re.sub(r'^解答[:]\s*', '', answer)
if question and answer:
questions.append({
"question": question.strip(),
"answer": answer.strip()
})
elif i < len(lines):
# 如果只有一行,作为问题,答案使用通用答案
question = lines[i]
question = re.sub(r'^问题\d+[:]\s*', '', question)
question = re.sub(r'^\d+[、.]\s*', '', question)
if question.strip():
questions.append({
"question": question.strip(),
"answer": f"这是{position_name}岗位的重要考察点,需要根据个人经验和专业知识进行回答。"
})
# 添加到岗位群的问题列表
for q in questions:
# 检查是否已存在完全相同的问题
is_duplicate = False
for existing in job_group_questions[job_group]:
if existing['question'] == q['question']:
is_duplicate = True
break
if not is_duplicate and q['question'] and q['answer']:
job_group_questions[job_group].append(q)
total_questions += 1
# 输出统计信息
print(f"\n===== 化工岗位面试题统计 =====")
print(f"总岗位数: {len(chemical_data)}")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量:")
for job_group, questions in sorted(job_group_questions.items()):
positions = positions_per_group[job_group]
if len(positions) > 3:
positions_str = ', '.join(positions[:3]) + f'{len(positions)}个岗位'
else:
positions_str = ', '.join(positions)
print(f" {job_group}: {len(questions)} 题 (包含: {positions_str})")
# 输出前几个岗位群的面试题示例
print(f"\n===== 面试题示例 =====")
for job_group in list(job_group_questions.keys())[:2]:
print(f"\n{job_group}】的前3道面试题")
for i, q in enumerate(job_group_questions[job_group][:3], 1):
print(f" {i}. 问:{q['question'][:50]}...")
print(f" 答:{q['answer'][:50]}...")
# 保存面试题数据到JSON文件
with open('all_interview_questions.json', 'w', encoding='utf-8') as f:
json.dump(job_group_questions, f, ensure_ascii=False, indent=2)
print(f"\n✅ 已保存所有面试题到 all_interview_questions.json")
print(f"\n下一步:生成更新脚本...")