Files
ALL-teach_sys/frontend_能源/extract_interview_questions.py
KQL cd2e307402 初始化12个产业教务系统项目
主要内容:
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容:
- 删除所有node_modules和.yoyo文件夹,从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表:
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 14:14:14 +08:00

121 lines
4.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
import datetime
import shutil
def parse_interview_questions(content):
"""
解析面试题内容,提取问题和答案
"""
questions = []
# 按段落分割内容
sections = content.split('\n\n')
question_id = 1
current_question = None
for section in sections:
section = section.strip()
if not section:
continue
# 跳过标题行(以#开头)
if section.startswith('#'):
continue
# 检查是否是问题(通常是数字开头或包含""
if re.match(r'^\d+\.', section) or '' in section:
# 提取问题文本
question_text = re.sub(r'^\d+\.\s*', '', section).strip()
current_question = {
'id': f'q{question_id}',
'question': question_text,
'answer': '',
'difficulty': '中等',
'tags': []
}
question_id += 1
elif current_question and (section.startswith('示例答案') or section.startswith('答案')):
# 提取答案
answer_text = re.sub(r'^(示例答案|答案)[:]?\s*', '', section).strip()
current_question['answer'] = answer_text
questions.append(current_question)
current_question = None
return questions
def extract_and_update_interview_questions():
"""
从能源岗位简历.json提取面试题并更新mock文件
"""
# 读取能源岗位简历数据
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
energy_jobs = json.load(f)
# 按岗位群分组并提取面试题
job_groups = defaultdict(list)
interview_questions = {}
for job in energy_jobs:
group_name = job.get("简历岗位群", "")
if group_name:
job_groups[group_name].append(job)
# 提取面试题内容
if "面试题内容" in job and group_name not in interview_questions:
questions = parse_interview_questions(job["面试题内容"])
if questions:
interview_questions[group_name] = questions[:5] # 每个岗位群取前5个问题
# 读取mock文件
mock_file = "src/mocks/resumeInterviewMock.js"
# 备份文件
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
shutil.copy(mock_file, backup_path)
print(f"✅ 已备份文件到:{backup_path}")
# 读取文件内容
with open(mock_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找并替换每个岗位群的questions
import re
# 分析content找到industries数组
industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
if industries_match:
industries_str = industries_match.group(1)
industries = json.loads(industries_str)
# 更新每个岗位群的questions
for industry in industries:
group_name = industry.get("name", "")
if group_name in interview_questions:
# 更新subQuestions
for question_obj in industry.get("questions", []):
question_obj["subQuestions"] = interview_questions[group_name]
# 将更新后的industries转回字符串
new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2)
new_content = content[:industries_match.start(1)] + new_industries_str + content[industries_match.end(1):]
# 写回文件
with open(mock_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print("✅ 成功更新面试题数据")
print("\n📊 更新的岗位群面试题:")
for group_name, questions in interview_questions.items():
print(f" - {group_name}: {len(questions)}个问题")
else:
print("❌ 未找到industries数组")
if __name__ == "__main__":
extract_and_update_interview_questions()