Files
ALL-teach_sys/frontend_能源/extract_all_interview_questions.py
KQL cd2e307402 初始化12个产业教务系统项目
主要内容:
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容:
- 删除所有node_modules和.yoyo文件夹,从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表:
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 14:14:14 +08:00

167 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
import datetime
import shutil
def parse_interview_questions(content):
"""
解析面试题内容,提取所有问题和答案
"""
questions = []
# 按行分割内容
lines = content.split('\n')
current_question = None
current_answer = []
question_id = 1
in_answer = False
for i, line in enumerate(lines):
line = line.strip()
# 检查是否是问题行(数字+句号开头)
question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
if question_match:
# 保存上一个问题
if current_question and current_answer:
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
current_answer = []
# 创建新问题
question_text = question_match.group(2)
# 移除问题末尾的问号(如果有)
question_text = question_text.rstrip('?')
current_question = {
'id': f'q{question_id}',
'question': question_text,
'answer': ''
}
question_id += 1
in_answer = False
# 检查是否是答案开始标记
elif '示例答案' in line or '答案:' in line or '答案:' in line:
in_answer = True
# 如果答案在同一行
answer_in_line = re.sub(r'^.*?(示例答案|答案)[:]?\s*', '', line).strip()
if answer_in_line:
current_answer.append(answer_in_line)
# 收集答案内容
elif in_answer and current_question and line:
# 跳过标题行
if not line.startswith('#'):
current_answer.append(line)
# 检查是否到达下一个部分(标题)
elif line.startswith('#') and current_question and current_answer:
# 保存当前问题
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
current_question = None
current_answer = []
in_answer = False
# 保存最后一个问题
if current_question and current_answer:
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
return questions
def extract_and_update_all_interview_questions():
"""
从能源岗位简历.json提取所有面试题并更新mock文件
"""
# 读取能源岗位简历数据
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
energy_jobs = json.load(f)
# 按岗位群分组并提取所有面试题
interview_questions = {}
for job in energy_jobs:
group_name = job.get("简历岗位群", "")
if group_name and group_name not in interview_questions:
# 提取面试题内容
if "面试题内容" in job:
questions = parse_interview_questions(job["面试题内容"])
if questions:
interview_questions[group_name] = questions
print(f"{group_name}: 提取了 {len(questions)} 个问题")
# 读取mock文件
mock_file = "src/mocks/resumeInterviewMock.js"
# 备份文件
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
shutil.copy(mock_file, backup_path)
print(f"\n✅ 已备份文件到:{backup_path}")
# 读取文件内容
with open(mock_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找并替换每个岗位群的questions
import re
# 分析content找到industries数组
industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
if industries_match:
industries_str = industries_match.group(1)
# 手动解析并更新每个岗位群的subQuestions
# 因为JSON格式可能有问题我们逐个替换
new_content = content
for group_name, questions in interview_questions.items():
# 查找该岗位群的questions部分
pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]'
match = re.search(pattern, new_content)
if match:
# 找到subQuestions的位置
sub_pattern = r'"subQuestions":\s*\[[^\]]*\]'
sub_match = re.search(sub_pattern, match.group(0))
if sub_match:
# 生成新的subQuestions
new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8)
# 调整缩进8个空格
new_sub_questions = '\n'.join([' ' + line if line.strip() else line
for line in new_sub_questions.split('\n')])
# 替换
replacement = f'"subQuestions": {new_sub_questions}'
new_match_str = match.group(0).replace(sub_match.group(0), replacement)
new_content = new_content.replace(match.group(0), new_match_str)
# 写回文件
with open(mock_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print("\n✅ 成功更新所有面试题数据")
print("\n📊 更新统计:")
total_questions = 0
for group_name, questions in interview_questions.items():
total_questions += len(questions)
print(f" - {group_name}: {len(questions)}个问题")
print(f"\n📈 总计:{len(interview_questions)}个岗位群,{total_questions}个面试题")
else:
print("❌ 未找到industries数组")
if __name__ == "__main__":
extract_and_update_all_interview_questions()