Files
ALL-teach_sys/frontend_能源/extract_interview_questions.py

121 lines
4.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
import datetime
import shutil
def parse_interview_questions(content):
"""
解析面试题内容提取问题和答案
"""
questions = []
# 按段落分割内容
sections = content.split('\n\n')
question_id = 1
current_question = None
for section in sections:
section = section.strip()
if not section:
continue
# 跳过标题行(以#开头)
if section.startswith('#'):
continue
# 检查是否是问题(通常是数字开头或包含""
if re.match(r'^\d+\.', section) or '' in section:
# 提取问题文本
question_text = re.sub(r'^\d+\.\s*', '', section).strip()
current_question = {
'id': f'q{question_id}',
'question': question_text,
'answer': '',
'difficulty': '中等',
'tags': []
}
question_id += 1
elif current_question and (section.startswith('示例答案') or section.startswith('答案')):
# 提取答案
answer_text = re.sub(r'^(示例答案|答案)[:]?\s*', '', section).strip()
current_question['answer'] = answer_text
questions.append(current_question)
current_question = None
return questions
def extract_and_update_interview_questions():
"""
从能源岗位简历.json提取面试题并更新mock文件
"""
# 读取能源岗位简历数据
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
energy_jobs = json.load(f)
# 按岗位群分组并提取面试题
job_groups = defaultdict(list)
interview_questions = {}
for job in energy_jobs:
group_name = job.get("简历岗位群", "")
if group_name:
job_groups[group_name].append(job)
# 提取面试题内容
if "面试题内容" in job and group_name not in interview_questions:
questions = parse_interview_questions(job["面试题内容"])
if questions:
interview_questions[group_name] = questions[:5] # 每个岗位群取前5个问题
# 读取mock文件
mock_file = "src/mocks/resumeInterviewMock.js"
# 备份文件
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
shutil.copy(mock_file, backup_path)
print(f"✅ 已备份文件到:{backup_path}")
# 读取文件内容
with open(mock_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找并替换每个岗位群的questions
import re
# 分析content找到industries数组
industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
if industries_match:
industries_str = industries_match.group(1)
industries = json.loads(industries_str)
# 更新每个岗位群的questions
for industry in industries:
group_name = industry.get("name", "")
if group_name in interview_questions:
# 更新subQuestions
for question_obj in industry.get("questions", []):
question_obj["subQuestions"] = interview_questions[group_name]
# 将更新后的industries转回字符串
new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2)
new_content = content[:industries_match.start(1)] + new_industries_str + content[industries_match.end(1):]
# 写回文件
with open(mock_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print("✅ 成功更新面试题数据")
print("\n📊 更新的岗位群面试题:")
for group_name, questions in interview_questions.items():
print(f" - {group_name}: {len(questions)}个问题")
else:
print("❌ 未找到industries数组")
if __name__ == "__main__":
extract_and_update_interview_questions()