Files
ALL-teach_sys/frontend_能源/extract_interview_questions_v2.py

173 lines
6.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
import datetime
import shutil
def parse_interview_questions(content):
"""
解析面试题内容提取问题和答案
"""
questions = []
# 移除markdown标题
content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE)
# 分割成行
lines = content.split('\n')
current_question = None
current_answer = []
question_id = 1
for line in lines:
line = line.strip()
if not line:
continue
# 检查是否是问题行(数字+句号开头)
question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
if question_match:
# 保存上一个问题
if current_question and current_answer:
current_question['answer'] = ' '.join(current_answer).strip()
questions.append(current_question)
current_answer = []
# 创建新问题
question_text = question_match.group(2)
# 移除问题末尾的问号(如果有)
question_text = question_text.rstrip('?')
current_question = {
'id': f'q{question_id}',
'question': question_text,
'answer': '',
'difficulty': '中等',
'tags': ['能源行业', '专业知识']
}
question_id += 1
# 检查是否是答案行
elif line.startswith('示例答案') or line.startswith('答案'):
# 答案开始标记,清空答案缓冲区
current_answer = []
elif current_question and not question_match:
# 收集答案内容
if line and not line.startswith('示例答案') and not line.startswith('答案'):
current_answer.append(line)
# 保存最后一个问题
if current_question and current_answer:
current_question['answer'] = ' '.join(current_answer).strip()
questions.append(current_question)
# 如果没有找到问题,生成默认问题
if not questions:
questions = [
{
'id': 'q1',
'question': '请介绍一下你的专业背景',
'answer': '我具有能源相关专业背景,熟悉行业标准和规范,有实际项目经验。',
'difficulty': '简单',
'tags': ['基础问题']
},
{
'id': 'q2',
'question': '你为什么选择这个岗位',
'answer': '这个岗位与我的专业背景高度匹配,能够发挥我的专业技能。',
'difficulty': '简单',
'tags': ['基础问题']
}
]
return questions[:5] # 返回前5个问题
def extract_and_update_interview_questions():
"""
从能源岗位简历.json提取面试题并更新mock文件
"""
# 读取能源岗位简历数据
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
energy_jobs = json.load(f)
# 按岗位群分组并提取面试题
interview_questions = {}
for job in energy_jobs:
group_name = job.get("简历岗位群", "")
if group_name and group_name not in interview_questions:
# 提取面试题内容
if "面试题内容" in job:
questions = parse_interview_questions(job["面试题内容"])
interview_questions[group_name] = questions
# 读取mock文件
mock_file = "src/mocks/resumeInterviewMock.js"
# 备份文件
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
shutil.copy(mock_file, backup_path)
print(f"✅ 已备份文件到:{backup_path}")
# 读取文件内容
with open(mock_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 更新每个岗位群的subQuestions
new_lines = []
i = 0
while i < len(lines):
line = lines[i]
# 检查是否是questions数组开始
if '"questions":' in line:
new_lines.append(line)
i += 1
# 找到当前岗位群的名称
group_name = None
for j in range(max(0, i-20), i):
if '"name":' in lines[j]:
name_match = re.search(r'"name":\s*"([^"]+)"', lines[j])
if name_match:
group_name = name_match.group(1)
break
# 处理questions数组
while i < len(lines):
if '"subQuestions":' in lines[i]:
# 找到subQuestions行
if group_name and group_name in interview_questions:
# 生成新的subQuestions
questions_json = json.dumps(interview_questions[group_name], ensure_ascii=False, indent=8)
# 调整缩进
indent = len(lines[i]) - len(lines[i].lstrip())
indented_questions = '\n'.join([' ' * indent + line if line.strip() else line
for line in questions_json.split('\n')])
new_lines.append(' ' * indent + f'"subQuestions": {indented_questions}\n')
else:
new_lines.append(lines[i])
i += 1
break
else:
new_lines.append(lines[i])
i += 1
else:
new_lines.append(line)
i += 1
# 写回文件
with open(mock_file, 'w', encoding='utf-8') as f:
f.writelines(new_lines)
print("✅ 成功更新面试题数据")
print("\n📊 更新的岗位群面试题:")
for group_name, questions in interview_questions.items():
print(f" - {group_name}: {len(questions)}个问题")
if __name__ == "__main__":
extract_and_update_interview_questions()