Files
ALL-teach_sys/frontend_能源/extract_all_interview_questions.py

167 lines
6.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
import datetime
import shutil
def parse_interview_questions(content):
"""
解析面试题内容提取所有问题和答案
"""
questions = []
# 按行分割内容
lines = content.split('\n')
current_question = None
current_answer = []
question_id = 1
in_answer = False
for i, line in enumerate(lines):
line = line.strip()
# 检查是否是问题行(数字+句号开头)
question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
if question_match:
# 保存上一个问题
if current_question and current_answer:
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
current_answer = []
# 创建新问题
question_text = question_match.group(2)
# 移除问题末尾的问号(如果有)
question_text = question_text.rstrip('?')
current_question = {
'id': f'q{question_id}',
'question': question_text,
'answer': ''
}
question_id += 1
in_answer = False
# 检查是否是答案开始标记
elif '示例答案' in line or '答案:' in line or '答案:' in line:
in_answer = True
# 如果答案在同一行
answer_in_line = re.sub(r'^.*?(示例答案|答案)[:]?\s*', '', line).strip()
if answer_in_line:
current_answer.append(answer_in_line)
# 收集答案内容
elif in_answer and current_question and line:
# 跳过标题行
if not line.startswith('#'):
current_answer.append(line)
# 检查是否到达下一个部分(标题)
elif line.startswith('#') and current_question and current_answer:
# 保存当前问题
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
current_question = None
current_answer = []
in_answer = False
# 保存最后一个问题
if current_question and current_answer:
answer_text = '\n'.join(current_answer).strip()
if answer_text:
current_question['answer'] = answer_text
questions.append(current_question)
return questions
def extract_and_update_all_interview_questions():
"""
从能源岗位简历.json提取所有面试题并更新mock文件
"""
# 读取能源岗位简历数据
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
energy_jobs = json.load(f)
# 按岗位群分组并提取所有面试题
interview_questions = {}
for job in energy_jobs:
group_name = job.get("简历岗位群", "")
if group_name and group_name not in interview_questions:
# 提取面试题内容
if "面试题内容" in job:
questions = parse_interview_questions(job["面试题内容"])
if questions:
interview_questions[group_name] = questions
print(f"{group_name}: 提取了 {len(questions)} 个问题")
# 读取mock文件
mock_file = "src/mocks/resumeInterviewMock.js"
# 备份文件
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
shutil.copy(mock_file, backup_path)
print(f"\n✅ 已备份文件到:{backup_path}")
# 读取文件内容
with open(mock_file, 'r', encoding='utf-8') as f:
content = f.read()
# 查找并替换每个岗位群的questions
import re
# 分析content找到industries数组
industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
if industries_match:
industries_str = industries_match.group(1)
# 手动解析并更新每个岗位群的subQuestions
# 因为JSON格式可能有问题我们逐个替换
new_content = content
for group_name, questions in interview_questions.items():
# 查找该岗位群的questions部分
pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]'
match = re.search(pattern, new_content)
if match:
# 找到subQuestions的位置
sub_pattern = r'"subQuestions":\s*\[[^\]]*\]'
sub_match = re.search(sub_pattern, match.group(0))
if sub_match:
# 生成新的subQuestions
new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8)
# 调整缩进8个空格
new_sub_questions = '\n'.join([' ' + line if line.strip() else line
for line in new_sub_questions.split('\n')])
# 替换
replacement = f'"subQuestions": {new_sub_questions}'
new_match_str = match.group(0).replace(sub_match.group(0), replacement)
new_content = new_content.replace(match.group(0), new_match_str)
# 写回文件
with open(mock_file, 'w', encoding='utf-8') as f:
f.write(new_content)
print("\n✅ 成功更新所有面试题数据")
print("\n📊 更新统计:")
total_questions = 0
for group_name, questions in interview_questions.items():
total_questions += len(questions)
print(f" - {group_name}: {len(questions)}个问题")
print(f"\n📈 总计:{len(interview_questions)}个岗位群,{total_questions}个面试题")
else:
print("❌ 未找到industries数组")
if __name__ == "__main__":
extract_and_update_all_interview_questions()