167 lines
6.1 KiB
Python
167 lines
6.1 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
from collections import defaultdict
|
|||
|
|
import datetime
|
|||
|
|
import shutil
|
|||
|
|
|
|||
|
|
def parse_interview_questions(content):
|
|||
|
|
"""
|
|||
|
|
解析面试题内容,提取所有问题和答案
|
|||
|
|
"""
|
|||
|
|
questions = []
|
|||
|
|
|
|||
|
|
# 按行分割内容
|
|||
|
|
lines = content.split('\n')
|
|||
|
|
|
|||
|
|
current_question = None
|
|||
|
|
current_answer = []
|
|||
|
|
question_id = 1
|
|||
|
|
in_answer = False
|
|||
|
|
|
|||
|
|
for i, line in enumerate(lines):
|
|||
|
|
line = line.strip()
|
|||
|
|
|
|||
|
|
# 检查是否是问题行(数字+句号开头)
|
|||
|
|
question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
|
|||
|
|
if question_match:
|
|||
|
|
# 保存上一个问题
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = '\n'.join(current_answer).strip()
|
|||
|
|
if answer_text:
|
|||
|
|
current_question['answer'] = answer_text
|
|||
|
|
questions.append(current_question)
|
|||
|
|
current_answer = []
|
|||
|
|
|
|||
|
|
# 创建新问题
|
|||
|
|
question_text = question_match.group(2)
|
|||
|
|
# 移除问题末尾的问号(如果有)
|
|||
|
|
question_text = question_text.rstrip('??')
|
|||
|
|
|
|||
|
|
current_question = {
|
|||
|
|
'id': f'q{question_id}',
|
|||
|
|
'question': question_text,
|
|||
|
|
'answer': ''
|
|||
|
|
}
|
|||
|
|
question_id += 1
|
|||
|
|
in_answer = False
|
|||
|
|
|
|||
|
|
# 检查是否是答案开始标记
|
|||
|
|
elif '示例答案' in line or '答案:' in line or '答案:' in line:
|
|||
|
|
in_answer = True
|
|||
|
|
# 如果答案在同一行
|
|||
|
|
answer_in_line = re.sub(r'^.*?(示例答案|答案)[::]?\s*', '', line).strip()
|
|||
|
|
if answer_in_line:
|
|||
|
|
current_answer.append(answer_in_line)
|
|||
|
|
|
|||
|
|
# 收集答案内容
|
|||
|
|
elif in_answer and current_question and line:
|
|||
|
|
# 跳过标题行
|
|||
|
|
if not line.startswith('#'):
|
|||
|
|
current_answer.append(line)
|
|||
|
|
|
|||
|
|
# 检查是否到达下一个部分(标题)
|
|||
|
|
elif line.startswith('#') and current_question and current_answer:
|
|||
|
|
# 保存当前问题
|
|||
|
|
answer_text = '\n'.join(current_answer).strip()
|
|||
|
|
if answer_text:
|
|||
|
|
current_question['answer'] = answer_text
|
|||
|
|
questions.append(current_question)
|
|||
|
|
current_question = None
|
|||
|
|
current_answer = []
|
|||
|
|
in_answer = False
|
|||
|
|
|
|||
|
|
# 保存最后一个问题
|
|||
|
|
if current_question and current_answer:
|
|||
|
|
answer_text = '\n'.join(current_answer).strip()
|
|||
|
|
if answer_text:
|
|||
|
|
current_question['answer'] = answer_text
|
|||
|
|
questions.append(current_question)
|
|||
|
|
|
|||
|
|
return questions
|
|||
|
|
|
|||
|
|
def extract_and_update_all_interview_questions():
|
|||
|
|
"""
|
|||
|
|
从能源岗位简历.json提取所有面试题并更新mock文件
|
|||
|
|
"""
|
|||
|
|
# 读取能源岗位简历数据
|
|||
|
|
with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
|
|||
|
|
energy_jobs = json.load(f)
|
|||
|
|
|
|||
|
|
# 按岗位群分组并提取所有面试题
|
|||
|
|
interview_questions = {}
|
|||
|
|
|
|||
|
|
for job in energy_jobs:
|
|||
|
|
group_name = job.get("简历岗位群", "")
|
|||
|
|
if group_name and group_name not in interview_questions:
|
|||
|
|
# 提取面试题内容
|
|||
|
|
if "面试题内容" in job:
|
|||
|
|
questions = parse_interview_questions(job["面试题内容"])
|
|||
|
|
if questions:
|
|||
|
|
interview_questions[group_name] = questions
|
|||
|
|
print(f"✅ {group_name}: 提取了 {len(questions)} 个问题")
|
|||
|
|
|
|||
|
|
# 读取mock文件
|
|||
|
|
mock_file = "src/mocks/resumeInterviewMock.js"
|
|||
|
|
|
|||
|
|
# 备份文件
|
|||
|
|
backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|||
|
|
shutil.copy(mock_file, backup_path)
|
|||
|
|
print(f"\n✅ 已备份文件到:{backup_path}")
|
|||
|
|
|
|||
|
|
# 读取文件内容
|
|||
|
|
with open(mock_file, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 查找并替换每个岗位群的questions
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
# 分析content,找到industries数组
|
|||
|
|
industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
|
|||
|
|
if industries_match:
|
|||
|
|
industries_str = industries_match.group(1)
|
|||
|
|
|
|||
|
|
# 手动解析并更新每个岗位群的subQuestions
|
|||
|
|
# 因为JSON格式可能有问题,我们逐个替换
|
|||
|
|
new_content = content
|
|||
|
|
|
|||
|
|
for group_name, questions in interview_questions.items():
|
|||
|
|
# 查找该岗位群的questions部分
|
|||
|
|
pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]'
|
|||
|
|
|
|||
|
|
match = re.search(pattern, new_content)
|
|||
|
|
if match:
|
|||
|
|
# 找到subQuestions的位置
|
|||
|
|
sub_pattern = r'"subQuestions":\s*\[[^\]]*\]'
|
|||
|
|
sub_match = re.search(sub_pattern, match.group(0))
|
|||
|
|
|
|||
|
|
if sub_match:
|
|||
|
|
# 生成新的subQuestions
|
|||
|
|
new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8)
|
|||
|
|
# 调整缩进(8个空格)
|
|||
|
|
new_sub_questions = '\n'.join([' ' + line if line.strip() else line
|
|||
|
|
for line in new_sub_questions.split('\n')])
|
|||
|
|
|
|||
|
|
# 替换
|
|||
|
|
replacement = f'"subQuestions": {new_sub_questions}'
|
|||
|
|
new_match_str = match.group(0).replace(sub_match.group(0), replacement)
|
|||
|
|
new_content = new_content.replace(match.group(0), new_match_str)
|
|||
|
|
|
|||
|
|
# 写回文件
|
|||
|
|
with open(mock_file, 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(new_content)
|
|||
|
|
|
|||
|
|
print("\n✅ 成功更新所有面试题数据")
|
|||
|
|
print("\n📊 更新统计:")
|
|||
|
|
total_questions = 0
|
|||
|
|
for group_name, questions in interview_questions.items():
|
|||
|
|
total_questions += len(questions)
|
|||
|
|
print(f" - {group_name}: {len(questions)}个问题")
|
|||
|
|
print(f"\n📈 总计:{len(interview_questions)}个岗位群,{total_questions}个面试题")
|
|||
|
|
else:
|
|||
|
|
print("❌ 未找到industries数组")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
extract_and_update_all_interview_questions()
|