#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from collections import defaultdict import datetime import shutil def parse_interview_questions(content): """ 解析面试题内容,提取问题和答案 """ questions = [] # 移除markdown标题 content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE) # 分割成行 lines = content.split('\n') current_question = None current_answer = [] question_id = 1 for line in lines: line = line.strip() if not line: continue # 检查是否是问题行(数字+句号开头) question_match = re.match(r'^(\d+)\.\s+(.+)$', line) if question_match: # 保存上一个问题 if current_question and current_answer: current_question['answer'] = ' '.join(current_answer).strip() questions.append(current_question) current_answer = [] # 创建新问题 question_text = question_match.group(2) # 移除问题末尾的问号(如果有) question_text = question_text.rstrip('??') current_question = { 'id': f'q{question_id}', 'question': question_text, 'answer': '', 'difficulty': '中等', 'tags': ['能源行业', '专业知识'] } question_id += 1 # 检查是否是答案行 elif line.startswith('示例答案') or line.startswith('答案'): # 答案开始标记,清空答案缓冲区 current_answer = [] elif current_question and not question_match: # 收集答案内容 if line and not line.startswith('示例答案') and not line.startswith('答案'): current_answer.append(line) # 保存最后一个问题 if current_question and current_answer: current_question['answer'] = ' '.join(current_answer).strip() questions.append(current_question) # 如果没有找到问题,生成默认问题 if not questions: questions = [ { 'id': 'q1', 'question': '请介绍一下你的专业背景', 'answer': '我具有能源相关专业背景,熟悉行业标准和规范,有实际项目经验。', 'difficulty': '简单', 'tags': ['基础问题'] }, { 'id': 'q2', 'question': '你为什么选择这个岗位', 'answer': '这个岗位与我的专业背景高度匹配,能够发挥我的专业技能。', 'difficulty': '简单', 'tags': ['基础问题'] } ] return questions[:5] # 返回前5个问题 def extract_and_update_interview_questions(): """ 从能源岗位简历.json提取面试题并更新mock文件 """ # 读取能源岗位简历数据 with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f: energy_jobs = json.load(f) # 按岗位群分组并提取面试题 interview_questions = {} for job in energy_jobs: group_name = job.get("简历岗位群", "") if group_name and group_name not in interview_questions: # 提取面试题内容 if "面试题内容" in job: questions = parse_interview_questions(job["面试题内容"]) interview_questions[group_name] = questions # 读取mock文件 mock_file = "src/mocks/resumeInterviewMock.js" # 备份文件 backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" shutil.copy(mock_file, backup_path) print(f"✅ 已备份文件到:{backup_path}") # 读取文件内容 with open(mock_file, 'r', encoding='utf-8') as f: lines = f.readlines() # 更新每个岗位群的subQuestions new_lines = [] i = 0 while i < len(lines): line = lines[i] # 检查是否是questions数组开始 if '"questions":' in line: new_lines.append(line) i += 1 # 找到当前岗位群的名称 group_name = None for j in range(max(0, i-20), i): if '"name":' in lines[j]: name_match = re.search(r'"name":\s*"([^"]+)"', lines[j]) if name_match: group_name = name_match.group(1) break # 处理questions数组 while i < len(lines): if '"subQuestions":' in lines[i]: # 找到subQuestions行 if group_name and group_name in interview_questions: # 生成新的subQuestions questions_json = json.dumps(interview_questions[group_name], ensure_ascii=False, indent=8) # 调整缩进 indent = len(lines[i]) - len(lines[i].lstrip()) indented_questions = '\n'.join([' ' * indent + line if line.strip() else line for line in questions_json.split('\n')]) new_lines.append(' ' * indent + f'"subQuestions": {indented_questions}\n') else: new_lines.append(lines[i]) i += 1 break else: new_lines.append(lines[i]) i += 1 else: new_lines.append(line) i += 1 # 写回文件 with open(mock_file, 'w', encoding='utf-8') as f: f.writelines(new_lines) print("✅ 成功更新面试题数据") print("\n📊 更新的岗位群面试题:") for group_name, questions in interview_questions.items(): print(f" - {group_name}: {len(questions)}个问题") if __name__ == "__main__": extract_and_update_interview_questions()