#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from collections import defaultdict import datetime import shutil def parse_interview_questions(content): """ 解析面试题内容,提取所有问题和答案 """ questions = [] # 按行分割内容 lines = content.split('\n') current_question = None current_answer = [] question_id = 1 in_answer = False for i, line in enumerate(lines): line = line.strip() # 检查是否是问题行(数字+句号开头) question_match = re.match(r'^(\d+)\.\s+(.+)$', line) if question_match: # 保存上一个问题 if current_question and current_answer: answer_text = '\n'.join(current_answer).strip() if answer_text: current_question['answer'] = answer_text questions.append(current_question) current_answer = [] # 创建新问题 question_text = question_match.group(2) # 移除问题末尾的问号(如果有) question_text = question_text.rstrip('??') current_question = { 'id': f'q{question_id}', 'question': question_text, 'answer': '' } question_id += 1 in_answer = False # 检查是否是答案开始标记 elif '示例答案' in line or '答案:' in line or '答案:' in line: in_answer = True # 如果答案在同一行 answer_in_line = re.sub(r'^.*?(示例答案|答案)[::]?\s*', '', line).strip() if answer_in_line: current_answer.append(answer_in_line) # 收集答案内容 elif in_answer and current_question and line: # 跳过标题行 if not line.startswith('#'): current_answer.append(line) # 检查是否到达下一个部分(标题) elif line.startswith('#') and current_question and current_answer: # 保存当前问题 answer_text = '\n'.join(current_answer).strip() if answer_text: current_question['answer'] = answer_text questions.append(current_question) current_question = None current_answer = [] in_answer = False # 保存最后一个问题 if current_question and current_answer: answer_text = '\n'.join(current_answer).strip() if answer_text: current_question['answer'] = answer_text questions.append(current_question) return questions def extract_and_update_all_interview_questions(): """ 从能源岗位简历.json提取所有面试题并更新mock文件 """ # 读取能源岗位简历数据 with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f: energy_jobs = json.load(f) # 按岗位群分组并提取所有面试题 interview_questions = {} for job in energy_jobs: group_name = job.get("简历岗位群", "") if group_name and group_name not in interview_questions: # 提取面试题内容 if "面试题内容" in job: questions = parse_interview_questions(job["面试题内容"]) if questions: interview_questions[group_name] = questions print(f"✅ {group_name}: 提取了 {len(questions)} 个问题") # 读取mock文件 mock_file = "src/mocks/resumeInterviewMock.js" # 备份文件 backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" shutil.copy(mock_file, backup_path) print(f"\n✅ 已备份文件到:{backup_path}") # 读取文件内容 with open(mock_file, 'r', encoding='utf-8') as f: content = f.read() # 查找并替换每个岗位群的questions import re # 分析content,找到industries数组 industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content) if industries_match: industries_str = industries_match.group(1) # 手动解析并更新每个岗位群的subQuestions # 因为JSON格式可能有问题,我们逐个替换 new_content = content for group_name, questions in interview_questions.items(): # 查找该岗位群的questions部分 pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]' match = re.search(pattern, new_content) if match: # 找到subQuestions的位置 sub_pattern = r'"subQuestions":\s*\[[^\]]*\]' sub_match = re.search(sub_pattern, match.group(0)) if sub_match: # 生成新的subQuestions new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8) # 调整缩进(8个空格) new_sub_questions = '\n'.join([' ' + line if line.strip() else line for line in new_sub_questions.split('\n')]) # 替换 replacement = f'"subQuestions": {new_sub_questions}' new_match_str = match.group(0).replace(sub_match.group(0), replacement) new_content = new_content.replace(match.group(0), new_match_str) # 写回文件 with open(mock_file, 'w', encoding='utf-8') as f: f.write(new_content) print("\n✅ 成功更新所有面试题数据") print("\n📊 更新统计:") total_questions = 0 for group_name, questions in interview_questions.items(): total_questions += len(questions) print(f" - {group_name}: {len(questions)}个问题") print(f"\n📈 总计:{len(interview_questions)}个岗位群,{total_questions}个面试题") else: print("❌ 未找到industries数组") if __name__ == "__main__": extract_and_update_all_interview_questions()