#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from collections import defaultdict import datetime import shutil def parse_interview_questions(content): """ 解析面试题内容,提取问题和答案 """ questions = [] # 按段落分割内容 sections = content.split('\n\n') question_id = 1 current_question = None for section in sections: section = section.strip() if not section: continue # 跳过标题行(以#开头) if section.startswith('#'): continue # 检查是否是问题(通常是数字开头或包含"?") if re.match(r'^\d+\.', section) or '?' in section: # 提取问题文本 question_text = re.sub(r'^\d+\.\s*', '', section).strip() current_question = { 'id': f'q{question_id}', 'question': question_text, 'answer': '', 'difficulty': '中等', 'tags': [] } question_id += 1 elif current_question and (section.startswith('示例答案') or section.startswith('答案')): # 提取答案 answer_text = re.sub(r'^(示例答案|答案)[::]?\s*', '', section).strip() current_question['answer'] = answer_text questions.append(current_question) current_question = None return questions def extract_and_update_interview_questions(): """ 从能源岗位简历.json提取面试题并更新mock文件 """ # 读取能源岗位简历数据 with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f: energy_jobs = json.load(f) # 按岗位群分组并提取面试题 job_groups = defaultdict(list) interview_questions = {} for job in energy_jobs: group_name = job.get("简历岗位群", "") if group_name: job_groups[group_name].append(job) # 提取面试题内容 if "面试题内容" in job and group_name not in interview_questions: questions = parse_interview_questions(job["面试题内容"]) if questions: interview_questions[group_name] = questions[:5] # 每个岗位群取前5个问题 # 读取mock文件 mock_file = "src/mocks/resumeInterviewMock.js" # 备份文件 backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}" shutil.copy(mock_file, backup_path) print(f"✅ 已备份文件到:{backup_path}") # 读取文件内容 with open(mock_file, 'r', encoding='utf-8') as f: content = f.read() # 查找并替换每个岗位群的questions import re # 分析content,找到industries数组 industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content) if industries_match: industries_str = industries_match.group(1) industries = json.loads(industries_str) # 更新每个岗位群的questions for industry in industries: group_name = industry.get("name", "") if group_name in interview_questions: # 更新subQuestions for question_obj in industry.get("questions", []): question_obj["subQuestions"] = interview_questions[group_name] # 将更新后的industries转回字符串 new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2) new_content = content[:industries_match.start(1)] + new_industries_str + content[industries_match.end(1):] # 写回文件 with open(mock_file, 'w', encoding='utf-8') as f: f.write(new_content) print("✅ 成功更新面试题数据") print("\n📊 更新的岗位群面试题:") for group_name, questions in interview_questions.items(): print(f" - {group_name}: {len(questions)}个问题") else: print("❌ 未找到industries数组") if __name__ == "__main__": extract_and_update_interview_questions()