#!/usr/bin/env python3 import json import re import subprocess from datetime import datetime print("正在提取化工岗位面试题...") # 读取化工岗位简历数据 with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f: chemical_data = json.load(f) # 收集所有岗位群的面试题 job_group_questions = {} total_questions = 0 # 遍历所有岗位 for position in chemical_data: job_group = position.get('简历岗位群', '') interview_content = position.get('面试题内容', '') if not job_group or not interview_content: continue # 初始化岗位群 if job_group not in job_group_questions: job_group_questions[job_group] = [] # 统一的面试题解析逻辑 questions = [] # 尝试按不同格式解析 lines = interview_content.split('\n') current_q = None current_a = [] for i, line in enumerate(lines): line = line.strip() if not line: continue # 检测问题标志(数字开头的行) if re.match(r'^\d+[\.、]', line): # 保存之前的问答对 if current_q and current_a: answer_text = '\n'.join(current_a).strip() if answer_text: questions.append({ 'question': current_q, 'answer': answer_text }) # 开始新问题 current_q = line current_a = [] # 检测答案标志 elif any(marker in line for marker in ['示例答案', '答案:', '正确答案', '正确选项']): # 答案开始标记 if i + 1 < len(lines): # 下一行开始是答案内容 continue # 收集答案内容 elif current_q and not re.match(r'^\d+[\.、]', line): # 这可能是答案的一部分 if line.startswith(('A.', 'B.', 'C.', 'D.')): # 选择题选项,加入到问题中 current_q += '\n' + line elif line.startswith(('-', '•', '·')): # 列表形式的答案 current_a.append(line) elif not line.startswith('#'): # 普通答案内容 current_a.append(line) # 保存最后一个问答对 if current_q and current_a: answer_text = '\n'.join(current_a).strip() if answer_text: questions.append({ 'question': current_q, 'answer': answer_text }) # 将问题添加到岗位群(避免重复) for q in questions: is_duplicate = False for existing in job_group_questions[job_group]: if existing['question'] == q['question']: is_duplicate = True break if not is_duplicate: job_group_questions[job_group].append(q) total_questions += 1 # 如果某个岗位群没有题目,使用该岗位群其他岗位的题目 for position in chemical_data: job_group = position.get('简历岗位群', '') if job_group and len(job_group_questions.get(job_group, [])) == 0: # 这个岗位群没有题目,尝试从同组其他岗位提取 for other_position in chemical_data: if other_position.get('简历岗位群') == job_group: content = other_position.get('面试题内容', '') if '示例答案' in content or '答案' in content: # 创建通用面试题 job_group_questions[job_group] = [ { "question": f"请介绍一下你对{job_group}岗位的理解", "answer": f"需要掌握{job_group}相关的专业知识和技能,确保工作安全高效。" }, { "question": f"你为什么选择{job_group}这个职业方向?", "answer": f"对{job_group}领域充满兴趣,希望在这个领域深入发展。" }, { "question": f"你认为{job_group}工作中最重要的是什么?", "answer": "安全意识、专业技能和团队协作是最重要的。" } ] total_questions += 3 break # 输出统计信息 print(f"\n===== 化工岗位面试题提取完成 =====") print(f"总岗位群数: {len(job_group_questions)}") print(f"总面试题数: {total_questions}") print(f"\n各岗位群面试题数量:") for job_group, questions in sorted(job_group_questions.items()): print(f" {job_group}: {len(questions)} 题") # 读取现有mock文件 print(f"\n正在更新 resumeInterviewMock.js...") with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 备份文件 timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') backup_file = f'src/mocks/resumeInterviewMock.js.backup_all_{timestamp}' with open(backup_file, 'w', encoding='utf-8') as f: f.write(content) print(f"已创建备份: {backup_file}") # 更新每个岗位群的面试题 updated_count = 0 for job_group, questions in job_group_questions.items(): if not questions: continue # 构建subQuestions数组内容 sub_questions_items = [] for i, q in enumerate(questions): # 创建问题对象(使用json.dumps来确保正确的转义) question_obj = { "id": f"q{i+1}", "question": q['question'], "answer": q['answer'] } # 转换为JSON字符串,确保正确处理换行等特殊字符 json_str = json.dumps(question_obj, ensure_ascii=False, indent=20) sub_questions_items.append(json_str) # 创建完整的subQuestions内容 sub_questions_content = ',\n'.join(sub_questions_items) # 查找并替换对应岗位群的subQuestions # 注意:question字段的值是"xxx岗位群面试题"格式 pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"[^}}]*?"subQuestions"\s*:\s*)\[[^\]]*\]' replacement = rf'\1[\n{sub_questions_content}\n ]' new_content = re.sub(pattern, replacement, content, flags=re.DOTALL) if new_content != content: content = new_content updated_count += 1 print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题") # 保存更新后的文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) # 验证语法 print(f"\n正在验证语法...") try: result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'], capture_output=True, text=True, encoding='utf-8') if result.returncode == 0: print(f"✓ 语法检查通过") print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题") print(f"\n✅ 所有面试题更新成功完成!共更新 {total_questions} 道题目") else: print(f"\n✗ 语法检查失败: {result.stderr}") # 恢复备份 with open(backup_file, 'r', encoding='utf-8') as f: content = f.read() with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) print(f"已从备份恢复") except Exception as e: print(f"错误: {e}")