#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re def extract_all_questions_from_content(content): """从内容中提取所有面试题""" all_questions = [] # 删除"判断题:"等前缀 content = re.sub(r'判断题[::]?\s*', '', content) # 先按大类分割(# 一、二、三等) category_pattern = r'# ([一二三四五六七八九十]+、[^\n]+)' categories = re.split(category_pattern, content) if len(categories) > 1: # 有分类的情况 for i in range(1, len(categories), 2): if i+1 >= len(categories): break category_title = categories[i].strip() category_content = categories[i+1] # 从该分类中提取所有问题 questions = extract_questions_from_text(category_content) if questions: all_questions.append({ "category": category_title, "questions": questions }) else: # 没有分类,直接提取所有问题 questions = extract_questions_from_text(content) if questions: all_questions.append({ "category": "综合面试题", "questions": questions }) return all_questions def extract_questions_from_text(text): """从文本中提取问题和答案""" questions = [] question_id = 1 # 分割文本为行 lines = text.split('\n') current_question = None current_answer = [] in_answer_section = False for i, line in enumerate(lines): line = line.strip() # 检查是否是问题行(数字开头) question_match = re.match(r'^(\d+)\.\s*(.+)$', line) if question_match: # 先保存上一个问题 if current_question and current_answer: answer_text = ' '.join(current_answer).strip() # 清理答案文本 answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text) answer_text = re.sub(r'\s+', ' ', answer_text) if answer_text: questions.append({ "id": f"q{question_id}", "question": current_question, "answer": answer_text }) question_id += 1 # 开始新问题 current_question = question_match.group(2).strip() current_answer = [] in_answer_section = False # 检查是否进入答案部分 elif line and ('示例答案' in line or '答案:' in line or '答案:' in line): in_answer_section = True # 答案可能在同一行 answer_on_same_line = re.sub(r'^.*(示例)?答案[::]?\s*', '', line).strip() if answer_on_same_line: current_answer.append(answer_on_same_line) # 收集答案内容 elif in_answer_section and line: # 检查是否是下一个问题或分类 if not re.match(r'^(\d+)\.', line) and not line.startswith('#'): current_answer.append(line) # 空行可能表示答案结束 elif not line and in_answer_section: in_answer_section = False # 保存最后一个问题 if current_question and current_answer: answer_text = ' '.join(current_answer).strip() answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text) answer_text = re.sub(r'\s+', ' ', answer_text) if answer_text: questions.append({ "id": f"q{question_id}", "question": current_question, "answer": answer_text }) # 如果没有找到答案,尝试另一种模式 if not questions: # 使用正则表达式匹配问题和答案 pattern = r'(\d+)\.\s*([^\n]+)\s*\n\s*(?:示例)?答案[::]?\s*\n\s*([^\n]+(?:\n(?!\d+\.|#)[^\n]*)*)' matches = re.findall(pattern, text, re.MULTILINE) question_id = 1 for match in matches: question_text = match[1].strip() answer_text = match[2].strip() answer_text = re.sub(r'\s+', ' ', answer_text) if question_text and answer_text: questions.append({ "id": f"q{question_id}", "question": question_text, "answer": answer_text }) question_id += 1 return questions def main(): # 读取大健康岗位简历数据 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: health_data = json.load(f) # 读取Mock文件 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 收集每个岗位群的所有面试题 industry_all_questions = {} for item in health_data: industry = item.get('简历岗位群', '') interview_content = item.get('面试题内容', '') if industry and interview_content: if industry not in industry_all_questions: industry_all_questions[industry] = [] # 提取该岗位的所有问题 categories = extract_all_questions_from_content(interview_content) # 合并到该岗位群的问题列表中 for cat in categories: # 检查是否已有该分类 existing_cat = None for existing in industry_all_questions[industry]: if existing['category'] == cat['category']: existing_cat = existing break if existing_cat: # 合并问题,避免重复 existing_questions = {q['question'] for q in existing_cat['questions']} for q in cat['questions']: if q['question'] not in existing_questions: existing_cat['questions'].append(q) else: # 添加新分类 industry_all_questions[industry].append(cat) # 转换为前端期望的格式并更新Mock文件 industry_mapping = { '健康管理': 'health_1', '健康检查': 'health_2', '康复治疗': 'health_3', '慢性病管理': 'health_4', '轻医美': 'health_5', '心理健康': 'health_6', '社群运营': 'health_7', '药品供应链管理': 'health_8', '药品生产': 'health_9', '药品质量检测': 'health_10', '药物研发': 'health_11' } updates = 0 for orig_name, industry_id in industry_mapping.items(): if orig_name in industry_all_questions: categories = industry_all_questions[orig_name] # 转换为questions数组 questions_array = [] cat_id = 1 total_questions = 0 for cat in categories: if cat['questions']: # 重新编号问题ID renumbered_questions = [] for i, q in enumerate(cat['questions'], 1): renumbered_questions.append({ "id": f"q{total_questions + i}", "question": q['question'], "answer": q['answer'] }) total_questions += len(renumbered_questions) questions_array.append({ "id": f"group_q{cat_id}", "question": cat['category'], "subQuestions": renumbered_questions }) cat_id += 1 if questions_array: print(f"✓ {orig_name} ({industry_id}): {len(questions_array)} 个分类,共 {total_questions} 个面试题") # 生成JSON字符串 questions_json = json.dumps(questions_array, ensure_ascii=False, indent=2) # 删除旧的questions字段 pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})' replacement1 = rf'\1\2' content = re.sub(pattern1, replacement1, content, flags=re.DOTALL) # 添加新的questions字段 pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})' replacement2 = rf'\1,\n "questions": {questions_json}\2' new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL) if count > 0: content = new_content updates += 1 # 写回文件 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) print(f"\n✅ 完成!更新了 {updates} 个岗位群的完整面试题数据") if __name__ == "__main__": main()