#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re def parse_all_interview_questions(content): """解析所有面试题内容,包括所有问题""" questions = [] question_id = 1 # 删除"判断题:"等前缀 content = re.sub(r'判断题:\s*', '', content) # 分割成不同的问题类别(一、二、三等) sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content) # 如果没有找到类别标记,尝试直接查找所有问题 all_questions = [] if len(sections) > 1: # 有类别的情况 for i in range(1, len(sections), 2): if i >= len(sections): break section_title = sections[i].strip() section_content = sections[i + 1] if i + 1 < len(sections) else "" # 提取该类别下的所有问题 category_questions = extract_questions_from_section(section_content, question_id) question_id += len(category_questions) if category_questions: all_questions.append({ "category": section_title, "questions": category_questions }) else: # 没有类别的情况,直接提取所有问题 category_questions = extract_questions_from_section(content, question_id) if category_questions: all_questions.append({ "category": "综合面试题", "questions": category_questions }) return all_questions def extract_questions_from_section(content, start_id): """从内容中提取所有问题和答案""" questions = [] question_id = start_id # 使用更宽松的模式匹配问题 # 模式1: 数字. 问题 pattern1 = r'\n(\d+)\.\s*([^\n]+?)[\n\s]+((?:示例)?答案[::]\s*[^\n]+(?:\n(?!\d+\.).*)*)' # 模式2: 问题后跟答案段落 pattern2 = r'\n(\d+)\.\s*([^\n]+)\n\s*\n\s*((?:示例)?答案[::])?\s*\n\s*([^\n]+(?:\n(?!\d+\.|示例答案).*)*)' # 先尝试模式1 matches = re.findall(pattern1, content, re.MULTILINE) if not matches: # 尝试模式2 matches = re.findall(pattern2, content, re.MULTILINE) matches = [(m[0], m[1], m[3]) for m in matches] # 调整格式 # 如果还是没有匹配,使用更简单的模式 if not matches: lines = content.split('\n') current_question = None current_answer = [] in_answer = False for line in lines: line = line.strip() # 检查是否是新问题 question_match = re.match(r'^(\d+)\.\s*(.+)$', line) if question_match: # 保存上一个问题 if current_question and current_answer: answer_text = ' '.join(current_answer).strip() if answer_text: questions.append({ "id": f"q{question_id}", "question": current_question, "answer": answer_text }) question_id += 1 # 开始新问题 current_question = question_match.group(2).strip() current_answer = [] in_answer = False # 检查是否是答案开始 elif '答案' in line or '示例答案' in line: in_answer = True # 可能答案就在同一行 answer_part = re.sub(r'^(示例)?答案[::]?\s*', '', line).strip() if answer_part: current_answer.append(answer_part) # 收集答案内容 elif in_answer and line: # 检查是否是下一个问题的开始 if not re.match(r'^\d+\.', line): current_answer.append(line) else: in_answer = False # 如果没有明确的答案标记,但有内容,也收集 elif current_question and not in_answer and line and not re.match(r'^\d+\.', line): current_answer.append(line) # 保存最后一个问题 if current_question and current_answer: answer_text = ' '.join(current_answer).strip() if answer_text: questions.append({ "id": f"q{question_id}", "question": current_question, "answer": answer_text }) else: # 处理正则匹配的结果 for match in matches: question_text = match[1].strip() answer_text = match[2].strip() # 清理答案文本 answer_text = re.sub(r'^(示例)?答案[::]?\s*', '', answer_text).strip() answer_text = re.sub(r'\s+', ' ', answer_text) # 合并多余空格 if question_text and answer_text: questions.append({ "id": f"q{question_id}", "question": question_text, "answer": answer_text }) question_id += 1 return questions def main(): # 读取大健康岗位简历数据 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: health_data = json.load(f) # 读取Mock文件 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 创建岗位群到面试题的映射 industry_questions_map = {} for item in health_data: industry = item.get('简历岗位群', '') interview_content = item.get('面试题内容', '') if industry and interview_content and industry not in industry_questions_map: all_categories = parse_all_interview_questions(interview_content) # 转换为前端期望的格式 questions_array = [] cat_id = 1 for category_data in all_categories: if category_data['questions']: questions_array.append({ "id": f"group_q{cat_id}", "question": category_data['category'], "subQuestions": category_data['questions'] }) cat_id += 1 if questions_array: industry_questions_map[industry] = questions_array total_questions = sum(len(q['subQuestions']) for q in questions_array) print(f"✓ {industry}: 提取了 {len(questions_array)} 个分类,共 {total_questions} 个面试题") # 映射岗位群名称到ID industry_mapping = { '健康管理': 'health_1', '健康检查': 'health_2', '康复治疗': 'health_3', '慢性病管理': 'health_4', '轻医美': 'health_5', '心理健康': 'health_6', '社群运营': 'health_7', '药品供应链管理': 'health_8', '药品生产': 'health_9', '药品质量检测': 'health_10', '药物研发': 'health_11' } # 更新Mock文件 updates = 0 for orig_name, industry_id in industry_mapping.items(): if orig_name in industry_questions_map: questions = industry_questions_map[orig_name] # 生成questions的JSON字符串 questions_json = json.dumps(questions, ensure_ascii=False, indent=2) # 查找并替换questions字段 # 先删除旧的questions字段 pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})' replacement1 = rf'\1\2' content = re.sub(pattern1, replacement1, content, flags=re.DOTALL) # 再添加新的questions字段 pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})' replacement2 = rf'\1,\n "questions": {questions_json}\2' new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL) if count > 0: content = new_content updates += 1 # 写回文件 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) print(f"\n✅ 完成!更新了 {updates} 个岗位群的完整面试题数据") if __name__ == "__main__": main()