#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re def parse_interview_questions(content): """解析面试题内容为结构化数据""" questions = [] # 分割成不同的问题类别 sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content) question_id = 1 for i in range(1, len(sections), 2): if i >= len(sections): break section_title = sections[i].strip() section_content = sections[i + 1] if i + 1 < len(sections) else "" # 提取每个问题 question_blocks = re.split(r'\n\d+\.\s+', section_content) for j, block in enumerate(question_blocks[1:], 1): # 跳过第一个空块 lines = block.strip().split('\n') if not lines: continue question_text = lines[0].strip() # 查找答案 answer_text = "" for k, line in enumerate(lines): if '示例答案' in line or '答案' in line: # 获取答案内容 answer_lines = [] for answer_line in lines[k+1:]: answer_line = answer_line.strip() if answer_line and not answer_line.startswith('示例答案'): if re.match(r'^\d+\.', answer_line): break answer_lines.append(answer_line) answer_text = ' '.join(answer_lines) break if question_text and answer_text: questions.append({ "id": f"q{question_id}", "question": question_text, "answer": answer_text }) question_id += 1 return questions def main(): # 读取大健康岗位简历数据 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: health_data = json.load(f) # 读取Mock文件,移除重复的岗位群 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 找到industries数组 match = re.search(r'const industries = \[(.*?)\];', content, re.DOTALL) if not match: print("未找到industries数组") return industries_content = match.group(1) # 解析每个岗位群 industry_blocks = re.findall(r'\{[^}]*?"id":\s*"health_\d+"[^}]*?"positions":\s*\[[^\]]*?\][^}]*?\}', industries_content, re.DOTALL) # 去重并保留第一个出现的 seen_ids = set() unique_blocks = [] for block in industry_blocks: id_match = re.search(r'"id":\s*"(health_\d+)"', block) if id_match: industry_id = id_match.group(1) if industry_id not in seen_ids: seen_ids.add(industry_id) unique_blocks.append(block) print(f"找到 {len(industry_blocks)} 个岗位群块,去重后剩余 {len(unique_blocks)} 个") # 为每个岗位群添加面试题 industry_questions = {} for item in health_data: industry = item.get('简历岗位群', '') interview_content = item.get('面试题内容', '') if industry and interview_content and industry not in industry_questions: questions = parse_interview_questions(interview_content) if questions: # 按类别分组 category_questions = {} current_category = "综合面试题" for q in questions: if '岗位理解' in q['question']: current_category = "岗位理解类问题" elif '实践经验' in q['question'] or '案例' in q['question']: current_category = "实践经验类问题" elif '客户服务' in q['question'] or '客户' in q['question']: current_category = "客户服务类问题" elif '市场' in q['question'] or '趋势' in q['question']: current_category = "市场与未来趋势类问题" elif '技术' in q['question'] or '专业' in q['question']: current_category = "专业技术类问题" elif '团队' in q['question'] or '协作' in q['question']: current_category = "团队协作类问题" if current_category not in category_questions: category_questions[current_category] = [] category_questions[current_category].append(q) # 构建questions数组 questions_array = [] cat_id = 1 for category, cat_questions in category_questions.items(): questions_array.append({ "id": f"group_q{cat_id}", "question": category, "subQuestions": cat_questions }) cat_id += 1 industry_questions[industry] = questions_array # 映射岗位群名称到ID industry_mapping = { '健康管理': 'health_1', '健康检查': 'health_2', '康复治疗': 'health_3', '慢性病管理': 'health_4', '轻医美': 'health_5', '心理健康': 'health_6', '社群运营': 'health_7', '药品供应链管理': 'health_8', '药品生产': 'health_9', '药品质量检测': 'health_10', '药物研发': 'health_11' } # 更新每个岗位群块,添加questions字段 updated_blocks = [] for block in unique_blocks: # 获取岗位群名称 name_match = re.search(r'"name":\s*"([^"]+)"', block) id_match = re.search(r'"id":\s*"(health_\d+)"', block) if name_match and id_match: industry_name = name_match.group(1) industry_id = id_match.group(1) # 查找对应的面试题 questions = None for orig_name, mapped_id in industry_mapping.items(): if mapped_id == industry_id and orig_name in industry_questions: questions = industry_questions[orig_name] break # 如果没找到,使用默认面试题 if not questions: questions = [{ "id": "group_q1", "question": f"{industry_name}专业认知", "subQuestions": [ { "id": "q1", "question": f"你如何理解{industry_name}的核心价值?", "answer": f"{industry_name}的核心价值在于通过专业技能和知识,为企业和客户创造价值,推动行业发展。" }, { "id": "q2", "question": f"{industry_name}中最重要的能力是什么?", "answer": "专业技能、沟通能力、团队协作和持续学习能力都是非常重要的。" }, { "id": "q3", "question": f"你为什么选择{industry_name}这个方向?", "answer": "我对这个领域充满热情,相信能够在这里发挥我的专业优势,为行业发展做出贡献。" } ] }] # 在positions数组后添加questions字段 if '"questions"' not in block: # 找到positions数组的结束位置 pos_end = block.rfind(']') if pos_end > 0: questions_json = json.dumps(questions, ensure_ascii=False, indent=4) # 调整缩进 questions_json = questions_json.replace('\n', '\n ') new_block = block[:pos_end+1] + ',\n "questions": ' + questions_json + block[pos_end+1:] updated_blocks.append(new_block) print(f" ✓ 为 {industry_name} ({industry_id}) 添加了面试题") else: updated_blocks.append(block) else: updated_blocks.append(block) else: updated_blocks.append(block) # 重新构建industries数组 new_industries = 'const industries = [\n ' + ',\n '.join(updated_blocks) + '\n];' # 替换原内容 new_content = content[:content.index('const industries')] + new_industries + content[content.index('];', content.index('const industries')) + 2:] # 写回文件 with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(new_content) print(f"\n完成!处理了 {len(updated_blocks)} 个岗位群") if __name__ == "__main__": main()