#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from collections import defaultdict def load_health_data(): """加载大健康数据""" with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f: resume_data = json.load(f) with open('网页未导入数据/大健康产业/大健康问答内容.json', 'r', encoding='utf-8') as f: qa_data = json.load(f) return resume_data, qa_data def map_level(level_tag): """映射岗位等级""" level_map = { '基础岗': '普通岗', '技术骨干岗': '技术骨干岗', '普通岗': '普通岗' } return level_map.get(level_tag, '普通岗') def parse_resume_content(content): """解析简历内容,提取关键信息""" # 提取项目经历 project_match = re.search(r'项目名称:(.+?)\n', content) project_name = project_match.group(1) if project_match else "健康管理项目" # 提取岗位职责(取前3条) responsibilities = [] resp_pattern = r'\d+\.\s*([^;\n]+[;。]?)' resp_matches = re.findall(resp_pattern, content) if resp_matches: responsibilities = [m.strip().rstrip(';。') for m in resp_matches[:3]] # 提取核心能力 core_skills = [] if '### (一)核心能力' in content: core_section = content.split('### (一)核心能力')[1].split('### (二)')[0] skill_matches = re.findall(r'\d+\.\s*([^。\n]+。?)', core_section) core_skills = [s.strip().rstrip('。') for s in skill_matches[:5]] # 提取个人评价 summary = "我是一名大专毕业生,具备扎实的专业基础和实践经验。" if '# 三、个人评价' in content or '# 三、个人总结' in content: summary_section = content.split('# 三、')[1] summary_text = re.sub(r'\n+', ' ', summary_section).strip() if len(summary_text) > 50: summary = summary_text[:200] + "..." return { 'project_name': project_name, 'responsibilities': responsibilities, 'core_skills': core_skills, 'summary': summary } def create_industries_structure(resume_data, qa_data): """创建industries数据结构""" # 按岗位群分组 groups = defaultdict(list) for item in resume_data: group_name = item['简历岗位群'] groups[group_name].append(item) industries = [] industry_id = 1 for group_name, positions_data in groups.items(): industry = { "id": f"health_{industry_id}", "name": group_name, "positions": [], "questions": [] } position_id = 1 for pos in positions_data: # 解析简历内容获取更详细的信息 parsed_info = parse_resume_content(pos['简历内容']) position = { "id": f"health_{industry_id}_{position_id}", "title": pos['岗位名称'], "level": map_level(pos['岗位等级标签']), "avatar": pos['简历头像url'], "department": group_name, "type": "全职", "experience": "1-3年" if pos['岗位等级标签'] == '基础岗' else "2-5年", "education": "大专", "salary": "5-10K" if pos['岗位等级标签'] == '基础岗' else "8-15K", "location": "北京", "updateTime": "2024-01-20", "description": f"负责{pos['岗位名称']}相关工作,包括{parsed_info['responsibilities'][0] if parsed_info['responsibilities'] else '日常工作'}", "requirements": parsed_info['responsibilities'][:4] if len(parsed_info['responsibilities']) >= 4 else [ "具备相关专业知识和技能", "有良好的沟通能力和团队合作精神", "能够独立完成岗位职责", "有相关实习或工作经验优先" ] } industry["positions"].append(position) position_id += 1 # 添加基于岗位群的问题 questions = [] question_id = 1 # 添加通用问题 main_question = { "id": f"group_q{industry_id}", "question": f"# 一、{group_name}专业认知", "subQuestions": [] } # 从大健康问答内容中提取相关问题 for qa in qa_data[:3]: # 取前3个问答作为示例 sub_q = { "id": f"q{industry_id}_{question_id}", "question": qa.get('问题_流程1', f"请介绍一下您对{group_name}的理解?"), "answer": qa.get('回答_流程2', f"在{group_name}领域,我通过系统学习和实践,掌握了相关专业知识和技能。") } main_question["subQuestions"].append(sub_q) question_id += 1 industry["questions"].append(main_question) industries.append(industry) industry_id += 1 return industries def create_resume_templates(resume_data): """创建resumeTemplates数据结构""" templates = defaultdict(list) for item in resume_data: group_name = item['简历岗位群'] parsed_info = parse_resume_content(item['简历内容']) template = { "position": item['岗位名称'], "level": map_level(item['岗位等级标签']), "avatar": item['简历头像url'], "content": { "original": item['简历内容'], "modified": item['简历内容'] # 使用相同内容 }, "studentInfo": { "project_experience": { "project_name": parsed_info['project_name'], "position": item['岗位名称'] + "助理", "time_period": "XXXXXX", "company": "XXXXXX", "description": " ;\n".join(parsed_info['responsibilities'][:10]) if parsed_info['responsibilities'] else f"参与{item['岗位名称']}相关工作" }, "core_skills": parsed_info['core_skills'][:9] if parsed_info['core_skills'] else [ f"熟悉{group_name}相关理论知识", "具备实践操作能力", "良好的沟通协调能力" ], "compound_skills": [ "医疗服务与健康管理常识:了解我国医疗体系的基本结构与服务流程", "患者安全意识:了解生命体征监测、基础护理技能", "医疗人工智能基础认知:了解AI在医疗健康领域的应用", "质量控制认知:了解ISO体系和质量管理基本要求", "药品营销能力:掌握客户需求识别和沟通技巧" ], "personal_summary": parsed_info['summary'] } } templates[group_name].append(template) return dict(templates) def update_mock_file(industries, resume_templates): """更新resumeInterviewMock.js文件""" # 读取原文件 with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f: content = f.read() # 转换数据为JavaScript格式的字符串 industries_str = json.dumps(industries, ensure_ascii=False, indent=2) templates_str = json.dumps(resume_templates, ensure_ascii=False, indent=2) # 替换industries数组 # 查找industries的开始和结束位置 industries_pattern = r'const industries = \[[\s\S]*?\n\];' new_industries = f'const industries = {industries_str};' content = re.sub(industries_pattern, new_industries, content, count=1) # 替换resumeTemplates对象 templates_pattern = r'const resumeTemplates = \{[\s\S]*?\n\};' new_templates = f'const resumeTemplates = {templates_str};' content = re.sub(templates_pattern, new_templates, content, count=1) # 写回文件 with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f: f.write(content) print("✅ 成功更新 resumeInterviewMock.js 文件") def main(): print("开始转换大健康数据...") # 加载数据 resume_data, qa_data = load_health_data() print(f"加载了 {len(resume_data)} 个岗位简历数据") print(f"加载了 {len(qa_data)} 个问答数据") # 创建数据结构 industries = create_industries_structure(resume_data, qa_data) resume_templates = create_resume_templates(resume_data) # 输出统计 print(f"\n转换完成:") print(f"- {len(industries)} 个岗位群") print(f"- {sum(len(ind['positions']) for ind in industries)} 个岗位") print(f"- {sum(len(templates) for templates in resume_templates.values())} 个简历模板") # 更新文件 update_mock_file(industries, resume_templates) # 输出各岗位群信息 print("\n岗位群详情:") for industry in industries: print(f" {industry['name']}: {len(industry['positions'])} 个岗位") if __name__ == "__main__": main()