Files
ALL-teach_sys/frontend_大健康/replace_mock_data.py

235 lines
9.2 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from collections import defaultdict
def load_health_data():
"""加载大健康数据"""
with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
resume_data = json.load(f)
with open('网页未导入数据/大健康产业/大健康问答内容.json', 'r', encoding='utf-8') as f:
qa_data = json.load(f)
return resume_data, qa_data
def map_level(level_tag):
"""映射岗位等级"""
level_map = {
'基础岗': '普通岗',
'技术骨干岗': '技术骨干岗',
'普通岗': '普通岗'
}
return level_map.get(level_tag, '普通岗')
def parse_resume_content(content):
"""解析简历内容,提取关键信息"""
# 提取项目经历
project_match = re.search(r'项目名称:(.+?)\n', content)
project_name = project_match.group(1) if project_match else "健康管理项目"
# 提取岗位职责取前3条
responsibilities = []
resp_pattern = r'\d+\.\s*([^\n]+[;。]?)'
resp_matches = re.findall(resp_pattern, content)
if resp_matches:
responsibilities = [m.strip().rstrip(';。') for m in resp_matches[:3]]
# 提取核心能力
core_skills = []
if '### (一)核心能力' in content:
core_section = content.split('### (一)核心能力')[1].split('### (二)')[0]
skill_matches = re.findall(r'\d+\.\s*([^。\n]+。?)', core_section)
core_skills = [s.strip().rstrip('') for s in skill_matches[:5]]
# 提取个人评价
summary = "我是一名大专毕业生,具备扎实的专业基础和实践经验。"
if '# 三、个人评价' in content or '# 三、个人总结' in content:
summary_section = content.split('# 三、')[1]
summary_text = re.sub(r'\n+', ' ', summary_section).strip()
if len(summary_text) > 50:
summary = summary_text[:200] + "..."
return {
'project_name': project_name,
'responsibilities': responsibilities,
'core_skills': core_skills,
'summary': summary
}
def create_industries_structure(resume_data, qa_data):
"""创建industries数据结构"""
# 按岗位群分组
groups = defaultdict(list)
for item in resume_data:
group_name = item['简历岗位群']
groups[group_name].append(item)
industries = []
industry_id = 1
for group_name, positions_data in groups.items():
industry = {
"id": f"health_{industry_id}",
"name": group_name,
"positions": [],
"questions": []
}
position_id = 1
for pos in positions_data:
# 解析简历内容获取更详细的信息
parsed_info = parse_resume_content(pos['简历内容'])
position = {
"id": f"health_{industry_id}_{position_id}",
"title": pos['岗位名称'],
"level": map_level(pos['岗位等级标签']),
"avatar": pos['简历头像url'],
"department": group_name,
"type": "全职",
"experience": "1-3年" if pos['岗位等级标签'] == '基础岗' else "2-5年",
"education": "大专",
"salary": "5-10K" if pos['岗位等级标签'] == '基础岗' else "8-15K",
"location": "北京",
"updateTime": "2024-01-20",
"description": f"负责{pos['岗位名称']}相关工作,包括{parsed_info['responsibilities'][0] if parsed_info['responsibilities'] else '日常工作'}",
"requirements": parsed_info['responsibilities'][:4] if len(parsed_info['responsibilities']) >= 4 else [
"具备相关专业知识和技能",
"有良好的沟通能力和团队合作精神",
"能够独立完成岗位职责",
"有相关实习或工作经验优先"
]
}
industry["positions"].append(position)
position_id += 1
# 添加基于岗位群的问题
questions = []
question_id = 1
# 添加通用问题
main_question = {
"id": f"group_q{industry_id}",
"question": f"# 一、{group_name}专业认知",
"subQuestions": []
}
# 从大健康问答内容中提取相关问题
for qa in qa_data[:3]: # 取前3个问答作为示例
sub_q = {
"id": f"q{industry_id}_{question_id}",
"question": qa.get('问题_流程1', f"请介绍一下您对{group_name}的理解?"),
"answer": qa.get('回答_流程2', f"{group_name}领域,我通过系统学习和实践,掌握了相关专业知识和技能。")
}
main_question["subQuestions"].append(sub_q)
question_id += 1
industry["questions"].append(main_question)
industries.append(industry)
industry_id += 1
return industries
def create_resume_templates(resume_data):
"""创建resumeTemplates数据结构"""
templates = defaultdict(list)
for item in resume_data:
group_name = item['简历岗位群']
parsed_info = parse_resume_content(item['简历内容'])
template = {
"position": item['岗位名称'],
"level": map_level(item['岗位等级标签']),
"avatar": item['简历头像url'],
"content": {
"original": item['简历内容'],
"modified": item['简历内容'] # 使用相同内容
},
"studentInfo": {
"project_experience": {
"project_name": parsed_info['project_name'],
"position": item['岗位名称'] + "助理",
"time_period": "XXXXXX",
"company": "XXXXXX",
"description": " \n".join(parsed_info['responsibilities'][:10]) if parsed_info['responsibilities'] else f"参与{item['岗位名称']}相关工作"
},
"core_skills": parsed_info['core_skills'][:9] if parsed_info['core_skills'] else [
f"熟悉{group_name}相关理论知识",
"具备实践操作能力",
"良好的沟通协调能力"
],
"compound_skills": [
"医疗服务与健康管理常识:了解我国医疗体系的基本结构与服务流程",
"患者安全意识:了解生命体征监测、基础护理技能",
"医疗人工智能基础认知了解AI在医疗健康领域的应用",
"质量控制认知了解ISO体系和质量管理基本要求",
"药品营销能力:掌握客户需求识别和沟通技巧"
],
"personal_summary": parsed_info['summary']
}
}
templates[group_name].append(template)
return dict(templates)
def update_mock_file(industries, resume_templates):
"""更新resumeInterviewMock.js文件"""
# 读取原文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 转换数据为JavaScript格式的字符串
industries_str = json.dumps(industries, ensure_ascii=False, indent=2)
templates_str = json.dumps(resume_templates, ensure_ascii=False, indent=2)
# 替换industries数组
# 查找industries的开始和结束位置
industries_pattern = r'const industries = \[[\s\S]*?\n\];'
new_industries = f'const industries = {industries_str};'
content = re.sub(industries_pattern, new_industries, content, count=1)
# 替换resumeTemplates对象
templates_pattern = r'const resumeTemplates = \{[\s\S]*?\n\};'
new_templates = f'const resumeTemplates = {templates_str};'
content = re.sub(templates_pattern, new_templates, content, count=1)
# 写回文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print("✅ 成功更新 resumeInterviewMock.js 文件")
def main():
print("开始转换大健康数据...")
# 加载数据
resume_data, qa_data = load_health_data()
print(f"加载了 {len(resume_data)} 个岗位简历数据")
print(f"加载了 {len(qa_data)} 个问答数据")
# 创建数据结构
industries = create_industries_structure(resume_data, qa_data)
resume_templates = create_resume_templates(resume_data)
# 输出统计
print(f"\n转换完成:")
print(f"- {len(industries)} 个岗位群")
print(f"- {sum(len(ind['positions']) for ind in industries)} 个岗位")
print(f"- {sum(len(templates) for templates in resume_templates.values())} 个简历模板")
# 更新文件
update_mock_file(industries, resume_templates)
# 输出各岗位群信息
print("\n岗位群详情:")
for industry in industries:
print(f" {industry['name']}: {len(industry['positions'])} 个岗位")
if __name__ == "__main__":
main()