#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re # 读取提取的数据 with open('extracted_resume_data.json', 'r', encoding='utf-8') as f: extracted_data = json.load(f) def parse_resume_content(content): """解析简历内容,提取项目经历、核心能力、复合能力和个人总结""" # 提取项目信息 project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content) position = re.search(r'实习岗位:(.+?)(?:\n|$)', content) time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content) company = re.search(r'实习单位:(.+?)(?:\n|$)', content) # 提取岗位职责 duties_match = re.search(r'(?:岗位职责|项目职责)[:\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL) if duties_match: duties_text = duties_match.group(1).strip() # 提取所有职责项并合并 duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL) description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d]) else: description = "" # 提取核心能力 core_skills_match = re.search(r'核心能力[:\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL) core_skills = [] if core_skills_match: skills_text = core_skills_match.group(1).strip() # 分割并清理每个技能项 core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL) core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s] # 提取复合能力 compound_skills_match = re.search(r'复合能力[:\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL) compound_skills = [] if compound_skills_match: skills_text = compound_skills_match.group(1) # 提取每个复合能力(格式:数字. 能力名称:描述) compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL) compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s] # 提取个人总结/评价 personal_summary_match = re.search(r'(?:个人总结|个人评价)[:\s]*\n(.+?)(?:\Z)', content, re.DOTALL) personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else "" return { 'project_experience': { 'project_name': project_name.group(1) if project_name else "", 'position': position.group(1) if position else "", 'time_period': time_period.group(1) if time_period else "", 'company': company.group(1) if company else "", 'description': description }, 'core_skills': core_skills, 'compound_skills': compound_skills, 'personal_summary': personal_summary } # 生成JavaScript代码更新 updates = [] for position_name, data in extracted_data.items(): content = data.get('简历内容', '') if content: student_info = parse_resume_content(content) # 格式化为JavaScript对象 js_obj = { 'position': position_name, 'studentInfo': student_info } updates.append(js_obj) # 保存为JSON供后续处理 with open('resume_updates.json', 'w', encoding='utf-8') as f: json.dump(updates, f, ensure_ascii=False, indent=2) print(f"成功处理 {len(updates)} 个岗位的数据") print("已保存到 resume_updates.json") # 显示第一个更新作为示例 if updates: print("\n示例数据(第一个岗位):") print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])