Files
online_sys/frontend_智能制造/update_resume_data.py

91 lines
3.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
# 读取提取的数据
with open('extracted_resume_data.json', 'r', encoding='utf-8') as f:
extracted_data = json.load(f)
def parse_resume_content(content):
"""解析简历内容,提取项目经历、核心能力、复合能力和个人总结"""
# 提取项目信息
project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content)
position = re.search(r'实习岗位:(.+?)(?:\n|$)', content)
time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content)
company = re.search(r'实习单位:(.+?)(?:\n|$)', content)
# 提取岗位职责
duties_match = re.search(r'(?:岗位职责|项目职责)[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
if duties_match:
duties_text = duties_match.group(1).strip()
# 提取所有职责项并合并
duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL)
description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d])
else:
description = ""
# 提取核心能力
core_skills_match = re.search(r'核心能力[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
core_skills = []
if core_skills_match:
skills_text = core_skills_match.group(1).strip()
# 分割并清理每个技能项
core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s]
# 提取复合能力
compound_skills_match = re.search(r'复合能力[\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL)
compound_skills = []
if compound_skills_match:
skills_text = compound_skills_match.group(1)
# 提取每个复合能力(格式:数字. 能力名称:描述)
compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s]
# 提取个人总结/评价
personal_summary_match = re.search(r'(?:个人总结|个人评价)[\s]*\n(.+?)(?:\Z)', content, re.DOTALL)
personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else ""
return {
'project_experience': {
'project_name': project_name.group(1) if project_name else "",
'position': position.group(1) if position else "",
'time_period': time_period.group(1) if time_period else "",
'company': company.group(1) if company else "",
'description': description
},
'core_skills': core_skills,
'compound_skills': compound_skills,
'personal_summary': personal_summary
}
# 生成JavaScript代码更新
updates = []
for position_name, data in extracted_data.items():
content = data.get('简历内容', '')
if content:
student_info = parse_resume_content(content)
# 格式化为JavaScript对象
js_obj = {
'position': position_name,
'studentInfo': student_info
}
updates.append(js_obj)
# 保存为JSON供后续处理
with open('resume_updates.json', 'w', encoding='utf-8') as f:
json.dump(updates, f, ensure_ascii=False, indent=2)
print(f"成功处理 {len(updates)} 个岗位的数据")
print("已保存到 resume_updates.json")
# 显示第一个更新作为示例
if updates:
print("\n示例数据(第一个岗位):")
print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])