91 lines
3.6 KiB
Python
91 lines
3.6 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
# 读取提取的数据
|
|||
|
|
with open('extracted_resume_data.json', 'r', encoding='utf-8') as f:
|
|||
|
|
extracted_data = json.load(f)
|
|||
|
|
|
|||
|
|
def parse_resume_content(content):
|
|||
|
|
"""解析简历内容,提取项目经历、核心能力、复合能力和个人总结"""
|
|||
|
|
|
|||
|
|
# 提取项目信息
|
|||
|
|
project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content)
|
|||
|
|
position = re.search(r'实习岗位:(.+?)(?:\n|$)', content)
|
|||
|
|
time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content)
|
|||
|
|
company = re.search(r'实习单位:(.+?)(?:\n|$)', content)
|
|||
|
|
|
|||
|
|
# 提取岗位职责
|
|||
|
|
duties_match = re.search(r'(?:岗位职责|项目职责)[:\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
|
|||
|
|
if duties_match:
|
|||
|
|
duties_text = duties_match.group(1).strip()
|
|||
|
|
# 提取所有职责项并合并
|
|||
|
|
duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL)
|
|||
|
|
description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d])
|
|||
|
|
else:
|
|||
|
|
description = ""
|
|||
|
|
|
|||
|
|
# 提取核心能力
|
|||
|
|
core_skills_match = re.search(r'核心能力[:\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
|
|||
|
|
core_skills = []
|
|||
|
|
if core_skills_match:
|
|||
|
|
skills_text = core_skills_match.group(1).strip()
|
|||
|
|
# 分割并清理每个技能项
|
|||
|
|
core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
|
|||
|
|
core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s]
|
|||
|
|
|
|||
|
|
# 提取复合能力
|
|||
|
|
compound_skills_match = re.search(r'复合能力[:\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL)
|
|||
|
|
compound_skills = []
|
|||
|
|
if compound_skills_match:
|
|||
|
|
skills_text = compound_skills_match.group(1)
|
|||
|
|
# 提取每个复合能力(格式:数字. 能力名称:描述)
|
|||
|
|
compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
|
|||
|
|
compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s]
|
|||
|
|
|
|||
|
|
# 提取个人总结/评价
|
|||
|
|
personal_summary_match = re.search(r'(?:个人总结|个人评价)[:\s]*\n(.+?)(?:\Z)', content, re.DOTALL)
|
|||
|
|
personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else ""
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'project_experience': {
|
|||
|
|
'project_name': project_name.group(1) if project_name else "",
|
|||
|
|
'position': position.group(1) if position else "",
|
|||
|
|
'time_period': time_period.group(1) if time_period else "",
|
|||
|
|
'company': company.group(1) if company else "",
|
|||
|
|
'description': description
|
|||
|
|
},
|
|||
|
|
'core_skills': core_skills,
|
|||
|
|
'compound_skills': compound_skills,
|
|||
|
|
'personal_summary': personal_summary
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 生成JavaScript代码更新
|
|||
|
|
updates = []
|
|||
|
|
|
|||
|
|
for position_name, data in extracted_data.items():
|
|||
|
|
content = data.get('简历内容', '')
|
|||
|
|
if content:
|
|||
|
|
student_info = parse_resume_content(content)
|
|||
|
|
|
|||
|
|
# 格式化为JavaScript对象
|
|||
|
|
js_obj = {
|
|||
|
|
'position': position_name,
|
|||
|
|
'studentInfo': student_info
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
updates.append(js_obj)
|
|||
|
|
|
|||
|
|
# 保存为JSON供后续处理
|
|||
|
|
with open('resume_updates.json', 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(updates, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f"成功处理 {len(updates)} 个岗位的数据")
|
|||
|
|
print("已保存到 resume_updates.json")
|
|||
|
|
|
|||
|
|
# 显示第一个更新作为示例
|
|||
|
|
if updates:
|
|||
|
|
print("\n示例数据(第一个岗位):")
|
|||
|
|
print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])
|