Files
online_sys/frontend_大健康/update_resume_data.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

91 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
# 读取提取的数据
with open('extracted_resume_data.json', 'r', encoding='utf-8') as f:
extracted_data = json.load(f)
def parse_resume_content(content):
"""解析简历内容,提取项目经历、核心能力、复合能力和个人总结"""
# 提取项目信息
project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content)
position = re.search(r'实习岗位:(.+?)(?:\n|$)', content)
time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content)
company = re.search(r'实习单位:(.+?)(?:\n|$)', content)
# 提取岗位职责
duties_match = re.search(r'(?:岗位职责|项目职责)[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
if duties_match:
duties_text = duties_match.group(1).strip()
# 提取所有职责项并合并
duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL)
description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d])
else:
description = ""
# 提取核心能力
core_skills_match = re.search(r'核心能力[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
core_skills = []
if core_skills_match:
skills_text = core_skills_match.group(1).strip()
# 分割并清理每个技能项
core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s]
# 提取复合能力
compound_skills_match = re.search(r'复合能力[\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL)
compound_skills = []
if compound_skills_match:
skills_text = compound_skills_match.group(1)
# 提取每个复合能力(格式:数字. 能力名称:描述)
compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s]
# 提取个人总结/评价
personal_summary_match = re.search(r'(?:个人总结|个人评价)[\s]*\n(.+?)(?:\Z)', content, re.DOTALL)
personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else ""
return {
'project_experience': {
'project_name': project_name.group(1) if project_name else "",
'position': position.group(1) if position else "",
'time_period': time_period.group(1) if time_period else "",
'company': company.group(1) if company else "",
'description': description
},
'core_skills': core_skills,
'compound_skills': compound_skills,
'personal_summary': personal_summary
}
# 生成JavaScript代码更新
updates = []
for position_name, data in extracted_data.items():
content = data.get('简历内容', '')
if content:
student_info = parse_resume_content(content)
# 格式化为JavaScript对象
js_obj = {
'position': position_name,
'studentInfo': student_info
}
updates.append(js_obj)
# 保存为JSON供后续处理
with open('resume_updates.json', 'w', encoding='utf-8') as f:
json.dump(updates, f, ensure_ascii=False, indent=2)
print(f"成功处理 {len(updates)} 个岗位的数据")
print("已保存到 resume_updates.json")
# 显示第一个更新作为示例
if updates:
print("\n示例数据(第一个岗位):")
print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])