Files
jiaowu-test/update_resume_data.py
KQL 561d5c286d feat: 实现日历课程点击跳转到直播间功能
- 添加日历课程详情弹窗的点击跳转功能
- 公共课直播间和课程直播间支持URL参数自动选中课程
- 优化岗位详情页面样式,复用简洁卡片样式
- 为岗位详情标题添加图标
- 调整不同类型课程的跳转逻辑

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-11 14:14:45 +08:00

91 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
# 读取提取的数据
with open('extracted_resume_data.json', 'r', encoding='utf-8') as f:
extracted_data = json.load(f)
def parse_resume_content(content):
"""解析简历内容,提取项目经历、核心能力、复合能力和个人总结"""
# 提取项目信息
project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content)
position = re.search(r'实习岗位:(.+?)(?:\n|$)', content)
time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content)
company = re.search(r'实习单位:(.+?)(?:\n|$)', content)
# 提取岗位职责
duties_match = re.search(r'(?:岗位职责|项目职责)[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
if duties_match:
duties_text = duties_match.group(1).strip()
# 提取所有职责项并合并
duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL)
description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d])
else:
description = ""
# 提取核心能力
core_skills_match = re.search(r'核心能力[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
core_skills = []
if core_skills_match:
skills_text = core_skills_match.group(1).strip()
# 分割并清理每个技能项
core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s]
# 提取复合能力
compound_skills_match = re.search(r'复合能力[\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL)
compound_skills = []
if compound_skills_match:
skills_text = compound_skills_match.group(1)
# 提取每个复合能力(格式:数字. 能力名称:描述)
compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s]
# 提取个人总结/评价
personal_summary_match = re.search(r'(?:个人总结|个人评价)[\s]*\n(.+?)(?:\Z)', content, re.DOTALL)
personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else ""
return {
'project_experience': {
'project_name': project_name.group(1) if project_name else "",
'position': position.group(1) if position else "",
'time_period': time_period.group(1) if time_period else "",
'company': company.group(1) if company else "",
'description': description
},
'core_skills': core_skills,
'compound_skills': compound_skills,
'personal_summary': personal_summary
}
# 生成JavaScript代码更新
updates = []
for position_name, data in extracted_data.items():
content = data.get('简历内容', '')
if content:
student_info = parse_resume_content(content)
# 格式化为JavaScript对象
js_obj = {
'position': position_name,
'studentInfo': student_info
}
updates.append(js_obj)
# 保存为JSON供后续处理
with open('resume_updates.json', 'w', encoding='utf-8') as f:
json.dump(updates, f, ensure_ascii=False, indent=2)
print(f"成功处理 {len(updates)} 个岗位的数据")
print("已保存到 resume_updates.json")
# 显示第一个更新作为示例
if updates:
print("\n示例数据(第一个岗位):")
print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])