Files
ALL-teach_sys/frontend/update_resume_data.py
KQL cd2e307402 初始化12个产业教务系统项目
主要内容:
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容:
- 删除所有node_modules和.yoyo文件夹,从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表:
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 14:14:14 +08:00

91 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
# 读取提取的数据
with open('extracted_resume_data.json', 'r', encoding='utf-8') as f:
extracted_data = json.load(f)
def parse_resume_content(content):
"""解析简历内容,提取项目经历、核心能力、复合能力和个人总结"""
# 提取项目信息
project_name = re.search(r'项目名称:(.+?)(?:\n|$)', content)
position = re.search(r'实习岗位:(.+?)(?:\n|$)', content)
time_period = re.search(r'实习时间:(.+?)(?:\n|$)', content)
company = re.search(r'实习单位:(.+?)(?:\n|$)', content)
# 提取岗位职责
duties_match = re.search(r'(?:岗位职责|项目职责)[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
if duties_match:
duties_text = duties_match.group(1).strip()
# 提取所有职责项并合并
duties_items = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', duties_text, re.DOTALL)
description = '\n'.join([d.strip().replace('\n', '') for d in duties_items if d])
else:
description = ""
# 提取核心能力
core_skills_match = re.search(r'核心能力[\s]*\n((?:\d+\..+?(?=\n#|\n\n|\Z))+)', content, re.DOTALL)
core_skills = []
if core_skills_match:
skills_text = core_skills_match.group(1).strip()
# 分割并清理每个技能项
core_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
core_skills = [s.strip().replace('\n', '').replace(' ', ' ') for s in core_skills if s]
# 提取复合能力
compound_skills_match = re.search(r'复合能力[\s]*\n((?:\d+\..+?(?=\n#|\Z))+)', content, re.DOTALL)
compound_skills = []
if compound_skills_match:
skills_text = compound_skills_match.group(1)
# 提取每个复合能力(格式:数字. 能力名称:描述)
compound_skills = re.findall(r'\d+\.\s*(.+?)(?=\d+\.|$)', skills_text, re.DOTALL)
compound_skills = [s.strip().replace('\n', '') for s in compound_skills if s]
# 提取个人总结/评价
personal_summary_match = re.search(r'(?:个人总结|个人评价)[\s]*\n(.+?)(?:\Z)', content, re.DOTALL)
personal_summary = personal_summary_match.group(1).strip() if personal_summary_match else ""
return {
'project_experience': {
'project_name': project_name.group(1) if project_name else "",
'position': position.group(1) if position else "",
'time_period': time_period.group(1) if time_period else "",
'company': company.group(1) if company else "",
'description': description
},
'core_skills': core_skills,
'compound_skills': compound_skills,
'personal_summary': personal_summary
}
# 生成JavaScript代码更新
updates = []
for position_name, data in extracted_data.items():
content = data.get('简历内容', '')
if content:
student_info = parse_resume_content(content)
# 格式化为JavaScript对象
js_obj = {
'position': position_name,
'studentInfo': student_info
}
updates.append(js_obj)
# 保存为JSON供后续处理
with open('resume_updates.json', 'w', encoding='utf-8') as f:
json.dump(updates, f, ensure_ascii=False, indent=2)
print(f"成功处理 {len(updates)} 个岗位的数据")
print("已保存到 resume_updates.json")
# 显示第一个更新作为示例
if updates:
print("\n示例数据(第一个岗位):")
print(json.dumps(updates[0], ensure_ascii=False, indent=2)[:1000])