Files
ALL-teach_sys/frontend_环保/convert_env_resume_data.py

360 lines
12 KiB
Python
Raw Permalink Normal View History

import json
import re
import os
# 读取环保岗位简历数据
with open('网页未导入数据/环保产业/环保岗位简历.json', 'r', encoding='utf-8') as f:
env_data = json.load(f)
# 读取面试题数据
with open('网页未导入数据/岗位群面试题(全产业).json', 'r', encoding='utf-8') as f:
interview_data = json.load(f)
# 读取修改版简历
modified_resumes = {}
modified_resume_dir = '网页未导入数据/环保产业/环保修改版简历'
if os.path.exists(modified_resume_dir):
for filename in os.listdir(modified_resume_dir):
if filename.endswith('.md'):
# 获取文件名(不含扩展名)
file_position_name = filename.replace('.md', '')
# 处理特殊的岗位名称映射
if file_position_name == '环评工程师(环境影响评价工程师)':
# 同时存储两种形式,以确保能匹配
with open(os.path.join(modified_resume_dir, filename), 'r', encoding='utf-8') as f:
content = f.read()
modified_resumes['环评工程师'] = content
modified_resumes['环评工程师(环境影响评价工程师)'] = content
else:
with open(os.path.join(modified_resume_dir, filename), 'r', encoding='utf-8') as f:
modified_resumes[file_position_name] = f.read()
print(f"已加载{len(modified_resumes)}个修改版简历:{list(modified_resumes.keys())}")
# 按岗位群组织数据
position_groups = {
"环境规划": [],
"节能与碳管理": [],
"设备运维与支持": [],
"生态修复": [],
"污染治理": [],
"资源回收与再生": [],
"报告输出与管理": [],
"环境监测": [],
"环境体系咨询": [],
"实验室检测": []
}
# 组织简历数据
for item in env_data:
group = item.get("简历岗位群", "")
if group in position_groups:
position_groups[group].append(item)
# 获取环保产业的面试题
env_interview_questions = {}
for item in interview_data:
if item.get("所属就业管家") == "环保":
group = item.get("简历岗位群", "")
if group not in env_interview_questions:
env_interview_questions[group] = item.get("面试题内容", "")
# 生成JavaScript格式的数据结构
js_output = """// 岗位群列表
const industries = [
"""
# 生成所有岗位群的数据
industry_configs = [
("environment_planning", "环境规划"),
("energy_carbon", "节能与碳管理"),
("equipment_maintenance", "设备运维与支持"),
("ecological_restoration", "生态修复"),
("pollution_control", "污染治理"),
("resource_recycling", "资源回收与再生"),
("report_management", "报告输出与管理"),
("environment_monitoring", "环境监测"),
("environment_consulting", "环境体系咨询"),
("laboratory_testing", "实验室检测")
]
for idx, (industry_id, industry_name) in enumerate(industry_configs):
js_output += f""" {{
id: "{industry_id}",
name: "{industry_name}",
positions: [
"""
# 获取该岗位群的岗位列表
positions = position_groups.get(industry_name, [])
for pos_idx, pos in enumerate(positions):
position_name = pos.get("岗位名称", "")
level = pos.get("岗位等级标签", "普通岗")
avatar = pos.get("简历头像url", "")
js_output += f""" {{
id: "{industry_id}_{pos_idx + 1}",
title: "{position_name}",
level: "{level}",
avatar: "{avatar}",
department: "{industry_name}",
type: "全职",
experience: "1-3年",
education: "大专",
salary: "6-10K",
location: "苏州",
updateTime: "2024-01-20",
description: "{position_name}职位描述",
requirements: [
"熟悉{industry_name}相关工作",
"具有良好的沟通能力",
"有相关工作经验优先"
]
}}"""
if pos_idx < len(positions) - 1:
js_output += ","
js_output += "\n"
js_output += """ ],
questions: [
{
"""
# 添加面试题内容
interview_content = env_interview_questions.get(industry_name, "")
if interview_content:
# 解析面试题内容,提取问答对
questions = []
lines = interview_content.split('\n')
current_q = ""
current_a = ""
in_answer = False
for i, line in enumerate(lines):
# 匹配题目行:数字开头,可能包含"问题:"
if line.strip() and line.strip()[0].isdigit() and '.' in line:
if current_q and current_a:
questions.append((current_q, current_a))
# 移除题号和"问题:"前缀
current_q = re.sub(r'^\d+\.\s*', '', line.strip())
current_q = re.sub(r'^问题:\s*', '', current_q)
current_a = ""
in_answer = False
# 匹配答案标记行
elif '答案' in line or '参考' in line or '回答' in line or '回复' in line:
in_answer = True
# 如果答案在同一行(冒号后)
if '' in line or ':' in line:
parts = re.split('[:]', line, 1)
if len(parts) > 1 and parts[1].strip():
current_a = parts[1].strip()
# 收集答案内容
elif in_answer and line.strip():
if not current_a:
current_a = line.strip()
else:
current_a += " " + line.strip()
# 空行可能表示答案结束
elif in_answer and not line.strip() and current_a:
in_answer = False
if current_q and current_a:
questions.append((current_q, current_a))
# 不限制问题数量,显示所有题目
# questions = questions[:6]
js_output += f""" id: "{industry_id}_q1",
question: "{industry_name}岗位群面试题",
subQuestions: [
"""
for q_idx, (question, answer) in enumerate(questions):
# 清理问题和答案文本
question = question.replace('"', '\\"').replace('\n', ' ')
answer = answer.replace('"', '\\"').replace('\n', ' ')
js_output += f""" {{
id: "q{q_idx + 1}",
question: "{question}",
answer: `{answer}`
}}"""
if q_idx < len(questions) - 1:
js_output += ","
js_output += "\n"
js_output += """ ]
}
]
"""
js_output += " }"
if idx < len(industry_configs) - 1:
js_output += ","
js_output += "\n"
js_output += """];
// 简历模板数据
const resumeTemplates = {
"""
# 生成简历模板数据
for idx, (industry_id, industry_name) in enumerate(industry_configs):
js_output += f' "{industry_name}": [\n'
positions = position_groups.get(industry_name, [])
for pos_idx, pos in enumerate(positions):
position_name = pos.get("岗位名称", "")
level = pos.get("岗位等级标签", "普通岗")
resume_content = pos.get("简历内容", "")
project_name = pos.get("对应项目案例名称", "")
avatar = pos.get("简历头像url", "")
# 解析简历内容
project_desc = ""
core_skills = []
compound_skills = []
personal_summary = ""
if resume_content:
# 提取项目职责
match = re.search(r'###\s*\(五\)\s*岗位职责[:](.*?)(?=\n#|$)', resume_content, re.DOTALL)
if match:
project_desc = match.group(1).strip()
# 提取垂直能力
match = re.search(r'###\s*\(一\)\s*垂直能力(.*?)(?=###|\n#|$)', resume_content, re.DOTALL)
if match:
skills_text = match.group(1)
core_skills = [s.strip() for s in re.findall(r'\d+\.\s*(.*?)(?=\n\d+\.|$)', skills_text, re.DOTALL)][:5]
# 提取复合能力
match = re.search(r'###\s*\(二\)\s*复合能力(.*?)(?=\n#|$)', resume_content, re.DOTALL)
if match:
skills_text = match.group(1)
compound_skills = [s.strip() for s in re.findall(r'\d+\.\s*(.*?)(?=\n\d+\.|$)', skills_text, re.DOTALL)][:5]
# 提取个人评价
match = re.search(r'#\s*三、\s*个人评价(.*?)$', resume_content, re.DOTALL)
if match:
personal_summary = match.group(1).strip()
# 清理文本
def clean_text(text):
return text.replace('\\n', '\\\\n').replace('"', '\\"').replace('\n', '\\n')
# 获取修改版简历(如果存在)
modified_content = modified_resumes.get(position_name)
if not modified_content:
# 尝试不同的岗位名称格式
if '' in position_name:
# 尝试去掉括号部分
short_name = position_name.split('')[0]
modified_content = modified_resumes.get(short_name)
elif position_name == '环评工程师':
# 特殊处理环评工程师
modified_content = modified_resumes.get('环评工程师(环境影响评价工程师)')
# 如果没有修改版,使用原始版本
if not modified_content:
modified_content = resume_content
js_output += f""" {{
position: "{position_name}",
level: "{level}",
avatar: "{avatar}",
content: {{
original: `{clean_text(resume_content)}`,
modified: `{clean_text(modified_content)}`
}},
studentInfo: {{
project_experience: {{
project_name: "{project_name}",
position: "{position_name}",
time_period: "XXXXXX",
company: "XXXXXX",
description: `{clean_text(project_desc)}`
}},
core_skills: [
"""
for skill_idx, skill in enumerate(core_skills):
js_output += f' "{clean_text(skill)}"'
if skill_idx < len(core_skills) - 1:
js_output += ","
js_output += "\n"
js_output += """ ],
compound_skills: [
"""
for skill_idx, skill in enumerate(compound_skills):
js_output += f' "{clean_text(skill)}"'
if skill_idx < len(compound_skills) - 1:
js_output += ","
js_output += "\n"
js_output += f""" ],
personal_summary: "{clean_text(personal_summary)}"
}}
}}"""
if pos_idx < len(positions) - 1:
js_output += ","
js_output += "\n"
js_output += " ]"
if idx < len(industry_configs) - 1:
js_output += ","
js_output += "\n"
js_output += """};
// 我的简历数据
const myResume = {
personalInfo: {
name: "邓沐",
phone: "138****8888",
email: "dengmu@example.com",
age: 22,
education: "苏州农业职业技术学院 生态环境修复技术 2021.9-2024.6",
experience: "1年",
location: "苏州"
},
workExperience: [
{
company: "某环保科技公司",
position: "环境监测实习生",
duration: "2023.06-2024.01",
description: "负责环境监测数据采集、分析和报告编制工作"
}
],
skills: ["环境监测", "数据分析", "报告编制", "AutoCAD", "GIS"],
projects: [
{
name: "某工业园区环境影响评价项目",
role: "环评助理",
duration: "2023.09-2024.01",
description: "协助完成环境影响评价报告的编制和现场踏勘工作"
}
]
};
// 获取页面mock数据的函数
export function getMockPageData() {
return resumeInterviewMockData;
}
// 导出合并的数据
export const resumeInterviewMockData = {
industries,
resumeTemplates,
myResume
};
"""
# 写入文件
with open('src/mocks/resumeInterviewMock_env.js', 'w', encoding='utf-8') as f:
f.write(js_output)
print("转换完成!已生成 src/mocks/resumeInterviewMock_env.js")