220 lines
7.8 KiB
Python
220 lines
7.8 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def extract_sections(content):
|
|||
|
|
"""从内容中提取概述、流程和关键点三个部分,不添加额外的标题"""
|
|||
|
|
overview = ""
|
|||
|
|
process = ""
|
|||
|
|
keyPoints = ""
|
|||
|
|
|
|||
|
|
if not content:
|
|||
|
|
return overview, process, keyPoints
|
|||
|
|
|
|||
|
|
# 使用正则表达式匹配一级标题
|
|||
|
|
# 匹配模式: # 一、项目概述 # 二、项目整体流程介绍 # 三、项目案例关键技术点
|
|||
|
|
|
|||
|
|
# 提取项目概述
|
|||
|
|
overview_match = re.search(r'#\s*一、\s*项目概述(.*?)(?=#\s*二、|$)', content, re.DOTALL)
|
|||
|
|
if overview_match:
|
|||
|
|
overview = overview_match.group(1).strip()
|
|||
|
|
|
|||
|
|
# 提取项目流程(不添加重复的标题)
|
|||
|
|
process_match = re.search(r'#\s*二、\s*项目整体流程介绍(.*?)(?=#\s*三、|$)', content, re.DOTALL)
|
|||
|
|
if process_match:
|
|||
|
|
process = process_match.group(1).strip()
|
|||
|
|
# 不再添加额外的标题
|
|||
|
|
|
|||
|
|
# 提取关键技术点(不添加重复的标题)
|
|||
|
|
keypoints_match = re.search(r'#\s*三、\s*项目案例关键技术点(.*?)$', content, re.DOTALL)
|
|||
|
|
if keypoints_match:
|
|||
|
|
keyPoints = keypoints_match.group(1).strip()
|
|||
|
|
# 不再添加额外的标题
|
|||
|
|
|
|||
|
|
# 如果没有找到标准格式,尝试其他格式
|
|||
|
|
if not overview and not process and not keyPoints:
|
|||
|
|
# 尝试查找其他可能的章节标题
|
|||
|
|
sections = re.split(r'^#{1,2}\s+', content, flags=re.MULTILINE)
|
|||
|
|
|
|||
|
|
for section in sections:
|
|||
|
|
section_lower = section.lower()
|
|||
|
|
if not overview and ('概述' in section or '背景' in section or '简介' in section):
|
|||
|
|
# 提取第一段作为概述
|
|||
|
|
lines = section.split('\n')
|
|||
|
|
overview = '\n'.join(lines[1:]) if len(lines) > 1 else section
|
|||
|
|
overview = overview.strip()
|
|||
|
|
elif not process and ('流程' in section or '步骤' in section or '实施' in section or '方法' in section):
|
|||
|
|
process = section.strip()
|
|||
|
|
elif not keyPoints and ('关键' in section or '要点' in section or '技术' in section or '成果' in section):
|
|||
|
|
keyPoints = section.strip()
|
|||
|
|
|
|||
|
|
# 如果还是没有找到,使用默认处理
|
|||
|
|
if not overview:
|
|||
|
|
# 取前500字作为概述
|
|||
|
|
overview = content[:500].strip()
|
|||
|
|
if len(content) > 500:
|
|||
|
|
overview += "..."
|
|||
|
|
|
|||
|
|
if not process and not keyPoints:
|
|||
|
|
# 如果没有明确的流程和关键点,将剩余内容作为流程
|
|||
|
|
remaining = content[len(overview):].strip() if overview in content else content
|
|||
|
|
if remaining:
|
|||
|
|
process = remaining
|
|||
|
|
|
|||
|
|
return overview, process, keyPoints
|
|||
|
|
|
|||
|
|
def remove_duplicate_titles():
|
|||
|
|
# 读取土木水利项目案例数据
|
|||
|
|
with open('网页未导入数据/土木水利产业/土木水利项目案例.json', 'r', encoding='utf-8') as f:
|
|||
|
|
civil_data = json.load(f)
|
|||
|
|
|
|||
|
|
print(f"开始处理项目详情数据,移除重复标题,共{len(civil_data)}个项目")
|
|||
|
|
|
|||
|
|
# 转换为班级项目库格式
|
|||
|
|
projects_list = []
|
|||
|
|
projects_detail = []
|
|||
|
|
|
|||
|
|
for idx, item in enumerate(civil_data, 1):
|
|||
|
|
# 提取字段
|
|||
|
|
project_name = item.get('案例名称', '')
|
|||
|
|
direction = item.get('所属垂直方向', '综合项目')
|
|||
|
|
content = item.get('项目案例内容', '')
|
|||
|
|
units = item.get('对应单元名称(垂直能力课)', '')
|
|||
|
|
positions = item.get('对应个人简历名称', '')
|
|||
|
|
|
|||
|
|
# 处理岗位列表
|
|||
|
|
position_list = []
|
|||
|
|
position_detail_list = []
|
|||
|
|
if positions:
|
|||
|
|
pos_names = [p.strip() for p in positions.split(',')]
|
|||
|
|
position_list = pos_names
|
|||
|
|
for pos in pos_names:
|
|||
|
|
if '助理' in pos or '实习' in pos:
|
|||
|
|
level = "实习生岗"
|
|||
|
|
elif '经理' in pos or '主管' in pos:
|
|||
|
|
level = "储备干部岗"
|
|||
|
|
else:
|
|||
|
|
level = "技术骨干岗"
|
|||
|
|
position_detail_list.append({
|
|||
|
|
"level": level,
|
|||
|
|
"position": pos
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 处理单元名称
|
|||
|
|
unit_name = units.split(',')[0] if units else direction
|
|||
|
|
|
|||
|
|
# 创建列表数据
|
|||
|
|
list_item = {
|
|||
|
|
"id": idx,
|
|||
|
|
"name": project_name,
|
|||
|
|
"description": direction,
|
|||
|
|
"positions": position_list,
|
|||
|
|
"unit": unit_name,
|
|||
|
|
"direction": direction,
|
|||
|
|
"category": direction.split('与')[0] if '与' in direction else direction
|
|||
|
|
}
|
|||
|
|
projects_list.append(list_item)
|
|||
|
|
|
|||
|
|
# 正确解析内容的三个部分,不添加重复标题
|
|||
|
|
overview, process, keyPoints = extract_sections(content)
|
|||
|
|
|
|||
|
|
# 如果某部分为空,提供默认内容
|
|||
|
|
if not overview:
|
|||
|
|
overview = f"{project_name}是{direction}领域的重要实践项目,通过本项目的实施,学生能够掌握相关的专业技能和实践经验。"
|
|||
|
|
|
|||
|
|
if not process:
|
|||
|
|
process = f"本项目按照标准的{direction}流程进行实施,包括需求分析、方案设计、实施执行、测试验收等关键环节。"
|
|||
|
|
|
|||
|
|
if not keyPoints:
|
|||
|
|
keyPoints = f"1. 掌握{direction}的核心技术\\n2. 熟悉项目实施的完整流程\\n3. 培养解决实际问题的能力\\n4. 提升团队协作和沟通能力"
|
|||
|
|
|
|||
|
|
# 创建详情数据
|
|||
|
|
detail_item = {
|
|||
|
|
"id": idx,
|
|||
|
|
"name": project_name,
|
|||
|
|
"positions": position_detail_list,
|
|||
|
|
"unit": unit_name,
|
|||
|
|
"overview": overview,
|
|||
|
|
"process": process,
|
|||
|
|
"keyPoints": keyPoints
|
|||
|
|
}
|
|||
|
|
projects_detail.append(detail_item)
|
|||
|
|
|
|||
|
|
# 显示处理进度
|
|||
|
|
if idx <= 3:
|
|||
|
|
print(f"\n项目{idx}: {project_name}")
|
|||
|
|
print(f" - overview长度: {len(overview)}")
|
|||
|
|
print(f" - process长度: {len(process)}")
|
|||
|
|
print(f" - keyPoints长度: {len(keyPoints)}")
|
|||
|
|
# 显示process的前100字符以验证没有重复标题
|
|||
|
|
print(f" - process开头: {process[:100]}...")
|
|||
|
|
|
|||
|
|
# 生成JavaScript代码
|
|||
|
|
output = """// 项目库Mock数据
|
|||
|
|
export const getMockProjectsList = (params = {}) => {
|
|||
|
|
const { search = "", page = 1, pageSize = 10 } = params;
|
|||
|
|
|
|||
|
|
// 完整项目列表数据
|
|||
|
|
const projects = """
|
|||
|
|
|
|||
|
|
output += json.dumps(projects_list, ensure_ascii=False, indent=2)
|
|||
|
|
output += ";\n\n"
|
|||
|
|
|
|||
|
|
output += """ // 根据搜索条件过滤
|
|||
|
|
let filteredProjects = projects;
|
|||
|
|
if (search) {
|
|||
|
|
filteredProjects = projects.filter(project =>
|
|||
|
|
project.name.toLowerCase().includes(search.toLowerCase()) ||
|
|||
|
|
project.description.toLowerCase().includes(search.toLowerCase())
|
|||
|
|
);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 分页处理
|
|||
|
|
const startIndex = (page - 1) * pageSize;
|
|||
|
|
const endIndex = startIndex + pageSize;
|
|||
|
|
const paginatedProjects = filteredProjects.slice(startIndex, endIndex);
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
success: true,
|
|||
|
|
data: paginatedProjects,
|
|||
|
|
total: filteredProjects.length,
|
|||
|
|
page: page,
|
|||
|
|
pageSize: pageSize
|
|||
|
|
};
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// 获取项目详情
|
|||
|
|
export const getMockProjectDetail = (id) => {
|
|||
|
|
// 直接根据ID返回对应项目的详情
|
|||
|
|
const projects = """
|
|||
|
|
|
|||
|
|
output += json.dumps(projects_detail, ensure_ascii=False, indent=2)
|
|||
|
|
output += """;\n
|
|||
|
|
const project = projects.find(p => p.id === parseInt(id));
|
|||
|
|
|
|||
|
|
if (project) {
|
|||
|
|
return {
|
|||
|
|
success: true,
|
|||
|
|
data: project
|
|||
|
|
};
|
|||
|
|
} else {
|
|||
|
|
return {
|
|||
|
|
success: false,
|
|||
|
|
message: "项目不存在"
|
|||
|
|
};
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# 保存到文件
|
|||
|
|
with open('src/mocks/projectLibraryMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(output)
|
|||
|
|
|
|||
|
|
print(f"\n✅ 项目详情数据已处理完成")
|
|||
|
|
print(f" - 已移除重复的标题")
|
|||
|
|
print(f" - process和keyPoints字段现在直接包含内容,没有多余的标题")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
remove_duplicate_titles()
|