#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re def extract_sections(content): """从内容中提取概述、流程和关键点三个部分,不添加额外的标题""" overview = "" process = "" keyPoints = "" if not content: return overview, process, keyPoints # 使用正则表达式匹配一级标题 # 匹配模式: # 一、项目概述 # 二、项目整体流程介绍 # 三、项目案例关键技术点 # 提取项目概述 overview_match = re.search(r'#\s*一、\s*项目概述(.*?)(?=#\s*二、|$)', content, re.DOTALL) if overview_match: overview = overview_match.group(1).strip() # 提取项目流程(不添加重复的标题) process_match = re.search(r'#\s*二、\s*项目整体流程介绍(.*?)(?=#\s*三、|$)', content, re.DOTALL) if process_match: process = process_match.group(1).strip() # 不再添加额外的标题 # 提取关键技术点(不添加重复的标题) keypoints_match = re.search(r'#\s*三、\s*项目案例关键技术点(.*?)$', content, re.DOTALL) if keypoints_match: keyPoints = keypoints_match.group(1).strip() # 不再添加额外的标题 # 如果没有找到标准格式,尝试其他格式 if not overview and not process and not keyPoints: # 尝试查找其他可能的章节标题 sections = re.split(r'^#{1,2}\s+', content, flags=re.MULTILINE) for section in sections: section_lower = section.lower() if not overview and ('概述' in section or '背景' in section or '简介' in section): # 提取第一段作为概述 lines = section.split('\n') overview = '\n'.join(lines[1:]) if len(lines) > 1 else section overview = overview.strip() elif not process and ('流程' in section or '步骤' in section or '实施' in section or '方法' in section): process = section.strip() elif not keyPoints and ('关键' in section or '要点' in section or '技术' in section or '成果' in section): keyPoints = section.strip() # 如果还是没有找到,使用默认处理 if not overview: # 取前500字作为概述 overview = content[:500].strip() if len(content) > 500: overview += "..." if not process and not keyPoints: # 如果没有明确的流程和关键点,将剩余内容作为流程 remaining = content[len(overview):].strip() if overview in content else content if remaining: process = remaining return overview, process, keyPoints def remove_duplicate_titles(): # 读取土木水利项目案例数据 with open('网页未导入数据/土木水利产业/土木水利项目案例.json', 'r', encoding='utf-8') as f: civil_data = json.load(f) print(f"开始处理项目详情数据,移除重复标题,共{len(civil_data)}个项目") # 转换为班级项目库格式 projects_list = [] projects_detail = [] for idx, item in enumerate(civil_data, 1): # 提取字段 project_name = item.get('案例名称', '') direction = item.get('所属垂直方向', '综合项目') content = item.get('项目案例内容', '') units = item.get('对应单元名称(垂直能力课)', '') positions = item.get('对应个人简历名称', '') # 处理岗位列表 position_list = [] position_detail_list = [] if positions: pos_names = [p.strip() for p in positions.split(',')] position_list = pos_names for pos in pos_names: if '助理' in pos or '实习' in pos: level = "实习生岗" elif '经理' in pos or '主管' in pos: level = "储备干部岗" else: level = "技术骨干岗" position_detail_list.append({ "level": level, "position": pos }) # 处理单元名称 unit_name = units.split(',')[0] if units else direction # 创建列表数据 list_item = { "id": idx, "name": project_name, "description": direction, "positions": position_list, "unit": unit_name, "direction": direction, "category": direction.split('与')[0] if '与' in direction else direction } projects_list.append(list_item) # 正确解析内容的三个部分,不添加重复标题 overview, process, keyPoints = extract_sections(content) # 如果某部分为空,提供默认内容 if not overview: overview = f"{project_name}是{direction}领域的重要实践项目,通过本项目的实施,学生能够掌握相关的专业技能和实践经验。" if not process: process = f"本项目按照标准的{direction}流程进行实施,包括需求分析、方案设计、实施执行、测试验收等关键环节。" if not keyPoints: keyPoints = f"1. 掌握{direction}的核心技术\\n2. 熟悉项目实施的完整流程\\n3. 培养解决实际问题的能力\\n4. 提升团队协作和沟通能力" # 创建详情数据 detail_item = { "id": idx, "name": project_name, "positions": position_detail_list, "unit": unit_name, "overview": overview, "process": process, "keyPoints": keyPoints } projects_detail.append(detail_item) # 显示处理进度 if idx <= 3: print(f"\n项目{idx}: {project_name}") print(f" - overview长度: {len(overview)}") print(f" - process长度: {len(process)}") print(f" - keyPoints长度: {len(keyPoints)}") # 显示process的前100字符以验证没有重复标题 print(f" - process开头: {process[:100]}...") # 生成JavaScript代码 output = """// 项目库Mock数据 export const getMockProjectsList = (params = {}) => { const { search = "", page = 1, pageSize = 10 } = params; // 完整项目列表数据 const projects = """ output += json.dumps(projects_list, ensure_ascii=False, indent=2) output += ";\n\n" output += """ // 根据搜索条件过滤 let filteredProjects = projects; if (search) { filteredProjects = projects.filter(project => project.name.toLowerCase().includes(search.toLowerCase()) || project.description.toLowerCase().includes(search.toLowerCase()) ); } // 分页处理 const startIndex = (page - 1) * pageSize; const endIndex = startIndex + pageSize; const paginatedProjects = filteredProjects.slice(startIndex, endIndex); return { success: true, data: paginatedProjects, total: filteredProjects.length, page: page, pageSize: pageSize }; }; // 获取项目详情 export const getMockProjectDetail = (id) => { // 直接根据ID返回对应项目的详情 const projects = """ output += json.dumps(projects_detail, ensure_ascii=False, indent=2) output += """;\n const project = projects.find(p => p.id === parseInt(id)); if (project) { return { success: true, data: project }; } else { return { success: false, message: "项目不存在" }; } }; """ # 保存到文件 with open('src/mocks/projectLibraryMock.js', 'w', encoding='utf-8') as f: f.write(output) print(f"\n✅ 项目详情数据已处理完成") print(f" - 已移除重复的标题") print(f" - process和keyPoints字段现在直接包含内容,没有多余的标题") if __name__ == "__main__": remove_duplicate_titles()