ALL-teach_sys/frontend_土木水利/remove_duplicate_titles.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re

def extract_sections(content):
    """从内容中提取概述、流程和关键点三个部分，不添加额外的标题"""
    overview = ""
    process = ""
    keyPoints = ""

    if not content:
        return overview, process, keyPoints

    # 使用正则表达式匹配一级标题
    # 匹配模式: # 一、项目概述  # 二、项目整体流程介绍  # 三、项目案例关键技术点

    # 提取项目概述
    overview_match = re.search(r'#\s*一、\s*项目概述(.*?)(?=#\s*二、|$)', content, re.DOTALL)
    if overview_match:
        overview = overview_match.group(1).strip()

    # 提取项目流程（不添加重复的标题）
    process_match = re.search(r'#\s*二、\s*项目整体流程介绍(.*?)(?=#\s*三、|$)', content, re.DOTALL)
    if process_match:
        process = process_match.group(1).strip()
        # 不再添加额外的标题

    # 提取关键技术点（不添加重复的标题）
    keypoints_match = re.search(r'#\s*三、\s*项目案例关键技术点(.*?)$', content, re.DOTALL)
    if keypoints_match:
        keyPoints = keypoints_match.group(1).strip()
        # 不再添加额外的标题

    # 如果没有找到标准格式，尝试其他格式
    if not overview and not process and not keyPoints:
        # 尝试查找其他可能的章节标题
        sections = re.split(r'^#{1,2}\s+', content, flags=re.MULTILINE)

        for section in sections:
            section_lower = section.lower()
            if not overview and ('概述' in section or '背景' in section or '简介' in section):
                # 提取第一段作为概述
                lines = section.split('\n')
                overview = '\n'.join(lines[1:]) if len(lines) > 1 else section
                overview = overview.strip()
            elif not process and ('流程' in section or '步骤' in section or '实施' in section or '方法' in section):
                process = section.strip()
            elif not keyPoints and ('关键' in section or '要点' in section or '技术' in section or '成果' in section):
                keyPoints = section.strip()

    # 如果还是没有找到，使用默认处理
    if not overview:
        # 取前500字作为概述
        overview = content[:500].strip()
        if len(content) > 500:
            overview += "..."

    if not process and not keyPoints:
        # 如果没有明确的流程和关键点，将剩余内容作为流程
        remaining = content[len(overview):].strip() if overview in content else content
        if remaining:
            process = remaining

    return overview, process, keyPoints

def remove_duplicate_titles():
    # 读取土木水利项目案例数据
    with open('网页未导入数据/土木水利产业/土木水利项目案例.json', 'r', encoding='utf-8') as f:
        civil_data = json.load(f)

    print(f"开始处理项目详情数据，移除重复标题，共{len(civil_data)}个项目")

    # 转换为班级项目库格式
    projects_list = []
    projects_detail = []

    for idx, item in enumerate(civil_data, 1):
        # 提取字段
        project_name = item.get('案例名称', '')
        direction = item.get('所属垂直方向', '综合项目')
        content = item.get('项目案例内容', '')
        units = item.get('对应单元名称（垂直能力课）', '')
        positions = item.get('对应个人简历名称', '')

        # 处理岗位列表
        position_list = []
        position_detail_list = []
        if positions:
            pos_names = [p.strip() for p in positions.split(',')]
            position_list = pos_names
            for pos in pos_names:
                if '助理' in pos or '实习' in pos:
                    level = "实习生岗"
                elif '经理' in pos or '主管' in pos:
                    level = "储备干部岗"
                else:
                    level = "技术骨干岗"
                position_detail_list.append({
                    "level": level,
                    "position": pos
                })

        # 处理单元名称
        unit_name = units.split(',')[0] if units else direction

        # 创建列表数据
        list_item = {
            "id": idx,
            "name": project_name,
            "description": direction,
            "positions": position_list,
            "unit": unit_name,
            "direction": direction,
            "category": direction.split('与')[0] if '与' in direction else direction
        }
        projects_list.append(list_item)

        # 正确解析内容的三个部分，不添加重复标题
        overview, process, keyPoints = extract_sections(content)

        # 如果某部分为空，提供默认内容
        if not overview:
            overview = f"{project_name}是{direction}领域的重要实践项目，通过本项目的实施，学生能够掌握相关的专业技能和实践经验。"

        if not process:
            process = f"本项目按照标准的{direction}流程进行实施，包括需求分析、方案设计、实施执行、测试验收等关键环节。"

        if not keyPoints:
            keyPoints = f"1. 掌握{direction}的核心技术\\n2. 熟悉项目实施的完整流程\\n3. 培养解决实际问题的能力\\n4. 提升团队协作和沟通能力"

        # 创建详情数据
        detail_item = {
            "id": idx,
            "name": project_name,
            "positions": position_detail_list,
            "unit": unit_name,
            "overview": overview,
            "process": process,
            "keyPoints": keyPoints
        }
        projects_detail.append(detail_item)

        # 显示处理进度
        if idx <= 3:
            print(f"\n项目{idx}: {project_name}")
            print(f"  - overview长度: {len(overview)}")
            print(f"  - process长度: {len(process)}")
            print(f"  - keyPoints长度: {len(keyPoints)}")
            # 显示process的前100字符以验证没有重复标题
            print(f"  - process开头: {process[:100]}...")

    # 生成JavaScript代码
    output = """// 项目库Mock数据
export const getMockProjectsList = (params = {}) => {
  const { search = "", page = 1, pageSize = 10 } = params;

  // 完整项目列表数据
  const projects = """

    output += json.dumps(projects_list, ensure_ascii=False, indent=2)
    output += ";\n\n"

    output += """  // 根据搜索条件过滤
  let filteredProjects = projects;
  if (search) {
    filteredProjects = projects.filter(project =>
      project.name.toLowerCase().includes(search.toLowerCase()) ||
      project.description.toLowerCase().includes(search.toLowerCase())
    );
  }

  // 分页处理
  const startIndex = (page - 1) * pageSize;
  const endIndex = startIndex + pageSize;
  const paginatedProjects = filteredProjects.slice(startIndex, endIndex);

  return {
    success: true,
    data: paginatedProjects,
    total: filteredProjects.length,
    page: page,
    pageSize: pageSize
  };
};

// 获取项目详情
export const getMockProjectDetail = (id) => {
  // 直接根据ID返回对应项目的详情
  const projects = """

    output += json.dumps(projects_detail, ensure_ascii=False, indent=2)
    output += """;\n
  const project = projects.find(p => p.id === parseInt(id));

  if (project) {
    return {
      success: true,
      data: project
    };
  } else {
    return {
      success: false,
      message: "项目不存在"
    };
  }
};
"""

    # 保存到文件
    with open('src/mocks/projectLibraryMock.js', 'w', encoding='utf-8') as f:
        f.write(output)

    print(f"\n✅ 项目详情数据已处理完成")
    print(f"   - 已移除重复的标题")
    print(f"   - process和keyPoints字段现在直接包含内容，没有多余的标题")

if __name__ == "__main__":
    remove_duplicate_titles()
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								import json
 								import re
 								def extract_sections(content):
 								    """从内容中提取概述、流程和关键点三个部分，不添加额外的标题"""
 								    overview = ""
 								    process = ""
 								    keyPoints = ""
 								    if not content:
 								        return overview, process, keyPoints
 								    # 使用正则表达式匹配一级标题
 								    # 匹配模式: # 一、项目概述  # 二、项目整体流程介绍  # 三、项目案例关键技术点
 								    # 提取项目概述
 								    overview_match = re.search(r'#\s*一、\s*项目概述(.*?)(?=#\s*二、|$)', content, re.DOTALL)
 								    if overview_match:
 								        overview = overview_match.group(1).strip()
 								    # 提取项目流程（不添加重复的标题）
 								    process_match = re.search(r'#\s*二、\s*项目整体流程介绍(.*?)(?=#\s*三、|$)', content, re.DOTALL)
 								    if process_match:
 								        process = process_match.group(1).strip()
 								        # 不再添加额外的标题
 								    # 提取关键技术点（不添加重复的标题）
 								    keypoints_match = re.search(r'#\s*三、\s*项目案例关键技术点(.*?)$', content, re.DOTALL)
 								    if keypoints_match:
 								        keyPoints = keypoints_match.group(1).strip()
 								        # 不再添加额外的标题
 								    # 如果没有找到标准格式，尝试其他格式
 								    if not overview and not process and not keyPoints:
 								        # 尝试查找其他可能的章节标题
 								        sections = re.split(r'^#{1,2}\s+', content, flags=re.MULTILINE)
 								        for section in sections:
 								            section_lower = section.lower()
 								            if not overview and ('概述' in section or '背景' in section or '简介' in section):
 								                # 提取第一段作为概述
 								                lines = section.split('\n')
 								                overview = '\n'.join(lines[1:]) if len(lines) > 1 else section
 								                overview = overview.strip()
 								            elif not process and ('流程' in section or '步骤' in section or '实施' in section or '方法' in section):
 								                process = section.strip()
 								            elif not keyPoints and ('关键' in section or '要点' in section or '技术' in section or '成果' in section):
 								                keyPoints = section.strip()
 								    # 如果还是没有找到，使用默认处理
 								    if not overview:
 								        # 取前500字作为概述
 								        overview = content[:500].strip()
 								        if len(content) > 500:
 								            overview += "..."
 								    if not process and not keyPoints:
 								        # 如果没有明确的流程和关键点，将剩余内容作为流程
 								        remaining = content[len(overview):].strip() if overview in content else content
 								        if remaining:
 								            process = remaining
 								    return overview, process, keyPoints
 								def remove_duplicate_titles():
 								    # 读取土木水利项目案例数据
 								    with open('网页未导入数据/土木水利产业/土木水利项目案例.json', 'r', encoding='utf-8') as f:
 								        civil_data = json.load(f)
 								    print(f"开始处理项目详情数据，移除重复标题，共{len(civil_data)}个项目")
 								    # 转换为班级项目库格式
 								    projects_list = []
 								    projects_detail = []
 								    for idx, item in enumerate(civil_data, 1):
 								        # 提取字段
 								        project_name = item.get('案例名称', '')
 								        direction = item.get('所属垂直方向', '综合项目')
 								        content = item.get('项目案例内容', '')
 								        units = item.get('对应单元名称（垂直能力课）', '')
 								        positions = item.get('对应个人简历名称', '')
 								        # 处理岗位列表
 								        position_list = []
 								        position_detail_list = []
 								        if positions:
 								            pos_names = [p.strip() for p in positions.split(',')]
 								            position_list = pos_names
 								            for pos in pos_names:
 								                if '助理' in pos or '实习' in pos:
 								                    level = "实习生岗"
 								                elif '经理' in pos or '主管' in pos:
 								                    level = "储备干部岗"
 								                else:
 								                    level = "技术骨干岗"
 								                position_detail_list.append({
 								                    "level": level,
 								                    "position": pos
 								                })
 								        # 处理单元名称
 								        unit_name = units.split(',')[0] if units else direction
 								        # 创建列表数据
 								        list_item = {
 								            "id": idx,
 								            "name": project_name,
 								            "description": direction,
 								            "positions": position_list,
 								            "unit": unit_name,
 								            "direction": direction,
 								            "category": direction.split('与')[0] if '与' in direction else direction
 								        }
 								        projects_list.append(list_item)
 								        # 正确解析内容的三个部分，不添加重复标题
 								        overview, process, keyPoints = extract_sections(content)
 								        # 如果某部分为空，提供默认内容
 								        if not overview:
 								            overview = f"{project_name}是{direction}领域的重要实践项目，通过本项目的实施，学生能够掌握相关的专业技能和实践经验。"
 								        if not process:
 								            process = f"本项目按照标准的{direction}流程进行实施，包括需求分析、方案设计、实施执行、测试验收等关键环节。"
 								        if not keyPoints:
 								            keyPoints = f"1. 掌握{direction}的核心技术\\n2. 熟悉项目实施的完整流程\\n3. 培养解决实际问题的能力\\n4. 提升团队协作和沟通能力"
 								        # 创建详情数据
 								        detail_item = {
 								            "id": idx,
 								            "name": project_name,
 								            "positions": position_detail_list,
 								            "unit": unit_name,
 								            "overview": overview,
 								            "process": process,
 								            "keyPoints": keyPoints
 								        }
 								        projects_detail.append(detail_item)
 								        # 显示处理进度
 								        if idx <= 3:
 								            print(f"\n项目{idx}: {project_name}")
 								            print(f"  - overview长度: {len(overview)}")
 								            print(f"  - process长度: {len(process)}")
 								            print(f"  - keyPoints长度: {len(keyPoints)}")
 								            # 显示process的前100字符以验证没有重复标题
 								            print(f"  - process开头: {process[:100]}...")
 								    # 生成JavaScript代码
 								    output = """// 项目库Mock数据
 								export const getMockProjectsList = (params = {}) => {
 								  const { search = "", page = 1, pageSize = 10 } = params;
 								  // 完整项目列表数据
 								  const projects = """
 								    output += json.dumps(projects_list, ensure_ascii=False, indent=2)
 								    output += ";\n\n"
 								    output += """  // 根据搜索条件过滤
 								  let filteredProjects = projects;
 								  if (search) {
 								    filteredProjects = projects.filter(project =>
 								      project.name.toLowerCase().includes(search.toLowerCase()) ||
 								      project.description.toLowerCase().includes(search.toLowerCase())
 								    );
 								  }
 								  // 分页处理
 								  const startIndex = (page - 1) * pageSize;
 								  const endIndex = startIndex + pageSize;
 								  const paginatedProjects = filteredProjects.slice(startIndex, endIndex);
 								  return {
 								    success: true,
 								    data: paginatedProjects,
 								    total: filteredProjects.length,
 								    page: page,
 								    pageSize: pageSize
 								  };
 								};
 								// 获取项目详情
 								export const getMockProjectDetail = (id) => {
 								  // 直接根据ID返回对应项目的详情
 								  const projects = """
 								    output += json.dumps(projects_detail, ensure_ascii=False, indent=2)
 								    output += """;\n
 								  const project = projects.find(p => p.id === parseInt(id));
 								  if (project) {
 								    return {
 								      success: true,
 								      data: project
 								    };
 								  } else {
 								    return {
 								      success: false,
 								      message: "项目不存在"
 								    };
 								  }
 								};
 								"""
 								    # 保存到文件
 								    with open('src/mocks/projectLibraryMock.js', 'w', encoding='utf-8') as f:
 								        f.write(output)
 								    print(f"\n✅ 项目详情数据已处理完成")
 								    print(f"   - 已移除重复的标题")
 								    print(f"   - process和keyPoints字段现在直接包含内容，没有多余的标题")
 								if __name__ == "__main__":
 								    remove_duplicate_titles()