ALL-teach_sys/frontend_土木水利/fix_expert_support_quotes.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re

def fix_quotes_in_content():
    """修复专家支持中心数据中的引号问题"""

    # 读取原始的土木水利问答内容
    with open('网页未导入数据/土木水利产业/土木水利问答内容.json', 'r', encoding='utf-8') as f:
        qa_data = json.load(f)

    # 转换为专家支持中心格式
    conversations = []

    # 类型映射
    type_mapping = {
        "智能客服": "专业知识",
        "导师问答": "专业知识",
        "就业指导": "就业指导",
        "常规问题": "常规问题"
    }

    # 导师头像映射
    mentor_avatars = {
        "杨清永": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5R3bN.jpg",
        "罗俊杰": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5BFnj.png",
        "多多畅职机器人": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_icon/recuWmDuekBTlr.png"
    }

    for idx, item in enumerate(qa_data, 1):
        # 提取时间部分（从第一个时间中提取日期）
        time_str = item.get("流程1_时间", "")
        date_parts = time_str.split("/") if time_str else ["2024", "1"]
        date = f"{date_parts[0]}年{int(date_parts[1]) if len(date_parts) > 1 else 1}月"

        # 确定类型
        qa_type = item.get("问答类型", "常规问题")
        conv_type = type_mapping.get(qa_type, "常规问题")

        # 确定导师
        mentor = item.get("查询导师名称", "")
        if not mentor:
            mentor = "多多畅职机器人"
        else:
            mentor = mentor + "老师"

        conversation = {
            "id": idx,
            "title": item.get("问题标题（AI）", "问题"),
            "status": "finish",
            "date": date,
            "type": conv_type,
            "messages": []
        }

        # 添加第一轮问答
        if item.get("问题_流程1"):
            conversation["messages"].append({
                "type": "user",
                "content": item["问题_流程1"],
                "time": item.get("流程1_时间", "")
            })

        if item.get("回答_流程2"):
            # 处理回答内容，将其中的问答格式转换
            answer_content = item["回答_流程2"]
            # 如果内容中包含额外的问答，只取第一个答案
            if "\n问：" in answer_content:
                answer_content = answer_content.split("\n问：")[0]

            conversation["messages"].append({
                "type": "assistant",
                "content": answer_content,
                "mentor": mentor,
                "time": item.get("流程2_时间", ""),
                "mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
            })

        # 添加第二轮问答（如果有）
        if item.get("问题_流程3"):
            conversation["messages"].append({
                "type": "user",
                "content": item["问题_流程3"],
                "time": item.get("流程3_时间", "")
            })

            if item.get("回答_流程4"):
                conversation["messages"].append({
                    "type": "assistant",
                    "content": item["回答_流程4"],
                    "mentor": mentor,
                    "time": item.get("流程4_时间", ""),
                    "mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
                })

        conversations.append(conversation)

    # 生成JavaScript文件内容，使用JSON序列化来自动处理引号转义
    js_content = """// 从土木水利问答内容.json转换的专家支持中心数据
const civilEngineeringExpertSupportData = {
  "conversations": %s
};

export default civilEngineeringExpertSupportData;""" % json.dumps(conversations, ensure_ascii=False, indent=2)

    # 写入文件
    with open('src/data/civilEngineeringExpertSupportData.js', 'w', encoding='utf-8') as f:
        f.write(js_content)

    print("✅ 专家支持中心数据已修复")
    print(f"转换了 {len(conversations)} 个对话")

if __name__ == "__main__":
    fix_quotes_in_content()