#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re def fix_quotes_in_content(): """修复专家支持中心数据中的引号问题""" # 读取原始的土木水利问答内容 with open('网页未导入数据/土木水利产业/土木水利问答内容.json', 'r', encoding='utf-8') as f: qa_data = json.load(f) # 转换为专家支持中心格式 conversations = [] # 类型映射 type_mapping = { "智能客服": "专业知识", "导师问答": "专业知识", "就业指导": "就业指导", "常规问题": "常规问题" } # 导师头像映射 mentor_avatars = { "杨清永": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5R3bN.png", "罗俊杰": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5BFnj.png", "多多畅职机器人": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_icon/recuWmDuekBTlr.png" } for idx, item in enumerate(qa_data, 1): # 提取时间部分(从第一个时间中提取日期) time_str = item.get("流程1_时间", "") date_parts = time_str.split("/") if time_str else ["2024", "1"] date = f"{date_parts[0]}年{int(date_parts[1]) if len(date_parts) > 1 else 1}月" # 确定类型 qa_type = item.get("问答类型", "常规问题") conv_type = type_mapping.get(qa_type, "常规问题") # 确定导师 mentor = item.get("查询导师名称", "") if not mentor: mentor = "多多畅职机器人" else: mentor = mentor + "老师" conversation = { "id": idx, "title": item.get("问题标题(AI)", "问题"), "status": "finish", "date": date, "type": conv_type, "messages": [] } # 添加第一轮问答 if item.get("问题_流程1"): conversation["messages"].append({ "type": "user", "content": item["问题_流程1"], "time": item.get("流程1_时间", "") }) if item.get("回答_流程2"): # 处理回答内容,将其中的问答格式转换 answer_content = item["回答_流程2"] # 如果内容中包含额外的问答,只取第一个答案 if "\n问:" in answer_content: answer_content = answer_content.split("\n问:")[0] conversation["messages"].append({ "type": "assistant", "content": answer_content, "mentor": mentor, "time": item.get("流程2_时间", ""), "mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"]) }) # 添加第二轮问答(如果有) if item.get("问题_流程3"): conversation["messages"].append({ "type": "user", "content": item["问题_流程3"], "time": item.get("流程3_时间", "") }) if item.get("回答_流程4"): conversation["messages"].append({ "type": "assistant", "content": item["回答_流程4"], "mentor": mentor, "time": item.get("流程4_时间", ""), "mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"]) }) conversations.append(conversation) # 生成JavaScript文件内容,使用JSON序列化来自动处理引号转义 js_content = """// 从土木水利问答内容.json转换的专家支持中心数据 const civilEngineeringExpertSupportData = { "conversations": %s }; export default civilEngineeringExpertSupportData;""" % json.dumps(conversations, ensure_ascii=False, indent=2) # 写入文件 with open('src/data/civilEngineeringExpertSupportData.js', 'w', encoding='utf-8') as f: f.write(js_content) print("✅ 专家支持中心数据已修复") print(f"转换了 {len(conversations)} 个对话") if __name__ == "__main__": fix_quotes_in_content()