116 lines
4.4 KiB
Python
116 lines
4.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def fix_quotes_in_content():
|
|||
|
|
"""修复专家支持中心数据中的引号问题"""
|
|||
|
|
|
|||
|
|
# 读取原始的土木水利问答内容
|
|||
|
|
with open('网页未导入数据/土木水利产业/土木水利问答内容.json', 'r', encoding='utf-8') as f:
|
|||
|
|
qa_data = json.load(f)
|
|||
|
|
|
|||
|
|
# 转换为专家支持中心格式
|
|||
|
|
conversations = []
|
|||
|
|
|
|||
|
|
# 类型映射
|
|||
|
|
type_mapping = {
|
|||
|
|
"智能客服": "专业知识",
|
|||
|
|
"导师问答": "专业知识",
|
|||
|
|
"就业指导": "就业指导",
|
|||
|
|
"常规问题": "常规问题"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 导师头像映射
|
|||
|
|
mentor_avatars = {
|
|||
|
|
"杨清永": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5R3bN.png",
|
|||
|
|
"罗俊杰": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5BFnj.png",
|
|||
|
|
"多多畅职机器人": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_icon/recuWmDuekBTlr.png"
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
for idx, item in enumerate(qa_data, 1):
|
|||
|
|
# 提取时间部分(从第一个时间中提取日期)
|
|||
|
|
time_str = item.get("流程1_时间", "")
|
|||
|
|
date_parts = time_str.split("/") if time_str else ["2024", "1"]
|
|||
|
|
date = f"{date_parts[0]}年{int(date_parts[1]) if len(date_parts) > 1 else 1}月"
|
|||
|
|
|
|||
|
|
# 确定类型
|
|||
|
|
qa_type = item.get("问答类型", "常规问题")
|
|||
|
|
conv_type = type_mapping.get(qa_type, "常规问题")
|
|||
|
|
|
|||
|
|
# 确定导师
|
|||
|
|
mentor = item.get("查询导师名称", "")
|
|||
|
|
if not mentor:
|
|||
|
|
mentor = "多多畅职机器人"
|
|||
|
|
else:
|
|||
|
|
mentor = mentor + "老师"
|
|||
|
|
|
|||
|
|
conversation = {
|
|||
|
|
"id": idx,
|
|||
|
|
"title": item.get("问题标题(AI)", "问题"),
|
|||
|
|
"status": "finish",
|
|||
|
|
"date": date,
|
|||
|
|
"type": conv_type,
|
|||
|
|
"messages": []
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 添加第一轮问答
|
|||
|
|
if item.get("问题_流程1"):
|
|||
|
|
conversation["messages"].append({
|
|||
|
|
"type": "user",
|
|||
|
|
"content": item["问题_流程1"],
|
|||
|
|
"time": item.get("流程1_时间", "")
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
if item.get("回答_流程2"):
|
|||
|
|
# 处理回答内容,将其中的问答格式转换
|
|||
|
|
answer_content = item["回答_流程2"]
|
|||
|
|
# 如果内容中包含额外的问答,只取第一个答案
|
|||
|
|
if "\n问:" in answer_content:
|
|||
|
|
answer_content = answer_content.split("\n问:")[0]
|
|||
|
|
|
|||
|
|
conversation["messages"].append({
|
|||
|
|
"type": "assistant",
|
|||
|
|
"content": answer_content,
|
|||
|
|
"mentor": mentor,
|
|||
|
|
"time": item.get("流程2_时间", ""),
|
|||
|
|
"mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 添加第二轮问答(如果有)
|
|||
|
|
if item.get("问题_流程3"):
|
|||
|
|
conversation["messages"].append({
|
|||
|
|
"type": "user",
|
|||
|
|
"content": item["问题_流程3"],
|
|||
|
|
"time": item.get("流程3_时间", "")
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
if item.get("回答_流程4"):
|
|||
|
|
conversation["messages"].append({
|
|||
|
|
"type": "assistant",
|
|||
|
|
"content": item["回答_流程4"],
|
|||
|
|
"mentor": mentor,
|
|||
|
|
"time": item.get("流程4_时间", ""),
|
|||
|
|
"mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
conversations.append(conversation)
|
|||
|
|
|
|||
|
|
# 生成JavaScript文件内容,使用JSON序列化来自动处理引号转义
|
|||
|
|
js_content = """// 从土木水利问答内容.json转换的专家支持中心数据
|
|||
|
|
const civilEngineeringExpertSupportData = {
|
|||
|
|
"conversations": %s
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
export default civilEngineeringExpertSupportData;""" % json.dumps(conversations, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
# 写入文件
|
|||
|
|
with open('src/data/civilEngineeringExpertSupportData.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(js_content)
|
|||
|
|
|
|||
|
|
print("✅ 专家支持中心数据已修复")
|
|||
|
|
print(f"转换了 {len(conversations)} 个对话")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
fix_quotes_in_content()
|