Files
ALL-teach_sys/frontend_土木水利/fix_expert_support_quotes.py

116 lines
4.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
def fix_quotes_in_content():
"""修复专家支持中心数据中的引号问题"""
# 读取原始的土木水利问答内容
with open('网页未导入数据/土木水利产业/土木水利问答内容.json', 'r', encoding='utf-8') as f:
qa_data = json.load(f)
# 转换为专家支持中心格式
conversations = []
# 类型映射
type_mapping = {
"智能客服": "专业知识",
"导师问答": "专业知识",
"就业指导": "就业指导",
"常规问题": "常规问题"
}
# 导师头像映射
mentor_avatars = {
"杨清永": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5R3bN.png",
"罗俊杰": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_teacher-avatar/recuW7dxJ5BFnj.png",
"多多畅职机器人": "https://ddcz-1315997005.cos.ap-nanjing.myqcloud.com/static/img/teach_sys_icon/recuWmDuekBTlr.png"
}
for idx, item in enumerate(qa_data, 1):
# 提取时间部分(从第一个时间中提取日期)
time_str = item.get("流程1_时间", "")
date_parts = time_str.split("/") if time_str else ["2024", "1"]
date = f"{date_parts[0]}{int(date_parts[1]) if len(date_parts) > 1 else 1}"
# 确定类型
qa_type = item.get("问答类型", "常规问题")
conv_type = type_mapping.get(qa_type, "常规问题")
# 确定导师
mentor = item.get("查询导师名称", "")
if not mentor:
mentor = "多多畅职机器人"
else:
mentor = mentor + "老师"
conversation = {
"id": idx,
"title": item.get("问题标题AI", "问题"),
"status": "finish",
"date": date,
"type": conv_type,
"messages": []
}
# 添加第一轮问答
if item.get("问题_流程1"):
conversation["messages"].append({
"type": "user",
"content": item["问题_流程1"],
"time": item.get("流程1_时间", "")
})
if item.get("回答_流程2"):
# 处理回答内容,将其中的问答格式转换
answer_content = item["回答_流程2"]
# 如果内容中包含额外的问答,只取第一个答案
if "\n问:" in answer_content:
answer_content = answer_content.split("\n问:")[0]
conversation["messages"].append({
"type": "assistant",
"content": answer_content,
"mentor": mentor,
"time": item.get("流程2_时间", ""),
"mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
})
# 添加第二轮问答(如果有)
if item.get("问题_流程3"):
conversation["messages"].append({
"type": "user",
"content": item["问题_流程3"],
"time": item.get("流程3_时间", "")
})
if item.get("回答_流程4"):
conversation["messages"].append({
"type": "assistant",
"content": item["回答_流程4"],
"mentor": mentor,
"time": item.get("流程4_时间", ""),
"mentorAvatar": mentor_avatars.get(mentor.replace("老师", ""), mentor_avatars["多多畅职机器人"])
})
conversations.append(conversation)
# 生成JavaScript文件内容使用JSON序列化来自动处理引号转义
js_content = """// 从土木水利问答内容.json转换的专家支持中心数据
const civilEngineeringExpertSupportData = {
"conversations": %s
};
export default civilEngineeringExpertSupportData;""" % json.dumps(conversations, ensure_ascii=False, indent=2)
# 写入文件
with open('src/data/civilEngineeringExpertSupportData.js', 'w', encoding='utf-8') as f:
f.write(js_content)
print("✅ 专家支持中心数据已修复")
print(f"转换了 {len(conversations)} 个对话")
if __name__ == "__main__":
fix_quotes_in_content()