Files
ALL-teach_sys/frontend_土木水利/convert_interview_questions.py

118 lines
4.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from typing import List, Dict, Any
def parse_interview_content(content: str) -> List[Dict[str, Any]]:
"""解析面试题内容,转换为问答格式"""
questions = []
# 按章节分割
sections = re.split(r'\n# [一二三四五六七八九十]、', content)
question_id = 1
for section in sections[1:]: # 跳过第一个空白section
lines = section.strip().split('\n')
if not lines:
continue
section_title = lines[0].strip()
# 提取问题和答案
current_question = ""
current_answer = ""
collecting_answer = False
for line in lines[1:]:
line = line.strip()
if not line:
continue
# 识别问题(以数字开头)
if re.match(r'^\d+\.', line):
# 保存上一个问题
if current_question and current_answer:
questions.append({
"id": f"q_{question_id}",
"question": current_question.strip(),
"answer": current_answer.strip()
})
question_id += 1
# 开始新问题
current_question = re.sub(r'^\d+\.\s*', '', line)
current_answer = ""
collecting_answer = False
# 识别答案(示例答案:或答案:)
elif line.startswith('示例答案:') or line.startswith('答案:'):
current_answer = line.replace('示例答案:', '').replace('答案:', '').strip()
collecting_answer = True
# 继续收集答案
elif collecting_answer and not re.match(r'^\d+\.', line) and not line.startswith('选择题:') and not line.startswith('填空题:'):
if not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):
current_answer += " " + line
# 保存最后一个问题
if current_question and current_answer:
questions.append({
"id": f"q_{question_id}",
"question": current_question.strip(),
"answer": current_answer.strip()
})
question_id += 1
return questions
def main():
# 读取土木水利岗位简历数据
with open('网页未导入数据/土木水利产业/土木水利岗位简历.json', 'r', encoding='utf-8') as f:
data = json.load(f)
# 按岗位群分组面试题
interview_groups = {}
for position in data:
group_name = position['简历岗位群']
interview_title = position['面试题']
interview_content = position['面试题内容']
if group_name not in interview_groups:
interview_groups[group_name] = {
'title': interview_title,
'content': interview_content
}
# 转换面试题格式
converted_questions = []
for group_name, group_data in interview_groups.items():
questions = parse_interview_content(group_data['content'])
# 只取前4个问题以适应界面显示
questions = questions[:4]
# 为每个问题添加难度和标签
for i, q in enumerate(questions):
if 'BIM' in group_name:
q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
q['tags'] = ['BIM基础', 'BIM建模', 'BIM协作'][min(i, 2)]
else:
q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
q['tags'] = ['房地产', '经纪业务', '客户服务'][min(i, 2)]
converted_questions.append({
'group_name': group_name,
'title': group_data['title'],
'questions': questions
})
# 输出转换结果
print("转换后的面试题数据:")
print(json.dumps(converted_questions, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()