118 lines
4.2 KiB
Python
118 lines
4.2 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
# -*- coding: utf-8 -*-
|
||
|
|
|
||
|
|
import json
|
||
|
|
import re
|
||
|
|
from typing import List, Dict, Any
|
||
|
|
|
||
|
|
def parse_interview_content(content: str) -> List[Dict[str, Any]]:
|
||
|
|
"""解析面试题内容,转换为问答格式"""
|
||
|
|
questions = []
|
||
|
|
|
||
|
|
# 按章节分割
|
||
|
|
sections = re.split(r'\n# [一二三四五六七八九十]、', content)
|
||
|
|
|
||
|
|
question_id = 1
|
||
|
|
for section in sections[1:]: # 跳过第一个空白section
|
||
|
|
lines = section.strip().split('\n')
|
||
|
|
if not lines:
|
||
|
|
continue
|
||
|
|
|
||
|
|
section_title = lines[0].strip()
|
||
|
|
|
||
|
|
# 提取问题和答案
|
||
|
|
current_question = ""
|
||
|
|
current_answer = ""
|
||
|
|
collecting_answer = False
|
||
|
|
|
||
|
|
for line in lines[1:]:
|
||
|
|
line = line.strip()
|
||
|
|
if not line:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# 识别问题(以数字开头)
|
||
|
|
if re.match(r'^\d+\.', line):
|
||
|
|
# 保存上一个问题
|
||
|
|
if current_question and current_answer:
|
||
|
|
questions.append({
|
||
|
|
"id": f"q_{question_id}",
|
||
|
|
"question": current_question.strip(),
|
||
|
|
"answer": current_answer.strip()
|
||
|
|
})
|
||
|
|
question_id += 1
|
||
|
|
|
||
|
|
# 开始新问题
|
||
|
|
current_question = re.sub(r'^\d+\.\s*', '', line)
|
||
|
|
current_answer = ""
|
||
|
|
collecting_answer = False
|
||
|
|
|
||
|
|
# 识别答案(示例答案:或答案:)
|
||
|
|
elif line.startswith('示例答案:') or line.startswith('答案:'):
|
||
|
|
current_answer = line.replace('示例答案:', '').replace('答案:', '').strip()
|
||
|
|
collecting_answer = True
|
||
|
|
|
||
|
|
# 继续收集答案
|
||
|
|
elif collecting_answer and not re.match(r'^\d+\.', line) and not line.startswith('选择题:') and not line.startswith('填空题:'):
|
||
|
|
if not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):
|
||
|
|
current_answer += " " + line
|
||
|
|
|
||
|
|
# 保存最后一个问题
|
||
|
|
if current_question and current_answer:
|
||
|
|
questions.append({
|
||
|
|
"id": f"q_{question_id}",
|
||
|
|
"question": current_question.strip(),
|
||
|
|
"answer": current_answer.strip()
|
||
|
|
})
|
||
|
|
question_id += 1
|
||
|
|
|
||
|
|
return questions
|
||
|
|
|
||
|
|
def main():
|
||
|
|
# 读取土木水利岗位简历数据
|
||
|
|
with open('网页未导入数据/土木水利产业/土木水利岗位简历.json', 'r', encoding='utf-8') as f:
|
||
|
|
data = json.load(f)
|
||
|
|
|
||
|
|
# 按岗位群分组面试题
|
||
|
|
interview_groups = {}
|
||
|
|
|
||
|
|
for position in data:
|
||
|
|
group_name = position['简历岗位群']
|
||
|
|
interview_title = position['面试题']
|
||
|
|
interview_content = position['面试题内容']
|
||
|
|
|
||
|
|
if group_name not in interview_groups:
|
||
|
|
interview_groups[group_name] = {
|
||
|
|
'title': interview_title,
|
||
|
|
'content': interview_content
|
||
|
|
}
|
||
|
|
|
||
|
|
# 转换面试题格式
|
||
|
|
converted_questions = []
|
||
|
|
|
||
|
|
for group_name, group_data in interview_groups.items():
|
||
|
|
questions = parse_interview_content(group_data['content'])
|
||
|
|
|
||
|
|
# 只取前4个问题以适应界面显示
|
||
|
|
questions = questions[:4]
|
||
|
|
|
||
|
|
# 为每个问题添加难度和标签
|
||
|
|
for i, q in enumerate(questions):
|
||
|
|
if 'BIM' in group_name:
|
||
|
|
q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
|
||
|
|
q['tags'] = ['BIM基础', 'BIM建模', 'BIM协作'][min(i, 2)]
|
||
|
|
else:
|
||
|
|
q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
|
||
|
|
q['tags'] = ['房地产', '经纪业务', '客户服务'][min(i, 2)]
|
||
|
|
|
||
|
|
converted_questions.append({
|
||
|
|
'group_name': group_name,
|
||
|
|
'title': group_data['title'],
|
||
|
|
'questions': questions
|
||
|
|
})
|
||
|
|
|
||
|
|
# 输出转换结果
|
||
|
|
print("转换后的面试题数据:")
|
||
|
|
print(json.dumps(converted_questions, ensure_ascii=False, indent=2))
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|