#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import re from typing import List, Dict, Any def parse_interview_content(content: str) -> List[Dict[str, Any]]: """解析面试题内容,转换为问答格式""" questions = [] # 按章节分割 sections = re.split(r'\n# [一二三四五六七八九十]、', content) question_id = 1 for section in sections[1:]: # 跳过第一个空白section lines = section.strip().split('\n') if not lines: continue section_title = lines[0].strip() # 提取问题和答案 current_question = "" current_answer = "" collecting_answer = False for line in lines[1:]: line = line.strip() if not line: continue # 识别问题(以数字开头) if re.match(r'^\d+\.', line): # 保存上一个问题 if current_question and current_answer: questions.append({ "id": f"q_{question_id}", "question": current_question.strip(), "answer": current_answer.strip() }) question_id += 1 # 开始新问题 current_question = re.sub(r'^\d+\.\s*', '', line) current_answer = "" collecting_answer = False # 识别答案(示例答案:或答案:) elif line.startswith('示例答案:') or line.startswith('答案:'): current_answer = line.replace('示例答案:', '').replace('答案:', '').strip() collecting_answer = True # 继续收集答案 elif collecting_answer and not re.match(r'^\d+\.', line) and not line.startswith('选择题:') and not line.startswith('填空题:'): if not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'): current_answer += " " + line # 保存最后一个问题 if current_question and current_answer: questions.append({ "id": f"q_{question_id}", "question": current_question.strip(), "answer": current_answer.strip() }) question_id += 1 return questions def main(): # 读取土木水利岗位简历数据 with open('网页未导入数据/土木水利产业/土木水利岗位简历.json', 'r', encoding='utf-8') as f: data = json.load(f) # 按岗位群分组面试题 interview_groups = {} for position in data: group_name = position['简历岗位群'] interview_title = position['面试题'] interview_content = position['面试题内容'] if group_name not in interview_groups: interview_groups[group_name] = { 'title': interview_title, 'content': interview_content } # 转换面试题格式 converted_questions = [] for group_name, group_data in interview_groups.items(): questions = parse_interview_content(group_data['content']) # 只取前4个问题以适应界面显示 questions = questions[:4] # 为每个问题添加难度和标签 for i, q in enumerate(questions): if 'BIM' in group_name: q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)] q['tags'] = ['BIM基础', 'BIM建模', 'BIM协作'][min(i, 2)] else: q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)] q['tags'] = ['房地产', '经纪业务', '客户服务'][min(i, 2)] converted_questions.append({ 'group_name': group_name, 'title': group_data['title'], 'questions': questions }) # 输出转换结果 print("转换后的面试题数据:") print(json.dumps(converted_questions, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()