ALL-teach_sys/frontend_土木水利/convert_interview_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from typing import List, Dict, Any

def parse_interview_content(content: str) -> List[Dict[str, Any]]:
    """解析面试题内容，转换为问答格式"""
    questions = []
    
    # 按章节分割
    sections = re.split(r'\n# [一二三四五六七八九十]、', content)
    
    question_id = 1
    for section in sections[1:]:  # 跳过第一个空白section
        lines = section.strip().split('\n')
        if not lines:
            continue
            
        section_title = lines[0].strip()
        
        # 提取问题和答案
        current_question = ""
        current_answer = ""
        collecting_answer = False
        
        for line in lines[1:]:
            line = line.strip()
            if not line:
                continue
                
            # 识别问题（以数字开头）
            if re.match(r'^\d+\.', line):
                # 保存上一个问题
                if current_question and current_answer:
                    questions.append({
                        "id": f"q_{question_id}",
                        "question": current_question.strip(),
                        "answer": current_answer.strip()
                    })
                    question_id += 1
                
                # 开始新问题
                current_question = re.sub(r'^\d+\.\s*', '', line)
                current_answer = ""
                collecting_answer = False
            
            # 识别答案（示例答案：或答案：）
            elif line.startswith('示例答案：') or line.startswith('答案：'):
                current_answer = line.replace('示例答案：', '').replace('答案：', '').strip()
                collecting_answer = True
            
            # 继续收集答案
            elif collecting_answer and not re.match(r'^\d+\.', line) and not line.startswith('选择题：') and not line.startswith('填空题：'):
                if not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):
                    current_answer += " " + line
        
        # 保存最后一个问题
        if current_question and current_answer:
            questions.append({
                "id": f"q_{question_id}",
                "question": current_question.strip(),
                "answer": current_answer.strip()
            })
            question_id += 1
    
    return questions

def main():
    # 读取土木水利岗位简历数据
    with open('网页未导入数据/土木水利产业/土木水利岗位简历.json', 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # 按岗位群分组面试题
    interview_groups = {}
    
    for position in data:
        group_name = position['简历岗位群']
        interview_title = position['面试题']
        interview_content = position['面试题内容']
        
        if group_name not in interview_groups:
            interview_groups[group_name] = {
                'title': interview_title,
                'content': interview_content
            }
    
    # 转换面试题格式
    converted_questions = []
    
    for group_name, group_data in interview_groups.items():
        questions = parse_interview_content(group_data['content'])
        
        # 只取前4个问题以适应界面显示
        questions = questions[:4]
        
        # 为每个问题添加难度和标签
        for i, q in enumerate(questions):
            if 'BIM' in group_name:
                q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
                q['tags'] = ['BIM基础', 'BIM建模', 'BIM协作'][min(i, 2)]
            else:
                q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]
                q['tags'] = ['房地产', '经纪业务', '客户服务'][min(i, 2)]
        
        converted_questions.append({
            'group_name': group_name,
            'title': group_data['title'],
            'questions': questions
        })
    
    # 输出转换结果
    print("转换后的面试题数据：")
    print(json.dumps(converted_questions, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()
初始化12个产业教务系统项目主要内容： - 包含12个产业的完整教务系统前端代码 - 智能启动脚本 (start-industry.sh) - 可视化产业导航页面 (index.html) - 项目文档 (README.md) 优化内容： - 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB - 添加.gitignore文件避免上传不必要的文件 - 自动依赖管理和智能启动系统产业列表： 1. 文旅产业 (5150) 2. 智能制造 (5151) 3. 智能开发 (5152) 4. 财经商贸 (5153) 5. 视觉设计 (5154) 6. 交通物流 (5155) 7. 大健康 (5156) 8. 土木水利 (5157) 9. 食品产业 (5158) 10. 化工产业 (5159) 11. 能源产业 (5160) 12. 环保产业 (5161) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-09-24 14:14:14 +08:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`

			`import json`
			`import re`
			`from typing import List, Dict, Any`

			`def parse_interview_content(content: str) -> List[Dict[str, Any]]:`
			`"""解析面试题内容，转换为问答格式"""`
			`questions = []`

			`# 按章节分割`
			`sections = re.split(r'\n# [一二三四五六七八九十]、', content)`

			`question_id = 1`
			`for section in sections[1:]: # 跳过第一个空白section`
			`lines = section.strip().split('\n')`
			`if not lines:`
			`continue`

			`section_title = lines[0].strip()`

			`# 提取问题和答案`
			`current_question = ""`
			`current_answer = ""`
			`collecting_answer = False`

			`for line in lines[1:]:`
			`line = line.strip()`
			`if not line:`
			`continue`

			`# 识别问题（以数字开头）`
			`if re.match(r'^\d+\.', line):`
			`# 保存上一个问题`
			`if current_question and current_answer:`
			`questions.append({`
			`"id": f"q_{question_id}",`
			`"question": current_question.strip(),`
			`"answer": current_answer.strip()`
			`})`
			`question_id += 1`

			`# 开始新问题`
			`current_question = re.sub(r'^\d+\.\s*', '', line)`
			`current_answer = ""`
			`collecting_answer = False`

			`# 识别答案（示例答案：或答案：）`
			`elif line.startswith('示例答案：') or line.startswith('答案：'):`
			`current_answer = line.replace('示例答案：', '').replace('答案：', '').strip()`
			`collecting_answer = True`

			`# 继续收集答案`
			`elif collecting_answer and not re.match(r'^\d+\.', line) and not line.startswith('选择题：') and not line.startswith('填空题：'):`
			`if not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):`
			`current_answer += " " + line`

			`# 保存最后一个问题`
			`if current_question and current_answer:`
			`questions.append({`
			`"id": f"q_{question_id}",`
			`"question": current_question.strip(),`
			`"answer": current_answer.strip()`
			`})`
			`question_id += 1`

			`return questions`

			`def main():`
			`# 读取土木水利岗位简历数据`
			`with open('网页未导入数据/土木水利产业/土木水利岗位简历.json', 'r', encoding='utf-8') as f:`
			`data = json.load(f)`

			`# 按岗位群分组面试题`
			`interview_groups = {}`

			`for position in data:`
			`group_name = position['简历岗位群']`
			`interview_title = position['面试题']`
			`interview_content = position['面试题内容']`

			`if group_name not in interview_groups:`
			`interview_groups[group_name] = {`
			`'title': interview_title,`
			`'content': interview_content`
			`}`

			`# 转换面试题格式`
			`converted_questions = []`

			`for group_name, group_data in interview_groups.items():`
			`questions = parse_interview_content(group_data['content'])`

			`# 只取前4个问题以适应界面显示`
			`questions = questions[:4]`

			`# 为每个问题添加难度和标签`
			`for i, q in enumerate(questions):`
			`if 'BIM' in group_name:`
			`q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]`
			`q['tags'] = ['BIM基础', 'BIM建模', 'BIM协作'][min(i, 2)]`
			`else:`
			`q['difficulty'] = ['基础', '中等', '中高', '高级'][min(i, 3)]`
			`q['tags'] = ['房地产', '经纪业务', '客户服务'][min(i, 2)]`

			`converted_questions.append({`
			`'group_name': group_name,`
			`'title': group_data['title'],`
			`'questions': questions`
			`})`

			`# 输出转换结果`
			`print("转换后的面试题数据：")`
			`print(json.dumps(converted_questions, ensure_ascii=False, indent=2))`

			`if __name__ == "__main__":`
			`main()`