ALL-teach_sys/frontend_能源/extract_interview_questions_v2.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from collections import defaultdict
import datetime
import shutil

def parse_interview_questions(content):
    """
    解析面试题内容，提取问题和答案
    """
    questions = []

    # 移除markdown标题
    content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE)

    # 分割成行
    lines = content.split('\n')

    current_question = None
    current_answer = []
    question_id = 1

    for line in lines:
        line = line.strip()
        if not line:
            continue

        # 检查是否是问题行（数字+句号开头）
        question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
        if question_match:
            # 保存上一个问题
            if current_question and current_answer:
                current_question['answer'] = ' '.join(current_answer).strip()
                questions.append(current_question)
                current_answer = []

            # 创建新问题
            question_text = question_match.group(2)
            # 移除问题末尾的问号（如果有）
            question_text = question_text.rstrip('？?')

            current_question = {
                'id': f'q{question_id}',
                'question': question_text,
                'answer': '',
                'difficulty': '中等',
                'tags': ['能源行业', '专业知识']
            }
            question_id += 1

        # 检查是否是答案行
        elif line.startswith('示例答案') or line.startswith('答案'):
            # 答案开始标记，清空答案缓冲区
            current_answer = []
        elif current_question and not question_match:
            # 收集答案内容
            if line and not line.startswith('示例答案') and not line.startswith('答案'):
                current_answer.append(line)

    # 保存最后一个问题
    if current_question and current_answer:
        current_question['answer'] = ' '.join(current_answer).strip()
        questions.append(current_question)

    # 如果没有找到问题，生成默认问题
    if not questions:
        questions = [
            {
                'id': 'q1',
                'question': '请介绍一下你的专业背景',
                'answer': '我具有能源相关专业背景，熟悉行业标准和规范，有实际项目经验。',
                'difficulty': '简单',
                'tags': ['基础问题']
            },
            {
                'id': 'q2',
                'question': '你为什么选择这个岗位',
                'answer': '这个岗位与我的专业背景高度匹配，能够发挥我的专业技能。',
                'difficulty': '简单',
                'tags': ['基础问题']
            }
        ]

    return questions[:5]  # 返回前5个问题

def extract_and_update_interview_questions():
    """
    从能源岗位简历.json提取面试题并更新mock文件
    """
    # 读取能源岗位简历数据
    with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
        energy_jobs = json.load(f)

    # 按岗位群分组并提取面试题
    interview_questions = {}

    for job in energy_jobs:
        group_name = job.get("简历岗位群", "")
        if group_name and group_name not in interview_questions:
            # 提取面试题内容
            if "面试题内容" in job:
                questions = parse_interview_questions(job["面试题内容"])
                interview_questions[group_name] = questions

    # 读取mock文件
    mock_file = "src/mocks/resumeInterviewMock.js"

    # 备份文件
    backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
    shutil.copy(mock_file, backup_path)
    print(f"✅ 已备份文件到：{backup_path}")

    # 读取文件内容
    with open(mock_file, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    # 更新每个岗位群的subQuestions
    new_lines = []
    i = 0
    while i < len(lines):
        line = lines[i]

        # 检查是否是questions数组开始
        if '"questions":' in line:
            new_lines.append(line)
            i += 1

            # 找到当前岗位群的名称
            group_name = None
            for j in range(max(0, i-20), i):
                if '"name":' in lines[j]:
                    name_match = re.search(r'"name":\s*"([^"]+)"', lines[j])
                    if name_match:
                        group_name = name_match.group(1)
                        break

            # 处理questions数组
            while i < len(lines):
                if '"subQuestions":' in lines[i]:
                    # 找到subQuestions行
                    if group_name and group_name in interview_questions:
                        # 生成新的subQuestions
                        questions_json = json.dumps(interview_questions[group_name], ensure_ascii=False, indent=8)
                        # 调整缩进
                        indent = len(lines[i]) - len(lines[i].lstrip())
                        indented_questions = '\n'.join([' ' * indent + line if line.strip() else line
                                                      for line in questions_json.split('\n')])
                        new_lines.append(' ' * indent + f'"subQuestions": {indented_questions}\n')
                    else:
                        new_lines.append(lines[i])
                    i += 1
                    break
                else:
                    new_lines.append(lines[i])
                    i += 1
        else:
            new_lines.append(line)
            i += 1

    # 写回文件
    with open(mock_file, 'w', encoding='utf-8') as f:
        f.writelines(new_lines)

    print("✅ 成功更新面试题数据")
    print("\n📊 更新的岗位群面试题：")
    for group_name, questions in interview_questions.items():
        print(f"   - {group_name}: {len(questions)}个问题")

if __name__ == "__main__":
    extract_and_update_interview_questions()
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								import json
 								import re
 								from collections import defaultdict
 								import datetime
 								import shutil
 								def parse_interview_questions(content):
 								    """
 								    解析面试题内容，提取问题和答案
 								    """
 								    questions = []
 								    # 移除markdown标题
 								    content = re.sub(r'^#+\s+.*$', '', content, flags=re.MULTILINE)
 								    # 分割成行
 								    lines = content.split('\n')
 								    current_question = None
 								    current_answer = []
 								    question_id = 1
 								    for line in lines:
 								        line = line.strip()
 								        if not line:
 								            continue
 								        # 检查是否是问题行（数字+句号开头）
 								        question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
 								        if question_match:
 								            # 保存上一个问题
 								            if current_question and current_answer:
 								                current_question['answer'] = ' '.join(current_answer).strip()
 								                questions.append(current_question)
 								                current_answer = []
 								            # 创建新问题
 								            question_text = question_match.group(2)
 								            # 移除问题末尾的问号（如果有）
 								            question_text = question_text.rstrip('？?')
 								            current_question = {
 								                'id': f'q{question_id}',
 								                'question': question_text,
 								                'answer': '',
 								                'difficulty': '中等',
 								                'tags': ['能源行业', '专业知识']
 								            }
 								            question_id += 1
 								        # 检查是否是答案行
 								        elif line.startswith('示例答案') or line.startswith('答案'):
 								            # 答案开始标记，清空答案缓冲区
 								            current_answer = []
 								        elif current_question and not question_match:
 								            # 收集答案内容
 								            if line and not line.startswith('示例答案') and not line.startswith('答案'):
 								                current_answer.append(line)
 								    # 保存最后一个问题
 								    if current_question and current_answer:
 								        current_question['answer'] = ' '.join(current_answer).strip()
 								        questions.append(current_question)
 								    # 如果没有找到问题，生成默认问题
 								    if not questions:
 								        questions = [
 								            {
 								                'id': 'q1',
 								                'question': '请介绍一下你的专业背景',
 								                'answer': '我具有能源相关专业背景，熟悉行业标准和规范，有实际项目经验。',
 								                'difficulty': '简单',
 								                'tags': ['基础问题']
 								            },
 								            {
 								                'id': 'q2',
 								                'question': '你为什么选择这个岗位',
 								                'answer': '这个岗位与我的专业背景高度匹配，能够发挥我的专业技能。',
 								                'difficulty': '简单',
 								                'tags': ['基础问题']
 								            }
 								        ]
 								    return questions[:5]  # 返回前5个问题
 								def extract_and_update_interview_questions():
 								    """
 								    从能源岗位简历.json提取面试题并更新mock文件
 								    """
 								    # 读取能源岗位简历数据
 								    with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
 								        energy_jobs = json.load(f)
 								    # 按岗位群分组并提取面试题
 								    interview_questions = {}
 								    for job in energy_jobs:
 								        group_name = job.get("简历岗位群", "")
 								        if group_name and group_name not in interview_questions:
 								            # 提取面试题内容
 								            if "面试题内容" in job:
 								                questions = parse_interview_questions(job["面试题内容"])
 								                interview_questions[group_name] = questions
 								    # 读取mock文件
 								    mock_file = "src/mocks/resumeInterviewMock.js"
 								    # 备份文件
 								    backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
 								    shutil.copy(mock_file, backup_path)
 								    print(f"✅ 已备份文件到：{backup_path}")
 								    # 读取文件内容
 								    with open(mock_file, 'r', encoding='utf-8') as f:
 								        lines = f.readlines()
 								    # 更新每个岗位群的subQuestions
 								    new_lines = []
 								    i = 0
 								    while i < len(lines):
 								        line = lines[i]
 								        # 检查是否是questions数组开始
 								        if '"questions":' in line:
 								            new_lines.append(line)
 								            i += 1
 								            # 找到当前岗位群的名称
 								            group_name = None
 								            for j in range(max(0, i-20), i):
 								                if '"name":' in lines[j]:
 								                    name_match = re.search(r'"name":\s*"([^"]+)"', lines[j])
 								                    if name_match:
 								                        group_name = name_match.group(1)
 								                        break
 								            # 处理questions数组
 								            while i < len(lines):
 								                if '"subQuestions":' in lines[i]:
 								                    # 找到subQuestions行
 								                    if group_name and group_name in interview_questions:
 								                        # 生成新的subQuestions
 								                        questions_json = json.dumps(interview_questions[group_name], ensure_ascii=False, indent=8)
 								                        # 调整缩进
 								                        indent = len(lines[i]) - len(lines[i].lstrip())
 								                        indented_questions = '\n'.join([' ' * indent + line if line.strip() else line
 								                                                      for line in questions_json.split('\n')])
 								                        new_lines.append(' ' * indent + f'"subQuestions": {indented_questions}\n')
 								                    else:
 								                        new_lines.append(lines[i])
 								                    i += 1
 								                    break
 								                else:
 								                    new_lines.append(lines[i])
 								                    i += 1
 								        else:
 								            new_lines.append(line)
 								            i += 1
 								    # 写回文件
 								    with open(mock_file, 'w', encoding='utf-8') as f:
 								        f.writelines(new_lines)
 								    print("✅ 成功更新面试题数据")
 								    print("\n📊 更新的岗位群面试题：")
 								    for group_name, questions in interview_questions.items():
 								        print(f"   - {group_name}: {len(questions)}个问题")
 								if __name__ == "__main__":
 								    extract_and_update_interview_questions()