ALL-teach_sys/frontend_能源/extract_all_interview_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from collections import defaultdict
import datetime
import shutil

def parse_interview_questions(content):
    """
    解析面试题内容，提取所有问题和答案
    """
    questions = []

    # 按行分割内容
    lines = content.split('\n')

    current_question = None
    current_answer = []
    question_id = 1
    in_answer = False

    for i, line in enumerate(lines):
        line = line.strip()

        # 检查是否是问题行（数字+句号开头）
        question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
        if question_match:
            # 保存上一个问题
            if current_question and current_answer:
                answer_text = '\n'.join(current_answer).strip()
                if answer_text:
                    current_question['answer'] = answer_text
                    questions.append(current_question)
                current_answer = []

            # 创建新问题
            question_text = question_match.group(2)
            # 移除问题末尾的问号（如果有）
            question_text = question_text.rstrip('？?')

            current_question = {
                'id': f'q{question_id}',
                'question': question_text,
                'answer': ''
            }
            question_id += 1
            in_answer = False

        # 检查是否是答案开始标记
        elif '示例答案' in line or '答案：' in line or '答案:' in line:
            in_answer = True
            # 如果答案在同一行
            answer_in_line = re.sub(r'^.*?(示例答案|答案)[：:]?\s*', '', line).strip()
            if answer_in_line:
                current_answer.append(answer_in_line)

        # 收集答案内容
        elif in_answer and current_question and line:
            # 跳过标题行
            if not line.startswith('#'):
                current_answer.append(line)

        # 检查是否到达下一个部分（标题）
        elif line.startswith('#') and current_question and current_answer:
            # 保存当前问题
            answer_text = '\n'.join(current_answer).strip()
            if answer_text:
                current_question['answer'] = answer_text
                questions.append(current_question)
            current_question = None
            current_answer = []
            in_answer = False

    # 保存最后一个问题
    if current_question and current_answer:
        answer_text = '\n'.join(current_answer).strip()
        if answer_text:
            current_question['answer'] = answer_text
            questions.append(current_question)

    return questions

def extract_and_update_all_interview_questions():
    """
    从能源岗位简历.json提取所有面试题并更新mock文件
    """
    # 读取能源岗位简历数据
    with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
        energy_jobs = json.load(f)

    # 按岗位群分组并提取所有面试题
    interview_questions = {}

    for job in energy_jobs:
        group_name = job.get("简历岗位群", "")
        if group_name and group_name not in interview_questions:
            # 提取面试题内容
            if "面试题内容" in job:
                questions = parse_interview_questions(job["面试题内容"])
                if questions:
                    interview_questions[group_name] = questions
                    print(f"✅ {group_name}: 提取了 {len(questions)} 个问题")

    # 读取mock文件
    mock_file = "src/mocks/resumeInterviewMock.js"

    # 备份文件
    backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
    shutil.copy(mock_file, backup_path)
    print(f"\n✅ 已备份文件到：{backup_path}")

    # 读取文件内容
    with open(mock_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # 查找并替换每个岗位群的questions
    import re

    # 分析content，找到industries数组
    industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
    if industries_match:
        industries_str = industries_match.group(1)

        # 手动解析并更新每个岗位群的subQuestions
        # 因为JSON格式可能有问题，我们逐个替换
        new_content = content

        for group_name, questions in interview_questions.items():
            # 查找该岗位群的questions部分
            pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]'

            match = re.search(pattern, new_content)
            if match:
                # 找到subQuestions的位置
                sub_pattern = r'"subQuestions":\s*\[[^\]]*\]'
                sub_match = re.search(sub_pattern, match.group(0))

                if sub_match:
                    # 生成新的subQuestions
                    new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8)
                    # 调整缩进（8个空格）
                    new_sub_questions = '\n'.join(['        ' + line if line.strip() else line
                                                  for line in new_sub_questions.split('\n')])

                    # 替换
                    replacement = f'"subQuestions": {new_sub_questions}'
                    new_match_str = match.group(0).replace(sub_match.group(0), replacement)
                    new_content = new_content.replace(match.group(0), new_match_str)

        # 写回文件
        with open(mock_file, 'w', encoding='utf-8') as f:
            f.write(new_content)

        print("\n✅ 成功更新所有面试题数据")
        print("\n📊 更新统计：")
        total_questions = 0
        for group_name, questions in interview_questions.items():
            total_questions += len(questions)
            print(f"   - {group_name}: {len(questions)}个问题")
        print(f"\n📈 总计：{len(interview_questions)}个岗位群，{total_questions}个面试题")
    else:
        print("❌ 未找到industries数组")

if __name__ == "__main__":
    extract_and_update_all_interview_questions()
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								import json
 								import re
 								from collections import defaultdict
 								import datetime
 								import shutil
 								def parse_interview_questions(content):
 								    """
 								    解析面试题内容，提取所有问题和答案
 								    """
 								    questions = []
 								    # 按行分割内容
 								    lines = content.split('\n')
 								    current_question = None
 								    current_answer = []
 								    question_id = 1
 								    in_answer = False
 								    for i, line in enumerate(lines):
 								        line = line.strip()
 								        # 检查是否是问题行（数字+句号开头）
 								        question_match = re.match(r'^(\d+)\.\s+(.+)$', line)
 								        if question_match:
 								            # 保存上一个问题
 								            if current_question and current_answer:
 								                answer_text = '\n'.join(current_answer).strip()
 								                if answer_text:
 								                    current_question['answer'] = answer_text
 								                    questions.append(current_question)
 								                current_answer = []
 								            # 创建新问题
 								            question_text = question_match.group(2)
 								            # 移除问题末尾的问号（如果有）
 								            question_text = question_text.rstrip('？?')
 								            current_question = {
 								                'id': f'q{question_id}',
 								                'question': question_text,
 								                'answer': ''
 								            }
 								            question_id += 1
 								            in_answer = False
 								        # 检查是否是答案开始标记
 								        elif '示例答案' in line or '答案：' in line or '答案:' in line:
 								            in_answer = True
 								            # 如果答案在同一行
 								            answer_in_line = re.sub(r'^.*?(示例答案|答案)[：:]?\s*', '', line).strip()
 								            if answer_in_line:
 								                current_answer.append(answer_in_line)
 								        # 收集答案内容
 								        elif in_answer and current_question and line:
 								            # 跳过标题行
 								            if not line.startswith('#'):
 								                current_answer.append(line)
 								        # 检查是否到达下一个部分（标题）
 								        elif line.startswith('#') and current_question and current_answer:
 								            # 保存当前问题
 								            answer_text = '\n'.join(current_answer).strip()
 								            if answer_text:
 								                current_question['answer'] = answer_text
 								                questions.append(current_question)
 								            current_question = None
 								            current_answer = []
 								            in_answer = False
 								    # 保存最后一个问题
 								    if current_question and current_answer:
 								        answer_text = '\n'.join(current_answer).strip()
 								        if answer_text:
 								            current_question['answer'] = answer_text
 								            questions.append(current_question)
 								    return questions
 								def extract_and_update_all_interview_questions():
 								    """
 								    从能源岗位简历.json提取所有面试题并更新mock文件
 								    """
 								    # 读取能源岗位简历数据
 								    with open("网页未导入数据/能源产业/能源岗位简历.json", 'r', encoding='utf-8') as f:
 								        energy_jobs = json.load(f)
 								    # 按岗位群分组并提取所有面试题
 								    interview_questions = {}
 								    for job in energy_jobs:
 								        group_name = job.get("简历岗位群", "")
 								        if group_name and group_name not in interview_questions:
 								            # 提取面试题内容
 								            if "面试题内容" in job:
 								                questions = parse_interview_questions(job["面试题内容"])
 								                if questions:
 								                    interview_questions[group_name] = questions
 								                    print(f"✅ {group_name}: 提取了 {len(questions)} 个问题")
 								    # 读取mock文件
 								    mock_file = "src/mocks/resumeInterviewMock.js"
 								    # 备份文件
 								    backup_path = f"{mock_file}.backup_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
 								    shutil.copy(mock_file, backup_path)
 								    print(f"\n✅ 已备份文件到：{backup_path}")
 								    # 读取文件内容
 								    with open(mock_file, 'r', encoding='utf-8') as f:
 								        content = f.read()
 								    # 查找并替换每个岗位群的questions
 								    import re
 								    # 分析content，找到industries数组
 								    industries_match = re.search(r'const industries = (\[[\s\S]*?\]);', content)
 								    if industries_match:
 								        industries_str = industries_match.group(1)
 								        # 手动解析并更新每个岗位群的subQuestions
 								        # 因为JSON格式可能有问题，我们逐个替换
 								        new_content = content
 								        for group_name, questions in interview_questions.items():
 								            # 查找该岗位群的questions部分
 								            pattern = f'"name": "{group_name}"[\\s\\S]*?"questions":[\\s\\S]*?"subQuestions":\\s*\\[[^\\]]*\\]'
 								            match = re.search(pattern, new_content)
 								            if match:
 								                # 找到subQuestions的位置
 								                sub_pattern = r'"subQuestions":\s*\[[^\]]*\]'
 								                sub_match = re.search(sub_pattern, match.group(0))
 								                if sub_match:
 								                    # 生成新的subQuestions
 								                    new_sub_questions = json.dumps(questions, ensure_ascii=False, indent=8)
 								                    # 调整缩进（8个空格）
 								                    new_sub_questions = '\n'.join(['        ' + line if line.strip() else line
 								                                                  for line in new_sub_questions.split('\n')])
 								                    # 替换
 								                    replacement = f'"subQuestions": {new_sub_questions}'
 								                    new_match_str = match.group(0).replace(sub_match.group(0), replacement)
 								                    new_content = new_content.replace(match.group(0), new_match_str)
 								        # 写回文件
 								        with open(mock_file, 'w', encoding='utf-8') as f:
 								            f.write(new_content)
 								        print("\n✅ 成功更新所有面试题数据")
 								        print("\n📊 更新统计：")
 								        total_questions = 0
 								        for group_name, questions in interview_questions.items():
 								            total_questions += len(questions)
 								            print(f"   - {group_name}: {len(questions)}个问题")
 								        print(f"\n📈 总计：{len(interview_questions)}个岗位群，{total_questions}个面试题")
 								    else:
 								        print("❌ 未找到industries数组")
 								if __name__ == "__main__":
 								    extract_and_update_all_interview_questions()