ALL-teach_sys/frontend_大健康/extract_complete_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re

def extract_all_questions_from_content(content):
    """从内容中提取所有面试题"""
    all_questions = []

    # 删除"判断题："等前缀
    content = re.sub(r'判断题[：:]?\s*', '', content)

    # 先按大类分割（# 一、二、三等）
    category_pattern = r'# ([一二三四五六七八九十]+、[^\n]+)'
    categories = re.split(category_pattern, content)

    if len(categories) > 1:
        # 有分类的情况
        for i in range(1, len(categories), 2):
            if i+1 >= len(categories):
                break

            category_title = categories[i].strip()
            category_content = categories[i+1]

            # 从该分类中提取所有问题
            questions = extract_questions_from_text(category_content)

            if questions:
                all_questions.append({
                    "category": category_title,
                    "questions": questions
                })
    else:
        # 没有分类，直接提取所有问题
        questions = extract_questions_from_text(content)
        if questions:
            all_questions.append({
                "category": "综合面试题",
                "questions": questions
            })

    return all_questions

def extract_questions_from_text(text):
    """从文本中提取问题和答案"""
    questions = []
    question_id = 1

    # 分割文本为行
    lines = text.split('\n')

    current_question = None
    current_answer = []
    in_answer_section = False

    for i, line in enumerate(lines):
        line = line.strip()

        # 检查是否是问题行（数字开头）
        question_match = re.match(r'^(\d+)\.\s*(.+)$', line)

        if question_match:
            # 先保存上一个问题
            if current_question and current_answer:
                answer_text = ' '.join(current_answer).strip()
                # 清理答案文本
                answer_text = re.sub(r'^(示例)?答案[：:]?\s*', '', answer_text)
                answer_text = re.sub(r'\s+', ' ', answer_text)

                if answer_text:
                    questions.append({
                        "id": f"q{question_id}",
                        "question": current_question,
                        "answer": answer_text
                    })
                    question_id += 1

            # 开始新问题
            current_question = question_match.group(2).strip()
            current_answer = []
            in_answer_section = False

        # 检查是否进入答案部分
        elif line and ('示例答案' in line or '答案：' in line or '答案:' in line):
            in_answer_section = True
            # 答案可能在同一行
            answer_on_same_line = re.sub(r'^.*(示例)?答案[：:]?\s*', '', line).strip()
            if answer_on_same_line:
                current_answer.append(answer_on_same_line)

        # 收集答案内容
        elif in_answer_section and line:
            # 检查是否是下一个问题或分类
            if not re.match(r'^(\d+)\.', line) and not line.startswith('#'):
                current_answer.append(line)

        # 空行可能表示答案结束
        elif not line and in_answer_section:
            in_answer_section = False

    # 保存最后一个问题
    if current_question and current_answer:
        answer_text = ' '.join(current_answer).strip()
        answer_text = re.sub(r'^(示例)?答案[：:]?\s*', '', answer_text)
        answer_text = re.sub(r'\s+', ' ', answer_text)

        if answer_text:
            questions.append({
                "id": f"q{question_id}",
                "question": current_question,
                "answer": answer_text
            })

    # 如果没有找到答案，尝试另一种模式
    if not questions:
        # 使用正则表达式匹配问题和答案
        pattern = r'(\d+)\.\s*([^\n]+)\s*\n\s*(?:示例)?答案[：:]?\s*\n\s*([^\n]+(?:\n(?!\d+\.|#)[^\n]*)*)'
        matches = re.findall(pattern, text, re.MULTILINE)

        question_id = 1
        for match in matches:
            question_text = match[1].strip()
            answer_text = match[2].strip()
            answer_text = re.sub(r'\s+', ' ', answer_text)

            if question_text and answer_text:
                questions.append({
                    "id": f"q{question_id}",
                    "question": question_text,
                    "answer": answer_text
                })
                question_id += 1

    return questions

def main():
    # 读取大健康岗位简历数据
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
        health_data = json.load(f)

    # 读取Mock文件
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
        content = f.read()

    # 收集每个岗位群的所有面试题
    industry_all_questions = {}

    for item in health_data:
        industry = item.get('简历岗位群', '')
        interview_content = item.get('面试题内容', '')

        if industry and interview_content:
            if industry not in industry_all_questions:
                industry_all_questions[industry] = []

            # 提取该岗位的所有问题
            categories = extract_all_questions_from_content(interview_content)

            # 合并到该岗位群的问题列表中
            for cat in categories:
                # 检查是否已有该分类
                existing_cat = None
                for existing in industry_all_questions[industry]:
                    if existing['category'] == cat['category']:
                        existing_cat = existing
                        break

                if existing_cat:
                    # 合并问题，避免重复
                    existing_questions = {q['question'] for q in existing_cat['questions']}
                    for q in cat['questions']:
                        if q['question'] not in existing_questions:
                            existing_cat['questions'].append(q)
                else:
                    # 添加新分类
                    industry_all_questions[industry].append(cat)

    # 转换为前端期望的格式并更新Mock文件
    industry_mapping = {
        '健康管理': 'health_1',
        '健康检查': 'health_2',
        '康复治疗': 'health_3',
        '慢性病管理': 'health_4',
        '轻医美': 'health_5',
        '心理健康': 'health_6',
        '社群运营': 'health_7',
        '药品供应链管理': 'health_8',
        '药品生产': 'health_9',
        '药品质量检测': 'health_10',
        '药物研发': 'health_11'
    }

    updates = 0
    for orig_name, industry_id in industry_mapping.items():
        if orig_name in industry_all_questions:
            categories = industry_all_questions[orig_name]

            # 转换为questions数组
            questions_array = []
            cat_id = 1
            total_questions = 0

            for cat in categories:
                if cat['questions']:
                    # 重新编号问题ID
                    renumbered_questions = []
                    for i, q in enumerate(cat['questions'], 1):
                        renumbered_questions.append({
                            "id": f"q{total_questions + i}",
                            "question": q['question'],
                            "answer": q['answer']
                        })
                    total_questions += len(renumbered_questions)

                    questions_array.append({
                        "id": f"group_q{cat_id}",
                        "question": cat['category'],
                        "subQuestions": renumbered_questions
                    })
                    cat_id += 1

            if questions_array:
                print(f"✓ {orig_name} ({industry_id}): {len(questions_array)} 个分类，共 {total_questions} 个面试题")

                # 生成JSON字符串
                questions_json = json.dumps(questions_array, ensure_ascii=False, indent=2)

                # 删除旧的questions字段
                pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})'
                replacement1 = rf'\1\2'
                content = re.sub(pattern1, replacement1, content, flags=re.DOTALL)

                # 添加新的questions字段
                pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})'
                replacement2 = rf'\1,\n    "questions": {questions_json}\2'

                new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL)
                if count > 0:
                    content = new_content
                    updates += 1

    # 写回文件
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
        f.write(content)

    print(f"\n✅ 完成！更新了 {updates} 个岗位群的完整面试题数据")

if __name__ == "__main__":
    main()
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								# -*- coding: utf-8 -*-
 								import json
 								import re
 								def extract_all_questions_from_content(content):
 								    """从内容中提取所有面试题"""
 								    all_questions = []
 								    # 删除"判断题："等前缀
 								    content = re.sub(r'判断题[：:]?\s*', '', content)
 								    # 先按大类分割（# 一、二、三等）
 								    category_pattern = r'# ([一二三四五六七八九十]+、[^\n]+)'
 								    categories = re.split(category_pattern, content)
 								    if len(categories) > 1:
 								        # 有分类的情况
 								        for i in range(1, len(categories), 2):
 								            if i+1 >= len(categories):
 								                break
 								            category_title = categories[i].strip()
 								            category_content = categories[i+1]
 								            # 从该分类中提取所有问题
 								            questions = extract_questions_from_text(category_content)
 								            if questions:
 								                all_questions.append({
 								                    "category": category_title,
 								                    "questions": questions
 								                })
 								    else:
 								        # 没有分类，直接提取所有问题
 								        questions = extract_questions_from_text(content)
 								        if questions:
 								            all_questions.append({
 								                "category": "综合面试题",
 								                "questions": questions
 								            })
 								    return all_questions
 								def extract_questions_from_text(text):
 								    """从文本中提取问题和答案"""
 								    questions = []
 								    question_id = 1
 								    # 分割文本为行
 								    lines = text.split('\n')
 								    current_question = None
 								    current_answer = []
 								    in_answer_section = False
 								    for i, line in enumerate(lines):
 								        line = line.strip()
 								        # 检查是否是问题行（数字开头）
 								        question_match = re.match(r'^(\d+)\.\s*(.+)$', line)
 								        if question_match:
 								            # 先保存上一个问题
 								            if current_question and current_answer:
 								                answer_text = ' '.join(current_answer).strip()
 								                # 清理答案文本
 								                answer_text = re.sub(r'^(示例)?答案[：:]?\s*', '', answer_text)
 								                answer_text = re.sub(r'\s+', ' ', answer_text)
 								                if answer_text:
 								                    questions.append({
 								                        "id": f"q{question_id}",
 								                        "question": current_question,
 								                        "answer": answer_text
 								                    })
 								                    question_id += 1
 								            # 开始新问题
 								            current_question = question_match.group(2).strip()
 								            current_answer = []
 								            in_answer_section = False
 								        # 检查是否进入答案部分
 								        elif line and ('示例答案' in line or '答案：' in line or '答案:' in line):
 								            in_answer_section = True
 								            # 答案可能在同一行
 								            answer_on_same_line = re.sub(r'^.*(示例)?答案[：:]?\s*', '', line).strip()
 								            if answer_on_same_line:
 								                current_answer.append(answer_on_same_line)
 								        # 收集答案内容
 								        elif in_answer_section and line:
 								            # 检查是否是下一个问题或分类
 								            if not re.match(r'^(\d+)\.', line) and not line.startswith('#'):
 								                current_answer.append(line)
 								        # 空行可能表示答案结束
 								        elif not line and in_answer_section:
 								            in_answer_section = False
 								    # 保存最后一个问题
 								    if current_question and current_answer:
 								        answer_text = ' '.join(current_answer).strip()
 								        answer_text = re.sub(r'^(示例)?答案[：:]?\s*', '', answer_text)
 								        answer_text = re.sub(r'\s+', ' ', answer_text)
 								        if answer_text:
 								            questions.append({
 								                "id": f"q{question_id}",
 								                "question": current_question,
 								                "answer": answer_text
 								            })
 								    # 如果没有找到答案，尝试另一种模式
 								    if not questions:
 								        # 使用正则表达式匹配问题和答案
 								        pattern = r'(\d+)\.\s*([^\n]+)\s*\n\s*(?:示例)?答案[：:]?\s*\n\s*([^\n]+(?:\n(?!\d+\.|#)[^\n]*)*)'
 								        matches = re.findall(pattern, text, re.MULTILINE)
 								        question_id = 1
 								        for match in matches:
 								            question_text = match[1].strip()
 								            answer_text = match[2].strip()
 								            answer_text = re.sub(r'\s+', ' ', answer_text)
 								            if question_text and answer_text:
 								                questions.append({
 								                    "id": f"q{question_id}",
 								                    "question": question_text,
 								                    "answer": answer_text
 								                })
 								                question_id += 1
 								    return questions
 								def main():
 								    # 读取大健康岗位简历数据
 								    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
 								        health_data = json.load(f)
 								    # 读取Mock文件
 								    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
 								        content = f.read()
 								    # 收集每个岗位群的所有面试题
 								    industry_all_questions = {}
 								    for item in health_data:
 								        industry = item.get('简历岗位群', '')
 								        interview_content = item.get('面试题内容', '')
 								        if industry and interview_content:
 								            if industry not in industry_all_questions:
 								                industry_all_questions[industry] = []
 								            # 提取该岗位的所有问题
 								            categories = extract_all_questions_from_content(interview_content)
 								            # 合并到该岗位群的问题列表中
 								            for cat in categories:
 								                # 检查是否已有该分类
 								                existing_cat = None
 								                for existing in industry_all_questions[industry]:
 								                    if existing['category'] == cat['category']:
 								                        existing_cat = existing
 								                        break
 								                if existing_cat:
 								                    # 合并问题，避免重复
 								                    existing_questions = {q['question'] for q in existing_cat['questions']}
 								                    for q in cat['questions']:
 								                        if q['question'] not in existing_questions:
 								                            existing_cat['questions'].append(q)
 								                else:
 								                    # 添加新分类
 								                    industry_all_questions[industry].append(cat)
 								    # 转换为前端期望的格式并更新Mock文件
 								    industry_mapping = {
 								        '健康管理': 'health_1',
 								        '健康检查': 'health_2',
 								        '康复治疗': 'health_3',
 								        '慢性病管理': 'health_4',
 								        '轻医美': 'health_5',
 								        '心理健康': 'health_6',
 								        '社群运营': 'health_7',
 								        '药品供应链管理': 'health_8',
 								        '药品生产': 'health_9',
 								        '药品质量检测': 'health_10',
 								        '药物研发': 'health_11'
 								    }
 								    updates = 0
 								    for orig_name, industry_id in industry_mapping.items():
 								        if orig_name in industry_all_questions:
 								            categories = industry_all_questions[orig_name]
 								            # 转换为questions数组
 								            questions_array = []
 								            cat_id = 1
 								            total_questions = 0
 								            for cat in categories:
 								                if cat['questions']:
 								                    # 重新编号问题ID
 								                    renumbered_questions = []
 								                    for i, q in enumerate(cat['questions'], 1):
 								                        renumbered_questions.append({
 								                            "id": f"q{total_questions + i}",
 								                            "question": q['question'],
 								                            "answer": q['answer']
 								                        })
 								                    total_questions += len(renumbered_questions)
 								                    questions_array.append({
 								                        "id": f"group_q{cat_id}",
 								                        "question": cat['category'],
 								                        "subQuestions": renumbered_questions
 								                    })
 								                    cat_id += 1
 								            if questions_array:
 								                print(f"✓ {orig_name} ({industry_id}): {len(questions_array)} 个分类，共 {total_questions} 个面试题")
 								                # 生成JSON字符串
 								                questions_json = json.dumps(questions_array, ensure_ascii=False, indent=2)
 								                # 删除旧的questions字段
 								                pattern1 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\]),\s*"questions":\s*\[[^\]]*?\](\s*\}})'
 								                replacement1 = rf'\1\2'
 								                content = re.sub(pattern1, replacement1, content, flags=re.DOTALL)
 								                # 添加新的questions字段
 								                pattern2 = rf'("id":\s*"{industry_id}"[^{{]*?"positions":\s*\[[^\]]*?\])(\s*\}})'
 								                replacement2 = rf'\1,\n    "questions": {questions_json}\2'
 								                new_content, count = re.subn(pattern2, replacement2, content, flags=re.DOTALL)
 								                if count > 0:
 								                    content = new_content
 								                    updates += 1
 								    # 写回文件
 								    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								        f.write(content)
 								    print(f"\n✅ 完成！更新了 {updates} 个岗位群的完整面试题数据")
 								if __name__ == "__main__":
 								    main()