ALL-teach_sys/frontend_化工/extract_all_questions_complete.py

#!/usr/bin/env python3
import json
import re

# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
    chemical_data = json.load(f)

# 收集所有岗位群的面试题
job_group_all_questions = {}
total_questions = 0

# 遍历所有岗位
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    interview_content = position.get('面试题内容', '')

    if not job_group or not interview_content:
        continue

    # 初始化岗位群
    if job_group not in job_group_all_questions:
        job_group_all_questions[job_group] = []

    # 解析面试题内容 - 分别处理模拟问答题和选择题
    sections = interview_content.split('#')

    for section in sections:
        if not section.strip():
            continue

        section_lines = section.strip().split('\n')
        section_title = section_lines[0] if section_lines else ''

        if '模拟问答题' in section_title or '问答题' in section_title:
            # 处理问答题
            current_q = None
            current_a = []

            for line in section_lines[1:]:
                line = line.strip()

                # 检查是否是新问题（以数字开头）
                if re.match(r'^\d+\.', line):
                    # 保存之前的问答对
                    if current_q and current_a:
                        answer_text = '\n'.join(current_a)
                        job_group_all_questions[job_group].append({
                            'question': current_q,
                            'answer': answer_text
                        })
                        total_questions += 1

                    current_q = line
                    current_a = []
                elif '示例答案' in line or '答案：' in line:
                    # 开始收集答案
                    continue
                elif current_q and (line.startswith('-') or line.startswith('•') or line):
                    # 收集答案内容
                    if line and not line.startswith('![]'):  # 排除图片链接
                        current_a.append(line)

            # 保存最后一个问答对
            if current_q and current_a:
                answer_text = '\n'.join(current_a)
                job_group_all_questions[job_group].append({
                    'question': current_q,
                    'answer': answer_text
                })
                total_questions += 1

        elif '选择题' in section_title:
            # 处理选择题
            current_q = None
            options = []
            current_answer = None

            for line in section_lines[1:]:
                line = line.strip()

                # 检查是否是新问题（以数字开头）
                if re.match(r'^\d+\.', line):
                    # 保存之前的选择题
                    if current_q and options and current_answer:
                        full_question = current_q + '\n' + '\n'.join(options)
                        job_group_all_questions[job_group].append({
                            'question': full_question,
                            'answer': current_answer
                        })
                        total_questions += 1

                    current_q = line
                    options = []
                    current_answer = None
                elif re.match(r'^[A-D]\.', line):
                    # 收集选项
                    options.append(line)
                elif '正确答案' in line or '正确选项' in line or '答案：' in line:
                    # 提取答案
                    current_answer = line

            # 保存最后一个选择题
            if current_q and options and current_answer:
                full_question = current_q + '\n' + '\n'.join(options)
                job_group_all_questions[job_group].append({
                    'question': full_question,
                    'answer': current_answer
                })
                total_questions += 1

# 输出统计信息
print(f"\n===== 化工岗位完整面试题统计 =====")
print(f"总岗位群数: {len(job_group_all_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量：")

for job_group, questions in sorted(job_group_all_questions.items()):
    print(f"  {job_group}: {len(questions)} 题")

# 显示部分题目示例
print(f"\n===== 面试题示例 =====")
for job_group in list(job_group_all_questions.keys())[:2]:
    questions = job_group_all_questions[job_group]
    if questions:
        print(f"\n【{job_group}】共{len(questions)}题，前2题示例：")
        for i, q in enumerate(questions[:2], 1):
            print(f"  题{i}:")
            print(f"    问题: {q['question'][:60]}...")
            print(f"    答案: {q['answer'][:60]}...")

# 保存面试题数据
with open('all_questions_complete.json', 'w', encoding='utf-8') as f:
    json.dump(job_group_all_questions, f, ensure_ascii=False, indent=2)

print(f"\n✅ 已保存所有面试题到 all_questions_complete.json")

# 生成最终的更新脚本
print(f"\n正在生成最终更新脚本...")

with open('update_all_questions_final.py', 'w', encoding='utf-8') as f:
    f.write('''#!/usr/bin/env python3
import json
import re
import subprocess
from datetime import datetime

# 读取完整的面试题数据
with open('all_questions_complete.json', 'r', encoding='utf-8') as f:
    job_group_questions = json.load(f)

# 读取现有mock文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份文件
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_final_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
    f.write(content)
print(f"已创建备份: {backup_file}")

# 更新每个岗位群的面试题
updated_count = 0
for job_group, questions in job_group_questions.items():
    if not questions:
        continue

    # 构建subQuestions数组内容
    sub_questions_items = []
    for i, q in enumerate(questions):
        # 对问题和答案进行JSON转义
        escaped_q = json.dumps(q['question'], ensure_ascii=False)
        escaped_a = json.dumps(q['answer'], ensure_ascii=False)

        question_str = '{'
        question_str += f'"id": "q{i+1}", '
        question_str += f'"question": {escaped_q}, '
        question_str += f'"answer": {escaped_a}'
        question_str += '}'
        sub_questions_items.append(question_str)

    # 创建完整的subQuestions内容
    sub_questions_content = ',\\n'.join(sub_questions_items)

    # 查找并替换对应岗位群的subQuestions
    pattern = rf'("question"\\s*:\\s*"{re.escape(job_group)}"[^}}]*?"subQuestions"\\s*:\\s*)\\[[^\\]]*\\]'
    replacement = rf'\\1[\\n{sub_questions_content}\\n        ]'

    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    if new_content != content:
        content = new_content
        updated_count += 1
        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")

# 保存更新后的文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
    f.write(content)

# 验证语法
try:
    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
                          capture_output=True, text=True, encoding='utf-8')
    if result.returncode == 0:
        print(f"\\n✓ 语法检查通过")
        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
        print(f"\\n✅ 所有面试题更新成功完成！")
    else:
        print(f"\\n✗ 语法检查失败: {result.stderr}")
        # 恢复备份
        with open(backup_file, 'r', encoding='utf-8') as f:
            content = f.read()
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已从备份恢复")
except Exception as e:
    print(f"错误: {e}")
''')

print(f"✅ 已生成 update_all_questions_final.py")
print(f"\n请运行以下命令来更新所有面试题：")
print(f"python3 update_all_questions_final.py")
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								import json
 								import re
 								# 读取化工岗位简历数据
 								with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
 								    chemical_data = json.load(f)
 								# 收集所有岗位群的面试题
 								job_group_all_questions = {}
 								total_questions = 0
 								# 遍历所有岗位
 								for position in chemical_data:
 								    job_group = position.get('简历岗位群', '')
 								    interview_content = position.get('面试题内容', '')
 								    if not job_group or not interview_content:
 								        continue
 								    # 初始化岗位群
 								    if job_group not in job_group_all_questions:
 								        job_group_all_questions[job_group] = []
 								    # 解析面试题内容 - 分别处理模拟问答题和选择题
 								    sections = interview_content.split('#')
 								    for section in sections:
 								        if not section.strip():
 								            continue
 								        section_lines = section.strip().split('\n')
 								        section_title = section_lines[0] if section_lines else ''
 								        if '模拟问答题' in section_title or '问答题' in section_title:
 								            # 处理问答题
 								            current_q = None
 								            current_a = []
 								            for line in section_lines[1:]:
 								                line = line.strip()
 								                # 检查是否是新问题（以数字开头）
 								                if re.match(r'^\d+\.', line):
 								                    # 保存之前的问答对
 								                    if current_q and current_a:
 								                        answer_text = '\n'.join(current_a)
 								                        job_group_all_questions[job_group].append({
 								                            'question': current_q,
 								                            'answer': answer_text
 								                        })
 								                        total_questions += 1
 								                    current_q = line
 								                    current_a = []
 								                elif '示例答案' in line or '答案：' in line:
 								                    # 开始收集答案
 								                    continue
 								                elif current_q and (line.startswith('-') or line.startswith('•') or line):
 								                    # 收集答案内容
 								                    if line and not line.startswith('![]'):  # 排除图片链接
 								                        current_a.append(line)
 								            # 保存最后一个问答对
 								            if current_q and current_a:
 								                answer_text = '\n'.join(current_a)
 								                job_group_all_questions[job_group].append({
 								                    'question': current_q,
 								                    'answer': answer_text
 								                })
 								                total_questions += 1
 								        elif '选择题' in section_title:
 								            # 处理选择题
 								            current_q = None
 								            options = []
 								            current_answer = None
 								            for line in section_lines[1:]:
 								                line = line.strip()
 								                # 检查是否是新问题（以数字开头）
 								                if re.match(r'^\d+\.', line):
 								                    # 保存之前的选择题
 								                    if current_q and options and current_answer:
 								                        full_question = current_q + '\n' + '\n'.join(options)
 								                        job_group_all_questions[job_group].append({
 								                            'question': full_question,
 								                            'answer': current_answer
 								                        })
 								                        total_questions += 1
 								                    current_q = line
 								                    options = []
 								                    current_answer = None
 								                elif re.match(r'^[A-D]\.', line):
 								                    # 收集选项
 								                    options.append(line)
 								                elif '正确答案' in line or '正确选项' in line or '答案：' in line:
 								                    # 提取答案
 								                    current_answer = line
 								            # 保存最后一个选择题
 								            if current_q and options and current_answer:
 								                full_question = current_q + '\n' + '\n'.join(options)
 								                job_group_all_questions[job_group].append({
 								                    'question': full_question,
 								                    'answer': current_answer
 								                })
 								                total_questions += 1
 								# 输出统计信息
 								print(f"\n===== 化工岗位完整面试题统计 =====")
 								print(f"总岗位群数: {len(job_group_all_questions)}")
 								print(f"总面试题数: {total_questions}")
 								print(f"\n各岗位群面试题数量：")
 								for job_group, questions in sorted(job_group_all_questions.items()):
 								    print(f"  {job_group}: {len(questions)} 题")
 								# 显示部分题目示例
 								print(f"\n===== 面试题示例 =====")
 								for job_group in list(job_group_all_questions.keys())[:2]:
 								    questions = job_group_all_questions[job_group]
 								    if questions:
 								        print(f"\n【{job_group}】共{len(questions)}题，前2题示例：")
 								        for i, q in enumerate(questions[:2], 1):
 								            print(f"  题{i}:")
 								            print(f"    问题: {q['question'][:60]}...")
 								            print(f"    答案: {q['answer'][:60]}...")
 								# 保存面试题数据
 								with open('all_questions_complete.json', 'w', encoding='utf-8') as f:
 								    json.dump(job_group_all_questions, f, ensure_ascii=False, indent=2)
 								print(f"\n✅ 已保存所有面试题到 all_questions_complete.json")
 								# 生成最终的更新脚本
 								print(f"\n正在生成最终更新脚本...")
 								with open('update_all_questions_final.py', 'w', encoding='utf-8') as f:
 								    f.write('''#!/usr/bin/env python3
 								import json
 								import re
 								import subprocess
 								from datetime import datetime
 								# 读取完整的面试题数据
 								with open('all_questions_complete.json', 'r', encoding='utf-8') as f:
 								    job_group_questions = json.load(f)
 								# 读取现有mock文件
 								with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
 								    content = f.read()
 								# 备份文件
 								timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 								backup_file = f'src/mocks/resumeInterviewMock.js.backup_final_{timestamp}'
 								with open(backup_file, 'w', encoding='utf-8') as f:
 								    f.write(content)
 								print(f"已创建备份: {backup_file}")
 								# 更新每个岗位群的面试题
 								updated_count = 0
 								for job_group, questions in job_group_questions.items():
 								    if not questions:
 								        continue
 								    # 构建subQuestions数组内容
 								    sub_questions_items = []
 								    for i, q in enumerate(questions):
 								        # 对问题和答案进行JSON转义
 								        escaped_q = json.dumps(q['question'], ensure_ascii=False)
 								        escaped_a = json.dumps(q['answer'], ensure_ascii=False)
 								        question_str = '{'
 								        question_str += f'"id": "q{i+1}", '
 								        question_str += f'"question": {escaped_q}, '
 								        question_str += f'"answer": {escaped_a}'
 								        question_str += '}'
 								        sub_questions_items.append(question_str)
 								    # 创建完整的subQuestions内容
 								    sub_questions_content = ',\\n'.join(sub_questions_items)
 								    # 查找并替换对应岗位群的subQuestions
 								    pattern = rf'("question"\\s*:\\s*"{re.escape(job_group)}"[^}}]*?"subQuestions"\\s*:\\s*)\\[[^\\]]*\\]'
 								    replacement = rf'\\1[\\n{sub_questions_content}\\n        ]'
 								    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)
 								    if new_content != content:
 								        content = new_content
 								        updated_count += 1
 								        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")
 								# 保存更新后的文件
 								with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								    f.write(content)
 								# 验证语法
 								try:
 								    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
 								                          capture_output=True, text=True, encoding='utf-8')
 								    if result.returncode == 0:
 								        print(f"\\n✓ 语法检查通过")
 								        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
 								        print(f"\\n✅ 所有面试题更新成功完成！")
 								    else:
 								        print(f"\\n✗ 语法检查失败: {result.stderr}")
 								        # 恢复备份
 								        with open(backup_file, 'r', encoding='utf-8') as f:
 								            content = f.read()
 								        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								            f.write(content)
 								        print(f"已从备份恢复")
 								except Exception as e:
 								    print(f"错误: {e}")
 								''')
 								print(f"✅ 已生成 update_all_questions_final.py")
 								print(f"\n请运行以下命令来更新所有面试题：")
 								print(f"python3 update_all_questions_final.py")