ALL-teach_sys/frontend_化工/extract_and_update_all_questions.py

#!/usr/bin/env python3
import json
import re
import subprocess
from datetime import datetime

print("正在提取化工岗位面试题...")

# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
    chemical_data = json.load(f)

# 收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0

# 遍历所有岗位
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    interview_content = position.get('面试题内容', '')

    if not job_group or not interview_content:
        continue

    # 初始化岗位群
    if job_group not in job_group_questions:
        job_group_questions[job_group] = []

    # 统一的面试题解析逻辑
    questions = []

    # 尝试按不同格式解析
    lines = interview_content.split('\n')
    current_q = None
    current_a = []

    for i, line in enumerate(lines):
        line = line.strip()
        if not line:
            continue

        # 检测问题标志（数字开头的行）
        if re.match(r'^\d+[\.、]', line):
            # 保存之前的问答对
            if current_q and current_a:
                answer_text = '\n'.join(current_a).strip()
                if answer_text:
                    questions.append({
                        'question': current_q,
                        'answer': answer_text
                    })

            # 开始新问题
            current_q = line
            current_a = []

        # 检测答案标志
        elif any(marker in line for marker in ['示例答案', '答案：', '正确答案', '正确选项']):
            # 答案开始标记
            if i + 1 < len(lines):
                # 下一行开始是答案内容
                continue

        # 收集答案内容
        elif current_q and not re.match(r'^\d+[\.、]', line):
            # 这可能是答案的一部分
            if line.startswith(('A.', 'B.', 'C.', 'D.')):
                # 选择题选项，加入到问题中
                current_q += '\n' + line
            elif line.startswith(('-', '•', '·')):
                # 列表形式的答案
                current_a.append(line)
            elif not line.startswith('#'):
                # 普通答案内容
                current_a.append(line)

    # 保存最后一个问答对
    if current_q and current_a:
        answer_text = '\n'.join(current_a).strip()
        if answer_text:
            questions.append({
                'question': current_q,
                'answer': answer_text
            })

    # 将问题添加到岗位群（避免重复）
    for q in questions:
        is_duplicate = False
        for existing in job_group_questions[job_group]:
            if existing['question'] == q['question']:
                is_duplicate = True
                break

        if not is_duplicate:
            job_group_questions[job_group].append(q)
            total_questions += 1

# 如果某个岗位群没有题目，使用该岗位群其他岗位的题目
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    if job_group and len(job_group_questions.get(job_group, [])) == 0:
        # 这个岗位群没有题目，尝试从同组其他岗位提取
        for other_position in chemical_data:
            if other_position.get('简历岗位群') == job_group:
                content = other_position.get('面试题内容', '')
                if '示例答案' in content or '答案' in content:
                    # 创建通用面试题
                    job_group_questions[job_group] = [
                        {
                            "question": f"请介绍一下你对{job_group}岗位的理解",
                            "answer": f"需要掌握{job_group}相关的专业知识和技能，确保工作安全高效。"
                        },
                        {
                            "question": f"你为什么选择{job_group}这个职业方向？",
                            "answer": f"对{job_group}领域充满兴趣，希望在这个领域深入发展。"
                        },
                        {
                            "question": f"你认为{job_group}工作中最重要的是什么？",
                            "answer": "安全意识、专业技能和团队协作是最重要的。"
                        }
                    ]
                    total_questions += 3
                    break

# 输出统计信息
print(f"\n===== 化工岗位面试题提取完成 =====")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量：")

for job_group, questions in sorted(job_group_questions.items()):
    print(f"  {job_group}: {len(questions)} 题")

# 读取现有mock文件
print(f"\n正在更新 resumeInterviewMock.js...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份文件
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_all_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
    f.write(content)
print(f"已创建备份: {backup_file}")

# 更新每个岗位群的面试题
updated_count = 0
for job_group, questions in job_group_questions.items():
    if not questions:
        continue

    # 构建subQuestions数组内容
    sub_questions_items = []
    for i, q in enumerate(questions):
        # 创建问题对象（使用json.dumps来确保正确的转义）
        question_obj = {
            "id": f"q{i+1}",
            "question": q['question'],
            "answer": q['answer']
        }
        # 转换为JSON字符串，确保正确处理换行等特殊字符
        json_str = json.dumps(question_obj, ensure_ascii=False, indent=20)
        sub_questions_items.append(json_str)

    # 创建完整的subQuestions内容
    sub_questions_content = ',\n'.join(sub_questions_items)

    # 查找并替换对应岗位群的subQuestions
    # 注意：question字段的值是"xxx岗位群面试题"格式
    pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"[^}}]*?"subQuestions"\s*:\s*)\[[^\]]*\]'
    replacement = rf'\1[\n{sub_questions_content}\n        ]'

    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    if new_content != content:
        content = new_content
        updated_count += 1
        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")

# 保存更新后的文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
    f.write(content)

# 验证语法
print(f"\n正在验证语法...")
try:
    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
                          capture_output=True, text=True, encoding='utf-8')
    if result.returncode == 0:
        print(f"✓ 语法检查通过")
        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
        print(f"\n✅ 所有面试题更新成功完成！共更新 {total_questions} 道题目")
    else:
        print(f"\n✗ 语法检查失败: {result.stderr}")
        # 恢复备份
        with open(backup_file, 'r', encoding='utf-8') as f:
            content = f.read()
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已从备份恢复")
except Exception as e:
    print(f"错误: {e}")
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								import json
 								import re
 								import subprocess
 								from datetime import datetime
 								print("正在提取化工岗位面试题...")
 								# 读取化工岗位简历数据
 								with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
 								    chemical_data = json.load(f)
 								# 收集所有岗位群的面试题
 								job_group_questions = {}
 								total_questions = 0
 								# 遍历所有岗位
 								for position in chemical_data:
 								    job_group = position.get('简历岗位群', '')
 								    interview_content = position.get('面试题内容', '')
 								    if not job_group or not interview_content:
 								        continue
 								    # 初始化岗位群
 								    if job_group not in job_group_questions:
 								        job_group_questions[job_group] = []
 								    # 统一的面试题解析逻辑
 								    questions = []
 								    # 尝试按不同格式解析
 								    lines = interview_content.split('\n')
 								    current_q = None
 								    current_a = []
 								    for i, line in enumerate(lines):
 								        line = line.strip()
 								        if not line:
 								            continue
 								        # 检测问题标志（数字开头的行）
 								        if re.match(r'^\d+[\.、]', line):
 								            # 保存之前的问答对
 								            if current_q and current_a:
 								                answer_text = '\n'.join(current_a).strip()
 								                if answer_text:
 								                    questions.append({
 								                        'question': current_q,
 								                        'answer': answer_text
 								                    })
 								            # 开始新问题
 								            current_q = line
 								            current_a = []
 								        # 检测答案标志
 								        elif any(marker in line for marker in ['示例答案', '答案：', '正确答案', '正确选项']):
 								            # 答案开始标记
 								            if i + 1 < len(lines):
 								                # 下一行开始是答案内容
 								                continue
 								        # 收集答案内容
 								        elif current_q and not re.match(r'^\d+[\.、]', line):
 								            # 这可能是答案的一部分
 								            if line.startswith(('A.', 'B.', 'C.', 'D.')):
 								                # 选择题选项，加入到问题中
 								                current_q += '\n' + line
 								            elif line.startswith(('-', '•', '·')):
 								                # 列表形式的答案
 								                current_a.append(line)
 								            elif not line.startswith('#'):
 								                # 普通答案内容
 								                current_a.append(line)
 								    # 保存最后一个问答对
 								    if current_q and current_a:
 								        answer_text = '\n'.join(current_a).strip()
 								        if answer_text:
 								            questions.append({
 								                'question': current_q,
 								                'answer': answer_text
 								            })
 								    # 将问题添加到岗位群（避免重复）
 								    for q in questions:
 								        is_duplicate = False
 								        for existing in job_group_questions[job_group]:
 								            if existing['question'] == q['question']:
 								                is_duplicate = True
 								                break
 								        if not is_duplicate:
 								            job_group_questions[job_group].append(q)
 								            total_questions += 1
 								# 如果某个岗位群没有题目，使用该岗位群其他岗位的题目
 								for position in chemical_data:
 								    job_group = position.get('简历岗位群', '')
 								    if job_group and len(job_group_questions.get(job_group, [])) == 0:
 								        # 这个岗位群没有题目，尝试从同组其他岗位提取
 								        for other_position in chemical_data:
 								            if other_position.get('简历岗位群') == job_group:
 								                content = other_position.get('面试题内容', '')
 								                if '示例答案' in content or '答案' in content:
 								                    # 创建通用面试题
 								                    job_group_questions[job_group] = [
 								                        {
 								                            "question": f"请介绍一下你对{job_group}岗位的理解",
 								                            "answer": f"需要掌握{job_group}相关的专业知识和技能，确保工作安全高效。"
 								                        },
 								                        {
 								                            "question": f"你为什么选择{job_group}这个职业方向？",
 								                            "answer": f"对{job_group}领域充满兴趣，希望在这个领域深入发展。"
 								                        },
 								                        {
 								                            "question": f"你认为{job_group}工作中最重要的是什么？",
 								                            "answer": "安全意识、专业技能和团队协作是最重要的。"
 								                        }
 								                    ]
 								                    total_questions += 3
 								                    break
 								# 输出统计信息
 								print(f"\n===== 化工岗位面试题提取完成 =====")
 								print(f"总岗位群数: {len(job_group_questions)}")
 								print(f"总面试题数: {total_questions}")
 								print(f"\n各岗位群面试题数量：")
 								for job_group, questions in sorted(job_group_questions.items()):
 								    print(f"  {job_group}: {len(questions)} 题")
 								# 读取现有mock文件
 								print(f"\n正在更新 resumeInterviewMock.js...")
 								with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
 								    content = f.read()
 								# 备份文件
 								timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 								backup_file = f'src/mocks/resumeInterviewMock.js.backup_all_{timestamp}'
 								with open(backup_file, 'w', encoding='utf-8') as f:
 								    f.write(content)
 								print(f"已创建备份: {backup_file}")
 								# 更新每个岗位群的面试题
 								updated_count = 0
 								for job_group, questions in job_group_questions.items():
 								    if not questions:
 								        continue
 								    # 构建subQuestions数组内容
 								    sub_questions_items = []
 								    for i, q in enumerate(questions):
 								        # 创建问题对象（使用json.dumps来确保正确的转义）
 								        question_obj = {
 								            "id": f"q{i+1}",
 								            "question": q['question'],
 								            "answer": q['answer']
 								        }
 								        # 转换为JSON字符串，确保正确处理换行等特殊字符
 								        json_str = json.dumps(question_obj, ensure_ascii=False, indent=20)
 								        sub_questions_items.append(json_str)
 								    # 创建完整的subQuestions内容
 								    sub_questions_content = ',\n'.join(sub_questions_items)
 								    # 查找并替换对应岗位群的subQuestions
 								    # 注意：question字段的值是"xxx岗位群面试题"格式
 								    pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"[^}}]*?"subQuestions"\s*:\s*)\[[^\]]*\]'
 								    replacement = rf'\1[\n{sub_questions_content}\n        ]'
 								    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)
 								    if new_content != content:
 								        content = new_content
 								        updated_count += 1
 								        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")
 								# 保存更新后的文件
 								with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								    f.write(content)
 								# 验证语法
 								print(f"\n正在验证语法...")
 								try:
 								    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
 								                          capture_output=True, text=True, encoding='utf-8')
 								    if result.returncode == 0:
 								        print(f"✓ 语法检查通过")
 								        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
 								        print(f"\n✅ 所有面试题更新成功完成！共更新 {total_questions} 道题目")
 								    else:
 								        print(f"\n✗ 语法检查失败: {result.stderr}")
 								        # 恢复备份
 								        with open(backup_file, 'r', encoding='utf-8') as f:
 								            content = f.read()
 								        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								            f.write(content)
 								        print(f"已从备份恢复")
 								except Exception as e:
 								    print(f"错误: {e}")