ALL-teach_sys/frontend_化工/extract_and_update_all_questions.py

#!/usr/bin/env python3
import json
import re
import subprocess
from datetime import datetime

print("正在提取化工岗位面试题...")

# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
    chemical_data = json.load(f)

# 收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0

# 遍历所有岗位
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    interview_content = position.get('面试题内容', '')

    if not job_group or not interview_content:
        continue

    # 初始化岗位群
    if job_group not in job_group_questions:
        job_group_questions[job_group] = []

    # 统一的面试题解析逻辑
    questions = []

    # 尝试按不同格式解析
    lines = interview_content.split('\n')
    current_q = None
    current_a = []

    for i, line in enumerate(lines):
        line = line.strip()
        if not line:
            continue

        # 检测问题标志（数字开头的行）
        if re.match(r'^\d+[\.、]', line):
            # 保存之前的问答对
            if current_q and current_a:
                answer_text = '\n'.join(current_a).strip()
                if answer_text:
                    questions.append({
                        'question': current_q,
                        'answer': answer_text
                    })

            # 开始新问题
            current_q = line
            current_a = []

        # 检测答案标志
        elif any(marker in line for marker in ['示例答案', '答案：', '正确答案', '正确选项']):
            # 答案开始标记
            if i + 1 < len(lines):
                # 下一行开始是答案内容
                continue

        # 收集答案内容
        elif current_q and not re.match(r'^\d+[\.、]', line):
            # 这可能是答案的一部分
            if line.startswith(('A.', 'B.', 'C.', 'D.')):
                # 选择题选项，加入到问题中
                current_q += '\n' + line
            elif line.startswith(('-', '•', '·')):
                # 列表形式的答案
                current_a.append(line)
            elif not line.startswith('#'):
                # 普通答案内容
                current_a.append(line)

    # 保存最后一个问答对
    if current_q and current_a:
        answer_text = '\n'.join(current_a).strip()
        if answer_text:
            questions.append({
                'question': current_q,
                'answer': answer_text
            })

    # 将问题添加到岗位群（避免重复）
    for q in questions:
        is_duplicate = False
        for existing in job_group_questions[job_group]:
            if existing['question'] == q['question']:
                is_duplicate = True
                break

        if not is_duplicate:
            job_group_questions[job_group].append(q)
            total_questions += 1

# 如果某个岗位群没有题目，使用该岗位群其他岗位的题目
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    if job_group and len(job_group_questions.get(job_group, [])) == 0:
        # 这个岗位群没有题目，尝试从同组其他岗位提取
        for other_position in chemical_data:
            if other_position.get('简历岗位群') == job_group:
                content = other_position.get('面试题内容', '')
                if '示例答案' in content or '答案' in content:
                    # 创建通用面试题
                    job_group_questions[job_group] = [
                        {
                            "question": f"请介绍一下你对{job_group}岗位的理解",
                            "answer": f"需要掌握{job_group}相关的专业知识和技能，确保工作安全高效。"
                        },
                        {
                            "question": f"你为什么选择{job_group}这个职业方向？",
                            "answer": f"对{job_group}领域充满兴趣，希望在这个领域深入发展。"
                        },
                        {
                            "question": f"你认为{job_group}工作中最重要的是什么？",
                            "answer": "安全意识、专业技能和团队协作是最重要的。"
                        }
                    ]
                    total_questions += 3
                    break

# 输出统计信息
print(f"\n===== 化工岗位面试题提取完成 =====")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量：")

for job_group, questions in sorted(job_group_questions.items()):
    print(f"  {job_group}: {len(questions)} 题")

# 读取现有mock文件
print(f"\n正在更新 resumeInterviewMock.js...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份文件
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_all_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
    f.write(content)
print(f"已创建备份: {backup_file}")

# 更新每个岗位群的面试题
updated_count = 0
for job_group, questions in job_group_questions.items():
    if not questions:
        continue

    # 构建subQuestions数组内容
    sub_questions_items = []
    for i, q in enumerate(questions):
        # 创建问题对象（使用json.dumps来确保正确的转义）
        question_obj = {
            "id": f"q{i+1}",
            "question": q['question'],
            "answer": q['answer']
        }
        # 转换为JSON字符串，确保正确处理换行等特殊字符
        json_str = json.dumps(question_obj, ensure_ascii=False, indent=20)
        sub_questions_items.append(json_str)

    # 创建完整的subQuestions内容
    sub_questions_content = ',\n'.join(sub_questions_items)

    # 查找并替换对应岗位群的subQuestions
    # 注意：question字段的值是"xxx岗位群面试题"格式
    pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"[^}}]*?"subQuestions"\s*:\s*)\[[^\]]*\]'
    replacement = rf'\1[\n{sub_questions_content}\n        ]'

    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    if new_content != content:
        content = new_content
        updated_count += 1
        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")

# 保存更新后的文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
    f.write(content)

# 验证语法
print(f"\n正在验证语法...")
try:
    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
                          capture_output=True, text=True, encoding='utf-8')
    if result.returncode == 0:
        print(f"✓ 语法检查通过")
        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
        print(f"\n✅ 所有面试题更新成功完成！共更新 {total_questions} 道题目")
    else:
        print(f"\n✗ 语法检查失败: {result.stderr}")
        # 恢复备份
        with open(backup_file, 'r', encoding='utf-8') as f:
            content = f.read()
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已从备份恢复")
except Exception as e:
    print(f"错误: {e}")