ALL-teach_sys/frontend_化工/extract_all_complete_questions.py

#!/usr/bin/env python3
import json
import re
from datetime import datetime

print("正在提取化工岗位简历中的所有面试题...")

# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
    chemical_data = json.load(f)

# 收集每个岗位群的所有面试题
job_group_all_questions = {}
total_questions = 0

# 遍历所有岗位，提取完整的面试题内容
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    interview_content = position.get('面试题内容', '')

    if not job_group or not interview_content:
        continue

    # 初始化岗位群
    if job_group not in job_group_all_questions:
        job_group_all_questions[job_group] = []

    # 解析面试题内容
    questions = []

    # 分割内容为行
    lines = interview_content.split('\n')

    current_question = None
    current_answer = []
    in_answer = False

    for i, line in enumerate(lines):
        line = line.strip()

        # 跳过空行和标题行
        if not line or line.startswith('#'):
            continue

        # 检测新的问题（以数字开头）
        if re.match(r'^[0-9]+[\.、]', line):
            # 保存之前的问答
            if current_question and current_answer:
                answer_text = '\n'.join(current_answer).strip()
                if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
                    questions.append({
                        'question': current_question,
                        'answer': answer_text
                    })

            # 开始新问题
            current_question = line
            current_answer = []
            in_answer = False

        # 检测答案标记
        elif any(marker in line for marker in ['示例答案', '答案：', '正确答案', '正确选项', '答案是']):
            in_answer = True
            # 如果答案在同一行
            if '：' in line or ':' in line:
                answer_part = line.split('：', 1)[-1].split(':', 1)[-1].strip()
                if answer_part:
                    current_answer.append(answer_part)

        # 收集选项（选择题）
        elif current_question and re.match(r'^[A-D][\.、]', line):
            current_question += '\n' + line

        # 收集答案内容
        elif in_answer and line:
            if not line.startswith('#'):
                current_answer.append(line)

        # 如果还没有明确的答案标记，但这可能是答案内容
        elif current_question and not re.match(r'^[0-9]+[\.、]', line):
            # 检查是否可能是答案（在问题后面的非问题行）
            if i > 0 and not in_answer:
                # 如果上一行是问题，这行可能是答案
                prev_line = lines[i-1].strip() if i > 0 else ''
                if re.match(r'^[0-9]+[\.、]', prev_line) or prev_line == current_question:
                    in_answer = True
                    current_answer.append(line)

    # 保存最后一个问答
    if current_question and current_answer:
        answer_text = '\n'.join(current_answer).strip()
        if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
            questions.append({
                'question': current_question,
                'answer': answer_text
            })

    # 将问题添加到岗位群（避免重复）
    for q in questions:
        # 清理问题和答案文本
        q['question'] = q['question'].strip()
        q['answer'] = q['answer'].strip()

        # 检查是否重复
        is_duplicate = False
        for existing in job_group_all_questions[job_group]:
            # 比较问题的前50个字符来判断是否重复
            if existing['question'][:50] == q['question'][:50]:
                is_duplicate = True
                break

        if not is_duplicate and len(q['question']) > 5 and len(q['answer']) > 5:
            job_group_all_questions[job_group].append(q)
            total_questions += 1

# 输出统计信息
print(f"\n===== 提取完成 =====")
print(f"总岗位群数: {len(job_group_all_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量：")

for job_group, questions in sorted(job_group_all_questions.items()):
    print(f"  {job_group}: {len(questions)} 道题")

# 显示一些示例
print(f"\n===== 面试题示例 =====")
for job_group in ['化工安全', '化工检验检测', '化工生产']:
    if job_group in job_group_all_questions:
        questions = job_group_all_questions[job_group]
        print(f"\n【{job_group}】共 {len(questions)} 道题，前2题：")
        for i, q in enumerate(questions[:2], 1):
            print(f"  {i}. {q['question'][:60]}...")
            print(f"     答: {q['answer'][:60]}...")

# 保存提取的所有面试题
with open('all_interview_questions_complete.json', 'w', encoding='utf-8') as f:
    json.dump(job_group_all_questions, f, ensure_ascii=False, indent=2)

print(f"\n✅ 已保存到 all_interview_questions_complete.json")

# 读取现有mock文件并更新
print(f"\n正在更新 resumeInterviewMock.js...")

with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
    f.write(content)
print(f"已创建备份: {backup_file}")

# 更新每个岗位群
updated_count = 0
for job_group, questions in job_group_all_questions.items():
    if not questions:
        continue

    # 为每个问题创建正确的格式
    formatted_questions = []
    for i, q in enumerate(questions, 1):
        formatted_questions.append({
            "id": f"q{i}",
            "question": q['question'],
            "answer": q['answer']
        })

    # 转换为JSON字符串
    sub_questions_str = json.dumps(formatted_questions, ensure_ascii=False, indent=8)

    # 替换对应岗位群的subQuestions
    pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"\s*,\s*"subQuestions"\s*:\s*)\[[^\]]*\]'
    replacement = rf'\1{sub_questions_str}'

    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    if new_content != content:
        content = new_content
        updated_count += 1
        print(f"✓ 已更新 {job_group}: {len(questions)} 道题")

# 保存更新后的文件
if updated_count > 0:
    with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
        f.write(content)
    print(f"\n✅ 成功更新了 {updated_count} 个岗位群，共 {total_questions} 道面试题")

# 验证语法
import subprocess
try:
    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
                          capture_output=True, text=True, encoding='utf-8')
    if result.returncode == 0:
        print("✓ 语法检查通过")
    else:
        print(f"✗ 语法检查失败: {result.stderr}")
        # 恢复备份
        with open(backup_file, 'r', encoding='utf-8') as f:
            backup_content = f.read()
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(backup_content)
        print("已从备份恢复")
except Exception as e:
    print(f"错误: {e}")
-												初始化12个产业教务系统项目

主要内容：
- 包含12个产业的完整教务系统前端代码
- 智能启动脚本 (start-industry.sh)
- 可视化产业导航页面 (index.html)
- 项目文档 (README.md)

优化内容：
- 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB
- 添加.gitignore文件避免上传不必要的文件
- 自动依赖管理和智能启动系统

产业列表：
1. 文旅产业 (5150)
2. 智能制造 (5151)
3. 智能开发 (5152)
4. 财经商贸 (5153)
5. 视觉设计 (5154)
6. 交通物流 (5155)
7. 大健康 (5156)
8. 土木水利 (5157)
9. 食品产业 (5158)
10. 化工产业 (5159)
11. 能源产业 (5160)
12. 环保产业 (5161)

🤖 Generated with Claude Code
Co-Authored-By: Claude <noreply@anthropic.com>

											
										
										
											2025-09-24 14:14:14 +08:00
+								#!/usr/bin/env python3
 								import json
 								import re
 								from datetime import datetime
 								print("正在提取化工岗位简历中的所有面试题...")
 								# 读取化工岗位简历数据
 								with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
 								    chemical_data = json.load(f)
 								# 收集每个岗位群的所有面试题
 								job_group_all_questions = {}
 								total_questions = 0
 								# 遍历所有岗位，提取完整的面试题内容
 								for position in chemical_data:
 								    job_group = position.get('简历岗位群', '')
 								    interview_content = position.get('面试题内容', '')
 								    if not job_group or not interview_content:
 								        continue
 								    # 初始化岗位群
 								    if job_group not in job_group_all_questions:
 								        job_group_all_questions[job_group] = []
 								    # 解析面试题内容
 								    questions = []
 								    # 分割内容为行
 								    lines = interview_content.split('\n')
 								    current_question = None
 								    current_answer = []
 								    in_answer = False
 								    for i, line in enumerate(lines):
 								        line = line.strip()
 								        # 跳过空行和标题行
 								        if not line or line.startswith('#'):
 								            continue
 								        # 检测新的问题（以数字开头）
 								        if re.match(r'^[0-9]+[\.、]', line):
 								            # 保存之前的问答
 								            if current_question and current_answer:
 								                answer_text = '\n'.join(current_answer).strip()
 								                if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
 								                    questions.append({
 								                        'question': current_question,
 								                        'answer': answer_text
 								                    })
 								            # 开始新问题
 								            current_question = line
 								            current_answer = []
 								            in_answer = False
 								        # 检测答案标记
 								        elif any(marker in line for marker in ['示例答案', '答案：', '正确答案', '正确选项', '答案是']):
 								            in_answer = True
 								            # 如果答案在同一行
 								            if '：' in line or ':' in line:
 								                answer_part = line.split('：', 1)[-1].split(':', 1)[-1].strip()
 								                if answer_part:
 								                    current_answer.append(answer_part)
 								        # 收集选项（选择题）
 								        elif current_question and re.match(r'^[A-D][\.、]', line):
 								            current_question += '\n' + line
 								        # 收集答案内容
 								        elif in_answer and line:
 								            if not line.startswith('#'):
 								                current_answer.append(line)
 								        # 如果还没有明确的答案标记，但这可能是答案内容
 								        elif current_question and not re.match(r'^[0-9]+[\.、]', line):
 								            # 检查是否可能是答案（在问题后面的非问题行）
 								            if i > 0 and not in_answer:
 								                # 如果上一行是问题，这行可能是答案
 								                prev_line = lines[i-1].strip() if i > 0 else ''
 								                if re.match(r'^[0-9]+[\.、]', prev_line) or prev_line == current_question:
 								                    in_answer = True
 								                    current_answer.append(line)
 								    # 保存最后一个问答
 								    if current_question and current_answer:
 								        answer_text = '\n'.join(current_answer).strip()
 								        if answer_text and not any(skip in answer_text for skip in ['![', 'image']):
 								            questions.append({
 								                'question': current_question,
 								                'answer': answer_text
 								            })
 								    # 将问题添加到岗位群（避免重复）
 								    for q in questions:
 								        # 清理问题和答案文本
 								        q['question'] = q['question'].strip()
 								        q['answer'] = q['answer'].strip()
 								        # 检查是否重复
 								        is_duplicate = False
 								        for existing in job_group_all_questions[job_group]:
 								            # 比较问题的前50个字符来判断是否重复
 								            if existing['question'][:50] == q['question'][:50]:
 								                is_duplicate = True
 								                break
 								        if not is_duplicate and len(q['question']) > 5 and len(q['answer']) > 5:
 								            job_group_all_questions[job_group].append(q)
 								            total_questions += 1
 								# 输出统计信息
 								print(f"\n===== 提取完成 =====")
 								print(f"总岗位群数: {len(job_group_all_questions)}")
 								print(f"总面试题数: {total_questions}")
 								print(f"\n各岗位群面试题数量：")
 								for job_group, questions in sorted(job_group_all_questions.items()):
 								    print(f"  {job_group}: {len(questions)} 道题")
 								# 显示一些示例
 								print(f"\n===== 面试题示例 =====")
 								for job_group in ['化工安全', '化工检验检测', '化工生产']:
 								    if job_group in job_group_all_questions:
 								        questions = job_group_all_questions[job_group]
 								        print(f"\n【{job_group}】共 {len(questions)} 道题，前2题：")
 								        for i, q in enumerate(questions[:2], 1):
 								            print(f"  {i}. {q['question'][:60]}...")
 								            print(f"     答: {q['answer'][:60]}...")
 								# 保存提取的所有面试题
 								with open('all_interview_questions_complete.json', 'w', encoding='utf-8') as f:
 								    json.dump(job_group_all_questions, f, ensure_ascii=False, indent=2)
 								print(f"\n✅ 已保存到 all_interview_questions_complete.json")
 								# 读取现有mock文件并更新
 								print(f"\n正在更新 resumeInterviewMock.js...")
 								with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
 								    content = f.read()
 								# 备份
 								timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
 								backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'
 								with open(backup_file, 'w', encoding='utf-8') as f:
 								    f.write(content)
 								print(f"已创建备份: {backup_file}")
 								# 更新每个岗位群
 								updated_count = 0
 								for job_group, questions in job_group_all_questions.items():
 								    if not questions:
 								        continue
 								    # 为每个问题创建正确的格式
 								    formatted_questions = []
 								    for i, q in enumerate(questions, 1):
 								        formatted_questions.append({
 								            "id": f"q{i}",
 								            "question": q['question'],
 								            "answer": q['answer']
 								        })
 								    # 转换为JSON字符串
 								    sub_questions_str = json.dumps(formatted_questions, ensure_ascii=False, indent=8)
 								    # 替换对应岗位群的subQuestions
 								    pattern = rf'("question"\s*:\s*"{re.escape(job_group)}岗位群面试题"\s*,\s*"subQuestions"\s*:\s*)\[[^\]]*\]'
 								    replacement = rf'\1{sub_questions_str}'
 								    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)
 								    if new_content != content:
 								        content = new_content
 								        updated_count += 1
 								        print(f"✓ 已更新 {job_group}: {len(questions)} 道题")
 								# 保存更新后的文件
 								if updated_count > 0:
 								    with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								        f.write(content)
 								    print(f"\n✅ 成功更新了 {updated_count} 个岗位群，共 {total_questions} 道面试题")
 								# 验证语法
 								import subprocess
 								try:
 								    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
 								                          capture_output=True, text=True, encoding='utf-8')
 								    if result.returncode == 0:
 								        print("✓ 语法检查通过")
 								    else:
 								        print(f"✗ 语法检查失败: {result.stderr}")
 								        # 恢复备份
 								        with open(backup_file, 'r', encoding='utf-8') as f:
 								            backup_content = f.read()
 								        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
 								            f.write(backup_content)
 								        print("已从备份恢复")
 								except Exception as e:
 								    print(f"错误: {e}")