ALL-teach_sys/frontend_化工/extract_complete_questions.py

#!/usr/bin/env python3
import json

# 读取化工岗位简历数据
with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:
    chemical_data = json.load(f)

# 收集所有岗位群的面试题
job_group_questions = {}
total_questions = 0

# 遍历所有岗位
for position in chemical_data:
    job_group = position.get('简历岗位群', '')
    interview_content = position.get('面试题内容', '')

    if not job_group or not interview_content:
        continue

    # 初始化岗位群
    if job_group not in job_group_questions:
        job_group_questions[job_group] = []

    # 解析面试题内容
    lines = interview_content.split('\n')
    current_question = None
    current_answer = None

    for line in lines:
        line = line.strip()

        # 识别问题行
        if any(marker in line for marker in ['示例答案', '正确答案', '答案：', '正确选项']):
            # 这是答案标记行
            if current_question and not current_answer:
                current_answer = ""
        elif line and current_answer is not None:
            # 收集答案内容
            if line.startswith('-') or line.startswith('•'):
                current_answer += line + '\n'
            elif not line.startswith('#') and not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):
                current_answer += line + ' '
        elif line and (line[0].isdigit() or line.startswith('问题')):
            # 保存之前的问答对
            if current_question and current_answer:
                # 检查是否已存在
                is_duplicate = False
                for existing in job_group_questions[job_group]:
                    if existing['question'] == current_question:
                        is_duplicate = True
                        break

                if not is_duplicate:
                    job_group_questions[job_group].append({
                        'question': current_question.strip(),
                        'answer': current_answer.strip()
                    })
                    total_questions += 1

            # 开始新问题
            current_question = line
            current_answer = None

    # 保存最后一个问答对
    if current_question and current_answer:
        is_duplicate = False
        for existing in job_group_questions[job_group]:
            if existing['question'] == current_question:
                is_duplicate = True
                break

        if not is_duplicate:
            job_group_questions[job_group].append({
                'question': current_question.strip(),
                'answer': current_answer.strip()
            })
            total_questions += 1

# 输出统计信息
print(f"\n===== 化工岗位完整面试题统计 =====")
print(f"总岗位群数: {len(job_group_questions)}")
print(f"总面试题数: {total_questions}")
print(f"\n各岗位群面试题数量：")

for job_group, questions in sorted(job_group_questions.items()):
    print(f"  {job_group}: {len(questions)} 题")

# 保存提取的面试题
with open('complete_interview_questions.json', 'w', encoding='utf-8') as f:
    json.dump(job_group_questions, f, ensure_ascii=False, indent=2)

print(f"\n✅ 已保存所有面试题到 complete_interview_questions.json")

# 生成更新脚本
print(f"\n正在生成更新脚本...")

with open('update_complete_interview_questions.py', 'w', encoding='utf-8') as f:
    f.write('''#!/usr/bin/env python3
import json
import re
import subprocess
from datetime import datetime

# 读取提取的面试题数据
with open('complete_interview_questions.json', 'r', encoding='utf-8') as f:
    job_group_questions = json.load(f)

# 读取现有mock文件
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份文件
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'
with open(backup_file, 'w', encoding='utf-8') as f:
    f.write(content)
print(f"已创建备份: {backup_file}")

# 更新每个岗位群的面试题
updated_count = 0
for job_group, questions in job_group_questions.items():
    if not questions:
        continue

    # 构建subQuestions数组内容
    sub_questions_items = []
    for i, q in enumerate(questions):
        question_obj = {
            "id": f"q{i+1}",
            "question": q['question'],
            "answer": q['answer']
        }
        sub_questions_items.append(json.dumps(question_obj, ensure_ascii=False, indent=20))

    # 创建完整的subQuestions内容
    sub_questions_content = ',\\n'.join(sub_questions_items)

    # 查找并替换对应岗位群的subQuestions
    pattern = rf'("question"\\s*:\\s*"{re.escape(job_group)}"[^}}]*?"subQuestions"\\s*:\\s*)\\[[^\\]]*\\]'
    replacement = rf'\\1[\\n{sub_questions_content}\\n        ]'

    new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)

    if new_content != content:
        content = new_content
        updated_count += 1
        print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")

# 保存更新后的文件
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
    f.write(content)

# 验证语法
try:
    result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
                          capture_output=True, text=True, encoding='utf-8')
    if result.returncode == 0:
        print(f"\\n✓ 语法检查通过")
        print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")
        print(f"\\n✅ 所有面试题更新成功完成！")
    else:
        print(f"\\n✗ 语法检查失败: {result.stderr}")
        # 恢复备份
        with open(backup_file, 'r', encoding='utf-8') as f:
            content = f.read()
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已从备份恢复")
except Exception as e:
    print(f"错误: {e}")
''')

print(f"✅ 已生成 update_complete_interview_questions.py")
print(f"\n请运行以下命令来更新所有面试题：")
print(f"python3 update_complete_interview_questions.py")
初始化12个产业教务系统项目主要内容： - 包含12个产业的完整教务系统前端代码 - 智能启动脚本 (start-industry.sh) - 可视化产业导航页面 (index.html) - 项目文档 (README.md) 优化内容： - 删除所有node_modules和.yoyo文件夹，从7.5GB减少到2.7GB - 添加.gitignore文件避免上传不必要的文件 - 自动依赖管理和智能启动系统产业列表： 1. 文旅产业 (5150) 2. 智能制造 (5151) 3. 智能开发 (5152) 4. 财经商贸 (5153) 5. 视觉设计 (5154) 6. 交通物流 (5155) 7. 大健康 (5156) 8. 土木水利 (5157) 9. 食品产业 (5158) 10. 化工产业 (5159) 11. 能源产业 (5160) 12. 环保产业 (5161) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-09-24 14:14:14 +08:00			`#!/usr/bin/env python3`
			`import json`

			`# 读取化工岗位简历数据`
			`with open('网页未导入数据/化工产业/化工岗位简历.json', 'r', encoding='utf-8') as f:`
			`chemical_data = json.load(f)`

			`# 收集所有岗位群的面试题`
			`job_group_questions = {}`
			`total_questions = 0`

			`# 遍历所有岗位`
			`for position in chemical_data:`
			`job_group = position.get('简历岗位群', '')`
			`interview_content = position.get('面试题内容', '')`

			`if not job_group or not interview_content:`
			`continue`

			`# 初始化岗位群`
			`if job_group not in job_group_questions:`
			`job_group_questions[job_group] = []`

			`# 解析面试题内容`
			`lines = interview_content.split('\n')`
			`current_question = None`
			`current_answer = None`

			`for line in lines:`
			`line = line.strip()`

			`# 识别问题行`
			`if any(marker in line for marker in ['示例答案', '正确答案', '答案：', '正确选项']):`
			`# 这是答案标记行`
			`if current_question and not current_answer:`
			`current_answer = ""`
			`elif line and current_answer is not None:`
			`# 收集答案内容`
			`if line.startswith('-') or line.startswith('•'):`
			`current_answer += line + '\n'`
			`elif not line.startswith('#') and not line.startswith('A.') and not line.startswith('B.') and not line.startswith('C.') and not line.startswith('D.'):`
			`current_answer += line + ' '`
			`elif line and (line[0].isdigit() or line.startswith('问题')):`
			`# 保存之前的问答对`
			`if current_question and current_answer:`
			`# 检查是否已存在`
			`is_duplicate = False`
			`for existing in job_group_questions[job_group]:`
			`if existing['question'] == current_question:`
			`is_duplicate = True`
			`break`

			`if not is_duplicate:`
			`job_group_questions[job_group].append({`
			`'question': current_question.strip(),`
			`'answer': current_answer.strip()`
			`})`
			`total_questions += 1`

			`# 开始新问题`
			`current_question = line`
			`current_answer = None`

			`# 保存最后一个问答对`
			`if current_question and current_answer:`
			`is_duplicate = False`
			`for existing in job_group_questions[job_group]:`
			`if existing['question'] == current_question:`
			`is_duplicate = True`
			`break`

			`if not is_duplicate:`
			`job_group_questions[job_group].append({`
			`'question': current_question.strip(),`
			`'answer': current_answer.strip()`
			`})`
			`total_questions += 1`

			`# 输出统计信息`
			`print(f"\n===== 化工岗位完整面试题统计 =====")`
			`print(f"总岗位群数: {len(job_group_questions)}")`
			`print(f"总面试题数: {total_questions}")`
			`print(f"\n各岗位群面试题数量：")`

			`for job_group, questions in sorted(job_group_questions.items()):`
			`print(f" {job_group}: {len(questions)} 题")`

			`# 保存提取的面试题`
			`with open('complete_interview_questions.json', 'w', encoding='utf-8') as f:`
			`json.dump(job_group_questions, f, ensure_ascii=False, indent=2)`

			`print(f"\n✅ 已保存所有面试题到 complete_interview_questions.json")`

			`# 生成更新脚本`
			`print(f"\n正在生成更新脚本...")`

			`with open('update_complete_interview_questions.py', 'w', encoding='utf-8') as f:`
			`f.write('''#!/usr/bin/env python3`
			`import json`
			`import re`
			`import subprocess`
			`from datetime import datetime`

			`# 读取提取的面试题数据`
			`with open('complete_interview_questions.json', 'r', encoding='utf-8') as f:`
			`job_group_questions = json.load(f)`

			`# 读取现有mock文件`
			`with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:`
			`content = f.read()`

			`# 备份文件`
			`timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')`
			`backup_file = f'src/mocks/resumeInterviewMock.js.backup_complete_{timestamp}'`
			`with open(backup_file, 'w', encoding='utf-8') as f:`
			`f.write(content)`
			`print(f"已创建备份: {backup_file}")`

			`# 更新每个岗位群的面试题`
			`updated_count = 0`
			`for job_group, questions in job_group_questions.items():`
			`if not questions:`
			`continue`

			`# 构建subQuestions数组内容`
			`sub_questions_items = []`
			`for i, q in enumerate(questions):`
			`question_obj = {`
			`"id": f"q{i+1}",`
			`"question": q['question'],`
			`"answer": q['answer']`
			`}`
			`sub_questions_items.append(json.dumps(question_obj, ensure_ascii=False, indent=20))`

			`# 创建完整的subQuestions内容`
			`sub_questions_content = ',\\n'.join(sub_questions_items)`

			`# 查找并替换对应岗位群的subQuestions`
			`pattern = rf'("question"\\s:\\s"{re.escape(job_group)}"[^}}]?"subQuestions"\\s:\\s)\\[[^\\]]\\]'`
			`replacement = rf'\\1[\\n{sub_questions_content}\\n ]'`

			`new_content = re.sub(pattern, replacement, content, flags=re.DOTALL)`

			`if new_content != content:`
			`content = new_content`
			`updated_count += 1`
			`print(f"✓ 已更新 {job_group} 的 {len(questions)} 道面试题")`

			`# 保存更新后的文件`
			`with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:`
			`f.write(content)`

			`# 验证语法`
			`try:`
			`result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],`
			`capture_output=True, text=True, encoding='utf-8')`
			`if result.returncode == 0:`
			`print(f"\\n✓ 语法检查通过")`
			`print(f"✓ 成功更新了 {updated_count} 个岗位群的面试题")`
			`print(f"\\n✅ 所有面试题更新成功完成！")`
			`else:`
			`print(f"\\n✗ 语法检查失败: {result.stderr}")`
			`# 恢复备份`
			`with open(backup_file, 'r', encoding='utf-8') as f:`
			`content = f.read()`
			`with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:`
			`f.write(content)`
			`print(f"已从备份恢复")`
			`except Exception as e:`
			`print(f"错误: {e}")`
			`''')`

			`print(f"✅ 已生成 update_complete_interview_questions.py")`
			`print(f"\n请运行以下命令来更新所有面试题：")`
			`print(f"python3 update_complete_interview_questions.py")`