online_sys/frontend_大健康/update_interview_questions_from_json.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re

def parse_interview_questions(content):
    """解析面试题内容为结构化数据"""
    questions = []

    # 分割成不同的问题类别
    sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)

    question_id = 1
    for i in range(1, len(sections), 2):
        if i >= len(sections):
            break

        section_title = sections[i].strip()
        section_content = sections[i + 1] if i + 1 < len(sections) else ""

        # 提取每个问题
        question_blocks = re.split(r'\n\d+\.\s+', section_content)

        for j, block in enumerate(question_blocks[1:], 1):  # 跳过第一个空块
            lines = block.strip().split('\n')
            if not lines:
                continue

            question_text = lines[0].strip()

            # 查找答案
            answer_text = ""
            for k, line in enumerate(lines):
                if '示例答案' in line or '答案' in line:
                    # 获取答案内容，跳过"示例答案："这一行
                    answer_lines = []
                    for answer_line in lines[k+1:]:
                        answer_line = answer_line.strip()
                        if answer_line and not answer_line.startswith('示例答案'):
                            # 如果遇到下一个问题的编号，停止
                            if re.match(r'^\d+\.', answer_line):
                                break
                            answer_lines.append(answer_line)
                    answer_text = ' '.join(answer_lines)
                    break

            if question_text and answer_text:
                questions.append({
                    "id": f"q{question_id}",
                    "question": question_text,
                    "answer": answer_text
                })
                question_id += 1

    return questions

def main():
    # 读取大健康岗位简历数据
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
        health_data = json.load(f)

    # 读取现有的Mock数据
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
        mock_content = f.read()

    # 创建岗位群到面试题的映射
    industry_questions = {}

    # 遍历所有岗位数据，提取面试题
    for item in health_data:
        industry = item.get('简历岗位群', '')
        interview_content = item.get('面试题内容', '')

        if industry and interview_content and industry not in industry_questions:
            # 解析面试题内容
            questions = parse_interview_questions(interview_content)
            if questions:
                industry_questions[industry] = questions

    # 构建更新的JavaScript代码
    updates = []

    # 为每个岗位群生成面试题数据
    for industry, questions in industry_questions.items():
        # 映射岗位群名称
        industry_mapping = {
            '健康管理': 'health_1',
            '健康检查': 'health_2',
            '康复治疗': 'health_3',
            '慢性病管理': 'health_4',
            '轻医美': 'health_5',
            '心理健康': 'health_6'
        }

        if industry in industry_mapping:
            industry_id = industry_mapping[industry]

            # 生成面试题的JavaScript代码
            # 按照前端期望的格式：questions数组包含分类，每个分类有subQuestions
            category_questions = {}
            current_category = "综合面试题"

            for q in questions:
                # 检查是否是新的分类（通过问题中的关键词判断）
                if '岗位理解' in q['question']:
                    current_category = "岗位理解类问题"
                elif '实践经验' in q['question'] or '案例' in q['question']:
                    current_category = "实践经验类问题"
                elif '客户服务' in q['question'] or '客户' in q['question']:
                    current_category = "客户服务类问题"
                elif '市场' in q['question'] or '趋势' in q['question']:
                    current_category = "市场与未来趋势类问题"

                if current_category not in category_questions:
                    category_questions[current_category] = []

                category_questions[current_category].append(q)

            # 构建questions数组
            questions_array = []
            cat_id = 1
            for category, cat_questions in category_questions.items():
                questions_array.append({
                    "id": f"group_q{cat_id}",
                    "question": category,
                    "subQuestions": cat_questions
                })
                cat_id += 1

            updates.append({
                'industry_id': industry_id,
                'industry_name': industry,
                'questions': questions_array
            })

    # 更新Mock文件
    print(f"准备更新 {len(updates)} 个岗位群的面试题...")

    for update in updates:
        print(f"更新岗位群: {update['industry_name']} (ID: {update['industry_id']})")
        print(f"  - 包含 {len(update['questions'])} 个问题分类")
        total_questions = sum(len(q['subQuestions']) for q in update['questions'])
        print(f"  - 共计 {total_questions} 个面试题")

        # 查找并替换对应岗位群的questions数组
        # 使用更精确的正则表达式查找对应的industry块
        pattern = rf'(\s*"id":\s*"{update["industry_id"]}"[^}}]*?"questions":\s*)\[[^\]]*?\](\s*\}})'

        # 生成新的questions数组字符串
        questions_json = json.dumps(update['questions'], ensure_ascii=False, indent=6)
        # 调整第一行的缩进
        questions_json = questions_json.lstrip()

        replacement = rf'\1{questions_json}\2'

        # 执行替换
        new_content, count = re.subn(pattern, replacement, mock_content, flags=re.DOTALL)

        if count > 0:
            mock_content = new_content
            print(f"  ✓ 成功更新")
        else:
            # 尝试另一种模式（可能questions在positions数组之后）
            pattern2 = rf'("id":\s*"{update["industry_id"]}"[^}}]*?)(\n\s*\}})'

            # 检查是否已经有questions字段
            if f'"id": "{update["industry_id"]}"' in mock_content:
                # 找到对应的industry块，在positions后添加questions
                industry_block = re.search(rf'"id":\s*"{update["industry_id"]}"[^{{]*?\{{[^}}]*?\]\s*(?:,\s*"questions"[^}}]*?\])?', mock_content, re.DOTALL)
                if industry_block:
                    block_text = industry_block.group(0)
                    if '"questions"' not in block_text:
                        # 需要在positions数组后添加questions
                        pattern3 = rf'("id":\s*"{update["industry_id"]}"[^}}]*?\])\s*(\}})'
                        questions_json = json.dumps(update['questions'], ensure_ascii=False, indent=6)
                        questions_json = questions_json.lstrip()
                        replacement3 = rf'\1,\n    "questions": {questions_json}\n  \2'
                        new_content, count = re.subn(pattern3, replacement3, mock_content)
                        if count > 0:
                            mock_content = new_content
                            print(f"  ✓ 成功添加questions字段")
                        else:
                            print(f"  ⚠ 未找到合适的位置添加questions: {update['industry_id']}")
                    else:
                        print(f"  ⚠ 未找到匹配的岗位群ID: {update['industry_id']}")

    # 写回文件
    with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
        f.write(mock_content)

    print("\n面试题数据更新完成！")

if __name__ == "__main__":
    main()