online_sys/frontend_大健康/reorganize_questions_data_only.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
重新组织面试题数据 - 只修改数据，不修改代码结构
将各个position的questions合并到对应industry的questions字段
"""

import json
import re
from datetime import datetime

def load_health_resume_data():
    """加载大健康岗位简历数据"""
    try:
        with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading health resume data: {e}")
        return None

def parse_interview_content_to_flat_array(content):
    """解析面试题内容，转换为扁平的问答数组"""
    if not content:
        return []

    questions = []

    # 按大标题分割（# 一、二、三等）
    if content.startswith('# '):
        content = '\n' + content
    sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)

    if len(sections) < 2:
        return []

    question_counter = 1
    for i in range(1, len(sections), 2):
        if i + 1 < len(sections):
            section_title = sections[i].strip()
            section_content = sections[i + 1].strip()

            # 按问题编号分割 (1. 2. 3. 等)
            question_parts = re.split(r'\n\s*(\d+\.)?\s*', section_content)

            for j in range(1, len(question_parts)):
                if j >= len(question_parts) or question_parts[j] is None:
                    continue
                question_block = str(question_parts[j]).strip()
                if not question_block or question_block.endswith('.'):
                    continue

                # 提取问题和答案
                lines = question_block.split('\n')
                question_text = ""
                answer_text = ""
                in_answer = False

                for line in lines:
                    line = line.strip()
                    if line.startswith('示例答案：'):
                        in_answer = True
                        continue

                    if not in_answer and line and not line.startswith('示例答案：'):
                        if question_text:
                            question_text += " "
                        question_text += line
                    elif in_answer and line:
                        if answer_text:
                            answer_text += " "
                        answer_text += line

                if question_text:
                    questions.append({
                        "id": f"q{question_counter}",
                        "question": question_text,
                        "answer": answer_text
                    })
                    question_counter += 1

    return questions

def reorganize_questions_by_industry():
    """重新组织面试题数据"""
    try:
        # 加载大健康数据
        health_data = load_health_resume_data()
        if not health_data:
            print("Failed to load health resume data")
            return False

        # 按面试题类别分组
        category_questions = {}
        for item in health_data:
            category = item.get('面试题', '')
            interview_content = item.get('面试题内容', '')

            if category and interview_content and category not in category_questions:
                questions = parse_interview_content_to_flat_array(interview_content)
                if questions:
                    category_questions[category] = questions

        print(f"解析了 {len(category_questions)} 个面试题类别")

        # 读取现有文件
        with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
            content = f.read()

        # 创建备份
        backup_filename = f'src/mocks/resumeInterviewMock.js.backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
        with open(backup_filename, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已创建备份文件: {backup_filename}")

        updated_content = content

        # 首先移除所有position级别的questions字段
        updated_content = re.sub(
            r',?\s*"questions": \[[^\]]*?\](?:\s*,\s*)?',
            '',
            updated_content,
            flags=re.DOTALL
        )

        # 映射关系：行业名称 -> 面试题类别
        industry_category_mapping = {
            "健康管理": "健康管理类岗位面试题",
            "健康检查": "健康检查类岗位面试题",
            "康复治疗": "康复治疗类岗位面试题",
            "医疗美容": "医疗美容类岗位面试题",
            "运营管理": "运营类岗位面试题",
            "心理健康": "心理健康类岗位面试题",
            "供应链管理": "供应链类岗位面试题",
            "药品制造": "药品制造类岗位面试题",
            "检测分析": "检测分析类岗位面试题",
            "临床研究": "临床研究类岗位面试题"
        }

        # 为每个行业添加对应的面试题
        for industry_name, category in industry_category_mapping.items():
            if category in category_questions:
                questions_data = category_questions[category]
                questions_json = json.dumps(questions_data, ensure_ascii=False, indent=6)

                # 查找对应行业并添加questions字段
                pattern = rf'("name": "{re.escape(industry_name)}"[^}}]*?"positions": \[[^\]]*?\]\s*)'
                replacement = r'\1,\n      "questions": ' + questions_json

                updated_content = re.sub(
                    pattern,
                    replacement,
                    updated_content,
                    flags=re.DOTALL
                )
                print(f"✅ 为 {industry_name} 行业添加了面试题 ({len(questions_data)} 个问题)")

        # 写回文件
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(updated_content)

        print("面试题数据重组完成！")
        return True

    except Exception as e:
        print(f"重组失败: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    reorganize_questions_by_industry()