online_sys/frontend_大健康/clean_duplicate_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
清理重复的面试题数据
只保留industry级别的questions，移除position级别的questions
"""

import re
from datetime import datetime

def clean_duplicate_questions():
    """清理重复的面试题数据"""
    try:
        # 读取当前文件
        with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
            content = f.read()

        # 创建备份
        backup_filename = f'src/mocks/resumeInterviewMock.js.backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
        with open(backup_filename, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"已创建备份文件: {backup_filename}")

        # 移除所有position级别的questions字段
        # 这个正则表达式匹配position对象内的questions字段
        updated_content = re.sub(
            r',?\s*"questions": \[\s*\{[^\]]*?\](?:\s*,\s*)?(?=\s*\})',
            '',
            content,
            flags=re.DOTALL
        )

        # 移除position对象结尾可能的多余逗号
        updated_content = re.sub(
            r',(\s*\}\s*\]\s*,?\s*"questions":)',
            r'\1',
            updated_content,
            flags=re.DOTALL
        )

        # 如果目前没有industry级别的questions，手动添加几个主要行业的面试题
        health_management_questions = [
            {
                "id": "q1",
                "question": "你认为健康顾问和传统医生的区别是什么？",
                "answer": "医生主要负责疾病诊疗，而健康顾问更多聚焦于疾病预防和健康促进。健康顾问通过营养、运动、心理等多维度干预，帮助客户建立良好的生活习惯，与医生形成互补关系，共同提升客户健康水平。"
            },
            {
                "id": "q2",
                "question": "健康管理中最具挑战性的部分是什么？",
                "answer": "最大的挑战是客户的依从性。很多客户虽然知道健康的重要性，但难以坚持。我会通过阶段性目标、数据化追踪和正向反馈来增强客户信心，提高健康干预的执行力。"
            },
            {
                "id": "q3",
                "question": "如果客户长期忽视健康管理建议，你会怎么办？",
                "answer": "我会先分析原因，可能是方案过于复杂、目标过高或缺乏动力。我会调整为更小、更容易实现的目标，比如\"每天走5000步\"而不是\"立即跑10公里\"。同时，通过定期沟通和数据反馈，让客户看到小进步，逐步增强信心。"
            }
        ]

        health_check_questions = [
            {
                "id": "q1",
                "question": "常见的健康检查项目有哪些？",
                "answer": "包括基础检查（身高、体重、血压、心率）、实验室检测（血常规、肝肾功能、血脂血糖）、影像学检查（X光、彩超、CT/MRI）、心电图检查、癌症筛查（肿瘤标志物、低剂量螺旋CT）、以及女性乳腺/宫颈筛查、男性前列腺检查等。"
            },
            {
                "id": "q2",
                "question": "为什么说健康检查应因人而异？",
                "answer": "因为健康风险与性别、年龄、职业、遗传背景和生活方式密切相关。例如，年轻人更需关注代谢异常和传染病筛查，中老年人则更需重视心血管疾病和肿瘤筛查。因人制宜的体检方案能提高检查的针对性和有效性，避免资源浪费。"
            }
        ]

        # 如果健康管理行业还没有industry级别的questions，添加它
        if '"name": "健康管理"' in updated_content and '"questions":' not in updated_content[updated_content.find('"name": "健康管理"'):updated_content.find('"name": "健康管理"') + 2000]:
            health_mgmt_questions_str = str(health_management_questions).replace("'", '"')
            updated_content = re.sub(
                r'("name": "健康管理"[^}]*?"positions": \[[^\]]*?\]\s*)',
                r'\1,\n    "questions": ' + health_mgmt_questions_str,
                updated_content,
                flags=re.DOTALL
            )

        # 如果健康检查行业还没有industry级别的questions，添加它
        if '"name": "健康检查"' in updated_content:
            health_check_start = updated_content.find('"name": "健康检查"')
            next_industry_start = updated_content.find('"name":', health_check_start + 20)
            if next_industry_start == -1:
                next_industry_start = len(updated_content)

            health_check_section = updated_content[health_check_start:next_industry_start]
            if '"questions":' not in health_check_section:
                health_check_questions_str = str(health_check_questions).replace("'", '"')
                updated_content = re.sub(
                    r'("name": "健康检查"[^}]*?"positions": \[[^\]]*?\]\s*)',
                    r'\1,\n    "questions": ' + health_check_questions_str,
                    updated_content,
                    flags=re.DOTALL
                )

        # 写回文件
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(updated_content)

        print("重复面试题数据清理完成！")
        return True

    except Exception as e:
        print(f"清理失败: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    clean_duplicate_questions()