online_sys/frontend_大健康/update_questions_safe.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
安全地更新面试题数据，只替换questions字段内容
避免破坏JavaScript文件的整体结构
"""

import json
import re
import sys
from datetime import datetime

def load_health_resume_data():
    """加载大健康岗位简历数据"""
    try:
        with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading health resume data: {e}")
        return None

def parse_interview_content_to_flat_array(content):
    """解析面试题内容，转换为扁平的问答数组"""
    if not content:
        return []

    questions = []

    # 按大标题分割（# 一、二、三等）
    if content.startswith('# '):
        content = '\n' + content
    sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)

    if len(sections) < 2:
        return []

    question_counter = 1
    for i in range(1, len(sections), 2):
        if i + 1 < len(sections):
            section_title = sections[i].strip()
            section_content = sections[i + 1].strip()

            # 按问题编号分割 (1. 2. 3. 等)
            question_parts = re.split(r'\n\s*(\d+\.)\s+', section_content)

            for j in range(1, len(question_parts), 2):
                if j + 1 < len(question_parts):
                    question_block = question_parts[j + 1].strip()

                    # 提取问题和答案
                    lines = question_block.split('\n')
                    question_text = ""
                    answer_text = ""
                    in_answer = False

                    for line in lines:
                        line = line.strip()
                        if line.startswith('示例答案：'):
                            in_answer = True
                            continue

                        if not in_answer and line and not line.startswith('示例答案：'):
                            if question_text:
                                question_text += " "
                            question_text += line
                        elif in_answer and line:
                            if answer_text:
                                answer_text += " "
                            answer_text += line

                    if question_text:
                        questions.append({
                            "id": f"q{question_counter}",
                            "question": question_text,
                            "answer": answer_text
                        })
                        question_counter += 1

    return questions

def safe_update_questions():
    """安全地更新questions字段"""
    try:
        # 加载大健康数据
        health_data = load_health_resume_data()
        if not health_data:
            print("Failed to load health resume data")
            return False

        # 创建岗位到面试题的映射
        position_to_questions = {}
        for item in health_data:
            position_name = item.get('岗位名称', '')
            interview_content = item.get('面试题内容', '')

            if position_name and interview_content:
                questions = parse_interview_content_to_flat_array(interview_content)
                position_to_questions[position_name] = questions

        print(f"解析了 {len(position_to_questions)} 个岗位的面试题")

        # 读取现有文件
        with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
            content = f.read()

        updated_content = content
        update_count = 0

        for position_name, questions in position_to_questions.items():
            if not questions:
                continue

            # 生成新的questions数组JavaScript字符串
            questions_js_parts = []
            for q in questions:
                # 正确转义引号和特殊字符
                q_text = q['question'].replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\r', '')
                a_text = q['answer'].replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\r', '')

                question_js = '''      {{
        "id": "{}",
        "question": "{}",
        "answer": "{}"
      }}'''.format(q['id'], q_text, a_text)
                questions_js_parts.append(question_js)

            questions_js = '''[
{}
    ]'''.format(',\n'.join(questions_js_parts))

            # 只替换questions字段的内容，使用更精确的正则表达式
            # 匹配 "title": "岗位名称" 后面的整个对象，但只替换其中的questions字段

            # 方法1：找到岗位对象，然后替换其中的questions字段
            position_pattern = rf'"title":\s*"{re.escape(position_name)}"'

            # 找到该岗位在文件中的位置
            match = re.search(position_pattern, updated_content)
            if match:
                start_pos = match.start()

                # 从该位置开始找到完整的岗位对象
                # 找到对象的开始 '{'
                obj_start = updated_content.rfind('{', 0, start_pos)

                # 找到对象的结束 '}'，需要平衡括号
                bracket_count = 0
                obj_end = obj_start
                for i in range(obj_start, len(updated_content)):
                    if updated_content[i] == '{':
                        bracket_count += 1
                    elif updated_content[i] == '}':
                        bracket_count -= 1
                        if bracket_count == 0:
                            obj_end = i + 1
                            break

                if obj_end > obj_start:
                    # 提取完整的岗位对象
                    position_obj = updated_content[obj_start:obj_end]

                    # 检查这个对象是否真的包含我们要找的岗位
                    if f'"title": "{position_name}"' in position_obj:
                        # 替换其中的questions字段
                        # 先删除现有的questions字段
                        new_obj = re.sub(
                            r',?\s*"questions":\s*\[[^\]]*?\]',
                            '',
                            position_obj,
                            flags=re.DOTALL
                        )

                        # 在requirements后添加新的questions字段
                        new_obj = re.sub(
                            r'("requirements":\s*\[[^\]]*?\])',
                            r'\1,\n    "questions": ' + questions_js,
                            new_obj,
                            flags=re.DOTALL
                        )

                        # 替换原内容
                        updated_content = updated_content[:obj_start] + new_obj + updated_content[obj_end:]
                        update_count += 1
                        print(f"✅ 更新 {position_name} 的面试题 ({len(questions)} 个问题)")

        # 写回文件
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(updated_content)

        print(f"\n🎉 成功安全更新 {update_count} 个岗位的面试题数据！")
        return True

    except Exception as e:
        print(f"Error updating interview questions: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """主函数"""
    print("开始安全更新面试题数据...")

    success = safe_update_questions()

    if success:
        print("面试题数据安全更新完成！")
    else:
        print("面试题数据更新失败！")

    return success

if __name__ == "__main__":
    main()