online_sys/frontend_大健康/update_personalized_interview_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
更新简历与面试题页面的面试题数据
使用大健康岗位简历.json中的个性化面试题内容
"""

import json
import re
import sys
from datetime import datetime

def load_health_resume_data():
    """加载大健康岗位简历数据"""
    try:
        with open('网页未导入数据/大健康产业/大健康岗位简历.json', 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        print(f"Error loading health resume data: {e}")
        return None

def parse_interview_content_to_js(content):
    """解析面试题内容，转换为JavaScript格式的问答结构"""
    if not content:
        return '[]'

    questions_js = []

    # 按大标题分割（# 一、二、三等）
    # 先处理开头可能没有\n的情况
    if content.startswith('# '):
        content = '\n' + content
    sections = re.split(r'\n# ([一二三四五六七八九十]+、[^#\n]+)', content)

    if len(sections) < 2:
        return '[]'

    question_id = 1
    for i in range(1, len(sections), 2):
        if i + 1 < len(sections):
            section_title = sections[i].strip()
            section_content = sections[i + 1].strip()

            # 解析每个section中的问题
            sub_questions_js = []

            # 按问题编号分割 (1. 2. 3. 等)
            question_parts = re.split(r'\n\s*(\d+\.)\s+', section_content)

            sub_question_id = 1
            for j in range(1, len(question_parts), 2):
                if j + 1 < len(question_parts):
                    question_block = question_parts[j + 1].strip()

                    # 提取问题和答案
                    lines = question_block.split('\n')
                    question_text = ""
                    answer_text = ""
                    in_answer = False

                    for line in lines:
                        line = line.strip()
                        if line.startswith('示例答案：'):
                            in_answer = True
                            continue

                        if not in_answer and line and not line.startswith('示例答案：'):
                            if question_text:
                                question_text += " "
                            question_text += line
                        elif in_answer and line:
                            if answer_text:
                                answer_text += " "  # 用空格连接，而不是换行
                            answer_text += line

                    if question_text:
                        # 转义引号和换行符
                        question_text = question_text.replace('"', '\\"').replace('\n', '\\n')
                        answer_text = answer_text.replace('"', '\\"').replace('\n', '\\n')

                        sub_question_template = '''          {{
            "id": "q{}_{}",
            "question": "{}",
            "answer": "{}"
          }}'''.format(question_id, sub_question_id, question_text, answer_text)
                        sub_questions_js.append(sub_question_template)
                        sub_question_id += 1

            if sub_questions_js:
                section_title_escaped = section_title.replace('"', '\\"')
                question_template = '''      {{
        "id": "group_q{}",
        "question": "# {}",
        "subQuestions": [
{}
        ]
      }}'''.format(question_id, section_title_escaped, ',\n'.join(sub_questions_js))
                questions_js.append(question_template)
                question_id += 1

    if questions_js:
        return '''[
{}
    ]'''.format(',\n'.join(questions_js))
    else:
        return '[]'

def update_interview_questions(health_data):
    """更新面试题数据"""
    try:
        # 读取现有文件
        with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
            content = f.read()

        # 创建岗位到面试题的映射，按类别分组
        position_questions_map = {}
        category_questions_map = {}

        # 首先按面试题类别分组
        for item in health_data:
            position_name = item.get('岗位名称', '')
            interview_category = item.get('面试题', '')
            interview_content = item.get('面试题内容', '')

            if position_name and interview_content:
                # 为每个岗位生成个性化的面试题
                questions_js = parse_interview_content_to_js(interview_content)
                position_questions_map[position_name] = questions_js

                # 同时按类别存储（用于相同类别的岗位）
                if interview_category and interview_category not in category_questions_map:
                    category_questions_map[interview_category] = questions_js

        # 更新每个岗位的面试题
        updated_content = content
        update_count = 0

        for position_name, questions_js in position_questions_map.items():
            # 查找该岗位的questions字段并替换
            # 先尝试查找已存在questions字段的岗位
            pattern = rf'"title": "{re.escape(position_name)}",[^}}]+?"questions": \[[^\]]*?\]'

            def replace_questions(match):
                matched_text = match.group(0)
                # 替换questions数组
                return re.sub(r'"questions": \[[^\]]*?\]', f'"questions": {questions_js}', matched_text, flags=re.DOTALL)

            new_content = re.sub(pattern, replace_questions, updated_content, flags=re.DOTALL)

            if new_content != updated_content:
                updated_content = new_content
                update_count += 1
                print(f"Updated interview questions for position: {position_name}")
            else:
                # 如果岗位没有questions字段，则添加
                pattern2 = rf'"title": "{re.escape(position_name)}",[^}}]+?"requirements": \[[^\]]*?\]'

                def add_questions(match):
                    matched_text = match.group(0)
                    # 在requirements后添加questions字段
                    return matched_text + ',\n    "questions": ' + questions_js

                new_content2 = re.sub(pattern2, add_questions, updated_content, flags=re.DOTALL)

                if new_content2 != updated_content:
                    updated_content = new_content2
                    update_count += 1
                    print(f"Added interview questions for position: {position_name}")

        # 写回文件
        with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
            f.write(updated_content)

        print(f"Interview questions updated successfully! Updated {update_count} positions.")
        print("Summary of question categories:")
        for category in category_questions_map.keys():
            print(f"  - {category}")

        return True

    except Exception as e:
        print(f"Error updating interview questions: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """主函数"""
    print("Starting to update personalized interview questions...")

    # 加载数据
    health_data = load_health_resume_data()
    if not health_data:
        print("Failed to load health resume data")
        return False

    # 更新面试题数据
    success = update_interview_questions(health_data)

    if success:
        print("Personalized interview questions update completed!")
    else:
        print("Interview questions update failed!")

    return success

if __name__ == "__main__":
    main()