online_sys/frontend_智能开发/scripts/update_positions_complete.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
import sys

def escape_for_js(text):
    """转义字符串用于JavaScript"""
    if not text:
        return ""
    text = text.replace('\\', '\\\\')
    text = text.replace('`', '\\`')
    text = text.replace('${', '\\${')
    return text

def parse_resume_content_complete(resume_content):
    """
    完整解析简历内容，确保不遗漏任何内容
    将内容准确分配到项目经历、专业技能和个人总结三个部分
    """

    # 清理markdown标题格式，保留原始内容
    content = resume_content

    # 查找三个主要部分
    # 项目经历部分 - 从"# 一、项目经历"到"# 二、"
    project_pattern = r'#\s*一、项目经历(.*?)(?=#\s*二、|$)'
    project_match = re.search(project_pattern, content, re.DOTALL | re.IGNORECASE)

    if project_match:
        project_experience = project_match.group(1).strip()
    else:
        # 如果没找到标准格式，尝试其他格式
        project_experience = ""

    # 专业技能部分 - 从"# 二、专业技能"或"# 二、掌握技能"到"# 三、"
    skills_pattern = r'#\s*二、(?:专业技能|掌握技能)(.*?)(?=#\s*三、|$)'
    skills_match = re.search(skills_pattern, content, re.DOTALL | re.IGNORECASE)

    if skills_match:
        skills = skills_match.group(1).strip()
    else:
        skills = ""

    # 个人总结部分 - 从"# 三、个人总结"或"# 三、个人评价"到结尾
    summary_pattern = r'#\s*三、(?:个人总结|个人评价)(.*?)$'
    summary_match = re.search(summary_pattern, content, re.DOTALL | re.IGNORECASE)

    if summary_match:
        personal_summary = summary_match.group(1).strip()
    else:
        personal_summary = ""

    # 如果某个部分没有匹配到，打印警告
    if not project_experience:
        print(f"    ⚠️  警告：未找到项目经历部分")
    if not skills:
        print(f"    ⚠️  警告：未找到专业技能部分")
    if not personal_summary:
        print(f"    ⚠️  警告：未找到个人总结部分")

    return {
        'projectExperience': project_experience,
        'skills': skills,
        'personalSummary': personal_summary
    }

def validate_content(position_name, resume_content, parsed_data):
    """验证解析后的内容是否完整"""
    original_length = len(resume_content)
    parsed_length = len(parsed_data['projectExperience']) + \
                    len(parsed_data['skills']) + \
                    len(parsed_data['personalSummary'])

    # 允许一定的差异（因为标题被移除了）
    if parsed_length < original_length * 0.8:
        print(f"    ⚠️  {position_name}: 可能有内容丢失")
        print(f"       原始长度: {original_length}, 解析后长度: {parsed_length}")

    return True

def process_part_complete(part_number):
    """处理指定部分的简历数据，确保内容完整"""
    file_path = f'/Users/apple/Documents/cursor/教务系统/frontend/网页未导入数据/个人简历内容_part{part_number}.json'
    print(f"\n========== 处理第 {part_number} 部分 ==========")

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"错误：找不到文件 {file_path}")
        return {}

    position_map = {}
    successful_count = 0
    failed_positions = []

    for item in data:
        if '❌岗位名称查询' in item and '简历内容' in item:
            position_name = item['❌岗位名称查询']
            resume_content = item['简历内容']

            print(f"\n处理岗位: {position_name}")

            # 解析简历内容
            resume_data = parse_resume_content_complete(resume_content)

            # 验证内容完整性
            if validate_content(position_name, resume_content, resume_data):
                position_map[position_name] = resume_data
                successful_count += 1
                print(f"    ✓ 成功解析")
            else:
                failed_positions.append(position_name)
                print(f"    ✗ 解析失败")

    # 保存映射数据
    output_file = f'/Users/apple/Documents/cursor/教务系统/frontend/scripts/resume_mapping_part{part_number}.json'
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(position_map, f, ensure_ascii=False, indent=2)

    print(f"\n========== 第 {part_number} 部分处理完成 ==========")
    print(f"成功处理: {successful_count} 个岗位")
    if failed_positions:
        print(f"失败岗位: {', '.join(failed_positions)}")
    print(f"映射数据已保存到: {output_file}")

    return position_map

def find_and_update_position(content, position_name, resume_data):
    """在mockData.js中查找并更新指定岗位的数据"""
    updated = False

    # 尝试多种匹配模式
    patterns = [
        rf'name:\s*["\']({re.escape(position_name)})["\']',  # 标准格式
        rf'jobTitle:\s*["\']({re.escape(position_name)})["\']',  # jobTitle格式
        rf'title:\s*["\']({re.escape(position_name)})["\']',  # title格式
    ]

    for pattern in patterns:
        matches = list(re.finditer(pattern, content))
        if matches:
            for match in matches:
                print(f"    找到岗位: {position_name} (使用模式: {pattern.split(':')[0]})")

                # 找到岗位后，查找其resume对象
                start_pos = match.start()

                # 查找resume对象的开始
                resume_search_area = content[start_pos:min(start_pos+3000, len(content))]
                resume_match = re.search(r'resume:\s*\{', resume_search_area)

                if resume_match:
                    resume_start = start_pos + resume_match.end()

                    # 找到resume对象的结束位置
                    brace_count = 1
                    i = resume_start
                    while i < len(content) and brace_count > 0:
                        if content[i] == '{':
                            brace_count += 1
                        elif content[i] == '}':
                            brace_count -= 1
                        i += 1

                    resume_end = i - 1
                    resume_content = content[resume_start:resume_end]

                    # 检查是否已经有这些字段
                    if 'projectExperience' not in resume_content:
                        # 在resume对象末尾添加新字段
                        new_fields = f''',
              projectExperience: `{escape_for_js(resume_data['projectExperience'])}`,
              skills: `{escape_for_js(resume_data['skills'])}`,
              personalSummary: `{escape_for_js(resume_data['personalSummary'])}`'''

                        content = content[:resume_end] + new_fields + content[resume_end:]
                        updated = True
                        print(f"    ✓ 更新成功")
                        break
                    else:
                        print(f"    - 该岗位已有相关字段，跳过")

            if updated:
                break

    if not updated:
        print(f"    ✗ 未找到岗位: {position_name}")

    return content, updated

def update_mockdata_complete(position_map, part_number):
    """使用映射数据更新mockData.js，确保内容完整"""
    print(f"\n========== 开始更新 mockData.js (第 {part_number} 部分) ==========")

    # 读取当前的mockData.js文件
    try:
        with open('/Users/apple/Documents/cursor/教务系统/frontend/src/data/mockData.js', 'r', encoding='utf-8') as f:
            content = f.read()
    except FileNotFoundError:
        print("错误：找不到 mockData.js 文件")
        return 0

    updated_count = 0
    failed_positions = []

    # 对每个岗位进行更新
    for position_name, resume_data in position_map.items():
        print(f"\n更新岗位: {position_name}")

        # 打印内容长度，便于验证
        print(f"  项目经历长度: {len(resume_data['projectExperience'])} 字符")
        print(f"  专业技能长度: {len(resume_data['skills'])} 字符")
        print(f"  个人总结长度: {len(resume_data['personalSummary'])} 字符")

        content, updated = find_and_update_position(content, position_name, resume_data)

        if updated:
            updated_count += 1
        else:
            failed_positions.append(position_name)

    # 写回文件
    with open('/Users/apple/Documents/cursor/教务系统/frontend/src/data/mockData.js', 'w', encoding='utf-8') as f:
        f.write(content)

    print(f"\n========== 第 {part_number} 部分更新完成 ==========")
    print(f"成功更新: {updated_count} 个岗位")
    if failed_positions:
        print(f"未找到的岗位: {', '.join(failed_positions)}")

    return updated_count

# 主程序
if __name__ == "__main__":
    if len(sys.argv) > 1:
        part_num = int(sys.argv[1])

        if part_num not in [1, 2, 3]:
            print("错误：部分编号必须是 1, 2 或 3")
            sys.exit(1)

        # 处理指定部分
        print(f"开始处理第 {part_num} 部分的简历数据...")
        position_map = process_part_complete(part_num)

        if position_map:
            # 更新mockData.js
            update_mockdata_complete(position_map, part_num)
            print(f"\n第 {part_num} 部分处理完成！")
        else:
            print(f"\n第 {part_num} 部分没有找到有效数据")
    else:
        print("使用方法: python3 update_positions_complete.py <部分编号>")
        print("部分编号: 1, 2, 或 3")
        print("\n示例:")
        print("  python3 update_positions_complete.py 1  # 处理第1部分")
        print("  python3 update_positions_complete.py 2  # 处理第2部分")
        print("  python3 update_positions_complete.py 3  # 处理第3部分")