ALL-teach_sys/frontend_大健康/remove_duplicate_questions.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import re

print("清理重复的questions数组...")

# 读取文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
    content = f.read()

# 备份
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js.backup_before_cleanup', 'w', encoding='utf-8') as f:
    f.write(content)

# 对每个岗位群，只保留第一个questions数组（包含正确内容的）
industries = ['health_1', 'health_2', 'health_3', 'health_4', 'health_5',
              'health_6', 'health_7', 'health_8', 'health_9', 'health_10', 'health_11']

for industry_id in industries:
    # 找到该岗位群的开始和结束
    pattern = rf'"id":\s*"{industry_id}"'
    match = re.search(pattern, content)

    if match:
        start = match.start()

        # 找到下一个岗位群或数组结束
        next_industry_pattern = rf'"id":\s*"health_\d+"'
        next_matches = list(re.finditer(next_industry_pattern, content[start + 10:]))

        if next_matches:
            end = start + 10 + next_matches[0].start()
        else:
            # 可能是最后一个岗位群
            end_match = re.search(r'\n\];', content[start:])
            if end_match:
                end = start + end_match.start()
            else:
                end = len(content)

        # 获取该岗位群的内容
        industry_content = content[start:end]

        # 查找所有的questions数组
        questions_matches = list(re.finditer(r'"questions":\s*\[', industry_content))

        if len(questions_matches) > 1:
            print(f"  {industry_id}: 发现 {len(questions_matches)} 个questions数组")

            # 保留第一个questions（包含正确内容），删除其他的
            # 找到第一个questions的结束位置
            first_q_start = questions_matches[0].start()

            # 找到对应的结束位置（匹配的]）
            bracket_count = 0
            in_questions = False
            first_q_end = -1

            for i in range(first_q_start, len(industry_content)):
                if industry_content[i] == '[':
                    if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
                        in_questions = True
                        bracket_count = 1
                    elif in_questions:
                        bracket_count += 1
                elif industry_content[i] == ']' and in_questions:
                    bracket_count -= 1
                    if bracket_count == 0:
                        first_q_end = i + 1
                        break

            if first_q_end > 0:
                # 删除第二个questions数组
                if len(questions_matches) > 1:
                    second_q_start = questions_matches[1].start()

                    # 找到第二个questions的结束位置
                    bracket_count = 0
                    in_questions = False
                    second_q_end = -1

                    for i in range(second_q_start, len(industry_content)):
                        if industry_content[i] == '[':
                            if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
                                in_questions = True
                                bracket_count = 1
                            elif in_questions:
                                bracket_count += 1
                        elif industry_content[i] == ']' and in_questions:
                            bracket_count -= 1
                            if bracket_count == 0:
                                second_q_end = i + 1
                                break

                    if second_q_end > 0:
                        # 删除第二个questions（包括前面的逗号）
                        # 查找前面的逗号
                        comma_pos = industry_content.rfind(',', 0, second_q_start)
                        if comma_pos > first_q_end:
                            # 删除从逗号到questions结束的内容
                            new_industry_content = industry_content[:comma_pos] + industry_content[second_q_end:]

                            # 替换原内容
                            content = content[:start] + new_industry_content + content[end:]
                            print(f"    ✓ 已删除 {industry_id} 的重复questions")

# 清理多余的空行和逗号
content = re.sub(r',\s*\]', ']', content)
content = re.sub(r',\s*\}', '}', content)
content = re.sub(r'\n\s*\n\s*\n', '\n\n', content)

# 写回文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
    f.write(content)

print("✓ 清理完成！")