Files
online_sys/frontend_大健康/remove_duplicate_questions.py
KQL a7242f0c69 Initial commit: 教务系统在线平台
- 包含4个产业方向的前端项目:智能开发、智能制造、大健康、财经商贸
- 已清理node_modules、.yoyo等大文件,项目大小从2.6GB优化至631MB
- 配置完善的.gitignore文件

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-12 18:16:55 +08:00

117 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
print("清理重复的questions数组...")
# 读取文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 备份
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js.backup_before_cleanup', 'w', encoding='utf-8') as f:
f.write(content)
# 对每个岗位群只保留第一个questions数组包含正确内容的
industries = ['health_1', 'health_2', 'health_3', 'health_4', 'health_5',
'health_6', 'health_7', 'health_8', 'health_9', 'health_10', 'health_11']
for industry_id in industries:
# 找到该岗位群的开始和结束
pattern = rf'"id":\s*"{industry_id}"'
match = re.search(pattern, content)
if match:
start = match.start()
# 找到下一个岗位群或数组结束
next_industry_pattern = rf'"id":\s*"health_\d+"'
next_matches = list(re.finditer(next_industry_pattern, content[start + 10:]))
if next_matches:
end = start + 10 + next_matches[0].start()
else:
# 可能是最后一个岗位群
end_match = re.search(r'\n\];', content[start:])
if end_match:
end = start + end_match.start()
else:
end = len(content)
# 获取该岗位群的内容
industry_content = content[start:end]
# 查找所有的questions数组
questions_matches = list(re.finditer(r'"questions":\s*\[', industry_content))
if len(questions_matches) > 1:
print(f" {industry_id}: 发现 {len(questions_matches)} 个questions数组")
# 保留第一个questions包含正确内容删除其他的
# 找到第一个questions的结束位置
first_q_start = questions_matches[0].start()
# 找到对应的结束位置(匹配的]
bracket_count = 0
in_questions = False
first_q_end = -1
for i in range(first_q_start, len(industry_content)):
if industry_content[i] == '[':
if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
in_questions = True
bracket_count = 1
elif in_questions:
bracket_count += 1
elif industry_content[i] == ']' and in_questions:
bracket_count -= 1
if bracket_count == 0:
first_q_end = i + 1
break
if first_q_end > 0:
# 删除第二个questions数组
if len(questions_matches) > 1:
second_q_start = questions_matches[1].start()
# 找到第二个questions的结束位置
bracket_count = 0
in_questions = False
second_q_end = -1
for i in range(second_q_start, len(industry_content)):
if industry_content[i] == '[':
if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
in_questions = True
bracket_count = 1
elif in_questions:
bracket_count += 1
elif industry_content[i] == ']' and in_questions:
bracket_count -= 1
if bracket_count == 0:
second_q_end = i + 1
break
if second_q_end > 0:
# 删除第二个questions包括前面的逗号
# 查找前面的逗号
comma_pos = industry_content.rfind(',', 0, second_q_start)
if comma_pos > first_q_end:
# 删除从逗号到questions结束的内容
new_industry_content = industry_content[:comma_pos] + industry_content[second_q_end:]
# 替换原内容
content = content[:start] + new_industry_content + content[end:]
print(f" ✓ 已删除 {industry_id} 的重复questions")
# 清理多余的空行和逗号
content = re.sub(r',\s*\]', ']', content)
content = re.sub(r',\s*\}', '}', content)
content = re.sub(r'\n\s*\n\s*\n', '\n\n', content)
# 写回文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print("✓ 清理完成!")