Files
online_sys/frontend_大健康/remove_duplicate_questions.py

117 lines
4.8 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
print("清理重复的questions数组...")
# 读取文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 备份
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js.backup_before_cleanup', 'w', encoding='utf-8') as f:
f.write(content)
# 对每个岗位群只保留第一个questions数组包含正确内容的
industries = ['health_1', 'health_2', 'health_3', 'health_4', 'health_5',
'health_6', 'health_7', 'health_8', 'health_9', 'health_10', 'health_11']
for industry_id in industries:
# 找到该岗位群的开始和结束
pattern = rf'"id":\s*"{industry_id}"'
match = re.search(pattern, content)
if match:
start = match.start()
# 找到下一个岗位群或数组结束
next_industry_pattern = rf'"id":\s*"health_\d+"'
next_matches = list(re.finditer(next_industry_pattern, content[start + 10:]))
if next_matches:
end = start + 10 + next_matches[0].start()
else:
# 可能是最后一个岗位群
end_match = re.search(r'\n\];', content[start:])
if end_match:
end = start + end_match.start()
else:
end = len(content)
# 获取该岗位群的内容
industry_content = content[start:end]
# 查找所有的questions数组
questions_matches = list(re.finditer(r'"questions":\s*\[', industry_content))
if len(questions_matches) > 1:
print(f" {industry_id}: 发现 {len(questions_matches)} 个questions数组")
# 保留第一个questions包含正确内容删除其他的
# 找到第一个questions的结束位置
first_q_start = questions_matches[0].start()
# 找到对应的结束位置(匹配的]
bracket_count = 0
in_questions = False
first_q_end = -1
for i in range(first_q_start, len(industry_content)):
if industry_content[i] == '[':
if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
in_questions = True
bracket_count = 1
elif in_questions:
bracket_count += 1
elif industry_content[i] == ']' and in_questions:
bracket_count -= 1
if bracket_count == 0:
first_q_end = i + 1
break
if first_q_end > 0:
# 删除第二个questions数组
if len(questions_matches) > 1:
second_q_start = questions_matches[1].start()
# 找到第二个questions的结束位置
bracket_count = 0
in_questions = False
second_q_end = -1
for i in range(second_q_start, len(industry_content)):
if industry_content[i] == '[':
if not in_questions and '"questions"' in industry_content[max(0, i-20):i]:
in_questions = True
bracket_count = 1
elif in_questions:
bracket_count += 1
elif industry_content[i] == ']' and in_questions:
bracket_count -= 1
if bracket_count == 0:
second_q_end = i + 1
break
if second_q_end > 0:
# 删除第二个questions包括前面的逗号
# 查找前面的逗号
comma_pos = industry_content.rfind(',', 0, second_q_start)
if comma_pos > first_q_end:
# 删除从逗号到questions结束的内容
new_industry_content = industry_content[:comma_pos] + industry_content[second_q_end:]
# 替换原内容
content = content[:start] + new_industry_content + content[end:]
print(f" ✓ 已删除 {industry_id} 的重复questions")
# 清理多余的空行和逗号
content = re.sub(r',\s*\]', ']', content)
content = re.sub(r',\s*\}', '}', content)
content = re.sub(r'\n\s*\n\s*\n', '\n\n', content)
# 写回文件
with open('/Users/apple/Documents/cursor/教务系统/frontend_大健康/src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print("✓ 清理完成!")