Files
online_sys/frontend_智能制造/clean_answer_options.py

141 lines
5.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
清理面试题答案中的选项只保留正确答案
"""
import re
from datetime import datetime
print("=== 清理面试题答案中的选项 ===\n")
# 1. 读取当前mock文件
print("1. 读取当前mock文件...")
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
content = f.read()
# 2. 备份
backup_time = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = f'src/mocks/resumeInterviewMock.js.backup_clean_options_{backup_time}'
print(f"2. 创建备份: {backup_file}")
with open(backup_file, 'w', encoding='utf-8') as f:
f.write(content)
# 3. 处理答案中的选项
print("\n3. 开始清理答案中的选项...")
def clean_answer(match):
"""清理答案中的选项格式"""
full_match = match.group(0)
answer_content = match.group(1)
# 检查是否包含选项格式A. B. C. 或 A、B、C、等
if re.search(r'[A-D][\.、]', answer_content):
# 尝试提取正确答案
# 模式1: "正确答案是A" 或 "答案A"
correct_match = re.search(r'(?:正确答案[是为::]*|答案[是为::]*)\s*([A-D])', answer_content)
if correct_match:
option_letter = correct_match.group(1)
# 查找该选项的内容
option_pattern = rf'{option_letter}[\.、]\s*([^A-D\n]+?)(?=[A-D][\.、]|$)'
option_content = re.search(option_pattern, answer_content)
if option_content:
# 返回只包含正确答案内容的版本
cleaned_answer = option_content.group(1).strip()
# 去除可能的句号或其他标点
cleaned_answer = re.sub(r'[。;;]+$', '', cleaned_answer)
return f'"answer": "{cleaned_answer}"'
# 模式2: 选项列表后跟正确答案说明
# 例如:"A. 选项1 B. 选项2 C. 选项3 D. 选项4。正确答案是C"
if '正确答案' in answer_content or '答案' in answer_content:
# 提取所有选项
options = {}
option_matches = re.findall(r'([A-D])[\.、]\s*([^A-D\n]+?)(?=[A-D][\.、]|正确答案|答案|$)', answer_content)
for letter, content in option_matches:
options[letter] = content.strip().rstrip('。;;')
# 找出正确答案
correct_match = re.search(r'(?:正确答案|答案)[是为::]*\s*([A-D])', answer_content)
if correct_match and correct_match.group(1) in options:
return f'"answer": "{options[correct_match.group(1)]}"'
# 如果没有选项格式或无法处理,返回原始内容
return full_match
# 4. 执行替换
original_content = content
# 匹配 "answer": "..." 模式
pattern = r'"answer":\s*"([^"]*(?:\\.[^"]*)*)"'
# 统计处理的答案数量
matches = re.findall(pattern, content)
total_answers = len(matches)
print(f"找到 {total_answers} 个答案")
# 处理包含选项的答案
processed = 0
for match in re.finditer(pattern, content):
answer_text = match.group(1)
if re.search(r'[A-D][\.、]', answer_text):
processed += 1
print(f"其中 {processed} 个答案包含选项格式")
# 执行替换
content = re.sub(pattern, clean_answer, content)
# 5. 额外的清理模式
print("\n4. 执行额外的清理模式...")
# 清理常见的选项格式
def additional_clean(text):
"""额外的清理规则"""
# 移除"选项:"开头
text = re.sub(r'"answer":\s*"选项[:]\s*', '"answer": "', text)
# 如果答案以单个字母开头(如"A 内容"),去掉字母
text = re.sub(r'"answer":\s*"[A-D]\s+([^"]+)"', r'"answer": "\1"', text)
# 如果答案以"A."或"A、"开头,去掉
text = re.sub(r'"answer":\s*"[A-D][\.、]\s*([^"]+)"', r'"answer": "\1"', text)
return text
content = additional_clean(content)
# 6. 保存文件
print("\n5. 保存更新后的文件...")
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
# 7. 验证语法
print("\n6. 验证语法...")
import subprocess
result = subprocess.run(['node', '-c', 'src/mocks/resumeInterviewMock.js'],
capture_output=True, text=True)
if result.returncode == 0:
print("✅ 语法检查通过!")
# 检查是否有变化
if content != original_content:
# 再次统计包含选项的答案
remaining_options = len(re.findall(r'"answer":\s*"[^"]*[A-D][\.、]', content))
print(f"\n清理结果:")
print(f"- 原始包含选项的答案: {processed}")
print(f"- 剩余包含选项的答案: {remaining_options}")
print(f"- 成功清理: {processed - remaining_options}")
else:
print("\n未发现需要清理的选项格式")
else:
print("❌ 语法错误:")
print(result.stderr)
print("\n正在恢复备份...")
with open(backup_file, 'r', encoding='utf-8') as f:
content = f.read()
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
f.write(content)
print("已恢复到备份版本")
print("\n✅ 清理完成!")