141 lines
5.0 KiB
Python
141 lines
5.0 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
from datetime import datetime
|
|||
|
|
import shutil
|
|||
|
|
|
|||
|
|
def clean_newlines_in_text(text):
|
|||
|
|
"""清理文本中的换行符问题"""
|
|||
|
|
if not isinstance(text, str):
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
# 将 \\n 转换为实际的换行符
|
|||
|
|
text = text.replace('\\n', '\n')
|
|||
|
|
|
|||
|
|
# 清理多余的换行符
|
|||
|
|
text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # 将三个或更多连续换行符合并为两个
|
|||
|
|
text = re.sub(r'^\n+', '', text) # 删除开头的换行符
|
|||
|
|
text = re.sub(r'\n+$', '', text) # 删除结尾的换行符
|
|||
|
|
|
|||
|
|
return text.strip()
|
|||
|
|
|
|||
|
|
def process_interview_data():
|
|||
|
|
"""处理面试题数据中的换行符"""
|
|||
|
|
print("🚀 开始处理面试题中的换行符...")
|
|||
|
|
|
|||
|
|
# 创建备份
|
|||
|
|
backup_name = f"src/mocks/resumeInterviewMock.js.backup_newlines_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|||
|
|
shutil.copy('src/mocks/resumeInterviewMock.js', backup_name)
|
|||
|
|
print(f"📦 已创建备份: {backup_name}")
|
|||
|
|
|
|||
|
|
# 读取文件
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 提取industries数据
|
|||
|
|
industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL)
|
|||
|
|
if not industries_match:
|
|||
|
|
print("❌ 未找到industries数据")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
industries_str = industries_match.group(1)
|
|||
|
|
industries = json.loads(industries_str)
|
|||
|
|
|
|||
|
|
# 处理面试题数据
|
|||
|
|
total_questions = 0
|
|||
|
|
processed_questions = 0
|
|||
|
|
|
|||
|
|
for industry in industries:
|
|||
|
|
for question_group in industry.get('questions', []):
|
|||
|
|
for sub_question in question_group.get('subQuestions', []):
|
|||
|
|
total_questions += 1
|
|||
|
|
|
|||
|
|
# 清理问题文本
|
|||
|
|
if 'question' in sub_question:
|
|||
|
|
original_question = sub_question['question']
|
|||
|
|
cleaned_question = clean_newlines_in_text(original_question)
|
|||
|
|
if original_question != cleaned_question:
|
|||
|
|
sub_question['question'] = cleaned_question
|
|||
|
|
processed_questions += 1
|
|||
|
|
|
|||
|
|
# 清理答案文本
|
|||
|
|
if 'answer' in sub_question:
|
|||
|
|
original_answer = sub_question['answer']
|
|||
|
|
cleaned_answer = clean_newlines_in_text(original_answer)
|
|||
|
|
if original_answer != cleaned_answer:
|
|||
|
|
sub_question['answer'] = cleaned_answer
|
|||
|
|
processed_questions += 1
|
|||
|
|
|
|||
|
|
print(f"📊 处理统计:")
|
|||
|
|
print(f" - 总面试题数: {total_questions}")
|
|||
|
|
print(f" - 处理的字段数: {processed_questions}")
|
|||
|
|
|
|||
|
|
# 重新生成文件内容
|
|||
|
|
new_industries_str = json.dumps(industries, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
# 替换原有的industries数据
|
|||
|
|
new_content = re.sub(
|
|||
|
|
r'const industries = \[.*?\];',
|
|||
|
|
f'const industries = {new_industries_str};',
|
|||
|
|
content,
|
|||
|
|
flags=re.DOTALL
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 写入文件
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'w', encoding='utf-8') as f:
|
|||
|
|
f.write(new_content)
|
|||
|
|
|
|||
|
|
print("✅ 已更新 resumeInterviewMock.js")
|
|||
|
|
return industries
|
|||
|
|
|
|||
|
|
def verify_cleaned_data():
|
|||
|
|
"""验证清理后的数据"""
|
|||
|
|
print("\\n🔍 验证清理结果...")
|
|||
|
|
|
|||
|
|
with open('src/mocks/resumeInterviewMock.js', 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 检查是否还有 \\n
|
|||
|
|
escaped_newlines = content.count('\\\\n')
|
|||
|
|
print(f" - 剩余的 \\\\n 数量: {escaped_newlines}")
|
|||
|
|
|
|||
|
|
# 提取一个示例查看
|
|||
|
|
industries_match = re.search(r'const industries = (\[.*?\]);', content, re.DOTALL)
|
|||
|
|
if industries_match:
|
|||
|
|
industries = json.loads(industries_match.group(1))
|
|||
|
|
|
|||
|
|
# 查看第一个面试题的答案
|
|||
|
|
if industries and industries[0]['questions'] and industries[0]['questions'][0]['subQuestions']:
|
|||
|
|
first_answer = industries[0]['questions'][0]['subQuestions'][0]['answer']
|
|||
|
|
print(f"\\n📝 清理后的答案示例:")
|
|||
|
|
print(f" 长度: {len(first_answer)} 字符")
|
|||
|
|
print(f" 前100字符: {first_answer[:100]}...")
|
|||
|
|
|
|||
|
|
# 检查换行符
|
|||
|
|
real_newlines = first_answer.count('\\n')
|
|||
|
|
print(f" 实际换行符数量: {real_newlines}")
|
|||
|
|
|
|||
|
|
if escaped_newlines == 0:
|
|||
|
|
print("\\n✅ 换行符清理完成!")
|
|||
|
|
else:
|
|||
|
|
print(f"\\n⚠️ 仍有 {escaped_newlines} 个转义换行符需要处理")
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
try:
|
|||
|
|
industries = process_interview_data()
|
|||
|
|
verify_cleaned_data()
|
|||
|
|
|
|||
|
|
print("\\n🎉 面试题换行符处理完成!")
|
|||
|
|
print("\\n📝 主要改进:")
|
|||
|
|
print(" - 清理了双重转义的换行符 (\\\\n → \\n)")
|
|||
|
|
print(" - 合并了多余的连续换行符")
|
|||
|
|
print(" - 删除了开头和结尾的多余换行符")
|
|||
|
|
print(" - 保持了文本的可读性和格式")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌ 处理失败: {e}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|