Files
Agent-n8n/scripts/fix_all_markdown_paths.py

120 lines
4.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
修复所有订单班Markdown文件中的图片路径问题
- 移除URL编码的子目录
- 确保所有图片路径都正确指向./image/目录下的新文件名
"""
import os
import re
from pathlib import Path
from urllib.parse import unquote
def fix_all_markdown_paths():
"""修复所有订单班的Markdown图片路径"""
base_path = Path("/Users/xiaoqi/Documents/Dev/Project/2025-09-08_n8nDEMO演示/data/订单班文档资料")
# 订单班列表(排除已处理的食品和文旅)
order_classes = [
"财经商贸", "智能开发", "智能制造",
"视觉设计", "交通物流", "土木", "大健康", "能源",
"化工", "环保"
]
print("=" * 60)
print("修复所有订单班Markdown文件中的图片路径")
print("=" * 60)
for order_class in order_classes:
order_dir = base_path / order_class
if not order_dir.exists():
continue
notion_dir = order_dir / "notion文稿"
if not notion_dir.exists():
continue
image_dir = notion_dir / "image"
if not image_dir.exists():
continue
# 获取所有图片文件
images = list(image_dir.glob("*.jpg"))
if not images:
continue
# 查找所有MD文件
md_files = list(notion_dir.glob("*.md"))
if not md_files:
continue
print(f"\n处理 {order_class}...")
for md_file in md_files:
try:
content = md_file.read_text(encoding='utf-8')
original_content = content
# 1. 移除URL编码的子目录路径
# 匹配 ![xxx](./image/任何子目录/文件名)
content = re.sub(
r'!\[([^\]]*)\]\(./image/[^/\)]+/([^\)]+\.(jpg|jpeg|png|gif))\)',
r'![\1](./image/\2)',
content,
flags=re.IGNORECASE
)
# 2. 修复错误的图片扩展名如果原来是jpeg/png现在都是jpg
for img in images:
img_name = img.name
# 尝试匹配各种可能的原始名称
base_name = img_name.replace('.jpg', '')
# 替换各种可能的引用
patterns = [
f'{base_name}.jpeg',
f'{base_name}.png',
f'{base_name}.gif',
f'{base_name}.JPG',
f'{base_name}.JPEG',
f'{base_name}.PNG'
]
for pattern in patterns:
content = content.replace(f'](./image/{pattern})', f'](./image/{img_name})')
content = content.replace(f']({pattern})', f'](./image/{img_name})')
# 3. 确保所有图片路径都正确格式化
# 修正只有文件名的情况
for img in images:
img_name = img.name
# 匹配 ![xxx](图片名) 改为 ![xxx](./image/图片名)
content = re.sub(
f'!\\[([^\\]]*)\\]\\({img_name}\\)',
f'![\\1](./image/{img_name})',
content
)
# 保存修改
if content != original_content:
# 创建备份
backup_file = md_file.with_suffix('.md.bak2')
if not backup_file.exists():
md_file.read_text(encoding='utf-8')
with open(backup_file, 'w', encoding='utf-8') as f:
f.write(original_content)
md_file.write_text(content, encoding='utf-8')
print(f" ✓ 修复: {md_file.name}")
else:
print(f" - 无需修复: {md_file.name}")
except Exception as e:
print(f" ✗ 失败 {md_file.name}: {e}")
print("\n" + "=" * 60)
print("✅ 所有Markdown路径修复完成")
print("=" * 60)
if __name__ == "__main__":
fix_all_markdown_paths()