#!/usr/bin/env python3 """ 修复图片路径URL编码问题的独立脚本 可以单独运行,也可以被主脚本调用 """ import os import re from pathlib import Path from urllib.parse import unquote from typing import List, Tuple def fix_markdown_image_paths(md_file: Path) -> int: """ 修复单个Markdown文件中的图片路径 返回修复的引用数量 """ try: content = md_file.read_text(encoding='utf-8') original_content = content # 查找所有图片引用 # 支持多种格式: ![alt](path), ![](path), ![[path]] patterns = [ r'!\[(.*?)\]\((.*?)\)', # 标准Markdown格式 r'!\[\[(.*?)\]\]', # Obsidian格式 ] fixed_count = 0 for pattern in patterns: matches = re.finditer(pattern, content) for match in matches: if len(match.groups()) == 2: # 标准Markdown格式 alt_text = match.group(1) img_path = match.group(2) else: # Obsidian格式 img_path = match.group(1) alt_text = "" # 跳过外部链接 if img_path.startswith('http://') or img_path.startswith('https://'): continue # 检查是否包含URL编码 if '%' in img_path: # 解码URL decoded_path = unquote(img_path) # 标准化路径 if decoded_path.startswith('./image/'): decoded_path = decoded_path[2:] # 移除 ./ elif not decoded_path.startswith('image/'): decoded_path = f"image/{Path(decoded_path).name}" # 构建新的引用 new_reference = f"![{alt_text or Path(decoded_path).stem}]({decoded_path})" old_reference = match.group(0) # 替换内容 content = content.replace(old_reference, new_reference) fixed_count += 1 print(f" 修复: {old_reference} → {new_reference}") # 检查路径格式是否正确 elif img_path.startswith('./image/'): # 移除不必要的 ./ new_path = img_path[2:] new_reference = f"![{alt_text or Path(new_path).stem}]({new_path})" old_reference = match.group(0) content = content.replace(old_reference, new_reference) fixed_count += 1 print(f" 修复: {old_reference} → {new_reference}") # 如果内容有变化,写回文件 if content != original_content: md_file.write_text(content, encoding='utf-8') print(f" ✅ 已修复 {fixed_count} 个图片引用") return fixed_count except Exception as e: print(f" ❌ 处理失败: {e}") return 0 def rename_encoded_images(image_dir: Path) -> int: """ 重命名包含URL编码的图片文件 返回重命名的文件数量 """ renamed_count = 0 if not image_dir.exists(): return 0 for img_file in image_dir.iterdir(): if img_file.is_file() and '%' in img_file.name: # 解码文件名 decoded_name = unquote(img_file.name) new_path = img_file.parent / decoded_name # 如果目标文件不存在,重命名 if not new_path.exists(): try: img_file.rename(new_path) print(f" 重命名: {img_file.name} → {decoded_name}") renamed_count += 1 except Exception as e: print(f" ❌ 重命名失败 {img_file.name}: {e}") return renamed_count def fix_order_class_images(order_dir: Path) -> Tuple[int, int]: """ 修复单个订单班的图片路径问题 返回 (修复的MD引用数, 重命名的图片数) """ print(f"\n处理 {order_dir.name} ...") if not order_dir.exists(): print(f" ⚠️ 目录不存在") return 0, 0 notion_dir = order_dir / "notion文稿" if not notion_dir.exists(): print(f" ⚠️ notion文稿目录不存在") return 0, 0 total_fixed_refs = 0 total_renamed_files = 0 # 1. 修复图片文件名 image_dir = notion_dir / "image" if image_dir.exists(): print(" 修复图片文件名...") renamed = rename_encoded_images(image_dir) total_renamed_files += renamed if renamed > 0: print(f" ✅ 重命名了 {renamed} 个图片文件") # 2. 修复Markdown文件中的引用 print(" 修复Markdown文件中的图片引用...") md_files = list(notion_dir.glob("*.md")) for md_file in md_files: print(f" 处理 {md_file.name}...") fixed = fix_markdown_image_paths(md_file) total_fixed_refs += fixed if total_fixed_refs == 0 and total_renamed_files == 0: print(" ✅ 没有需要修复的问题") else: print(f" 📊 总计: 修复了 {total_fixed_refs} 个引用, 重命名了 {total_renamed_files} 个文件") return total_fixed_refs, total_renamed_files def fix_all_order_classes(order_classes: List[str] = None): """ 修复指定的订单班(如果没有指定,修复所有) """ BASE_PATH = Path("/Users/xiaoqi/Documents/Dev/Project/2025-09-08_n8nDEMO演示") DATA_PATH = BASE_PATH / "data/订单班文档资料" ALL_ORDER_CLASSES = [ "食品", "环保", "财经商贸", "视觉设计", "能源", "交通物流", "智能制造", "文旅", "化工", "大健康", "智能开发", "土木" ] dirs_to_process = order_classes if order_classes else ALL_ORDER_CLASSES print("=" * 60) print("开始修复图片路径URL编码问题") print("=" * 60) total_refs = 0 total_files = 0 for dir_name in dirs_to_process: order_dir = DATA_PATH / dir_name if order_dir.exists(): refs, files = fix_order_class_images(order_dir) total_refs += refs total_files += files print("\n" + "=" * 60) print(f"✅ 修复完成!") print(f" 总计修复 {total_refs} 个Markdown引用") print(f" 总计重命名 {total_files} 个图片文件") print("=" * 60) def main(): """主函数 - 独立运行时使用""" import sys # 可以接受命令行参数指定订单班 if len(sys.argv) > 1: order_classes = sys.argv[1].split(',') fix_all_order_classes(order_classes) else: # 没有参数时处理所有订单班 fix_all_order_classes() if __name__ == "__main__": main()