#!/usr/bin/env python3 """ 验证所有图片链接是否有效 """ import re from pathlib import Path from urllib.parse import unquote from collections import defaultdict class ImageValidator: def __init__(self): self.data_path = Path(__file__).parent.parent / "data" / "订单班文档资料" self.broken_links = defaultdict(list) self.valid_links = 0 self.total_links = 0 def extract_image_refs(self, content: str) -> list: """提取Markdown中的图片引用""" # 匹配 ![alt](path) 格式 pattern1 = r'!\[([^\]]*)\]\(([^)]+)\)' # 匹配 标签 pattern2 = r']+src=["\']((?!http)[^"\']+)["\'][^>]*>' refs = [] for match in re.finditer(pattern1, content): refs.append(match.group(2)) for match in re.finditer(pattern2, content): refs.append(match.group(1)) return refs def normalize_path(self, path: str) -> str: """标准化路径""" path = unquote(path) # URL解码 if path.startswith('./'): path = path[2:] if path.startswith('image/'): return path return f"image/{path}" if '/' not in path else path def validate_order_class(self, order_dir: Path) -> dict: """验证单个订单班的图片链接""" notion_dir = order_dir / "notion文稿" image_dir = notion_dir / "image" stats = { "name": order_dir.name, "total": 0, "valid": 0, "broken": 0, "broken_files": [] } if not notion_dir.exists(): return stats # 收集所有实际图片 actual_images = set() if image_dir.exists(): for img_file in image_dir.glob("*"): if img_file.is_file() and img_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: actual_images.add(img_file.name) # 检查所有Markdown文件 for md_file in notion_dir.glob("*.md"): if md_file.name.endswith('.bak') or md_file.name == "图片索引.md": continue try: content = md_file.read_text(encoding='utf-8') refs = self.extract_image_refs(content) for ref in refs: stats["total"] += 1 self.total_links += 1 # 规范化路径 normalized = self.normalize_path(ref) if normalized.startswith('image/'): img_name = normalized.split('/')[-1] else: img_name = normalized # 检查文件是否存在 if img_name in actual_images: stats["valid"] += 1 self.valid_links += 1 else: stats["broken"] += 1 stats["broken_files"].append({ "file": md_file.name, "ref": ref, "expected": img_name }) self.broken_links[order_dir.name].append({ "file": md_file.name, "ref": ref, "expected": img_name }) except Exception as e: print(f" ⚠️ 无法读取 {md_file.name}: {e}") return stats def run(self): """验证所有订单班""" print("\n" + "="*60) print("🔍 图片链接验证工具") print("="*60) # 获取所有订单班 order_classes = sorted([d for d in self.data_path.iterdir() if d.is_dir()]) all_stats = [] for order_dir in order_classes: stats = self.validate_order_class(order_dir) all_stats.append(stats) # 显示结果 if stats["total"] > 0: if stats["broken"] == 0: print(f"✅ {stats['name']}: 所有 {stats['total']} 个链接有效") else: print(f"❌ {stats['name']}: {stats['broken']}/{stats['total']} 个链接损坏") for broken in stats["broken_files"][:3]: print(f" - {broken['file']}: {broken['ref']}") if len(stats["broken_files"]) > 3: print(f" ... 还有 {len(stats['broken_files']) - 3} 个") else: print(f"ℹ️ {stats['name']}: 无图片引用") # 显示总结 print("\n" + "="*60) print("📊 总结") print("="*60) print(f" 检查订单班: {len(order_classes)} 个") print(f" 总链接数: {self.total_links}") print(f" 有效链接: {self.valid_links}") print(f" 损坏链接: {self.total_links - self.valid_links}") if self.valid_links == self.total_links: print(f"\n🎉 所有图片链接都有效!") else: broken_percentage = ((self.total_links - self.valid_links) / self.total_links) * 100 print(f"\n⚠️ {broken_percentage:.1f}% 的链接需要修复") # 显示需要修复的订单班 print("\n需要修复的订单班:") for order_name, broken_refs in self.broken_links.items(): print(f" • {order_name}: {len(broken_refs)} 个损坏链接") # 保存验证报告 report_file = Path(__file__).parent.parent / "image_validation_report.txt" with open(report_file, 'w', encoding='utf-8') as f: f.write("图片链接验证报告\n") f.write("="*60 + "\n\n") for stats in all_stats: f.write(f"{stats['name']}:\n") f.write(f" 总链接: {stats['total']}\n") f.write(f" 有效: {stats['valid']}\n") f.write(f" 损坏: {stats['broken']}\n") if stats['broken_files']: f.write(" 损坏详情:\n") for broken in stats['broken_files']: f.write(f" - {broken['file']}: {broken['ref']} -> {broken['expected']}\n") f.write("\n") print(f"\n📄 详细报告已保存到: {report_file}") if __name__ == "__main__": validator = ImageValidator() validator.run()