Agent-n8n/scripts/safe_analyze_images.py

#!/usr/bin/env python3
"""
安全的图片分析脚本 - 只分析不修改
先运行这个脚本查看会做哪些修改，确认无误后再执行实际修改
"""

import os
import re
from pathlib import Path
from typing import Dict, List, Tuple
import json

def analyze_order_images():
    """分析所有订单班的图片情况"""
    base_path = Path("/Users/xiaoqi/Documents/Dev/Project/2025-09-08_n8nDEMO演示/data/订单班文档资料")

    # 订单班列表
    order_classes = [
        "文旅", "财经商贸", "食品", "智能开发", "智能制造",
        "视觉设计", "交通物流", "土木", "大健康", "能源",
        "化工", "环保"
    ]

    print("=" * 80)
    print("图片资源分析报告")
    print("=" * 80)
    print(f"基础路径: {base_path}")
    print("")

    total_images = 0
    total_to_rename = 0
    total_to_convert = 0
    reports = []

    for order_class in order_classes:
        order_dir = base_path / order_class

        if not order_dir.exists():
            print(f"⚠️  {order_class}: 目录不存在")
            continue

        notion_dir = order_dir / "notion文稿"
        if not notion_dir.exists():
            print(f"⚠️  {order_class}: notion文稿目录不存在")
            continue

        # 分析这个订单班
        report = analyze_single_order(order_class, notion_dir)
        reports.append(report)

        total_images += report['total_images']
        total_to_rename += report['to_rename']
        total_to_convert += report['to_convert']

        # 打印单个订单班报告
        print_order_report(report)

    # 打印总结
    print("\n" + "=" * 80)
    print("📊 总体统计")
    print("=" * 80)
    print(f"订单班总数: {len(order_classes)}")
    print(f"有效订单班: {len(reports)}")
    print(f"图片总数: {total_images}")
    print(f"需要重命名: {total_to_rename}")
    print(f"需要转格式: {total_to_convert}")

    # 保存详细报告
    report_file = base_path.parent.parent / "scripts" / "image_analysis_report.json"
    with open(report_file, 'w', encoding='utf-8') as f:
        json.dump(reports, f, ensure_ascii=False, indent=2, default=str)
    print(f"\n📝 详细报告已保存到: {report_file}")

    return reports

def analyze_single_order(order_class: str, notion_dir: Path) -> Dict:
    """分析单个订单班的图片"""
    image_dir = notion_dir / "image"

    # 收集所有图片
    all_images = []

    # 在notion文稿根目录查找散落的图片
    root_images = []
    for ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']:
        root_images.extend(notion_dir.glob(f"*.{ext}"))
        root_images.extend(notion_dir.glob(f"*.{ext.upper()}"))

    # 在image子目录查找图片
    sub_images = []
    if image_dir.exists():
        for ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']:
            sub_images.extend(image_dir.glob(f"*.{ext}"))
            sub_images.extend(image_dir.glob(f"*.{ext.upper()}"))

    all_images = list(set(root_images + sub_images))

    # 分析需要的修改
    rename_list = []
    convert_list = []
    move_list = []

    # 分类计数器
    counters = {
        "设计图": 0,
        "展示图": 0,
        "效果图": 0,
        "流程图": 0,
        "场景图": 0,
        "图片": 0
    }

    for img_path in sorted(all_images):
        old_name = img_path.name
        new_name = generate_new_name(old_name, order_class, counters.copy())

        # 检查是否需要移动到image目录
        if img_path.parent != image_dir:
            move_list.append({
                'from': str(img_path.relative_to(notion_dir)),
                'to': f"image/{new_name}"
            })

        # 检查是否需要重命名
        if old_name != new_name:
            rename_list.append({
                'old': old_name,
                'new': new_name
            })

        # 检查是否需要转换格式
        if not old_name.lower().endswith('.jpg'):
            convert_list.append({
                'file': old_name,
                'from_format': Path(old_name).suffix,
                'to_format': '.jpg'
            })

    # 分析MD文件
    md_files = list(notion_dir.glob("*.md"))
    md_references = []

    for md_file in md_files:
        try:
            content = md_file.read_text(encoding='utf-8')
            # 查找所有图片引用
            patterns = [
                r'!\[([^\]]*)\]\(([^)]+)\)',  # Markdown格式
                r'<img[^>]+src=["\']([^"\']+)["\']',  # HTML格式
            ]

            for pattern in patterns:
                matches = re.findall(pattern, content)
                for match in matches:
                    img_ref = match[-1] if isinstance(match, tuple) else match
                    md_references.append({
                        'md_file': md_file.name,
                        'reference': img_ref
                    })
        except:
            pass

    return {
        'order_class': order_class,
        'total_images': len(all_images),
        'root_images': len(root_images),
        'sub_images': len(sub_images),
        'to_rename': len(rename_list),
        'to_convert': len(convert_list),
        'to_move': len(move_list),
        'md_files': len(md_files),
        'md_references': len(md_references),
        'rename_list': rename_list[:5],  # 只显示前5个
        'convert_list': convert_list[:5],  # 只显示前5个
        'move_list': move_list[:5],  # 只显示前5个
    }

def generate_new_name(filename: str, order_class: str, counters: Dict[str, int]) -> str:
    """生成新的标准化文件名"""
    name = Path(filename).stem.lower()

    # 特殊处理文旅订单班的Whisk图片
    if order_class == "文旅" and 'whisk' in name:
        counters["设计图"] += 1
        return f"设计图_{counters['设计图']:02d}.jpg"

    # 根据文件名特征分类
    if any(kw in name for kw in ['设计', 'design', 'whisk', 'cad', '3d', '三维', '渲染']):
        counters["设计图"] += 1
        return f"设计图_{counters['设计图']:02d}.jpg"
    elif any(kw in name for kw in ['展示', 'display', 'show', '展台', '展位']):
        counters["展示图"] += 1
        return f"展示图_{counters['展示图']:02d}.jpg"
    elif any(kw in name for kw in ['效果', 'effect', 'render']):
        counters["效果图"] += 1
        return f"效果图_{counters['效果图']:02d}.jpg"
    elif any(kw in name for kw in ['流程', 'flow', 'process', '步骤']):
        counters["流程图"] += 1
        return f"流程图_{counters['流程图']:02d}.jpg"
    elif any(kw in name for kw in ['场景', 'scene', '展会', '博览', '会场', '签到', '试驾']):
        counters["场景图"] += 1
        return f"场景图_{counters['场景图']:02d}.jpg"
    elif name[0:1].isdigit():  # 数字开头
        counters["展示图"] += 1
        return f"展示图_{counters['展示图']:02d}.jpg"
    else:
        counters["图片"] += 1
        return f"图片_{counters['图片']:02d}.jpg"

def print_order_report(report: Dict):
    """打印单个订单班的分析报告"""
    print(f"\n{'='*50}")
    print(f"📁 {report['order_class']}订单班")
    print(f"{'='*50}")
    print(f"  图片总数: {report['total_images']}")
    print(f"  ├── 根目录: {report['root_images']} 张")
    print(f"  └── image目录: {report['sub_images']} 张")
    print(f"  需要移动: {report['to_move']} 张")
    print(f"  需要重命名: {report['to_rename']} 张")
    print(f"  需要转格式: {report['to_convert']} 张")
    print(f"  MD文件: {report['md_files']} 个")
    print(f"  图片引用: {report['md_references']} 处")

    if report['rename_list']:
        print(f"\n  📝 重命名示例 (前{len(report['rename_list'])}个):")
        for item in report['rename_list']:
            print(f"    {item['old']} → {item['new']}")

    if report['move_list']:
        print(f"\n  📦 需要移动到image目录:")
        for item in report['move_list']:
            print(f"    {item['from']} → {item['to']}")

if __name__ == "__main__":
    print("🔍 开始分析图片资源...")
    print("⚠️  本脚本只分析，不会修改任何文件")
    print("")

    analyze_order_images()

    print("\n" + "="*80)
    print("✅ 分析完成！")
    print("")
    print("下一步：")
    print("1. 查看生成的 image_analysis_report.json 详细报告")
    print("2. 确认修改计划无误")
    print("3. 运行实际修改脚本（带备份功能）")