Files
Agent-n8n/scripts/safe_analyze_images.py

247 lines
8.4 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
安全的图片分析脚本 - 只分析不修改
先运行这个脚本查看会做哪些修改确认无误后再执行实际修改
"""
import os
import re
from pathlib import Path
from typing import Dict, List, Tuple
import json
def analyze_order_images():
"""分析所有订单班的图片情况"""
base_path = Path("/Users/xiaoqi/Documents/Dev/Project/2025-09-08_n8nDEMO演示/data/订单班文档资料")
# 订单班列表
order_classes = [
"文旅", "财经商贸", "食品", "智能开发", "智能制造",
"视觉设计", "交通物流", "土木", "大健康", "能源",
"化工", "环保"
]
print("=" * 80)
print("图片资源分析报告")
print("=" * 80)
print(f"基础路径: {base_path}")
print("")
total_images = 0
total_to_rename = 0
total_to_convert = 0
reports = []
for order_class in order_classes:
order_dir = base_path / order_class
if not order_dir.exists():
print(f"⚠️ {order_class}: 目录不存在")
continue
notion_dir = order_dir / "notion文稿"
if not notion_dir.exists():
print(f"⚠️ {order_class}: notion文稿目录不存在")
continue
# 分析这个订单班
report = analyze_single_order(order_class, notion_dir)
reports.append(report)
total_images += report['total_images']
total_to_rename += report['to_rename']
total_to_convert += report['to_convert']
# 打印单个订单班报告
print_order_report(report)
# 打印总结
print("\n" + "=" * 80)
print("📊 总体统计")
print("=" * 80)
print(f"订单班总数: {len(order_classes)}")
print(f"有效订单班: {len(reports)}")
print(f"图片总数: {total_images}")
print(f"需要重命名: {total_to_rename}")
print(f"需要转格式: {total_to_convert}")
# 保存详细报告
report_file = base_path.parent.parent / "scripts" / "image_analysis_report.json"
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(reports, f, ensure_ascii=False, indent=2, default=str)
print(f"\n📝 详细报告已保存到: {report_file}")
return reports
def analyze_single_order(order_class: str, notion_dir: Path) -> Dict:
"""分析单个订单班的图片"""
image_dir = notion_dir / "image"
# 收集所有图片
all_images = []
# 在notion文稿根目录查找散落的图片
root_images = []
for ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']:
root_images.extend(notion_dir.glob(f"*.{ext}"))
root_images.extend(notion_dir.glob(f"*.{ext.upper()}"))
# 在image子目录查找图片
sub_images = []
if image_dir.exists():
for ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp']:
sub_images.extend(image_dir.glob(f"*.{ext}"))
sub_images.extend(image_dir.glob(f"*.{ext.upper()}"))
all_images = list(set(root_images + sub_images))
# 分析需要的修改
rename_list = []
convert_list = []
move_list = []
# 分类计数器
counters = {
"设计图": 0,
"展示图": 0,
"效果图": 0,
"流程图": 0,
"场景图": 0,
"图片": 0
}
for img_path in sorted(all_images):
old_name = img_path.name
new_name = generate_new_name(old_name, order_class, counters.copy())
# 检查是否需要移动到image目录
if img_path.parent != image_dir:
move_list.append({
'from': str(img_path.relative_to(notion_dir)),
'to': f"image/{new_name}"
})
# 检查是否需要重命名
if old_name != new_name:
rename_list.append({
'old': old_name,
'new': new_name
})
# 检查是否需要转换格式
if not old_name.lower().endswith('.jpg'):
convert_list.append({
'file': old_name,
'from_format': Path(old_name).suffix,
'to_format': '.jpg'
})
# 分析MD文件
md_files = list(notion_dir.glob("*.md"))
md_references = []
for md_file in md_files:
try:
content = md_file.read_text(encoding='utf-8')
# 查找所有图片引用
patterns = [
r'!\[([^\]]*)\]\(([^)]+)\)', # Markdown格式
r'<img[^>]+src=["\']([^"\']+)["\']', # HTML格式
]
for pattern in patterns:
matches = re.findall(pattern, content)
for match in matches:
img_ref = match[-1] if isinstance(match, tuple) else match
md_references.append({
'md_file': md_file.name,
'reference': img_ref
})
except:
pass
return {
'order_class': order_class,
'total_images': len(all_images),
'root_images': len(root_images),
'sub_images': len(sub_images),
'to_rename': len(rename_list),
'to_convert': len(convert_list),
'to_move': len(move_list),
'md_files': len(md_files),
'md_references': len(md_references),
'rename_list': rename_list[:5], # 只显示前5个
'convert_list': convert_list[:5], # 只显示前5个
'move_list': move_list[:5], # 只显示前5个
}
def generate_new_name(filename: str, order_class: str, counters: Dict[str, int]) -> str:
"""生成新的标准化文件名"""
name = Path(filename).stem.lower()
# 特殊处理文旅订单班的Whisk图片
if order_class == "文旅" and 'whisk' in name:
counters["设计图"] += 1
return f"设计图_{counters['设计图']:02d}.jpg"
# 根据文件名特征分类
if any(kw in name for kw in ['设计', 'design', 'whisk', 'cad', '3d', '三维', '渲染']):
counters["设计图"] += 1
return f"设计图_{counters['设计图']:02d}.jpg"
elif any(kw in name for kw in ['展示', 'display', 'show', '展台', '展位']):
counters["展示图"] += 1
return f"展示图_{counters['展示图']:02d}.jpg"
elif any(kw in name for kw in ['效果', 'effect', 'render']):
counters["效果图"] += 1
return f"效果图_{counters['效果图']:02d}.jpg"
elif any(kw in name for kw in ['流程', 'flow', 'process', '步骤']):
counters["流程图"] += 1
return f"流程图_{counters['流程图']:02d}.jpg"
elif any(kw in name for kw in ['场景', 'scene', '展会', '博览', '会场', '签到', '试驾']):
counters["场景图"] += 1
return f"场景图_{counters['场景图']:02d}.jpg"
elif name[0:1].isdigit(): # 数字开头
counters["展示图"] += 1
return f"展示图_{counters['展示图']:02d}.jpg"
else:
counters["图片"] += 1
return f"图片_{counters['图片']:02d}.jpg"
def print_order_report(report: Dict):
"""打印单个订单班的分析报告"""
print(f"\n{'='*50}")
print(f"📁 {report['order_class']}订单班")
print(f"{'='*50}")
print(f" 图片总数: {report['total_images']}")
print(f" ├── 根目录: {report['root_images']}")
print(f" └── image目录: {report['sub_images']}")
print(f" 需要移动: {report['to_move']}")
print(f" 需要重命名: {report['to_rename']}")
print(f" 需要转格式: {report['to_convert']}")
print(f" MD文件: {report['md_files']}")
print(f" 图片引用: {report['md_references']}")
if report['rename_list']:
print(f"\n 📝 重命名示例 (前{len(report['rename_list'])}个):")
for item in report['rename_list']:
print(f" {item['old']}{item['new']}")
if report['move_list']:
print(f"\n 📦 需要移动到image目录:")
for item in report['move_list']:
print(f" {item['from']}{item['to']}")
if __name__ == "__main__":
print("🔍 开始分析图片资源...")
print("⚠️ 本脚本只分析,不会修改任何文件")
print("")
analyze_order_images()
print("\n" + "="*80)
print("✅ 分析完成!")
print("")
print("下一步:")
print("1. 查看生成的 image_analysis_report.json 详细报告")
print("2. 确认修改计划无误")
print("3. 运行实际修改脚本(带备份功能)")