Files
Agent-n8n/scripts/organize_doc_images.py

308 lines
10 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
整理 doc/订单班文档资料 目录下的图片
统一命名格式并更新Markdown引用
"""
import os
import re
import shutil
import json
from pathlib import Path
from typing import Dict, List
def organize_order_class_images(base_path: str):
"""整理指定路径下的所有订单班图片"""
base_dir = Path(base_path)
# 订单班列表
order_classes = [
"文旅", "财经商贸", "食品", "智能开发", "智能制造",
"视觉设计", "交通物流", "土木", "大健康", "能源",
"化工", "环保"
]
total_processed = 0
for order_class in order_classes:
order_dir = base_dir / order_class
if not order_dir.exists():
print(f"⚠ 目录不存在: {order_dir}")
continue
# 查找notion文稿目录
notion_dir = order_dir / "notion文稿"
if not notion_dir.exists():
print(f"⚠ notion文稿目录不存在: {notion_dir}")
continue
# 创建或确认image目录
image_dir = notion_dir / "image"
if not image_dir.exists():
image_dir.mkdir(parents=True, exist_ok=True)
print(f"✓ 创建图片目录: {image_dir}")
# 处理该订单班的图片
processed = process_order_images(order_class, notion_dir, image_dir)
total_processed += processed
print(f"\n✅ 总计处理 {total_processed} 张图片")
def process_order_images(order_class: str, notion_dir: Path, image_dir: Path) -> int:
"""处理单个订单班的图片"""
print(f"\n{'='*50}")
print(f"处理订单班: {order_class}")
print(f"{'='*50}")
# 收集所有图片文件包括根目录和image子目录
all_images = []
# 在notion文稿根目录查找图片
root_images = list(notion_dir.glob("*.jpg")) + \
list(notion_dir.glob("*.jpeg")) + \
list(notion_dir.glob("*.png"))
# 在image子目录查找图片
if image_dir.exists():
sub_images = list(image_dir.glob("*.jpg")) + \
list(image_dir.glob("*.jpeg")) + \
list(image_dir.glob("*.png"))
all_images.extend(sub_images)
# 移动根目录的图片到image目录
for img in root_images:
target = image_dir / img.name
if img != target: # 避免移动到自己
shutil.move(str(img), str(target))
print(f" ➜ 移动到image目录: {img.name}")
all_images.append(target)
if not all_images:
print(f" 没有找到图片文件")
return 0
# 重命名并整理图片
image_mapping = {}
renamed_count = 0
# 分类计数器
counters = {
"设计图": 0,
"展示图": 0,
"效果图": 0,
"流程图": 0,
"场景图": 0,
"其他": 0
}
for img_path in sorted(all_images):
old_name = img_path.name
new_name = generate_new_name(old_name, order_class, counters)
if old_name != new_name:
new_path = img_path.parent / new_name
if not new_path.exists():
shutil.move(str(img_path), str(new_path))
image_mapping[old_name] = new_name
renamed_count += 1
print(f" ✓ 重命名: {old_name} -> {new_name}")
# 更新Markdown文件中的引用
if image_mapping:
update_markdown_references(notion_dir, image_mapping)
# 保存映射文件
if image_mapping:
save_mapping(notion_dir, image_mapping)
# 创建图片索引
create_image_index(order_class, image_dir)
print(f" 完成: 重命名 {renamed_count} 张图片")
return len(all_images)
def generate_new_name(filename: str, order_class: str, counters: Dict[str, int]) -> str:
"""根据文件名特征生成新名称"""
name = Path(filename).stem
ext = Path(filename).suffix.lower()
# 统一扩展名
if ext == '.jpeg':
ext = '.jpg'
# 特殊处理文旅订单班的Whisk图片
if order_class == "文旅" and name.startswith('Whisk_'):
counters["设计图"] += 1
return f"设计图_{counters['设计图']:02d}{ext}"
# 根据文件名关键词分类
if any(keyword in name.lower() for keyword in ['设计', 'design', 'whisk']):
counters["设计图"] += 1
return f"设计图_{counters['设计图']:02d}{ext}"
elif any(keyword in name.lower() for keyword in ['展示', 'display', 'show']):
counters["展示图"] += 1
return f"展示图_{counters['展示图']:02d}{ext}"
elif any(keyword in name.lower() for keyword in ['效果', 'effect', 'render']):
counters["效果图"] += 1
return f"效果图_{counters['效果图']:02d}{ext}"
elif any(keyword in name.lower() for keyword in ['流程', 'flow', 'process']):
counters["流程图"] += 1
return f"流程图_{counters['流程图']:02d}{ext}"
elif any(keyword in name.lower() for keyword in ['场景', 'scene', '展会', '博览']):
counters["场景图"] += 1
return f"场景图_{counters['场景图']:02d}{ext}"
elif name[0].isdigit():
# 数字开头的文件
counters["展示图"] += 1
return f"展示图_{counters['展示图']:02d}{ext}"
else:
# 保留有意义的中文名
if any(char.isdigit() or char in '._-()[]{}' for char in name):
counters["其他"] += 1
return f"图片_{counters['其他']:02d}{ext}"
else:
# 保留原有的中文名
return f"{name}{ext}"
def update_markdown_references(notion_dir: Path, image_mapping: Dict[str, str]):
"""更新Markdown文件中的图片引用"""
md_files = list(notion_dir.glob("*.md"))
for md_file in md_files:
content = md_file.read_text(encoding='utf-8')
original_content = content
for old_name, new_name in image_mapping.items():
# 匹配各种可能的图片引用格式
patterns = [
# ![alt](old_name)
f"!\\[([^\\]]*)\\]\\({re.escape(old_name)}\\)",
# ![alt](./image/old_name)
f"!\\[([^\\]]*)\\]\\(\\./image/{re.escape(old_name)}\\)",
# ![alt](image/old_name)
f"!\\[([^\\]]*)\\]\\(image/{re.escape(old_name)}\\)",
# 直接的文件名引用
f"\\b{re.escape(old_name)}\\b"
]
for i, pattern in enumerate(patterns):
if i < 3: # 前三个是Markdown图片语法
if './image/' in pattern:
replacement = f"![\\1](./image/{new_name})"
elif 'image/' in pattern:
replacement = f"![\\1](image/{new_name})"
else:
replacement = f"![\\1]({new_name})"
else: # 直接的文件名
replacement = new_name
content = re.sub(pattern, replacement, content)
# 如果内容有变化,保存文件
if content != original_content:
md_file.write_text(content, encoding='utf-8')
print(f" ✓ 更新Markdown: {md_file.name}")
def save_mapping(notion_dir: Path, image_mapping: Dict[str, str]):
"""保存图片映射文件"""
mapping_file = notion_dir / "图片映射.json"
with open(mapping_file, 'w', encoding='utf-8') as f:
json.dump(image_mapping, f, ensure_ascii=False, indent=2)
print(f" ✓ 保存映射文件: {mapping_file.name}")
def create_image_index(order_class: str, image_dir: Path):
"""创建图片索引文档"""
if not image_dir.exists():
return
images = sorted(list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.png")))
if not images:
return
# 按类别分组
categories = {
"设计图": [],
"展示图": [],
"效果图": [],
"流程图": [],
"场景图": [],
"其他": []
}
for img in images:
name = img.name
for category in categories:
if name.startswith(category):
categories[category].append(name)
break
else:
categories["其他"].append(name)
# 生成索引内容
index_content = f"# {order_class}订单班 - 图片资源索引\n\n"
index_content += f"**更新时间**: {Path(__file__).stat().st_mtime}\n"
index_content += f"**图片总数**: {len(images)}\n\n"
for category, files in categories.items():
if files:
index_content += f"## {category} ({len(files)}张)\n\n"
for file in sorted(files):
# 提取编号和描述
if '_' in file:
parts = file.split('_', 2)
if len(parts) >= 2:
num = parts[1].split('.')[0]
desc = parts[2].split('.')[0] if len(parts) > 2 else ""
index_content += f"- {file}"
if desc:
index_content += f" - {desc}"
index_content += "\n"
else:
index_content += f"- {file}\n"
else:
index_content += f"- {file}\n"
index_content += "\n"
# 添加使用说明
index_content += """## 使用说明
### Markdown引用
```markdown
![图片描述](./image/设计图_01.jpg)
```
### HTML引用
```html
<img src="./image/设计图_01.jpg" alt="设计图">
```
### 相对路径引用
- 从notion文稿目录: `./image/图片名.jpg`
- 从上级目录: `./notion文稿/image/图片名.jpg`
"""
# 保存索引文件
index_file = image_dir.parent / "图片索引.md"
index_file.write_text(index_content, encoding='utf-8')
print(f" ✓ 创建图片索引: {index_file.name}")
def main():
"""主函数"""
base_path = "/Users/xiaoqi/Documents/Dev/Project/2025-09-08_n8nDEMO演示/doc/订单班文档资料"
print("=" * 60)
print("开始整理 doc/订单班文档资料 目录下的图片")
print("=" * 60)
organize_order_class_images(base_path)
print("\n" + "=" * 60)
print("✅ 图片整理完成!")
print("=" * 60)
if __name__ == "__main__":
main()