From 776afba5d8527e31e91707ee4aca65e1f2f6c553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Wed, 21 Jan 2026 10:16:12 -0600 Subject: [PATCH] chore: release v1.11.0 --- .claude-plugin/marketplace.json | 5 +- CHANGELOG.md | 9 + CHANGELOG.zh.md | 9 + README.md | 89 ++- README.zh.md | 89 ++- skills/baoyu-image-gen/SKILL.md | 219 +++++++ skills/baoyu-image-gen/scripts/main.ts | 576 ++++++++++++++++++ skills/baoyu-slide-deck/SKILL.md | 45 +- .../references/outline-template.md | 3 + 9 files changed, 1039 insertions(+), 5 deletions(-) create mode 100644 skills/baoyu-image-gen/SKILL.md create mode 100644 skills/baoyu-image-gen/scripts/main.ts diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b885030..c3faa4a 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,7 +6,7 @@ }, "metadata": { "description": "Skills shared by Baoyu for improving daily work efficiency", - "version": "1.10.0" + "version": "1.11.0" }, "plugins": [ { @@ -31,7 +31,8 @@ "source": "./", "strict": false, "skills": [ - "./skills/baoyu-danger-gemini-web" + "./skills/baoyu-danger-gemini-web", + "./skills/baoyu-image-gen" ] }, { diff --git a/CHANGELOG.md b/CHANGELOG.md index 272945b..927b8b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ English | [中文](./CHANGELOG.zh.md) +## 1.11.0 - 2026-01-21 + +### Features +- `baoyu-image-gen`: new AI SDK-based image generation skill using official OpenAI and Google APIs. Supports text-to-image, reference images (Google multimodal), aspect ratios, and quality presets (`normal`, `2k`). Auto-detects provider based on available API keys. +- `baoyu-slide-deck`: adds Layout Gallery with 24 layout types—10 slide-specific layouts (`title-hero`, `quote-callout`, `key-stat`, `split-screen`, `icon-grid`, `two-columns`, `three-columns`, `image-caption`, `agenda`, `bullet-list`) and 14 infographic-derived layouts (`linear-progression`, `binary-comparison`, `comparison-matrix`, `hierarchical-layers`, `hub-spoke`, `bento-grid`, `funnel`, `dashboard`, `venn-diagram`, `circular-flow`, `winding-roadmap`, `tree-branching`, `iceberg`, `bridge`). + +### Documentation +- `README.md`, `README.zh.md`: adds baoyu-image-gen documentation with usage examples, options table, and environment variables; adds Environment Configuration section for API key setup. + ## 1.10.0 - 2026-01-21 ### Features diff --git a/CHANGELOG.zh.md b/CHANGELOG.zh.md index 668adc6..088642a 100644 --- a/CHANGELOG.zh.md +++ b/CHANGELOG.zh.md @@ -2,6 +2,15 @@ [English](./CHANGELOG.md) | 中文 +## 1.11.0 - 2026-01-21 + +### 新功能 +- `baoyu-image-gen`:新增基于 AI SDK 的图像生成技能,使用官方 OpenAI 和 Google API。支持文生图、参考图(Google 多模态)、宽高比和质量预设(`normal`、`2k`)。根据可用的 API 密钥自动选择服务商。 +- `baoyu-slide-deck`:新增布局库(Layout Gallery),包含 24 种布局类型——10 种幻灯片专用布局(`title-hero` 标题主图、`quote-callout` 引用突出、`key-stat` 关键数据、`split-screen` 分屏、`icon-grid` 图标网格、`two-columns` 双栏、`three-columns` 三栏、`image-caption` 图片说明、`agenda` 议程、`bullet-list` 要点列表)和 14 种信息图衍生布局(`linear-progression` 线性流程、`binary-comparison` 二元对比、`comparison-matrix` 对比矩阵、`hierarchical-layers` 层级、`hub-spoke` 中心辐射、`bento-grid` 便当盒、`funnel` 漏斗、`dashboard` 仪表盘、`venn-diagram` 韦恩图、`circular-flow` 循环流程、`winding-roadmap` 蜿蜒路线图、`tree-branching` 树状分支、`iceberg` 冰山、`bridge` 桥接)。 + +### 文档 +- `README.md`、`README.zh.md`:新增 baoyu-image-gen 文档,包含用法示例、选项表和环境变量说明;新增环境配置章节,介绍 API 密钥设置方法。 + ## 1.10.0 - 2026-01-21 ### 新功能 diff --git a/README.md b/README.md index b22b5e8..e1c3824 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Simply tell Claude Code: | Plugin | Description | Skills | |--------|-------------|--------| | **content-skills** | Content generation and publishing | [xhs-images](#baoyu-xhs-images), [infographic](#baoyu-infographic), [cover-image](#baoyu-cover-image), [slide-deck](#baoyu-slide-deck), [comic](#baoyu-comic), [article-illustrator](#baoyu-article-illustrator), [post-to-x](#baoyu-post-to-x), [post-to-wechat](#baoyu-post-to-wechat) | -| **ai-generation-skills** | AI-powered generation backends | [danger-gemini-web](#baoyu-danger-gemini-web) | +| **ai-generation-skills** | AI-powered generation backends | [image-gen](#baoyu-image-gen), [danger-gemini-web](#baoyu-danger-gemini-web) | | **utility-skills** | Utility tools for content processing | [danger-x-to-markdown](#baoyu-danger-x-to-markdown), [compress-image](#baoyu-compress-image) | ## Update Skills @@ -515,6 +515,55 @@ Prerequisites: Google Chrome installed. First run requires QR code login (sessio AI-powered generation backends. +#### baoyu-image-gen + +AI SDK-based image generation using official OpenAI and Google APIs. Supports text-to-image, reference images, aspect ratios, and quality presets. + +```bash +# Basic generation (auto-detect provider) +/baoyu-image-gen --prompt "A cute cat" --image cat.png + +# With aspect ratio +/baoyu-image-gen --prompt "A landscape" --image landscape.png --ar 16:9 + +# High quality (2k) +/baoyu-image-gen --prompt "A banner" --image banner.png --quality 2k + +# Specific provider +/baoyu-image-gen --prompt "A cat" --image cat.png --provider openai + +# With reference images (Google multimodal only) +/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png +``` + +**Options**: +| Option | Description | +|--------|-------------| +| `--prompt`, `-p` | Prompt text | +| `--promptfiles` | Read prompt from files (concatenated) | +| `--image` | Output image path (required) | +| `--provider` | `google` or `openai` (default: google) | +| `--model`, `-m` | Model ID | +| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | +| `--size` | Size (e.g., `1024x1024`) | +| `--quality` | `normal` or `2k` (default: normal) | +| `--ref` | Reference images (Google multimodal only) | + +**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup): +| Variable | Description | Default | +|----------|-------------|---------| +| `OPENAI_API_KEY` | OpenAI API key | - | +| `GOOGLE_API_KEY` | Google API key | - | +| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` | +| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | +| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | +| `GOOGLE_BASE_URL` | Custom Google endpoint | - | + +**Provider Auto-Selection**: +1. If `--provider` specified → use it +2. If only one API key available → use that provider +3. If both available → default to Google + #### baoyu-danger-gemini-web Interacts with Gemini Web to generate text and images. @@ -568,6 +617,44 @@ Compress images to reduce file size while maintaining quality. /baoyu-compress-image path/to/images/ --quality 80 ``` +## Environment Configuration + +Some skills require API keys or custom configuration. Environment variables can be set in `.env` files: + +**Load Priority** (higher priority overrides lower): +1. CLI environment variables (e.g., `OPENAI_API_KEY=xxx /baoyu-image-gen ...`) +2. `process.env` (system environment) +3. `/.baoyu-skills/.env` (project-level) +4. `~/.baoyu-skills/.env` (user-level) + +**Setup**: + +```bash +# Create user-level config directory +mkdir -p ~/.baoyu-skills + +# Create .env file +cat > ~/.baoyu-skills/.env << 'EOF' +# OpenAI +OPENAI_API_KEY=sk-xxx +OPENAI_IMAGE_MODEL=gpt-image-1.5 +# OPENAI_BASE_URL=https://api.openai.com/v1 + +# Google +GOOGLE_API_KEY=xxx +GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview +# GOOGLE_BASE_URL=https://generativelanguage.googleapis.com/v1beta +EOF +``` + +**Project-level config** (for team sharing): + +```bash +mkdir -p .baoyu-skills +# Add .baoyu-skills/.env to .gitignore to avoid committing secrets +echo ".baoyu-skills/.env" >> .gitignore +``` + ## Customization All skills support customization via `EXTEND.md` files. Create an extension file to override default styles, add custom configurations, or define your own presets. diff --git a/README.zh.md b/README.zh.md index 80e1ab2..2146f94 100644 --- a/README.zh.md +++ b/README.zh.md @@ -54,7 +54,7 @@ npx skills add jimliu/baoyu-skills | 插件 | 说明 | 包含技能 | |------|------|----------| | **content-skills** | 内容生成和发布 | [xhs-images](#baoyu-xhs-images), [infographic](#baoyu-infographic), [cover-image](#baoyu-cover-image), [slide-deck](#baoyu-slide-deck), [comic](#baoyu-comic), [article-illustrator](#baoyu-article-illustrator), [post-to-x](#baoyu-post-to-x), [post-to-wechat](#baoyu-post-to-wechat) | -| **ai-generation-skills** | AI 生成后端 | [danger-gemini-web](#baoyu-danger-gemini-web) | +| **ai-generation-skills** | AI 生成后端 | [image-gen](#baoyu-image-gen), [danger-gemini-web](#baoyu-danger-gemini-web) | | **utility-skills** | 内容处理工具 | [danger-x-to-markdown](#baoyu-danger-x-to-markdown), [compress-image](#baoyu-compress-image) | ## 更新技能 @@ -515,6 +515,55 @@ npx skills add jimliu/baoyu-skills AI 驱动的生成后端。 +#### baoyu-image-gen + +基于 AI SDK 的图像生成,使用官方 OpenAI 和 Google API。支持文生图、参考图、宽高比和质量预设。 + +```bash +# 基础生成(自动检测服务商) +/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png + +# 指定宽高比 +/baoyu-image-gen --prompt "风景图" --image landscape.png --ar 16:9 + +# 高质量(2k 分辨率) +/baoyu-image-gen --prompt "横幅图" --image banner.png --quality 2k + +# 指定服务商 +/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openai + +# 带参考图(仅 Google 多模态支持) +/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png +``` + +**选项**: +| 选项 | 说明 | +|------|------| +| `--prompt`, `-p` | 提示词文本 | +| `--promptfiles` | 从文件读取提示词(多文件拼接) | +| `--image` | 输出图片路径(必需) | +| `--provider` | `google` 或 `openai`(默认:google) | +| `--model`, `-m` | 模型 ID | +| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) | +| `--size` | 尺寸(如 `1024x1024`) | +| `--quality` | `normal` 或 `2k`(默认:normal) | +| `--ref` | 参考图片(仅 Google 多模态支持) | + +**环境变量**(配置方法见[环境配置](#环境配置)): +| 变量 | 说明 | 默认值 | +|------|------|--------| +| `OPENAI_API_KEY` | OpenAI API 密钥 | - | +| `GOOGLE_API_KEY` | Google API 密钥 | - | +| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` | +| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` | +| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | +| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - | + +**服务商自动选择**: +1. 如果指定了 `--provider` → 使用指定的 +2. 如果只有一个 API 密钥 → 使用对应服务商 +3. 如果两个都有 → 默认使用 Google + #### baoyu-danger-gemini-web 与 Gemini Web 交互,生成文本和图片。 @@ -568,6 +617,44 @@ AI 驱动的生成后端。 /baoyu-compress-image path/to/images/ --quality 80 ``` +## 环境配置 + +部分技能需要 API 密钥或自定义配置。环境变量可以在 `.env` 文件中设置: + +**加载优先级**(高优先级覆盖低优先级): +1. 命令行环境变量(如 `OPENAI_API_KEY=xxx /baoyu-image-gen ...`) +2. `process.env`(系统环境变量) +3. `/.baoyu-skills/.env`(项目级) +4. `~/.baoyu-skills/.env`(用户级) + +**配置方法**: + +```bash +# 创建用户级配置目录 +mkdir -p ~/.baoyu-skills + +# 创建 .env 文件 +cat > ~/.baoyu-skills/.env << 'EOF' +# OpenAI +OPENAI_API_KEY=sk-xxx +OPENAI_IMAGE_MODEL=gpt-image-1.5 +# OPENAI_BASE_URL=https://api.openai.com/v1 + +# Google +GOOGLE_API_KEY=xxx +GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview +# GOOGLE_BASE_URL=https://generativelanguage.googleapis.com/v1beta +EOF +``` + +**项目级配置**(团队共享): + +```bash +mkdir -p .baoyu-skills +# 将 .baoyu-skills/.env 添加到 .gitignore 避免提交密钥 +echo ".baoyu-skills/.env" >> .gitignore +``` + ## 自定义扩展 所有技能支持通过 `EXTEND.md` 文件自定义。创建扩展文件可覆盖默认样式、添加自定义配置或定义个人预设。 diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md new file mode 100644 index 0000000..843039b --- /dev/null +++ b/skills/baoyu-image-gen/SKILL.md @@ -0,0 +1,219 @@ +--- +name: baoyu-image-gen +description: AI SDK-based image generation using official OpenAI and Google APIs. Supports text-to-image, reference images, aspect ratios, and quality presets. +--- + +# Image Generation (AI SDK) + +Official API-based image generation via AI SDK. Supports OpenAI (DALL-E, GPT Image) and Google (Imagen, Gemini multimodal). + +## Script Directory + +**Important**: All scripts are located in the `scripts/` subdirectory of this skill. + +**Agent Execution Instructions**: +1. Determine this SKILL.md file's directory path as `SKILL_DIR` +2. Script path = `${SKILL_DIR}/scripts/.ts` +3. Replace all `${SKILL_DIR}` in this document with the actual path + +**Script Reference**: +| Script | Purpose | +|--------|---------| +| `scripts/main.ts` | CLI entry point for image generation | + +## Quick Start + +```bash +# Basic generation (auto-detect provider) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png + +# With aspect ratio +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A landscape" --image landscape.png --ar 16:9 + +# High quality (2k) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png --quality 2k + +# Specific provider +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png --provider openai + +# From prompt files +npx -y bun ${SKILL_DIR}/scripts/main.ts --promptfiles system.md content.md --image out.png + +# With reference images (Google multimodal only) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png +``` + +## Commands + +### Basic Image Generation + +```bash +# Generate with prompt +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A sunset over mountains" --image sunset.png + +# Shorthand +npx -y bun ${SKILL_DIR}/scripts/main.ts -p "A cute robot" --image robot.png +``` + +### Aspect Ratios + +```bash +# Common ratios: 1:1, 16:9, 9:16, 4:3, 3:4, 2.35:1 +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A portrait" --image portrait.png --ar 3:4 + +# Or specify exact size +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Banner" --image banner.png --size 1792x1024 +``` + +### Reference Images (Google Multimodal) + +```bash +# Image editing with reference +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Make it blue" --image blue.png --ref original.png + +# Multiple references +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Combine these styles" --image out.png --ref a.png b.png +``` + +### Quality Presets + +```bash +# Normal quality (default) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png --quality normal + +# High quality (2k resolution) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png --quality 2k +``` + +### Output Formats + +```bash +# Plain output (prints saved path) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png + +# JSON output +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image cat.png --json +``` + +## Options + +| Option | Description | +|--------|-------------| +| `--prompt `, `-p` | Prompt text | +| `--promptfiles ` | Read prompt from files (concatenated) | +| `--image ` | Output image path (required) | +| `--provider google\|openai` | Force provider (default: google) | +| `--model `, `-m` | Model ID | +| `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | +| `--size ` | Size (e.g., `1024x1024`) | +| `--quality normal\|2k` | Quality preset (default: normal) | +| `--ref ` | Reference images (Google multimodal only) | +| `--n ` | Number of images | +| `--json` | JSON output | +| `--help`, `-h` | Show help | + +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `OPENAI_API_KEY` | OpenAI API key | - | +| `GOOGLE_API_KEY` | Google API key | - | +| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` | +| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | +| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | +| `GOOGLE_BASE_URL` | Custom Google endpoint | - | + +**Load Priority**: CLI args > `process.env` > `/.baoyu-skills/.env` > `~/.baoyu-skills/.env` + +## Provider & Model Strategy + +### Auto-Selection + +1. If `--provider` specified → use it +2. If only one API key available → use that provider +3. If both available → default to Google (multimodal LLMs more versatile) + +### API Selection by Model Type + +| Model Category | API Function | Example Models | +|----------------|--------------|----------------| +| Google Multimodal | `generateText` | `gemini-2.0-flash-exp-image-generation` | +| Google Imagen | `experimental_generateImage` | `imagen-3.0-generate-002` | +| OpenAI | `experimental_generateImage` | `gpt-image-1`, `dall-e-3` | + +### Available Models + +**Google**: +- `gemini-3-pro-image-preview` - Default, multimodal generation +- `gemini-2.0-flash-exp-image-generation` - Gemini 2.0 Flash +- `imagen-3.0-generate-002` - Imagen 3 + +**OpenAI**: +- `gpt-image-1.5` - Default, GPT Image 1.5 +- `gpt-image-1` - GPT Image 1 +- `dall-e-3` - DALL-E 3 + +## Quality Presets + +| Preset | OpenAI | Google | Use Case | +|--------|--------|--------|----------| +| `normal` | 1024x1024 | Default | Covers, illustrations | +| `2k` | 2048x2048 | "2048px" in prompt | Infographics, slides | + +## Aspect Ratio Handling + +- **Multimodal LLMs**: Embedded in prompt (e.g., `"... aspect ratio 16:9"`) +- **Image-only models**: Uses `aspectRatio` or `size` parameter +- **Common ratios**: 1:1, 16:9, 9:16, 4:3, 3:4, 2.35:1 + +## Examples + +### Generate Cover Image + +```bash +npx -y bun ${SKILL_DIR}/scripts/main.ts \ + --prompt "A minimalist tech illustration with blue gradients" \ + --image cover.png --ar 2.35:1 --quality 2k +``` + +### Generate Social Media Post + +```bash +npx -y bun ${SKILL_DIR}/scripts/main.ts \ + --prompt "Instagram post about coffee" \ + --image post.png --ar 1:1 +``` + +### Edit Image with Reference + +```bash +npx -y bun ${SKILL_DIR}/scripts/main.ts \ + --prompt "Change the background to sunset" \ + --image edited.png --ref original.png --provider google +``` + +### Batch Generation from Prompt File + +```bash +# Create prompt file with detailed instructions +npx -y bun ${SKILL_DIR}/scripts/main.ts \ + --promptfiles style-guide.md scene-description.md \ + --image scene.png +``` + +## Error Handling + +- **Missing API key**: Clear error with setup instructions +- **Generation failure**: Auto-retry once, then error +- **Invalid aspect ratio**: Warning, proceed with default +- **Reference images with image-only model**: Warning, ignore refs + +## Extension Support + +Custom configurations via EXTEND.md. + +**Check paths** (priority order): +1. `.baoyu-skills/baoyu-image-gen/EXTEND.md` (project) +2. `~/.baoyu-skills/baoyu-image-gen/EXTEND.md` (user) + +If found, load before workflow. Extension content overrides defaults. diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts new file mode 100644 index 0000000..3d10a66 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -0,0 +1,576 @@ +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; +import { homedir } from "node:os"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; + +type Provider = "google" | "openai"; +type Quality = "normal" | "2k"; + +type CliArgs = { + prompt: string | null; + promptFiles: string[]; + imagePath: string | null; + provider: Provider | null; + model: string | null; + aspectRatio: string | null; + size: string | null; + quality: Quality; + referenceImages: string[]; + n: number; + json: boolean; + help: boolean; +}; + +const GOOGLE_MULTIMODAL_MODELS = [ + "gemini-3-pro-image-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.5-flash-preview-native-audio-dialog", +]; + +const GOOGLE_IMAGEN_MODELS = ["imagen-3.0-generate-002", "imagen-3.0-generate-001"]; + +const OPENAI_IMAGE_MODELS = ["gpt-image-1.5", "gpt-image-1", "dall-e-3", "dall-e-2"]; + +function printUsage(): void { + console.log(`Usage: + npx -y bun scripts/main.ts --prompt "A cat" --image cat.png + npx -y bun scripts/main.ts --prompt "A landscape" --image landscape.png --ar 16:9 + npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png + +Options: + -p, --prompt Prompt text + --promptfiles Read prompt from files (concatenated) + --image Output image path (required) + --provider google|openai Force provider (auto-detect by default) + -m, --model Model ID + --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) + --size Size (e.g., 1024x1024) + --quality normal|2k Quality preset (default: normal) + --ref Reference images (Google multimodal only) + --n Number of images (default: 1) + --json JSON output + -h, --help Show help + +Environment variables: + OPENAI_API_KEY OpenAI API key + GOOGLE_API_KEY Google API key + OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5) + GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) + OPENAI_BASE_URL Custom OpenAI endpoint + GOOGLE_BASE_URL Custom Google endpoint + +Env file load order: CLI args > process.env > /.baoyu-skills/.env > ~/.baoyu-skills/.env`); +} + +function parseArgs(argv: string[]): CliArgs { + const out: CliArgs = { + prompt: null, + promptFiles: [], + imagePath: null, + provider: null, + model: null, + aspectRatio: null, + size: null, + quality: "normal", + referenceImages: [], + n: 1, + json: false, + help: false, + }; + + const positional: string[] = []; + + const takeMany = (i: number): { items: string[]; next: number } => { + const items: string[] = []; + let j = i + 1; + while (j < argv.length) { + const v = argv[j]!; + if (v.startsWith("-")) break; + items.push(v); + j++; + } + return { items, next: j - 1 }; + }; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]!; + + if (a === "--help" || a === "-h") { + out.help = true; + continue; + } + + if (a === "--json") { + out.json = true; + continue; + } + + if (a === "--prompt" || a === "-p") { + const v = argv[++i]; + if (!v) throw new Error(`Missing value for ${a}`); + out.prompt = v; + continue; + } + + if (a === "--promptfiles") { + const { items, next } = takeMany(i); + if (items.length === 0) throw new Error("Missing files for --promptfiles"); + out.promptFiles.push(...items); + i = next; + continue; + } + + if (a === "--image") { + const v = argv[++i]; + if (!v) throw new Error("Missing value for --image"); + out.imagePath = v; + continue; + } + + if (a === "--provider") { + const v = argv[++i]; + if (v !== "google" && v !== "openai") throw new Error(`Invalid provider: ${v}`); + out.provider = v; + continue; + } + + if (a === "--model" || a === "-m") { + const v = argv[++i]; + if (!v) throw new Error(`Missing value for ${a}`); + out.model = v; + continue; + } + + if (a === "--ar") { + const v = argv[++i]; + if (!v) throw new Error("Missing value for --ar"); + out.aspectRatio = v; + continue; + } + + if (a === "--size") { + const v = argv[++i]; + if (!v) throw new Error("Missing value for --size"); + out.size = v; + continue; + } + + if (a === "--quality") { + const v = argv[++i]; + if (v !== "normal" && v !== "2k") throw new Error(`Invalid quality: ${v}`); + out.quality = v; + continue; + } + + if (a === "--ref" || a === "--reference") { + const { items, next } = takeMany(i); + if (items.length === 0) throw new Error(`Missing files for ${a}`); + out.referenceImages.push(...items); + i = next; + continue; + } + + if (a === "--n") { + const v = argv[++i]; + if (!v) throw new Error("Missing value for --n"); + out.n = parseInt(v, 10); + if (isNaN(out.n) || out.n < 1) throw new Error(`Invalid count: ${v}`); + continue; + } + + if (a.startsWith("-")) { + throw new Error(`Unknown option: ${a}`); + } + + positional.push(a); + } + + if (!out.prompt && out.promptFiles.length === 0 && positional.length > 0) { + out.prompt = positional.join(" "); + } + + return out; +} + +async function loadEnvFile(p: string): Promise> { + try { + const content = await readFile(p, "utf8"); + const env: Record = {}; + for (const line of content.split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const idx = trimmed.indexOf("="); + if (idx === -1) continue; + const key = trimmed.slice(0, idx).trim(); + let val = trimmed.slice(idx + 1).trim(); + if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) { + val = val.slice(1, -1); + } + env[key] = val; + } + return env; + } catch { + return {}; + } +} + +async function loadEnv(): Promise { + const home = homedir(); + const cwd = process.cwd(); + + const homeEnv = await loadEnvFile(path.join(home, ".baoyu-skills", ".env")); + const cwdEnv = await loadEnvFile(path.join(cwd, ".baoyu-skills", ".env")); + + for (const [k, v] of Object.entries(homeEnv)) { + if (!process.env[k]) process.env[k] = v; + } + for (const [k, v] of Object.entries(cwdEnv)) { + if (!process.env[k]) process.env[k] = v; + } +} + +async function readPromptFromFiles(files: string[]): Promise { + const parts: string[] = []; + for (const f of files) { + parts.push(await readFile(f, "utf8")); + } + return parts.join("\n\n"); +} + +async function readPromptFromStdin(): Promise { + if (process.stdin.isTTY) return null; + try { + const t = await Bun.stdin.text(); + const v = t.trim(); + return v.length > 0 ? v : null; + } catch { + return null; + } +} + +function normalizeOutputImagePath(p: string): string { + const full = path.resolve(p); + const ext = path.extname(full); + if (ext) return full; + return `${full}.png`; +} + +function detectProvider(args: CliArgs): Provider { + if (args.provider) return args.provider; + + const hasGoogle = !!process.env.GOOGLE_API_KEY; + const hasOpenai = !!process.env.OPENAI_API_KEY; + + if (hasGoogle && !hasOpenai) return "google"; + if (hasOpenai && !hasGoogle) return "openai"; + if (hasGoogle && hasOpenai) return "google"; + + throw new Error( + "No API key found. Set GOOGLE_API_KEY or OPENAI_API_KEY.\n" + + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." + ); +} + +function getDefaultModel(provider: Provider): string { + if (provider === "google") { + return process.env.GOOGLE_IMAGE_MODEL || "gemini-3-pro-image-preview"; + } + return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5"; +} + +function isGoogleMultimodal(model: string): boolean { + return GOOGLE_MULTIMODAL_MODELS.some((m) => model.includes(m)); +} + +function isGoogleImagen(model: string): boolean { + return GOOGLE_IMAGEN_MODELS.some((m) => model.includes(m)); +} + +function buildPromptWithAspect(prompt: string, ar: string | null, quality: Quality): string { + let result = prompt; + if (ar) { + result += ` Aspect ratio: ${ar}.`; + } + if (quality === "2k") { + result += " High resolution 2048px."; + } + return result; +} + +function parseAspectRatio(ar: string): { width: number; height: number } | null { + const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/); + if (!match) return null; + const w = parseFloat(match[1]!); + const h = parseFloat(match[2]!); + if (w <= 0 || h <= 0) return null; + return { width: w, height: h }; +} + +function getOpenAISize(ar: string | null, quality: Quality): string { + const base = quality === "2k" ? 2048 : 1024; + + if (!ar) return `${base}x${base}`; + + const parsed = parseAspectRatio(ar); + if (!parsed) return `${base}x${base}`; + + const ratio = parsed.width / parsed.height; + + if (Math.abs(ratio - 1) < 0.1) return `${base}x${base}`; + if (ratio > 1.5) return quality === "2k" ? "2048x1024" : "1792x1024"; + if (ratio < 0.67) return quality === "2k" ? "1024x2048" : "1024x1792"; + return `${base}x${base}`; +} + +async function readImageAsBase64(p: string): Promise<{ data: string; mimeType: string }> { + const buf = await readFile(p); + const ext = path.extname(p).toLowerCase(); + let mimeType = "image/png"; + if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg"; + else if (ext === ".gif") mimeType = "image/gif"; + else if (ext === ".webp") mimeType = "image/webp"; + return { data: buf.toString("base64"), mimeType }; +} + +async function generateWithGoogleMultimodal( + prompt: string, + model: string, + args: CliArgs +): Promise { + const { generateText } = await import("ai"); + const { createGoogleGenerativeAI } = await import("@ai-sdk/google"); + + const google = createGoogleGenerativeAI({ + apiKey: process.env.GOOGLE_API_KEY, + baseURL: process.env.GOOGLE_BASE_URL, + }); + + const fullPrompt = buildPromptWithAspect(prompt, args.aspectRatio, args.quality); + + const messages: any[] = []; + const content: any[] = []; + + for (const refPath of args.referenceImages) { + const { data, mimeType } = await readImageAsBase64(refPath); + content.push({ type: "image", image: data, mimeType }); + } + content.push({ type: "text", text: fullPrompt }); + + messages.push({ role: "user", content }); + + const result = await generateText({ + model: google(model, { useSearchGrounding: false }), + messages, + providerOptions: { + google: { + responseModalities: ["TEXT", "IMAGE"], + }, + }, + }); + + const files = (result as any).files; + if (!files || files.length === 0) { + const expRes = (result as any).response?.body?.candidates?.[0]?.content?.parts; + if (expRes) { + for (const part of expRes) { + if (part.inlineData?.data) { + return Uint8Array.from(Buffer.from(part.inlineData.data, "base64")); + } + } + } + throw new Error("No image in response"); + } + + const img = files[0]; + if (img.uint8Array) return img.uint8Array; + if (img.base64) return Uint8Array.from(Buffer.from(img.base64, "base64")); + + throw new Error("Cannot extract image data"); +} + +async function generateWithGoogleImagen( + prompt: string, + model: string, + args: CliArgs +): Promise { + const { experimental_generateImage: generateImage } = await import("ai"); + const { createGoogleGenerativeAI } = await import("@ai-sdk/google"); + + const google = createGoogleGenerativeAI({ + apiKey: process.env.GOOGLE_API_KEY, + baseURL: process.env.GOOGLE_BASE_URL, + }); + + const fullPrompt = buildPromptWithAspect(prompt, args.aspectRatio, args.quality); + + const result = await generateImage({ + model: google.image(model), + prompt: fullPrompt, + n: args.n, + aspectRatio: args.aspectRatio || undefined, + }); + + const img = result.images[0]; + if (!img) throw new Error("No image in response"); + + if (img.uint8Array) return img.uint8Array; + if (img.base64) return Uint8Array.from(Buffer.from(img.base64, "base64")); + + throw new Error("Cannot extract image data"); +} + +async function generateWithOpenAI( + prompt: string, + model: string, + args: CliArgs +): Promise { + const baseURL = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1"; + const apiKey = process.env.OPENAI_API_KEY; + + if (!apiKey) throw new Error("OPENAI_API_KEY is required"); + + const size = args.size || getOpenAISize(args.aspectRatio, args.quality); + + const body: Record = { + model, + prompt, + size, + }; + + if (model.includes("dall-e-3")) { + body.quality = args.quality === "2k" ? "hd" : "standard"; + } + + const res = await fetch(`${baseURL}/images/generations`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`OpenAI API error: ${err}`); + } + + const result = (await res.json()) as { data: Array<{ url?: string; b64_json?: string }> }; + const img = result.data[0]; + + if (img?.b64_json) { + return Uint8Array.from(Buffer.from(img.b64_json, "base64")); + } + + if (img?.url) { + const imgRes = await fetch(img.url); + if (!imgRes.ok) throw new Error("Failed to download image"); + const buf = await imgRes.arrayBuffer(); + return new Uint8Array(buf); + } + + throw new Error("No image in response"); +} + +async function generate( + provider: Provider, + model: string, + prompt: string, + args: CliArgs +): Promise { + if (provider === "google") { + if (isGoogleMultimodal(model)) { + return generateWithGoogleMultimodal(prompt, model, args); + } + if (isGoogleImagen(model)) { + if (args.referenceImages.length > 0) { + console.error("Warning: Reference images not supported with Imagen models, ignoring."); + } + return generateWithGoogleImagen(prompt, model, args); + } + return generateWithGoogleMultimodal(prompt, model, args); + } + + if (args.referenceImages.length > 0) { + console.error("Warning: Reference images not supported with OpenAI, ignoring."); + } + return generateWithOpenAI(prompt, model, args); +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + + if (args.help) { + printUsage(); + return; + } + + await loadEnv(); + + let prompt: string | null = args.prompt; + if (!prompt && args.promptFiles.length > 0) prompt = await readPromptFromFiles(args.promptFiles); + if (!prompt) prompt = await readPromptFromStdin(); + + if (!prompt) { + console.error("Error: Prompt is required"); + printUsage(); + process.exitCode = 1; + return; + } + + if (!args.imagePath) { + console.error("Error: --image is required"); + printUsage(); + process.exitCode = 1; + return; + } + + const provider = detectProvider(args); + const model = args.model || getDefaultModel(provider); + const outputPath = normalizeOutputImagePath(args.imagePath); + + let imageData: Uint8Array; + let retried = false; + + while (true) { + try { + imageData = await generate(provider, model, prompt, args); + break; + } catch (e) { + if (!retried) { + retried = true; + console.error("Generation failed, retrying..."); + continue; + } + throw e; + } + } + + const dir = path.dirname(outputPath); + await mkdir(dir, { recursive: true }); + await writeFile(outputPath, imageData); + + if (args.json) { + console.log( + JSON.stringify( + { + savedImage: outputPath, + provider, + model, + prompt: prompt.slice(0, 200), + }, + null, + 2 + ) + ); + } else { + console.log(outputPath); + } +} + +main().catch((e) => { + const msg = e instanceof Error ? e.message : String(e); + console.error(msg); + process.exit(1); +}); diff --git a/skills/baoyu-slide-deck/SKILL.md b/skills/baoyu-slide-deck/SKILL.md index 9e6296d..eee9e0e 100644 --- a/skills/baoyu-slide-deck/SKILL.md +++ b/skills/baoyu-slide-deck/SKILL.md @@ -87,6 +87,46 @@ Transform content into professional slide deck images with flexible style option | lifestyle, wellness, travel, artistic, natural | `watercolor` | | Default | `blueprint` | +## Layout Gallery + +Optional layout hints for individual slides. Specify in outline's `// LAYOUT` section. + +### Slide-Specific Layouts + +| Layout | Description | Best For | +|--------|-------------|----------| +| `title-hero` | Large centered title + subtitle | Cover slides, section breaks | +| `quote-callout` | Featured quote with attribution | Testimonials, key insights | +| `key-stat` | Single large number as focal point | Impact statistics, metrics | +| `split-screen` | Half image, half text | Feature highlights, comparisons | +| `icon-grid` | Grid of icons with labels | Features, capabilities, benefits | +| `two-columns` | Content in balanced columns | Paired information, dual points | +| `three-columns` | Content in three columns | Triple comparisons, categories | +| `image-caption` | Full-bleed image + text overlay | Visual storytelling, emotional | +| `agenda` | Numbered list with highlights | Session overview, roadmap | +| `bullet-list` | Structured bullet points | Simple content, lists | + +### Infographic-Derived Layouts + +| Layout | Description | Best For | +|--------|-------------|----------| +| `linear-progression` | Sequential flow left-to-right | Timelines, step-by-step | +| `binary-comparison` | Side-by-side A vs B | Before/after, pros-cons | +| `comparison-matrix` | Multi-factor grid | Feature comparisons | +| `hierarchical-layers` | Pyramid or stacked levels | Priority, importance | +| `hub-spoke` | Central node with radiating items | Concept maps, ecosystems | +| `bento-grid` | Varied-size tiles | Overview, summary | +| `funnel` | Narrowing stages | Conversion, filtering | +| `dashboard` | Metrics with charts/numbers | KPIs, data display | +| `venn-diagram` | Overlapping circles | Relationships, intersections | +| `circular-flow` | Continuous cycle | Recurring processes | +| `winding-roadmap` | Curved path with milestones | Journey, timeline | +| `tree-branching` | Parent-child hierarchy | Org charts, taxonomies | +| `iceberg` | Visible vs hidden layers | Surface vs depth | +| `bridge` | Gap with connection | Problem-solution | + +**Usage**: Add `Layout: ` in slide's `// LAYOUT` section to guide visual composition. + ## Design Philosophy This deck is designed for **reading and sharing**, not live presentation: @@ -169,7 +209,10 @@ If `--outline-only`, stop here. 1. Read `references/base-prompt.md` 2. Combine with style instructions from outline 3. Add slide-specific content -4. Save to `prompts/` directory +4. If `Layout:` specified in outline, include layout guidance in prompt: + - Reference layout characteristics for image composition + - Example: `Layout: hub-spoke` → "Central concept in middle with related items radiating outward" +5. Save to `prompts/` directory ### Step 5: Generate Images diff --git a/skills/baoyu-slide-deck/references/outline-template.md b/skills/baoyu-slide-deck/references/outline-template.md index b8acd79..a798a0e 100644 --- a/skills/baoyu-slide-deck/references/outline-template.md +++ b/skills/baoyu-slide-deck/references/outline-template.md @@ -67,6 +67,7 @@ Sub-headline: [supporting tagline] [Detailed visual description - specific elements, composition, mood] // LAYOUT +Layout: [optional: layout name from gallery, e.g., title-hero] [Composition, hierarchy, spatial arrangement] ``` @@ -93,6 +94,7 @@ Body: [Detailed visual description] // LAYOUT +Layout: [optional: layout name from gallery] [Composition, hierarchy, spatial arrangement] ``` @@ -115,6 +117,7 @@ Body: [optional summary points or next steps] [Visual that reinforces the core message] // LAYOUT +Layout: [optional: layout name from gallery] [Clean, impactful composition] ```