diff --git a/CLAUDE.md b/CLAUDE.md index 44616ca..50b2e6b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,7 +31,7 @@ Execute: `${BUN_X} skills//scripts/main.ts [options]` - **Bun**: TypeScript runtime (`bun` preferred, fallback `npx -y bun`) - **Chrome**: Required for CDP-based skills (gemini-web, post-to-x/wechat/weibo, url-to-markdown). All CDP skills share a single profile, override via `BAOYU_CHROME_PROFILE_DIR` env var. Platform paths: [docs/chrome-profile.md](docs/chrome-profile.md) -- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, DashScope, or Replicate) configured in EXTEND.md +- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, OpenRouter, DashScope, or Replicate) configured in EXTEND.md - **Gemini Web auth**: Browser cookies (first run opens Chrome for login, `--login` to refresh) ## Security diff --git a/README.md b/README.md index 55ddd46..1aa6577 100644 --- a/README.md +++ b/README.md @@ -665,7 +665,7 @@ AI-powered generation backends. #### baoyu-image-gen -AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyun Tongyi Wanxiang) APIs. Supports text-to-image, reference images, aspect ratios, and quality presets. +AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets. ```bash # Basic generation (auto-detect provider) @@ -680,10 +680,16 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu # Specific provider /baoyu-image-gen --prompt "A cat" --image cat.png --provider openai +# OpenRouter +/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter + # DashScope (Aliyun Tongyi Wanxiang) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope -# With reference images (Google multimodal only) +# Replicate +/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate + +# With reference images (Google, OpenAI, OpenRouter, or Replicate) /baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png ``` @@ -693,25 +699,31 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu | `--prompt`, `-p` | Prompt text | | `--promptfiles` | Read prompt from files (concatenated) | | `--image` | Output image path (required) | -| `--provider` | `google`, `openai` or `dashscope` (default: google) | +| `--provider` | `google`, `openai`, `openrouter`, `dashscope` or `replicate` (default: auto-detect; prefers google) | | `--model`, `-m` | Model ID | | `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size` | Size (e.g., `1024x1024`) | -| `--quality` | `normal` or `2k` (default: normal) | -| `--ref` | Reference images (Google multimodal only) | +| `--quality` | `normal` or `2k` (default: `2k`) | +| `--ref` | Reference images (Google, OpenAI, OpenRouter or Replicate) | **Environment Variables** (see [Environment Configuration](#environment-configuration) for setup): | Variable | Description | Default | |----------|-------------|---------| | `OPENAI_API_KEY` | OpenAI API key | - | +| `OPENROUTER_API_KEY` | OpenRouter API key | - | | `GOOGLE_API_KEY` | Google API key | - | | `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - | +| `REPLICATE_API_TOKEN` | Replicate API token | - | | `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` | +| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` | +| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | +| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` | | `GOOGLE_BASE_URL` | Custom Google endpoint | - | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - | +| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - | **Provider Auto-Selection**: 1. If `--provider` specified → use it @@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx OPENAI_IMAGE_MODEL=gpt-image-1.5 # OPENAI_BASE_URL=https://api.openai.com/v1 +# OpenRouter +OPENROUTER_API_KEY=sk-or-xxx +OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview +# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 + # Google GOOGLE_API_KEY=xxx GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview @@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=z-image-turbo # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 + +# Replicate +REPLICATE_API_TOKEN=r8_xxx +REPLICATE_IMAGE_MODEL=google/nano-banana-pro +# REPLICATE_BASE_URL=https://api.replicate.com EOF ``` diff --git a/README.zh.md b/README.zh.md index 2cf0e24..7b09a17 100644 --- a/README.zh.md +++ b/README.zh.md @@ -665,7 +665,7 @@ AI 驱动的生成后端。 #### baoyu-image-gen -基于 AI SDK 的图像生成,使用官方 OpenAI、Google 和 DashScope(阿里通义万相)API。支持文生图、参考图、宽高比和质量预设。 +基于 AI SDK 的图像生成,支持 OpenAI、Google、OpenRouter、DashScope(阿里通义万相)和 Replicate API。支持文生图、参考图、宽高比和质量预设。 ```bash # 基础生成(自动检测服务商) @@ -680,10 +680,16 @@ AI 驱动的生成后端。 # 指定服务商 /baoyu-image-gen --prompt "一只猫" --image cat.png --provider openai +# OpenRouter +/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter + # DashScope(阿里通义万相) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope -# 带参考图(仅 Google 多模态支持) +# Replicate +/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate + +# 带参考图(Google、OpenAI、OpenRouter 或 Replicate) /baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png ``` @@ -693,25 +699,31 @@ AI 驱动的生成后端。 | `--prompt`, `-p` | 提示词文本 | | `--promptfiles` | 从文件读取提示词(多文件拼接) | | `--image` | 输出图片路径(必需) | -| `--provider` | `google`、`openai` 或 `dashscope`(默认:google) | +| `--provider` | `google`、`openai`、`openrouter`、`dashscope` 或 `replicate`(默认:自动检测,优先 google) | | `--model`, `-m` | 模型 ID | | `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) | | `--size` | 尺寸(如 `1024x1024`) | -| `--quality` | `normal` 或 `2k`(默认:normal) | -| `--ref` | 参考图片(仅 Google 多模态支持) | +| `--quality` | `normal` 或 `2k`(默认:`2k`) | +| `--ref` | 参考图片(Google、OpenAI、OpenRouter 或 Replicate) | **环境变量**(配置方法见[环境配置](#环境配置)): | 变量 | 说明 | 默认值 | |------|------|--------| | `OPENAI_API_KEY` | OpenAI API 密钥 | - | +| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - | | `GOOGLE_API_KEY` | Google API 密钥 | - | | `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - | +| `REPLICATE_API_TOKEN` | Replicate API Token | - | | `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` | +| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` | +| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | +| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` | | `GOOGLE_BASE_URL` | 自定义 Google 端点 | - | | `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - | +| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - | **服务商自动选择**: 1. 如果指定了 `--provider` → 使用指定的 @@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx OPENAI_IMAGE_MODEL=gpt-image-1.5 # OPENAI_BASE_URL=https://api.openai.com/v1 +# OpenRouter +OPENROUTER_API_KEY=sk-or-xxx +OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview +# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 + # Google GOOGLE_API_KEY=xxx GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview @@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=z-image-turbo # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 + +# Replicate +REPLICATE_API_TOKEN=r8_xxx +REPLICATE_IMAGE_MODEL=google/nano-banana-pro +# REPLICATE_BASE_URL=https://api.replicate.com EOF ``` diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md index 37d698f..0844c7c 100644 --- a/skills/baoyu-image-gen/SKILL.md +++ b/skills/baoyu-image-gen/SKILL.md @@ -1,7 +1,7 @@ --- name: baoyu-image-gen -description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. -version: 1.56.1 +description: AI image generation with OpenAI, Google, OpenRouter, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. +version: 1.56.2 metadata: openclaw: homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen @@ -13,7 +13,7 @@ metadata: # Image Generation (AI SDK) -Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers. +Official API-based image generation. Supports OpenAI, Google, OpenRouter, DashScope (阿里通义万象) and Replicate providers. ## Script Directory @@ -74,12 +74,18 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k # From prompt files ${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png -# With reference images (Google multimodal or OpenAI edits) +# With reference images (Google, OpenAI, OpenRouter, or Replicate) ${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png # With reference images (explicit provider/model) ${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider google --model gemini-3-pro-image-preview --ref source.png +# OpenRouter (recommended default model) +${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openrouter + +# OpenRouter with reference images +${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png + # Specific provider ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openai @@ -135,13 +141,13 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `--image ` | Output image path (required in single-image mode) | | `--batchfile ` | JSON batch file for multi-image generation | | `--jobs ` | Worker count for batch mode (default: auto, max from config, built-in default 10) | -| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) | -| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) | +| `--provider google\|openai\|openrouter\|dashscope\|replicate` | Force provider (default: auto-detect) | +| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: `2k`) | -| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) | -| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate | +| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) | +| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal models, and Replicate | | `--n ` | Number of images | | `--json` | JSON output | @@ -150,14 +156,19 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | Variable | Description | |----------|-------------| | `OPENAI_API_KEY` | OpenAI API key | +| `OPENROUTER_API_KEY` | OpenRouter API key | | `GOOGLE_API_KEY` | Google API key | | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) | | `REPLICATE_API_TOKEN` | Replicate API token | | `OPENAI_IMAGE_MODEL` | OpenAI model override | +| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) | | `GOOGLE_IMAGE_MODEL` | Google model override | | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) | | `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | +| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) | +| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution | +| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | | `GOOGLE_BASE_URL` | Custom Google endpoint | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | | `REPLICATE_BASE_URL` | Custom Replicate endpoint | @@ -182,6 +193,21 @@ Model priority (highest → lowest), applies to all providers: - Show: `Using [provider] / [model]` - Show switch hint: `Switch model: --model | EXTEND.md default_model.[provider] | env _IMAGE_MODEL` +### OpenRouter Models + +Use full OpenRouter model IDs, e.g.: + +- `google/gemini-3.1-flash-image-preview` (recommended, supports image output and reference-image workflows) +- `google/gemini-2.5-flash-image-preview` +- `black-forest-labs/flux.2-pro` +- Other OpenRouter image-capable model IDs + +Notes: + +- OpenRouter image generation uses `/chat/completions`, not the OpenAI `/images` endpoints +- If `--ref` is used, choose a multimodal model that supports image input and image output +- `--imageSize` maps to OpenRouter `imageGenerationOptions.size`; `--size ` is converted to the nearest OpenRouter size and inferred aspect ratio when possible + ### Replicate Models Supported model formats: @@ -201,19 +227,19 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r ## Provider Selection -1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate -2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`) +1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then OpenRouter, then Replicate +2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `openrouter`, or `replicate`) 3. Only one API key available → use that provider 4. Multiple available → default to Google ## Quality Presets -| Preset | Google imageSize | OpenAI Size | Replicate resolution | Use Case | -|--------|------------------|-------------|----------------------|----------| -| `normal` | 1K | 1024px | 1K | Quick previews | -| `2k` (default) | 2K | 2048px | 2K | Covers, illustrations, infographics | +| Preset | Google imageSize | OpenAI Size | OpenRouter size | Replicate resolution | Use Case | +|--------|------------------|-------------|-----------------|----------------------|----------| +| `normal` | 1K | 1024px | 1K | 1K | Quick previews | +| `2k` (default) | 2K | 2048px | 2K | 2K | Covers, illustrations, infographics | -**Google imageSize**: Can be overridden with `--imageSize 1K|2K|4K` +**Google/OpenRouter imageSize**: Can be overridden with `--imageSize 1K|2K|4K` ## Aspect Ratios @@ -221,6 +247,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1` - Google multimodal: uses `imageConfig.aspectRatio` - OpenAI: maps to closest supported size +- OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size ` is given, aspect ratio is inferred automatically - Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image` ## Generation Mode diff --git a/skills/baoyu-image-gen/references/config/first-time-setup.md b/skills/baoyu-image-gen/references/config/first-time-setup.md index 18415c4..ef11cba 100644 --- a/skills/baoyu-image-gen/references/config/first-time-setup.md +++ b/skills/baoyu-image-gen/references/config/first-time-setup.md @@ -47,6 +47,8 @@ options: description: "Gemini multimodal - high quality, reference images, flexible sizes" - label: "OpenAI" description: "GPT Image - consistent quality, reliable output" + - label: "OpenRouter" + description: "Router for Gemini/FLUX/OpenAI-compatible image models" - label: "DashScope" description: "Alibaba Cloud - z-image-turbo, good for Chinese content" - label: "Replicate" @@ -69,6 +71,22 @@ options: description: "Fast generation, balanced quality and speed" ``` +### Question 2b: Default OpenRouter Model + +Only show if user selected OpenRouter. + +```yaml +header: "OpenRouter Model" +question: "Default OpenRouter image generation model?" +options: + - label: "google/gemini-3.1-flash-image-preview (Recommended)" + description: "Best general-purpose OpenRouter image model with reference-image workflows" + - label: "google/gemini-2.5-flash-image-preview" + description: "Fast Gemini preview model on OpenRouter" + - label: "black-forest-labs/flux.2-pro" + description: "Strong text-to-image quality through OpenRouter" +``` + ### Question 3: Default Quality ```yaml @@ -112,6 +130,7 @@ default_image_size: null default_model: google: [selected google model or null] openai: null + openrouter: [selected openrouter model or null] dashscope: null replicate: null --- @@ -147,6 +166,20 @@ options: description: "Previous generation GPT Image model" ``` +### OpenRouter Model Selection + +```yaml +header: "OpenRouter Model" +question: "Choose a default OpenRouter image generation model?" +options: + - label: "google/gemini-3.1-flash-image-preview (Recommended)" + description: "Recommended for image output and reference-image edits" + - label: "google/gemini-2.5-flash-image-preview" + description: "Fast preview-oriented image generation" + - label: "black-forest-labs/flux.2-pro" + description: "High-quality text-to-image through OpenRouter" +``` + ### DashScope Model Selection ```yaml @@ -183,6 +216,7 @@ After user selects a model: default_model: google: [value or null] openai: [value or null] + openrouter: [value or null] dashscope: [value or null] replicate: [value or null] ``` diff --git a/skills/baoyu-image-gen/references/config/preferences-schema.md b/skills/baoyu-image-gen/references/config/preferences-schema.md index 8c79021..ec252a1 100644 --- a/skills/baoyu-image-gen/references/config/preferences-schema.md +++ b/skills/baoyu-image-gen/references/config/preferences-schema.md @@ -11,17 +11,18 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences --- version: 1 -default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect) +default_provider: null # google|openai|openrouter|dashscope|replicate|null (null = auto-detect) default_quality: null # normal|2k|null (null = use default: 2k) default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null -default_image_size: null # 1K|2K|4K|null (Google only, overrides quality) +default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality) default_model: google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview" openai: null # e.g., "gpt-image-1.5", "gpt-image-1" + openrouter: null # e.g., "google/gemini-3.1-flash-image-preview" dashscope: null # e.g., "z-image-turbo" replicate: null # e.g., "google/nano-banana-pro" @@ -37,6 +38,9 @@ batch: openai: concurrency: 3 start_interval_ms: 1100 + openrouter: + concurrency: 3 + start_interval_ms: 1100 dashscope: concurrency: 3 start_interval_ms: 1100 @@ -51,9 +55,10 @@ batch: | `default_provider` | string\|null | null | Default provider (null = auto-detect) | | `default_quality` | string\|null | null | Default quality (null = 2k) | | `default_aspect_ratio` | string\|null | null | Default aspect ratio | -| `default_image_size` | string\|null | null | Google image size (overrides quality) | +| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) | | `default_model.google` | string\|null | null | Google default model | | `default_model.openai` | string\|null | null | OpenAI default model | +| `default_model.openrouter` | string\|null | null | OpenRouter default model | | `default_model.dashscope` | string\|null | null | DashScope default model | | `default_model.replicate` | string\|null | null | Replicate default model | | `batch.max_workers` | int\|null | 10 | Batch worker cap | @@ -82,6 +87,7 @@ default_image_size: 2K default_model: google: "gemini-3-pro-image-preview" openai: "gpt-image-1.5" + openrouter: "google/gemini-3.1-flash-image-preview" dashscope: "z-image-turbo" replicate: "google/nano-banana-pro" batch: @@ -90,5 +96,8 @@ batch: replicate: concurrency: 5 start_interval_ms: 700 + openrouter: + concurrency: 3 + start_interval_ms: 1100 --- ``` diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts index f6b4f5e..504489b 100644 --- a/skills/baoyu-image-gen/scripts/main.ts +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -53,6 +53,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record = { replicate: { concurrency: 5, startIntervalMs: 700 }, google: { concurrency: 3, startIntervalMs: 1100 }, openai: { concurrency: 3, startIntervalMs: 1100 }, + openrouter: { concurrency: 3, startIntervalMs: 1100 }, dashscope: { concurrency: 3, startIntervalMs: 1100 }, }; @@ -68,13 +69,13 @@ Options: --image Output image path (required in single-image mode) --batchfile JSON batch file for multi-image generation --jobs Worker count for batch mode (default: auto, max from config, built-in default 10) - --provider google|openai|dashscope|replicate Force provider (auto-detect by default) + --provider google|openai|openrouter|dashscope|replicate Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) --quality normal|2k Quality preset (default: 2k) - --imageSize 1K|2K|4K Image size for Google (default: from quality) - --ref Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate) + --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) + --ref Reference images (Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal, or Replicate) --n Number of images for the current task (default: 1) --json JSON output -h, --help Show help @@ -101,16 +102,21 @@ Behavior: Environment variables: OPENAI_API_KEY OpenAI API key + OPENROUTER_API_KEY OpenRouter API key GOOGLE_API_KEY Google API key GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) DASHSCOPE_API_KEY DashScope API key REPLICATE_API_TOKEN Replicate API token OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5) + OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo) REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro) OPENAI_BASE_URL Custom OpenAI endpoint OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false) + OPENROUTER_BASE_URL Custom OpenRouter endpoint + OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution + OPENROUTER_TITLE Optional app name for OpenRouter attribution GOOGLE_BASE_URL Custom Google endpoint DASHSCOPE_BASE_URL Custom DashScope endpoint REPLICATE_BASE_URL Custom Replicate endpoint @@ -206,7 +212,13 @@ function parseArgs(argv: string[]): CliArgs { if (a === "--provider") { const v = argv[++i]; - if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") { + if ( + v !== "google" && + v !== "openai" && + v !== "openrouter" && + v !== "dashscope" && + v !== "replicate" + ) { throw new Error(`Invalid provider: ${v}`); } out.provider = v; @@ -352,7 +364,13 @@ function parseSimpleYaml(yaml: string): Partial { } else if (key === "default_image_size") { config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K"; } else if (key === "default_model") { - config.default_model = { google: null, openai: null, dashscope: null, replicate: null }; + config.default_model = { + google: null, + openai: null, + openrouter: null, + dashscope: null, + replicate: null, + }; currentKey = "default_model"; currentProvider = null; } else if (key === "batch") { @@ -370,7 +388,13 @@ function parseSimpleYaml(yaml: string): Partial { } else if ( currentKey === "provider_limits" && indent >= 4 && - (key === "google" || key === "openai" || key === "dashscope" || key === "replicate") + ( + key === "google" || + key === "openai" || + key === "openrouter" || + key === "dashscope" || + key === "replicate" + ) ) { config.batch ??= {}; config.batch.provider_limits ??= {}; @@ -378,7 +402,13 @@ function parseSimpleYaml(yaml: string): Partial { currentProvider = key; } else if ( currentKey === "default_model" && - (key === "google" || key === "openai" || key === "dashscope" || key === "replicate") + ( + key === "google" || + key === "openai" || + key === "openrouter" || + key === "dashscope" || + key === "replicate" + ) ) { const cleaned = value.replace(/['"]/g, ""); config.default_model![key] = cleaned === "null" ? null : cleaned; @@ -466,10 +496,11 @@ function getConfiguredProviderRateLimits( replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate }, google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google }, openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai }, + openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter }, dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope }, }; - for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope"] as Provider[]) { const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`; const extendLimit = extendConfig.batch?.provider_limits?.[provider]; configured[provider] = { @@ -522,10 +553,11 @@ function detectProvider(args: CliArgs): Provider { args.provider && args.provider !== "google" && args.provider !== "openai" && + args.provider !== "openrouter" && args.provider !== "replicate" ) { throw new Error( - "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate." + "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), or --provider replicate." ); } @@ -533,21 +565,24 @@ function detectProvider(args: CliArgs): Provider { const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY); const hasOpenai = !!process.env.OPENAI_API_KEY; + const hasOpenrouter = !!process.env.OPENROUTER_API_KEY; const hasDashscope = !!process.env.DASHSCOPE_API_KEY; const hasReplicate = !!process.env.REPLICATE_API_TOKEN; if (args.referenceImages.length > 0) { if (hasGoogle) return "google"; if (hasOpenai) return "openai"; + if (hasOpenrouter) return "openrouter"; if (hasReplicate) return "replicate"; throw new Error( - "Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref." + "Reference images require Google, OpenAI, OpenRouter or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, or REPLICATE_API_TOKEN, or remove --ref." ); } const available = [ hasGoogle && "google", hasOpenai && "openai", + hasOpenrouter && "openrouter", hasDashscope && "dashscope", hasReplicate && "replicate", ].filter(Boolean) as Provider[]; @@ -556,7 +591,7 @@ function detectProvider(args: CliArgs): Provider { if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -596,6 +631,7 @@ async function loadProviderModule(provider: Provider): Promise { if (provider === "google") return (await import("./providers/google")) as ProviderModule; if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule; if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule; + if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule; return (await import("./providers/openai")) as ProviderModule; } @@ -617,6 +653,9 @@ function getModelForProvider( if (extendConfig.default_model) { if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google; if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai; + if (provider === "openrouter" && extendConfig.default_model.openrouter) { + return extendConfig.default_model.openrouter; + } if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope; if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate; } diff --git a/skills/baoyu-image-gen/scripts/providers/openrouter.ts b/skills/baoyu-image-gen/scripts/providers/openrouter.ts new file mode 100644 index 0000000..4e55e06 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/openrouter.ts @@ -0,0 +1,255 @@ +import path from "node:path"; +import { readFile } from "node:fs/promises"; +import type { CliArgs } from "../types"; + +const DEFAULT_MODEL = "google/gemini-3.1-flash-image-preview"; + +type OpenRouterImageEntry = { + image_url?: string | { url?: string | null } | null; + imageUrl?: string | { url?: string | null } | null; +}; + +type OpenRouterMessagePart = { + type?: string; + text?: string; + image_url?: string | { url?: string | null } | null; + imageUrl?: string | { url?: string | null } | null; +}; + +type OpenRouterResponse = { + choices?: Array<{ + message?: { + images?: OpenRouterImageEntry[]; + content?: string | OpenRouterMessagePart[]; + }; + }>; +}; + +export function getDefaultModel(): string { + return process.env.OPENROUTER_IMAGE_MODEL || DEFAULT_MODEL; +} + +function getApiKey(): string | null { + return process.env.OPENROUTER_API_KEY || null; +} + +function getBaseUrl(): string { + const base = process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1"; + return base.replace(/\/+$/g, ""); +} + +function getHeaders(apiKey: string): Record { + const headers: Record = { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }; + + const referer = process.env.OPENROUTER_HTTP_REFERER?.trim(); + if (referer) { + headers["HTTP-Referer"] = referer; + } + + const title = process.env.OPENROUTER_TITLE?.trim(); + if (title) { + headers["X-OpenRouter-Title"] = title; + headers["X-Title"] = title; + } + + return headers; +} + +function parsePixelSize(value: string): { width: number; height: number } | null { + const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/); + if (!match) return null; + + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function gcd(a: number, b: number): number { + let x = Math.abs(a); + let y = Math.abs(b); + while (y !== 0) { + const next = x % y; + x = y; + y = next; + } + return x || 1; +} + +function inferAspectRatio(size: string | null): string | null { + if (!size) return null; + const parsed = parsePixelSize(size); + if (!parsed) return null; + + const divisor = gcd(parsed.width, parsed.height); + return `${parsed.width / divisor}:${parsed.height / divisor}`; +} + +function inferImageSize(size: string | null): "1K" | "2K" | "4K" | null { + if (!size) return null; + const parsed = parsePixelSize(size); + if (!parsed) return null; + + const longestEdge = Math.max(parsed.width, parsed.height); + if (longestEdge <= 1024) return "1K"; + if (longestEdge <= 2048) return "2K"; + return "4K"; +} + +function getImageSize(args: CliArgs): "1K" | "2K" | "4K" { + if (args.imageSize) return args.imageSize as "1K" | "2K" | "4K"; + + const inferredFromSize = inferImageSize(args.size); + if (inferredFromSize) return inferredFromSize; + + return args.quality === "normal" ? "1K" : "2K"; +} + +function getAspectRatio(args: CliArgs): string | null { + return args.aspectRatio || inferAspectRatio(args.size); +} + +function getMimeType(filename: string): string { + const ext = path.extname(filename).toLowerCase(); + if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg"; + if (ext === ".webp") return "image/webp"; + if (ext === ".gif") return "image/gif"; + return "image/png"; +} + +async function readImageAsDataUrl(filePath: string): Promise { + const bytes = await readFile(filePath); + return `data:${getMimeType(filePath)};base64,${bytes.toString("base64")}`; +} + +function buildContent(prompt: string, referenceImages: string[]): Array> { + const content: Array> = [{ type: "text", text: prompt }]; + + for (const imageUrl of referenceImages) { + content.push({ + type: "image_url", + image_url: { url: imageUrl }, + }); + } + + return content; +} + +function extractImageUrl(entry: OpenRouterImageEntry | OpenRouterMessagePart): string | null { + const value = entry.image_url ?? entry.imageUrl; + if (!value) return null; + if (typeof value === "string") return value; + return value.url ?? null; +} + +function decodeDataUrl(value: string): Uint8Array | null { + const match = value.match(/^data:image\/[^;]+;base64,([A-Za-z0-9+/=]+)$/); + if (!match) return null; + return Uint8Array.from(Buffer.from(match[1]!, "base64")); +} + +async function downloadImage(value: string): Promise { + const inline = decodeDataUrl(value); + if (inline) return inline; + + if (value.startsWith("http://") || value.startsWith("https://")) { + const response = await fetch(value); + if (!response.ok) { + throw new Error(`Failed to download OpenRouter image: ${response.status}`); + } + const buffer = await response.arrayBuffer(); + return new Uint8Array(buffer); + } + + return Uint8Array.from(Buffer.from(value, "base64")); +} + +async function extractImageFromResponse(result: OpenRouterResponse): Promise { + const message = result.choices?.[0]?.message; + + for (const image of message?.images ?? []) { + const imageUrl = extractImageUrl(image); + if (imageUrl) return downloadImage(imageUrl); + } + + if (Array.isArray(message?.content)) { + for (const item of message.content) { + const imageUrl = extractImageUrl(item); + if (imageUrl) return downloadImage(imageUrl); + + if (item.type === "text" && item.text) { + const inline = decodeDataUrl(item.text); + if (inline) return inline; + } + } + } else if (typeof message?.content === "string") { + const inline = decodeDataUrl(message.content); + if (inline) return inline; + } + + throw new Error("No image in OpenRouter response"); +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs +): Promise { + const apiKey = getApiKey(); + if (!apiKey) { + throw new Error("OPENROUTER_API_KEY is required. Get one at https://openrouter.ai/settings/keys"); + } + + const referenceImages: string[] = []; + for (const refPath of args.referenceImages) { + referenceImages.push(await readImageAsDataUrl(refPath)); + } + + const imageGenerationOptions: Record = { + size: getImageSize(args), + }; + + const aspectRatio = getAspectRatio(args); + if (aspectRatio) { + imageGenerationOptions.aspect_ratio = aspectRatio; + } + + const body = { + model, + messages: [ + { + role: "user", + content: buildContent(prompt, referenceImages), + }, + ], + modalities: ["image", "text"], + max_tokens: 256, + imageGenerationOptions, + providerPreferences: { + require_parameters: true, + }, + }; + + console.log(`Generating image with OpenRouter (${model})...`, imageGenerationOptions); + + const response = await fetch(`${getBaseUrl()}/chat/completions`, { + method: "POST", + headers: getHeaders(apiKey), + body: JSON.stringify(body), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`OpenRouter API error (${response.status}): ${errorText}`); + } + + const result = (await response.json()) as OpenRouterResponse; + return extractImageFromResponse(result); +} diff --git a/skills/baoyu-image-gen/scripts/types.ts b/skills/baoyu-image-gen/scripts/types.ts index e3616d3..e41f842 100644 --- a/skills/baoyu-image-gen/scripts/types.ts +++ b/skills/baoyu-image-gen/scripts/types.ts @@ -1,4 +1,4 @@ -export type Provider = "google" | "openai" | "dashscope" | "replicate"; +export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate"; export type Quality = "normal" | "2k"; export type CliArgs = { @@ -50,6 +50,7 @@ export type ExtendConfig = { default_model: { google: string | null; openai: string | null; + openrouter: string | null; dashscope: string | null; replicate: string | null; };