feat(baoyu-image-gen): add OpenRouter provider support

This commit is contained in:
Jim Liu 宝玉 2026-03-13 09:18:34 -05:00
parent c1e1526c84
commit 12b43e166d
9 changed files with 450 additions and 41 deletions

View File

@ -31,7 +31,7 @@ Execute: `${BUN_X} skills/<skill>/scripts/main.ts [options]`
- **Bun**: TypeScript runtime (`bun` preferred, fallback `npx -y bun`)
- **Chrome**: Required for CDP-based skills (gemini-web, post-to-x/wechat/weibo, url-to-markdown). All CDP skills share a single profile, override via `BAOYU_CHROME_PROFILE_DIR` env var. Platform paths: [docs/chrome-profile.md](docs/chrome-profile.md)
- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, DashScope, or Replicate) configured in EXTEND.md
- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, OpenRouter, DashScope, or Replicate) configured in EXTEND.md
- **Gemini Web auth**: Browser cookies (first run opens Chrome for login, `--login` to refresh)
## Security

View File

@ -665,7 +665,7 @@ AI-powered generation backends.
#### baoyu-image-gen
AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyun Tongyi Wanxiang) APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
```bash
# Basic generation (auto-detect provider)
@ -680,10 +680,16 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu
# Specific provider
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openai
# OpenRouter
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter
# DashScope (Aliyun Tongyi Wanxiang)
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
# With reference images (Google multimodal only)
# Replicate
/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
```
@ -693,25 +699,31 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu
| `--prompt`, `-p` | Prompt text |
| `--promptfiles` | Read prompt from files (concatenated) |
| `--image` | Output image path (required) |
| `--provider` | `google`, `openai` or `dashscope` (default: google) |
| `--provider` | `google`, `openai`, `openrouter`, `dashscope` or `replicate` (default: auto-detect; prefers google) |
| `--model`, `-m` | Model ID |
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size` | Size (e.g., `1024x1024`) |
| `--quality` | `normal` or `2k` (default: normal) |
| `--ref` | Reference images (Google multimodal only) |
| `--quality` | `normal` or `2k` (default: `2k`) |
| `--ref` | Reference images (Google, OpenAI, OpenRouter or Replicate) |
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
| Variable | Description | Default |
|----------|-------------|---------|
| `OPENAI_API_KEY` | OpenAI API key | - |
| `OPENROUTER_API_KEY` | OpenRouter API key | - |
| `GOOGLE_API_KEY` | Google API key | - |
| `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - |
| `REPLICATE_API_TOKEN` | Replicate API token | - |
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` |
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` |
| `GOOGLE_BASE_URL` | Custom Google endpoint | - |
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - |
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - |
**Provider Auto-Selection**:
1. If `--provider` specified → use it
@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx
OPENAI_IMAGE_MODEL=gpt-image-1.5
# OPENAI_BASE_URL=https://api.openai.com/v1
# OpenRouter
OPENROUTER_API_KEY=sk-or-xxx
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Google
GOOGLE_API_KEY=xxx
GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
DASHSCOPE_API_KEY=sk-xxx
DASHSCOPE_IMAGE_MODEL=z-image-turbo
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
# Replicate
REPLICATE_API_TOKEN=r8_xxx
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
# REPLICATE_BASE_URL=https://api.replicate.com
EOF
```

View File

@ -665,7 +665,7 @@ AI 驱动的生成后端。
#### baoyu-image-gen
基于 AI SDK 的图像生成,使用官方 OpenAI、Google 和 DashScope阿里通义万相API。支持文生图、参考图、宽高比和质量预设。
基于 AI SDK 的图像生成,支持 OpenAI、Google、OpenRouter、DashScope阿里通义万相和 Replicate API。支持文生图、参考图、宽高比和质量预设。
```bash
# 基础生成(自动检测服务商)
@ -680,10 +680,16 @@ AI 驱动的生成后端。
# 指定服务商
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openai
# OpenRouter
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter
# DashScope阿里通义万相
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
# 带参考图(仅 Google 多模态支持)
# Replicate
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate
# 带参考图Google、OpenAI、OpenRouter 或 Replicate
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
```
@ -693,25 +699,31 @@ AI 驱动的生成后端。
| `--prompt`, `-p` | 提示词文本 |
| `--promptfiles` | 从文件读取提示词(多文件拼接) |
| `--image` | 输出图片路径(必需) |
| `--provider` | `google`、`openai``dashscope`(默认:google |
| `--provider` | `google`、`openai`、`openrouter`、`dashscope` 或 `replicate`(默认:自动检测,优先 google |
| `--model`, `-m` | 模型 ID |
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3` |
| `--size` | 尺寸(如 `1024x1024` |
| `--quality` | `normal``2k`(默认:normal |
| `--ref` | 参考图片(仅 Google 多模态支持 |
| `--quality` | `normal``2k`(默认:`2k` |
| `--ref` | 参考图片(Google、OpenAI、OpenRouter 或 Replicate |
**环境变量**(配置方法见[环境配置](#环境配置)
| 变量 | 说明 | 默认值 |
|------|------|--------|
| `OPENAI_API_KEY` | OpenAI API 密钥 | - |
| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - |
| `GOOGLE_API_KEY` | Google API 密钥 | - |
| `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - |
| `REPLICATE_API_TOKEN` | Replicate API Token | - |
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` |
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` |
| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - |
| `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - |
| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - |
**服务商自动选择**
1. 如果指定了 `--provider` → 使用指定的
@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx
OPENAI_IMAGE_MODEL=gpt-image-1.5
# OPENAI_BASE_URL=https://api.openai.com/v1
# OpenRouter
OPENROUTER_API_KEY=sk-or-xxx
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# Google
GOOGLE_API_KEY=xxx
GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
DASHSCOPE_API_KEY=sk-xxx
DASHSCOPE_IMAGE_MODEL=z-image-turbo
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
# Replicate
REPLICATE_API_TOKEN=r8_xxx
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
# REPLICATE_BASE_URL=https://api.replicate.com
EOF
```

View File

@ -1,7 +1,7 @@
---
name: baoyu-image-gen
description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
version: 1.56.1
description: AI image generation with OpenAI, Google, OpenRouter, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
version: 1.56.2
metadata:
openclaw:
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen
@ -13,7 +13,7 @@ metadata:
# Image Generation (AI SDK)
Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers.
Official API-based image generation. Supports OpenAI, Google, OpenRouter, DashScope (阿里通义万象) and Replicate providers.
## Script Directory
@ -74,12 +74,18 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k
# From prompt files
${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png
# With reference images (Google multimodal or OpenAI edits)
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png
# With reference images (explicit provider/model)
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider google --model gemini-3-pro-image-preview --ref source.png
# OpenRouter (recommended default model)
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openrouter
# OpenRouter with reference images
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
# Specific provider
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openai
@ -135,13 +141,13 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| `--image <path>` | Output image path (required in single-image mode) |
| `--batchfile <path>` | JSON batch file for multi-image generation |
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) |
| `--provider google\|openai\|openrouter\|dashscope\|replicate` | Force provider (default: auto-detect) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) |
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size <WxH>` | Size (e.g., `1024x1024`) |
| `--quality normal\|2k` | Quality preset (default: `2k`) |
| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) |
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate |
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal models, and Replicate |
| `--n <count>` | Number of images |
| `--json` | JSON output |
@ -150,14 +156,19 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| Variable | Description |
|----------|-------------|
| `OPENAI_API_KEY` | OpenAI API key |
| `OPENROUTER_API_KEY` | OpenRouter API key |
| `GOOGLE_API_KEY` | Google API key |
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
| `REPLICATE_API_TOKEN` | Replicate API token |
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
| `GOOGLE_IMAGE_MODEL` | Google model override |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution |
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution |
| `GOOGLE_BASE_URL` | Custom Google endpoint |
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
@ -182,6 +193,21 @@ Model priority (highest → lowest), applies to all providers:
- Show: `Using [provider] / [model]`
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
### OpenRouter Models
Use full OpenRouter model IDs, e.g.:
- `google/gemini-3.1-flash-image-preview` (recommended, supports image output and reference-image workflows)
- `google/gemini-2.5-flash-image-preview`
- `black-forest-labs/flux.2-pro`
- Other OpenRouter image-capable model IDs
Notes:
- OpenRouter image generation uses `/chat/completions`, not the OpenAI `/images` endpoints
- If `--ref` is used, choose a multimodal model that supports image input and image output
- `--imageSize` maps to OpenRouter `imageGenerationOptions.size`; `--size <WxH>` is converted to the nearest OpenRouter size and inferred aspect ratio when possible
### Replicate Models
Supported model formats:
@ -201,19 +227,19 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
## Provider Selection
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`)
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then OpenRouter, then Replicate
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `openrouter`, or `replicate`)
3. Only one API key available → use that provider
4. Multiple available → default to Google
## Quality Presets
| Preset | Google imageSize | OpenAI Size | Replicate resolution | Use Case |
|--------|------------------|-------------|----------------------|----------|
| `normal` | 1K | 1024px | 1K | Quick previews |
| `2k` (default) | 2K | 2048px | 2K | Covers, illustrations, infographics |
| Preset | Google imageSize | OpenAI Size | OpenRouter size | Replicate resolution | Use Case |
|--------|------------------|-------------|-----------------|----------------------|----------|
| `normal` | 1K | 1024px | 1K | 1K | Quick previews |
| `2k` (default) | 2K | 2048px | 2K | 2K | Covers, illustrations, infographics |
**Google imageSize**: Can be overridden with `--imageSize 1K|2K|4K`
**Google/OpenRouter imageSize**: Can be overridden with `--imageSize 1K|2K|4K`
## Aspect Ratios
@ -221,6 +247,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
- Google multimodal: uses `imageConfig.aspectRatio`
- OpenAI: maps to closest supported size
- OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size <WxH>` is given, aspect ratio is inferred automatically
- Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image`
## Generation Mode

View File

@ -47,6 +47,8 @@ options:
description: "Gemini multimodal - high quality, reference images, flexible sizes"
- label: "OpenAI"
description: "GPT Image - consistent quality, reliable output"
- label: "OpenRouter"
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
- label: "DashScope"
description: "Alibaba Cloud - z-image-turbo, good for Chinese content"
- label: "Replicate"
@ -69,6 +71,22 @@ options:
description: "Fast generation, balanced quality and speed"
```
### Question 2b: Default OpenRouter Model
Only show if user selected OpenRouter.
```yaml
header: "OpenRouter Model"
question: "Default OpenRouter image generation model?"
options:
- label: "google/gemini-3.1-flash-image-preview (Recommended)"
description: "Best general-purpose OpenRouter image model with reference-image workflows"
- label: "google/gemini-2.5-flash-image-preview"
description: "Fast Gemini preview model on OpenRouter"
- label: "black-forest-labs/flux.2-pro"
description: "Strong text-to-image quality through OpenRouter"
```
### Question 3: Default Quality
```yaml
@ -112,6 +130,7 @@ default_image_size: null
default_model:
google: [selected google model or null]
openai: null
openrouter: [selected openrouter model or null]
dashscope: null
replicate: null
---
@ -147,6 +166,20 @@ options:
description: "Previous generation GPT Image model"
```
### OpenRouter Model Selection
```yaml
header: "OpenRouter Model"
question: "Choose a default OpenRouter image generation model?"
options:
- label: "google/gemini-3.1-flash-image-preview (Recommended)"
description: "Recommended for image output and reference-image edits"
- label: "google/gemini-2.5-flash-image-preview"
description: "Fast preview-oriented image generation"
- label: "black-forest-labs/flux.2-pro"
description: "High-quality text-to-image through OpenRouter"
```
### DashScope Model Selection
```yaml
@ -183,6 +216,7 @@ After user selects a model:
default_model:
google: [value or null]
openai: [value or null]
openrouter: [value or null]
dashscope: [value or null]
replicate: [value or null]
```

View File

@ -11,17 +11,18 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
---
version: 1
default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect)
default_provider: null # google|openai|openrouter|dashscope|replicate|null (null = auto-detect)
default_quality: null # normal|2k|null (null = use default: 2k)
default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
default_image_size: null # 1K|2K|4K|null (Google only, overrides quality)
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
default_model:
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
dashscope: null # e.g., "z-image-turbo"
replicate: null # e.g., "google/nano-banana-pro"
@ -37,6 +38,9 @@ batch:
openai:
concurrency: 3
start_interval_ms: 1100
openrouter:
concurrency: 3
start_interval_ms: 1100
dashscope:
concurrency: 3
start_interval_ms: 1100
@ -51,9 +55,10 @@ batch:
| `default_provider` | string\|null | null | Default provider (null = auto-detect) |
| `default_quality` | string\|null | null | Default quality (null = 2k) |
| `default_aspect_ratio` | string\|null | null | Default aspect ratio |
| `default_image_size` | string\|null | null | Google image size (overrides quality) |
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
| `default_model.google` | string\|null | null | Google default model |
| `default_model.openai` | string\|null | null | OpenAI default model |
| `default_model.openrouter` | string\|null | null | OpenRouter default model |
| `default_model.dashscope` | string\|null | null | DashScope default model |
| `default_model.replicate` | string\|null | null | Replicate default model |
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
@ -82,6 +87,7 @@ default_image_size: 2K
default_model:
google: "gemini-3-pro-image-preview"
openai: "gpt-image-1.5"
openrouter: "google/gemini-3.1-flash-image-preview"
dashscope: "z-image-turbo"
replicate: "google/nano-banana-pro"
batch:
@ -90,5 +96,8 @@ batch:
replicate:
concurrency: 5
start_interval_ms: 700
openrouter:
concurrency: 3
start_interval_ms: 1100
---
```

View File

@ -53,6 +53,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
replicate: { concurrency: 5, startIntervalMs: 700 },
google: { concurrency: 3, startIntervalMs: 1100 },
openai: { concurrency: 3, startIntervalMs: 1100 },
openrouter: { concurrency: 3, startIntervalMs: 1100 },
dashscope: { concurrency: 3, startIntervalMs: 1100 },
};
@ -68,13 +69,13 @@ Options:
--image <path> Output image path (required in single-image mode)
--batchfile <path> JSON batch file for multi-image generation
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
--provider google|openai|openrouter|dashscope|replicate Force provider (auto-detect by default)
-m, --model <id> Model ID
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
--size <WxH> Size (e.g., 1024x1024)
--quality normal|2k Quality preset (default: 2k)
--imageSize 1K|2K|4K Image size for Google (default: from quality)
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate)
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal, or Replicate)
--n <count> Number of images for the current task (default: 1)
--json JSON output
-h, --help Show help
@ -101,16 +102,21 @@ Behavior:
Environment variables:
OPENAI_API_KEY OpenAI API key
OPENROUTER_API_KEY OpenRouter API key
GOOGLE_API_KEY Google API key
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
DASHSCOPE_API_KEY DashScope API key
REPLICATE_API_TOKEN Replicate API token
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
OPENAI_BASE_URL Custom OpenAI endpoint
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
OPENROUTER_BASE_URL Custom OpenRouter endpoint
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
OPENROUTER_TITLE Optional app name for OpenRouter attribution
GOOGLE_BASE_URL Custom Google endpoint
DASHSCOPE_BASE_URL Custom DashScope endpoint
REPLICATE_BASE_URL Custom Replicate endpoint
@ -206,7 +212,13 @@ function parseArgs(argv: string[]): CliArgs {
if (a === "--provider") {
const v = argv[++i];
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") {
if (
v !== "google" &&
v !== "openai" &&
v !== "openrouter" &&
v !== "dashscope" &&
v !== "replicate"
) {
throw new Error(`Invalid provider: ${v}`);
}
out.provider = v;
@ -352,7 +364,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
} else if (key === "default_image_size") {
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
} else if (key === "default_model") {
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
config.default_model = {
google: null,
openai: null,
openrouter: null,
dashscope: null,
replicate: null,
};
currentKey = "default_model";
currentProvider = null;
} else if (key === "batch") {
@ -370,7 +388,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
} else if (
currentKey === "provider_limits" &&
indent >= 4 &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
(
key === "google" ||
key === "openai" ||
key === "openrouter" ||
key === "dashscope" ||
key === "replicate"
)
) {
config.batch ??= {};
config.batch.provider_limits ??= {};
@ -378,7 +402,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
currentProvider = key;
} else if (
currentKey === "default_model" &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
(
key === "google" ||
key === "openai" ||
key === "openrouter" ||
key === "dashscope" ||
key === "replicate"
)
) {
const cleaned = value.replace(/['"]/g, "");
config.default_model![key] = cleaned === "null" ? null : cleaned;
@ -466,10 +496,11 @@ function getConfiguredProviderRateLimits(
replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate },
google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google },
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter },
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
};
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope"] as Provider[]) {
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
configured[provider] = {
@ -522,10 +553,11 @@ function detectProvider(args: CliArgs): Provider {
args.provider &&
args.provider !== "google" &&
args.provider !== "openai" &&
args.provider !== "openrouter" &&
args.provider !== "replicate"
) {
throw new Error(
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), or --provider replicate."
);
}
@ -533,21 +565,24 @@ function detectProvider(args: CliArgs): Provider {
const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
const hasOpenai = !!process.env.OPENAI_API_KEY;
const hasOpenrouter = !!process.env.OPENROUTER_API_KEY;
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
if (args.referenceImages.length > 0) {
if (hasGoogle) return "google";
if (hasOpenai) return "openai";
if (hasOpenrouter) return "openrouter";
if (hasReplicate) return "replicate";
throw new Error(
"Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
"Reference images require Google, OpenAI, OpenRouter or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
);
}
const available = [
hasGoogle && "google",
hasOpenai && "openai",
hasOpenrouter && "openrouter",
hasDashscope && "dashscope",
hasReplicate && "replicate",
].filter(Boolean) as Provider[];
@ -556,7 +591,7 @@ function detectProvider(args: CliArgs): Provider {
if (available.length > 1) return available[0]!;
throw new Error(
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
);
}
@ -596,6 +631,7 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule;
return (await import("./providers/openai")) as ProviderModule;
}
@ -617,6 +653,9 @@ function getModelForProvider(
if (extendConfig.default_model) {
if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google;
if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai;
if (provider === "openrouter" && extendConfig.default_model.openrouter) {
return extendConfig.default_model.openrouter;
}
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
}

View File

@ -0,0 +1,255 @@
import path from "node:path";
import { readFile } from "node:fs/promises";
import type { CliArgs } from "../types";
const DEFAULT_MODEL = "google/gemini-3.1-flash-image-preview";
type OpenRouterImageEntry = {
image_url?: string | { url?: string | null } | null;
imageUrl?: string | { url?: string | null } | null;
};
type OpenRouterMessagePart = {
type?: string;
text?: string;
image_url?: string | { url?: string | null } | null;
imageUrl?: string | { url?: string | null } | null;
};
type OpenRouterResponse = {
choices?: Array<{
message?: {
images?: OpenRouterImageEntry[];
content?: string | OpenRouterMessagePart[];
};
}>;
};
export function getDefaultModel(): string {
return process.env.OPENROUTER_IMAGE_MODEL || DEFAULT_MODEL;
}
function getApiKey(): string | null {
return process.env.OPENROUTER_API_KEY || null;
}
function getBaseUrl(): string {
const base = process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
return base.replace(/\/+$/g, "");
}
function getHeaders(apiKey: string): Record<string, string> {
const headers: Record<string, string> = {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
};
const referer = process.env.OPENROUTER_HTTP_REFERER?.trim();
if (referer) {
headers["HTTP-Referer"] = referer;
}
const title = process.env.OPENROUTER_TITLE?.trim();
if (title) {
headers["X-OpenRouter-Title"] = title;
headers["X-Title"] = title;
}
return headers;
}
function parsePixelSize(value: string): { width: number; height: number } | null {
const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
if (!match) return null;
const width = parseInt(match[1]!, 10);
const height = parseInt(match[2]!, 10);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return { width, height };
}
function gcd(a: number, b: number): number {
let x = Math.abs(a);
let y = Math.abs(b);
while (y !== 0) {
const next = x % y;
x = y;
y = next;
}
return x || 1;
}
function inferAspectRatio(size: string | null): string | null {
if (!size) return null;
const parsed = parsePixelSize(size);
if (!parsed) return null;
const divisor = gcd(parsed.width, parsed.height);
return `${parsed.width / divisor}:${parsed.height / divisor}`;
}
function inferImageSize(size: string | null): "1K" | "2K" | "4K" | null {
if (!size) return null;
const parsed = parsePixelSize(size);
if (!parsed) return null;
const longestEdge = Math.max(parsed.width, parsed.height);
if (longestEdge <= 1024) return "1K";
if (longestEdge <= 2048) return "2K";
return "4K";
}
function getImageSize(args: CliArgs): "1K" | "2K" | "4K" {
if (args.imageSize) return args.imageSize as "1K" | "2K" | "4K";
const inferredFromSize = inferImageSize(args.size);
if (inferredFromSize) return inferredFromSize;
return args.quality === "normal" ? "1K" : "2K";
}
function getAspectRatio(args: CliArgs): string | null {
return args.aspectRatio || inferAspectRatio(args.size);
}
function getMimeType(filename: string): string {
const ext = path.extname(filename).toLowerCase();
if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg";
if (ext === ".webp") return "image/webp";
if (ext === ".gif") return "image/gif";
return "image/png";
}
async function readImageAsDataUrl(filePath: string): Promise<string> {
const bytes = await readFile(filePath);
return `data:${getMimeType(filePath)};base64,${bytes.toString("base64")}`;
}
function buildContent(prompt: string, referenceImages: string[]): Array<Record<string, unknown>> {
const content: Array<Record<string, unknown>> = [{ type: "text", text: prompt }];
for (const imageUrl of referenceImages) {
content.push({
type: "image_url",
image_url: { url: imageUrl },
});
}
return content;
}
function extractImageUrl(entry: OpenRouterImageEntry | OpenRouterMessagePart): string | null {
const value = entry.image_url ?? entry.imageUrl;
if (!value) return null;
if (typeof value === "string") return value;
return value.url ?? null;
}
function decodeDataUrl(value: string): Uint8Array | null {
const match = value.match(/^data:image\/[^;]+;base64,([A-Za-z0-9+/=]+)$/);
if (!match) return null;
return Uint8Array.from(Buffer.from(match[1]!, "base64"));
}
async function downloadImage(value: string): Promise<Uint8Array> {
const inline = decodeDataUrl(value);
if (inline) return inline;
if (value.startsWith("http://") || value.startsWith("https://")) {
const response = await fetch(value);
if (!response.ok) {
throw new Error(`Failed to download OpenRouter image: ${response.status}`);
}
const buffer = await response.arrayBuffer();
return new Uint8Array(buffer);
}
return Uint8Array.from(Buffer.from(value, "base64"));
}
async function extractImageFromResponse(result: OpenRouterResponse): Promise<Uint8Array> {
const message = result.choices?.[0]?.message;
for (const image of message?.images ?? []) {
const imageUrl = extractImageUrl(image);
if (imageUrl) return downloadImage(imageUrl);
}
if (Array.isArray(message?.content)) {
for (const item of message.content) {
const imageUrl = extractImageUrl(item);
if (imageUrl) return downloadImage(imageUrl);
if (item.type === "text" && item.text) {
const inline = decodeDataUrl(item.text);
if (inline) return inline;
}
}
} else if (typeof message?.content === "string") {
const inline = decodeDataUrl(message.content);
if (inline) return inline;
}
throw new Error("No image in OpenRouter response");
}
export async function generateImage(
prompt: string,
model: string,
args: CliArgs
): Promise<Uint8Array> {
const apiKey = getApiKey();
if (!apiKey) {
throw new Error("OPENROUTER_API_KEY is required. Get one at https://openrouter.ai/settings/keys");
}
const referenceImages: string[] = [];
for (const refPath of args.referenceImages) {
referenceImages.push(await readImageAsDataUrl(refPath));
}
const imageGenerationOptions: Record<string, string> = {
size: getImageSize(args),
};
const aspectRatio = getAspectRatio(args);
if (aspectRatio) {
imageGenerationOptions.aspect_ratio = aspectRatio;
}
const body = {
model,
messages: [
{
role: "user",
content: buildContent(prompt, referenceImages),
},
],
modalities: ["image", "text"],
max_tokens: 256,
imageGenerationOptions,
providerPreferences: {
require_parameters: true,
},
};
console.log(`Generating image with OpenRouter (${model})...`, imageGenerationOptions);
const response = await fetch(`${getBaseUrl()}/chat/completions`, {
method: "POST",
headers: getHeaders(apiKey),
body: JSON.stringify(body),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`OpenRouter API error (${response.status}): ${errorText}`);
}
const result = (await response.json()) as OpenRouterResponse;
return extractImageFromResponse(result);
}

View File

@ -1,4 +1,4 @@
export type Provider = "google" | "openai" | "dashscope" | "replicate";
export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate";
export type Quality = "normal" | "2k";
export type CliArgs = {
@ -50,6 +50,7 @@ export type ExtendConfig = {
default_model: {
google: string | null;
openai: string | null;
openrouter: string | null;
dashscope: string | null;
replicate: string | null;
};