feat(baoyu-image-gen): add OpenRouter provider support
This commit is contained in:
parent
c1e1526c84
commit
12b43e166d
|
|
@ -31,7 +31,7 @@ Execute: `${BUN_X} skills/<skill>/scripts/main.ts [options]`
|
|||
|
||||
- **Bun**: TypeScript runtime (`bun` preferred, fallback `npx -y bun`)
|
||||
- **Chrome**: Required for CDP-based skills (gemini-web, post-to-x/wechat/weibo, url-to-markdown). All CDP skills share a single profile, override via `BAOYU_CHROME_PROFILE_DIR` env var. Platform paths: [docs/chrome-profile.md](docs/chrome-profile.md)
|
||||
- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, DashScope, or Replicate) configured in EXTEND.md
|
||||
- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, OpenRouter, DashScope, or Replicate) configured in EXTEND.md
|
||||
- **Gemini Web auth**: Browser cookies (first run opens Chrome for login, `--login` to refresh)
|
||||
|
||||
## Security
|
||||
|
|
|
|||
32
README.md
32
README.md
|
|
@ -665,7 +665,7 @@ AI-powered generation backends.
|
|||
|
||||
#### baoyu-image-gen
|
||||
|
||||
AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyun Tongyi Wanxiang) APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
|
||||
AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
|
||||
|
||||
```bash
|
||||
# Basic generation (auto-detect provider)
|
||||
|
|
@ -680,10 +680,16 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu
|
|||
# Specific provider
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openai
|
||||
|
||||
# OpenRouter
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter
|
||||
|
||||
# DashScope (Aliyun Tongyi Wanxiang)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
|
||||
# With reference images (Google multimodal only)
|
||||
# Replicate
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate
|
||||
|
||||
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
|
||||
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
|
||||
```
|
||||
|
||||
|
|
@ -693,25 +699,31 @@ AI SDK-based image generation using official OpenAI, Google and DashScope (Aliyu
|
|||
| `--prompt`, `-p` | Prompt text |
|
||||
| `--promptfiles` | Read prompt from files (concatenated) |
|
||||
| `--image` | Output image path (required) |
|
||||
| `--provider` | `google`, `openai` or `dashscope` (default: google) |
|
||||
| `--provider` | `google`, `openai`, `openrouter`, `dashscope` or `replicate` (default: auto-detect; prefers google) |
|
||||
| `--model`, `-m` | Model ID |
|
||||
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size` | Size (e.g., `1024x1024`) |
|
||||
| `--quality` | `normal` or `2k` (default: normal) |
|
||||
| `--ref` | Reference images (Google multimodal only) |
|
||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||
| `--ref` | Reference images (Google, OpenAI, OpenRouter or Replicate) |
|
||||
|
||||
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `OPENAI_API_KEY` | OpenAI API key | - |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key | - |
|
||||
| `GOOGLE_API_KEY` | Google API key | - |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API token | - |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` |
|
||||
| `GOOGLE_BASE_URL` | Custom Google endpoint | - |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - |
|
||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - |
|
||||
|
||||
**Provider Auto-Selection**:
|
||||
1. If `--provider` specified → use it
|
||||
|
|
@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx
|
|||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=sk-or-xxx
|
||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||
|
||||
# Google
|
||||
GOOGLE_API_KEY=xxx
|
||||
GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
||||
|
|
@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
|||
DASHSCOPE_API_KEY=sk-xxx
|
||||
DASHSCOPE_IMAGE_MODEL=z-image-turbo
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Replicate
|
||||
REPLICATE_API_TOKEN=r8_xxx
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||
EOF
|
||||
```
|
||||
|
||||
|
|
|
|||
32
README.zh.md
32
README.zh.md
|
|
@ -665,7 +665,7 @@ AI 驱动的生成后端。
|
|||
|
||||
#### baoyu-image-gen
|
||||
|
||||
基于 AI SDK 的图像生成,使用官方 OpenAI、Google 和 DashScope(阿里通义万相)API。支持文生图、参考图、宽高比和质量预设。
|
||||
基于 AI SDK 的图像生成,支持 OpenAI、Google、OpenRouter、DashScope(阿里通义万相)和 Replicate API。支持文生图、参考图、宽高比和质量预设。
|
||||
|
||||
```bash
|
||||
# 基础生成(自动检测服务商)
|
||||
|
|
@ -680,10 +680,16 @@ AI 驱动的生成后端。
|
|||
# 指定服务商
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openai
|
||||
|
||||
# OpenRouter
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter
|
||||
|
||||
# DashScope(阿里通义万相)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
|
||||
# 带参考图(仅 Google 多模态支持)
|
||||
# Replicate
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate
|
||||
|
||||
# 带参考图(Google、OpenAI、OpenRouter 或 Replicate)
|
||||
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
|
||||
```
|
||||
|
||||
|
|
@ -693,25 +699,31 @@ AI 驱动的生成后端。
|
|||
| `--prompt`, `-p` | 提示词文本 |
|
||||
| `--promptfiles` | 从文件读取提示词(多文件拼接) |
|
||||
| `--image` | 输出图片路径(必需) |
|
||||
| `--provider` | `google`、`openai` 或 `dashscope`(默认:google) |
|
||||
| `--provider` | `google`、`openai`、`openrouter`、`dashscope` 或 `replicate`(默认:自动检测,优先 google) |
|
||||
| `--model`, `-m` | 模型 ID |
|
||||
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) |
|
||||
| `--size` | 尺寸(如 `1024x1024`) |
|
||||
| `--quality` | `normal` 或 `2k`(默认:normal) |
|
||||
| `--ref` | 参考图片(仅 Google 多模态支持) |
|
||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、OpenRouter 或 Replicate) |
|
||||
|
||||
**环境变量**(配置方法见[环境配置](#环境配置)):
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `OPENAI_API_KEY` | OpenAI API 密钥 | - |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - |
|
||||
| `GOOGLE_API_KEY` | Google API 密钥 | - |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API Token | - |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||
| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` |
|
||||
| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - |
|
||||
| `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - |
|
||||
| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - |
|
||||
|
||||
**服务商自动选择**:
|
||||
1. 如果指定了 `--provider` → 使用指定的
|
||||
|
|
@ -958,6 +970,11 @@ OPENAI_API_KEY=sk-xxx
|
|||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=sk-or-xxx
|
||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||
|
||||
# Google
|
||||
GOOGLE_API_KEY=xxx
|
||||
GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
||||
|
|
@ -967,6 +984,11 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
|||
DASHSCOPE_API_KEY=sk-xxx
|
||||
DASHSCOPE_IMAGE_MODEL=z-image-turbo
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Replicate
|
||||
REPLICATE_API_TOKEN=r8_xxx
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||
EOF
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: baoyu-image-gen
|
||||
description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
|
||||
version: 1.56.1
|
||||
description: AI image generation with OpenAI, Google, OpenRouter, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
|
||||
version: 1.56.2
|
||||
metadata:
|
||||
openclaw:
|
||||
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen
|
||||
|
|
@ -13,7 +13,7 @@ metadata:
|
|||
|
||||
# Image Generation (AI SDK)
|
||||
|
||||
Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers.
|
||||
Official API-based image generation. Supports OpenAI, Google, OpenRouter, DashScope (阿里通义万象) and Replicate providers.
|
||||
|
||||
## Script Directory
|
||||
|
||||
|
|
@ -74,12 +74,18 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k
|
|||
# From prompt files
|
||||
${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png
|
||||
|
||||
# With reference images (Google multimodal or OpenAI edits)
|
||||
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png
|
||||
|
||||
# With reference images (explicit provider/model)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider google --model gemini-3-pro-image-preview --ref source.png
|
||||
|
||||
# OpenRouter (recommended default model)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openrouter
|
||||
|
||||
# OpenRouter with reference images
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
|
||||
|
||||
# Specific provider
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider openai
|
||||
|
||||
|
|
@ -135,13 +141,13 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `--image <path>` | Output image path (required in single-image mode) |
|
||||
| `--batchfile <path>` | JSON batch file for multi-image generation |
|
||||
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
|
||||
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) |
|
||||
| `--provider google\|openai\|openrouter\|dashscope\|replicate` | Force provider (default: auto-detect) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) |
|
||||
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||
| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate |
|
||||
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal models, and Replicate |
|
||||
| `--n <count>` | Number of images |
|
||||
| `--json` | JSON output |
|
||||
|
||||
|
|
@ -150,14 +156,19 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `OPENAI_API_KEY` | OpenAI API key |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key |
|
||||
| `GOOGLE_API_KEY` | Google API key |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API token |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
|
||||
| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution |
|
||||
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution |
|
||||
| `GOOGLE_BASE_URL` | Custom Google endpoint |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
|
||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
|
||||
|
|
@ -182,6 +193,21 @@ Model priority (highest → lowest), applies to all providers:
|
|||
- Show: `Using [provider] / [model]`
|
||||
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
|
||||
|
||||
### OpenRouter Models
|
||||
|
||||
Use full OpenRouter model IDs, e.g.:
|
||||
|
||||
- `google/gemini-3.1-flash-image-preview` (recommended, supports image output and reference-image workflows)
|
||||
- `google/gemini-2.5-flash-image-preview`
|
||||
- `black-forest-labs/flux.2-pro`
|
||||
- Other OpenRouter image-capable model IDs
|
||||
|
||||
Notes:
|
||||
|
||||
- OpenRouter image generation uses `/chat/completions`, not the OpenAI `/images` endpoints
|
||||
- If `--ref` is used, choose a multimodal model that supports image input and image output
|
||||
- `--imageSize` maps to OpenRouter `imageGenerationOptions.size`; `--size <WxH>` is converted to the nearest OpenRouter size and inferred aspect ratio when possible
|
||||
|
||||
### Replicate Models
|
||||
|
||||
Supported model formats:
|
||||
|
|
@ -201,19 +227,19 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|
|||
|
||||
## Provider Selection
|
||||
|
||||
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate
|
||||
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`)
|
||||
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then OpenRouter, then Replicate
|
||||
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `openrouter`, or `replicate`)
|
||||
3. Only one API key available → use that provider
|
||||
4. Multiple available → default to Google
|
||||
|
||||
## Quality Presets
|
||||
|
||||
| Preset | Google imageSize | OpenAI Size | Replicate resolution | Use Case |
|
||||
|--------|------------------|-------------|----------------------|----------|
|
||||
| `normal` | 1K | 1024px | 1K | Quick previews |
|
||||
| `2k` (default) | 2K | 2048px | 2K | Covers, illustrations, infographics |
|
||||
| Preset | Google imageSize | OpenAI Size | OpenRouter size | Replicate resolution | Use Case |
|
||||
|--------|------------------|-------------|-----------------|----------------------|----------|
|
||||
| `normal` | 1K | 1024px | 1K | 1K | Quick previews |
|
||||
| `2k` (default) | 2K | 2048px | 2K | 2K | Covers, illustrations, infographics |
|
||||
|
||||
**Google imageSize**: Can be overridden with `--imageSize 1K|2K|4K`
|
||||
**Google/OpenRouter imageSize**: Can be overridden with `--imageSize 1K|2K|4K`
|
||||
|
||||
## Aspect Ratios
|
||||
|
||||
|
|
@ -221,6 +247,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
|
|||
|
||||
- Google multimodal: uses `imageConfig.aspectRatio`
|
||||
- OpenAI: maps to closest supported size
|
||||
- OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size <WxH>` is given, aspect ratio is inferred automatically
|
||||
- Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image`
|
||||
|
||||
## Generation Mode
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@ options:
|
|||
description: "Gemini multimodal - high quality, reference images, flexible sizes"
|
||||
- label: "OpenAI"
|
||||
description: "GPT Image - consistent quality, reliable output"
|
||||
- label: "OpenRouter"
|
||||
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
|
||||
- label: "DashScope"
|
||||
description: "Alibaba Cloud - z-image-turbo, good for Chinese content"
|
||||
- label: "Replicate"
|
||||
|
|
@ -69,6 +71,22 @@ options:
|
|||
description: "Fast generation, balanced quality and speed"
|
||||
```
|
||||
|
||||
### Question 2b: Default OpenRouter Model
|
||||
|
||||
Only show if user selected OpenRouter.
|
||||
|
||||
```yaml
|
||||
header: "OpenRouter Model"
|
||||
question: "Default OpenRouter image generation model?"
|
||||
options:
|
||||
- label: "google/gemini-3.1-flash-image-preview (Recommended)"
|
||||
description: "Best general-purpose OpenRouter image model with reference-image workflows"
|
||||
- label: "google/gemini-2.5-flash-image-preview"
|
||||
description: "Fast Gemini preview model on OpenRouter"
|
||||
- label: "black-forest-labs/flux.2-pro"
|
||||
description: "Strong text-to-image quality through OpenRouter"
|
||||
```
|
||||
|
||||
### Question 3: Default Quality
|
||||
|
||||
```yaml
|
||||
|
|
@ -112,6 +130,7 @@ default_image_size: null
|
|||
default_model:
|
||||
google: [selected google model or null]
|
||||
openai: null
|
||||
openrouter: [selected openrouter model or null]
|
||||
dashscope: null
|
||||
replicate: null
|
||||
---
|
||||
|
|
@ -147,6 +166,20 @@ options:
|
|||
description: "Previous generation GPT Image model"
|
||||
```
|
||||
|
||||
### OpenRouter Model Selection
|
||||
|
||||
```yaml
|
||||
header: "OpenRouter Model"
|
||||
question: "Choose a default OpenRouter image generation model?"
|
||||
options:
|
||||
- label: "google/gemini-3.1-flash-image-preview (Recommended)"
|
||||
description: "Recommended for image output and reference-image edits"
|
||||
- label: "google/gemini-2.5-flash-image-preview"
|
||||
description: "Fast preview-oriented image generation"
|
||||
- label: "black-forest-labs/flux.2-pro"
|
||||
description: "High-quality text-to-image through OpenRouter"
|
||||
```
|
||||
|
||||
### DashScope Model Selection
|
||||
|
||||
```yaml
|
||||
|
|
@ -183,6 +216,7 @@ After user selects a model:
|
|||
default_model:
|
||||
google: [value or null]
|
||||
openai: [value or null]
|
||||
openrouter: [value or null]
|
||||
dashscope: [value or null]
|
||||
replicate: [value or null]
|
||||
```
|
||||
|
|
|
|||
|
|
@ -11,17 +11,18 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
|
|||
---
|
||||
version: 1
|
||||
|
||||
default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect)
|
||||
default_provider: null # google|openai|openrouter|dashscope|replicate|null (null = auto-detect)
|
||||
|
||||
default_quality: null # normal|2k|null (null = use default: 2k)
|
||||
|
||||
default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
|
||||
|
||||
default_image_size: null # 1K|2K|4K|null (Google only, overrides quality)
|
||||
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
|
||||
|
||||
default_model:
|
||||
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
||||
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
||||
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
||||
dashscope: null # e.g., "z-image-turbo"
|
||||
replicate: null # e.g., "google/nano-banana-pro"
|
||||
|
||||
|
|
@ -37,6 +38,9 @@ batch:
|
|||
openai:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
openrouter:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
dashscope:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
|
|
@ -51,9 +55,10 @@ batch:
|
|||
| `default_provider` | string\|null | null | Default provider (null = auto-detect) |
|
||||
| `default_quality` | string\|null | null | Default quality (null = 2k) |
|
||||
| `default_aspect_ratio` | string\|null | null | Default aspect ratio |
|
||||
| `default_image_size` | string\|null | null | Google image size (overrides quality) |
|
||||
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
|
||||
| `default_model.google` | string\|null | null | Google default model |
|
||||
| `default_model.openai` | string\|null | null | OpenAI default model |
|
||||
| `default_model.openrouter` | string\|null | null | OpenRouter default model |
|
||||
| `default_model.dashscope` | string\|null | null | DashScope default model |
|
||||
| `default_model.replicate` | string\|null | null | Replicate default model |
|
||||
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
|
||||
|
|
@ -82,6 +87,7 @@ default_image_size: 2K
|
|||
default_model:
|
||||
google: "gemini-3-pro-image-preview"
|
||||
openai: "gpt-image-1.5"
|
||||
openrouter: "google/gemini-3.1-flash-image-preview"
|
||||
dashscope: "z-image-turbo"
|
||||
replicate: "google/nano-banana-pro"
|
||||
batch:
|
||||
|
|
@ -90,5 +96,8 @@ batch:
|
|||
replicate:
|
||||
concurrency: 5
|
||||
start_interval_ms: 700
|
||||
openrouter:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
---
|
||||
```
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
|
|||
replicate: { concurrency: 5, startIntervalMs: 700 },
|
||||
google: { concurrency: 3, startIntervalMs: 1100 },
|
||||
openai: { concurrency: 3, startIntervalMs: 1100 },
|
||||
openrouter: { concurrency: 3, startIntervalMs: 1100 },
|
||||
dashscope: { concurrency: 3, startIntervalMs: 1100 },
|
||||
};
|
||||
|
||||
|
|
@ -68,13 +69,13 @@ Options:
|
|||
--image <path> Output image path (required in single-image mode)
|
||||
--batchfile <path> JSON batch file for multi-image generation
|
||||
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
|
||||
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
|
||||
--provider google|openai|openrouter|dashscope|replicate Force provider (auto-detect by default)
|
||||
-m, --model <id> Model ID
|
||||
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
|
||||
--size <WxH> Size (e.g., 1024x1024)
|
||||
--quality normal|2k Quality preset (default: 2k)
|
||||
--imageSize 1K|2K|4K Image size for Google (default: from quality)
|
||||
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate)
|
||||
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
||||
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal, or Replicate)
|
||||
--n <count> Number of images for the current task (default: 1)
|
||||
--json JSON output
|
||||
-h, --help Show help
|
||||
|
|
@ -101,16 +102,21 @@ Behavior:
|
|||
|
||||
Environment variables:
|
||||
OPENAI_API_KEY OpenAI API key
|
||||
OPENROUTER_API_KEY OpenRouter API key
|
||||
GOOGLE_API_KEY Google API key
|
||||
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
|
||||
DASHSCOPE_API_KEY DashScope API key
|
||||
REPLICATE_API_TOKEN Replicate API token
|
||||
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
|
||||
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
|
||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
|
||||
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
||||
OPENAI_BASE_URL Custom OpenAI endpoint
|
||||
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
|
||||
OPENROUTER_BASE_URL Custom OpenRouter endpoint
|
||||
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
|
||||
OPENROUTER_TITLE Optional app name for OpenRouter attribution
|
||||
GOOGLE_BASE_URL Custom Google endpoint
|
||||
DASHSCOPE_BASE_URL Custom DashScope endpoint
|
||||
REPLICATE_BASE_URL Custom Replicate endpoint
|
||||
|
|
@ -206,7 +212,13 @@ function parseArgs(argv: string[]): CliArgs {
|
|||
|
||||
if (a === "--provider") {
|
||||
const v = argv[++i];
|
||||
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") {
|
||||
if (
|
||||
v !== "google" &&
|
||||
v !== "openai" &&
|
||||
v !== "openrouter" &&
|
||||
v !== "dashscope" &&
|
||||
v !== "replicate"
|
||||
) {
|
||||
throw new Error(`Invalid provider: ${v}`);
|
||||
}
|
||||
out.provider = v;
|
||||
|
|
@ -352,7 +364,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
} else if (key === "default_image_size") {
|
||||
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
|
||||
} else if (key === "default_model") {
|
||||
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
|
||||
config.default_model = {
|
||||
google: null,
|
||||
openai: null,
|
||||
openrouter: null,
|
||||
dashscope: null,
|
||||
replicate: null,
|
||||
};
|
||||
currentKey = "default_model";
|
||||
currentProvider = null;
|
||||
} else if (key === "batch") {
|
||||
|
|
@ -370,7 +388,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
} else if (
|
||||
currentKey === "provider_limits" &&
|
||||
indent >= 4 &&
|
||||
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
|
||||
(
|
||||
key === "google" ||
|
||||
key === "openai" ||
|
||||
key === "openrouter" ||
|
||||
key === "dashscope" ||
|
||||
key === "replicate"
|
||||
)
|
||||
) {
|
||||
config.batch ??= {};
|
||||
config.batch.provider_limits ??= {};
|
||||
|
|
@ -378,7 +402,13 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
currentProvider = key;
|
||||
} else if (
|
||||
currentKey === "default_model" &&
|
||||
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
|
||||
(
|
||||
key === "google" ||
|
||||
key === "openai" ||
|
||||
key === "openrouter" ||
|
||||
key === "dashscope" ||
|
||||
key === "replicate"
|
||||
)
|
||||
) {
|
||||
const cleaned = value.replace(/['"]/g, "");
|
||||
config.default_model![key] = cleaned === "null" ? null : cleaned;
|
||||
|
|
@ -466,10 +496,11 @@ function getConfiguredProviderRateLimits(
|
|||
replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate },
|
||||
google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google },
|
||||
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
|
||||
openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter },
|
||||
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
|
||||
};
|
||||
|
||||
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
|
||||
for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope"] as Provider[]) {
|
||||
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
|
||||
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
|
||||
configured[provider] = {
|
||||
|
|
@ -522,10 +553,11 @@ function detectProvider(args: CliArgs): Provider {
|
|||
args.provider &&
|
||||
args.provider !== "google" &&
|
||||
args.provider !== "openai" &&
|
||||
args.provider !== "openrouter" &&
|
||||
args.provider !== "replicate"
|
||||
) {
|
||||
throw new Error(
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), or --provider replicate."
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -533,21 +565,24 @@ function detectProvider(args: CliArgs): Provider {
|
|||
|
||||
const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
|
||||
const hasOpenai = !!process.env.OPENAI_API_KEY;
|
||||
const hasOpenrouter = !!process.env.OPENROUTER_API_KEY;
|
||||
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
|
||||
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
if (hasGoogle) return "google";
|
||||
if (hasOpenai) return "openai";
|
||||
if (hasOpenrouter) return "openrouter";
|
||||
if (hasReplicate) return "replicate";
|
||||
throw new Error(
|
||||
"Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
|
||||
"Reference images require Google, OpenAI, OpenRouter or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
|
||||
);
|
||||
}
|
||||
|
||||
const available = [
|
||||
hasGoogle && "google",
|
||||
hasOpenai && "openai",
|
||||
hasOpenrouter && "openrouter",
|
||||
hasDashscope && "dashscope",
|
||||
hasReplicate && "replicate",
|
||||
].filter(Boolean) as Provider[];
|
||||
|
|
@ -556,7 +591,7 @@ function detectProvider(args: CliArgs): Provider {
|
|||
if (available.length > 1) return available[0]!;
|
||||
|
||||
throw new Error(
|
||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
|
||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
|
||||
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
|
||||
);
|
||||
}
|
||||
|
|
@ -596,6 +631,7 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
|
|||
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
|
||||
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
|
||||
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
|
||||
if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule;
|
||||
return (await import("./providers/openai")) as ProviderModule;
|
||||
}
|
||||
|
||||
|
|
@ -617,6 +653,9 @@ function getModelForProvider(
|
|||
if (extendConfig.default_model) {
|
||||
if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google;
|
||||
if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai;
|
||||
if (provider === "openrouter" && extendConfig.default_model.openrouter) {
|
||||
return extendConfig.default_model.openrouter;
|
||||
}
|
||||
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
|
||||
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,255 @@
|
|||
import path from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import type { CliArgs } from "../types";
|
||||
|
||||
const DEFAULT_MODEL = "google/gemini-3.1-flash-image-preview";
|
||||
|
||||
type OpenRouterImageEntry = {
|
||||
image_url?: string | { url?: string | null } | null;
|
||||
imageUrl?: string | { url?: string | null } | null;
|
||||
};
|
||||
|
||||
type OpenRouterMessagePart = {
|
||||
type?: string;
|
||||
text?: string;
|
||||
image_url?: string | { url?: string | null } | null;
|
||||
imageUrl?: string | { url?: string | null } | null;
|
||||
};
|
||||
|
||||
type OpenRouterResponse = {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
images?: OpenRouterImageEntry[];
|
||||
content?: string | OpenRouterMessagePart[];
|
||||
};
|
||||
}>;
|
||||
};
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.OPENROUTER_IMAGE_MODEL || DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
function getApiKey(): string | null {
|
||||
return process.env.OPENROUTER_API_KEY || null;
|
||||
}
|
||||
|
||||
function getBaseUrl(): string {
|
||||
const base = process.env.OPENROUTER_BASE_URL || "https://openrouter.ai/api/v1";
|
||||
return base.replace(/\/+$/g, "");
|
||||
}
|
||||
|
||||
function getHeaders(apiKey: string): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
};
|
||||
|
||||
const referer = process.env.OPENROUTER_HTTP_REFERER?.trim();
|
||||
if (referer) {
|
||||
headers["HTTP-Referer"] = referer;
|
||||
}
|
||||
|
||||
const title = process.env.OPENROUTER_TITLE?.trim();
|
||||
if (title) {
|
||||
headers["X-OpenRouter-Title"] = title;
|
||||
headers["X-Title"] = title;
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
function parsePixelSize(value: string): { width: number; height: number } | null {
|
||||
const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
|
||||
if (!match) return null;
|
||||
|
||||
const width = parseInt(match[1]!, 10);
|
||||
const height = parseInt(match[2]!, 10);
|
||||
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
function gcd(a: number, b: number): number {
|
||||
let x = Math.abs(a);
|
||||
let y = Math.abs(b);
|
||||
while (y !== 0) {
|
||||
const next = x % y;
|
||||
x = y;
|
||||
y = next;
|
||||
}
|
||||
return x || 1;
|
||||
}
|
||||
|
||||
function inferAspectRatio(size: string | null): string | null {
|
||||
if (!size) return null;
|
||||
const parsed = parsePixelSize(size);
|
||||
if (!parsed) return null;
|
||||
|
||||
const divisor = gcd(parsed.width, parsed.height);
|
||||
return `${parsed.width / divisor}:${parsed.height / divisor}`;
|
||||
}
|
||||
|
||||
function inferImageSize(size: string | null): "1K" | "2K" | "4K" | null {
|
||||
if (!size) return null;
|
||||
const parsed = parsePixelSize(size);
|
||||
if (!parsed) return null;
|
||||
|
||||
const longestEdge = Math.max(parsed.width, parsed.height);
|
||||
if (longestEdge <= 1024) return "1K";
|
||||
if (longestEdge <= 2048) return "2K";
|
||||
return "4K";
|
||||
}
|
||||
|
||||
function getImageSize(args: CliArgs): "1K" | "2K" | "4K" {
|
||||
if (args.imageSize) return args.imageSize as "1K" | "2K" | "4K";
|
||||
|
||||
const inferredFromSize = inferImageSize(args.size);
|
||||
if (inferredFromSize) return inferredFromSize;
|
||||
|
||||
return args.quality === "normal" ? "1K" : "2K";
|
||||
}
|
||||
|
||||
function getAspectRatio(args: CliArgs): string | null {
|
||||
return args.aspectRatio || inferAspectRatio(args.size);
|
||||
}
|
||||
|
||||
function getMimeType(filename: string): string {
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg";
|
||||
if (ext === ".webp") return "image/webp";
|
||||
if (ext === ".gif") return "image/gif";
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
async function readImageAsDataUrl(filePath: string): Promise<string> {
|
||||
const bytes = await readFile(filePath);
|
||||
return `data:${getMimeType(filePath)};base64,${bytes.toString("base64")}`;
|
||||
}
|
||||
|
||||
function buildContent(prompt: string, referenceImages: string[]): Array<Record<string, unknown>> {
|
||||
const content: Array<Record<string, unknown>> = [{ type: "text", text: prompt }];
|
||||
|
||||
for (const imageUrl of referenceImages) {
|
||||
content.push({
|
||||
type: "image_url",
|
||||
image_url: { url: imageUrl },
|
||||
});
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
function extractImageUrl(entry: OpenRouterImageEntry | OpenRouterMessagePart): string | null {
|
||||
const value = entry.image_url ?? entry.imageUrl;
|
||||
if (!value) return null;
|
||||
if (typeof value === "string") return value;
|
||||
return value.url ?? null;
|
||||
}
|
||||
|
||||
function decodeDataUrl(value: string): Uint8Array | null {
|
||||
const match = value.match(/^data:image\/[^;]+;base64,([A-Za-z0-9+/=]+)$/);
|
||||
if (!match) return null;
|
||||
return Uint8Array.from(Buffer.from(match[1]!, "base64"));
|
||||
}
|
||||
|
||||
async function downloadImage(value: string): Promise<Uint8Array> {
|
||||
const inline = decodeDataUrl(value);
|
||||
if (inline) return inline;
|
||||
|
||||
if (value.startsWith("http://") || value.startsWith("https://")) {
|
||||
const response = await fetch(value);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to download OpenRouter image: ${response.status}`);
|
||||
}
|
||||
const buffer = await response.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
return Uint8Array.from(Buffer.from(value, "base64"));
|
||||
}
|
||||
|
||||
async function extractImageFromResponse(result: OpenRouterResponse): Promise<Uint8Array> {
|
||||
const message = result.choices?.[0]?.message;
|
||||
|
||||
for (const image of message?.images ?? []) {
|
||||
const imageUrl = extractImageUrl(image);
|
||||
if (imageUrl) return downloadImage(imageUrl);
|
||||
}
|
||||
|
||||
if (Array.isArray(message?.content)) {
|
||||
for (const item of message.content) {
|
||||
const imageUrl = extractImageUrl(item);
|
||||
if (imageUrl) return downloadImage(imageUrl);
|
||||
|
||||
if (item.type === "text" && item.text) {
|
||||
const inline = decodeDataUrl(item.text);
|
||||
if (inline) return inline;
|
||||
}
|
||||
}
|
||||
} else if (typeof message?.content === "string") {
|
||||
const inline = decodeDataUrl(message.content);
|
||||
if (inline) return inline;
|
||||
}
|
||||
|
||||
throw new Error("No image in OpenRouter response");
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs
|
||||
): Promise<Uint8Array> {
|
||||
const apiKey = getApiKey();
|
||||
if (!apiKey) {
|
||||
throw new Error("OPENROUTER_API_KEY is required. Get one at https://openrouter.ai/settings/keys");
|
||||
}
|
||||
|
||||
const referenceImages: string[] = [];
|
||||
for (const refPath of args.referenceImages) {
|
||||
referenceImages.push(await readImageAsDataUrl(refPath));
|
||||
}
|
||||
|
||||
const imageGenerationOptions: Record<string, string> = {
|
||||
size: getImageSize(args),
|
||||
};
|
||||
|
||||
const aspectRatio = getAspectRatio(args);
|
||||
if (aspectRatio) {
|
||||
imageGenerationOptions.aspect_ratio = aspectRatio;
|
||||
}
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: buildContent(prompt, referenceImages),
|
||||
},
|
||||
],
|
||||
modalities: ["image", "text"],
|
||||
max_tokens: 256,
|
||||
imageGenerationOptions,
|
||||
providerPreferences: {
|
||||
require_parameters: true,
|
||||
},
|
||||
};
|
||||
|
||||
console.log(`Generating image with OpenRouter (${model})...`, imageGenerationOptions);
|
||||
|
||||
const response = await fetch(`${getBaseUrl()}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: getHeaders(apiKey),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`OpenRouter API error (${response.status}): ${errorText}`);
|
||||
}
|
||||
|
||||
const result = (await response.json()) as OpenRouterResponse;
|
||||
return extractImageFromResponse(result);
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
export type Provider = "google" | "openai" | "dashscope" | "replicate";
|
||||
export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate";
|
||||
export type Quality = "normal" | "2k";
|
||||
|
||||
export type CliArgs = {
|
||||
|
|
@ -50,6 +50,7 @@ export type ExtendConfig = {
|
|||
default_model: {
|
||||
google: string | null;
|
||||
openai: string | null;
|
||||
openrouter: string | null;
|
||||
dashscope: string | null;
|
||||
replicate: string | null;
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue