feat(baoyu-image-gen): add MiniMax provider with subject reference and custom sizes
This commit is contained in:
parent
86a3d6521b
commit
ad8781c1c5
74
README.md
74
README.md
|
|
@ -663,7 +663,7 @@ AI-powered generation backends.
|
||||||
|
|
||||||
#### baoyu-image-gen
|
#### baoyu-image-gen
|
||||||
|
|
||||||
AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
|
AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), MiniMax, Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, custom sizes, batch generation, and quality presets.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Basic generation (auto-detect provider)
|
# Basic generation (auto-detect provider)
|
||||||
|
|
@ -684,9 +684,21 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter
|
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter
|
||||||
|
|
||||||
|
# OpenRouter with reference images
|
||||||
|
/baoyu-image-gen --prompt "Make it blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
|
||||||
|
|
||||||
# DashScope (Aliyun Tongyi Wanxiang)
|
# DashScope (Aliyun Tongyi Wanxiang)
|
||||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||||
|
|
||||||
|
# DashScope with custom size
|
||||||
|
/baoyu-image-gen --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
|
||||||
|
|
||||||
|
# MiniMax
|
||||||
|
/baoyu-image-gen --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax
|
||||||
|
|
||||||
|
# MiniMax with subject reference
|
||||||
|
/baoyu-image-gen --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate
|
/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate
|
||||||
|
|
||||||
|
|
@ -696,8 +708,11 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
# Seedream (豆包)
|
# Seedream (豆包)
|
||||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||||
|
|
||||||
# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0)
|
# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0)
|
||||||
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
|
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
|
||||||
|
|
||||||
|
# Batch mode
|
||||||
|
/baoyu-image-gen --batchfile batch.json --jobs 4 --json
|
||||||
```
|
```
|
||||||
|
|
||||||
**Options**:
|
**Options**:
|
||||||
|
|
@ -706,44 +721,73 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
| `--prompt`, `-p` | Prompt text |
|
| `--prompt`, `-p` | Prompt text |
|
||||||
| `--promptfiles` | Read prompt from files (concatenated) |
|
| `--promptfiles` | Read prompt from files (concatenated) |
|
||||||
| `--image` | Output image path (required) |
|
| `--image` | Output image path (required) |
|
||||||
| `--provider` | `google`, `openai`, `openrouter`, `dashscope`, `jimeng`, `seedream` or `replicate` (default: auto-detect; prefers google) |
|
| `--batchfile` | JSON batch file for multi-image generation |
|
||||||
| `--model`, `-m` | Model ID |
|
| `--jobs` | Worker count for batch mode |
|
||||||
|
| `--provider` | `google`, `openai`, `azure`, `openrouter`, `dashscope`, `minimax`, `jimeng`, `seedream`, or `replicate` |
|
||||||
|
| `--model`, `-m` | Model ID or deployment name. Azure uses deployment name; OpenRouter uses full model IDs; MiniMax uses `image-01` / `image-01-live` |
|
||||||
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||||
| `--size` | Size (e.g., `1024x1024`) |
|
| `--size` | Size (e.g., `1024x1024`) |
|
||||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||||
| `--ref` | Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0) |
|
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
|
||||||
|
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0) |
|
||||||
|
| `--n` | Number of images per request |
|
||||||
|
| `--json` | JSON output |
|
||||||
|
|
||||||
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
|
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
|
||||||
| Variable | Description | Default |
|
| Variable | Description | Default |
|
||||||
|----------|-------------|---------|
|
|----------|-------------|---------|
|
||||||
| `OPENAI_API_KEY` | OpenAI API key | - |
|
| `OPENAI_API_KEY` | OpenAI API key | - |
|
||||||
|
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key | - |
|
||||||
| `OPENROUTER_API_KEY` | OpenRouter API key | - |
|
| `OPENROUTER_API_KEY` | OpenRouter API key | - |
|
||||||
| `GOOGLE_API_KEY` | Google API key | - |
|
| `GOOGLE_API_KEY` | Google API key | - |
|
||||||
|
| `GEMINI_API_KEY` | Alias for `GOOGLE_API_KEY` | - |
|
||||||
| `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - |
|
| `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - |
|
||||||
|
| `MINIMAX_API_KEY` | MiniMax API key | - |
|
||||||
| `REPLICATE_API_TOKEN` | Replicate API token | - |
|
| `REPLICATE_API_TOKEN` | Replicate API token | - |
|
||||||
| `JIMENG_ACCESS_KEY_ID` | Jimeng Volcengine access key | - |
|
| `JIMENG_ACCESS_KEY_ID` | Jimeng Volcengine access key | - |
|
||||||
| `JIMENG_SECRET_ACCESS_KEY` | Jimeng Volcengine secret key | - |
|
| `JIMENG_SECRET_ACCESS_KEY` | Jimeng Volcengine secret key | - |
|
||||||
| `ARK_API_KEY` | Seedream Volcengine ARK API key | - |
|
| `ARK_API_KEY` | Seedream Volcengine ARK API key | - |
|
||||||
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
|
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
|
||||||
|
| `AZURE_OPENAI_DEPLOYMENT` | Azure default deployment name | - |
|
||||||
|
| `AZURE_OPENAI_IMAGE_MODEL` | Backward-compatible Azure deployment/model alias | `gpt-image-1.5` |
|
||||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
|
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
||||||
|
| `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||||
|
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
|
||||||
|
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
|
||||||
|
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
|
||||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` |
|
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` |
|
||||||
|
| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution | - |
|
||||||
|
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | - |
|
||||||
| `GOOGLE_BASE_URL` | Custom Google endpoint | - |
|
| `GOOGLE_BASE_URL` | Custom Google endpoint | - |
|
||||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - |
|
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - |
|
||||||
|
| `MINIMAX_BASE_URL` | Custom MiniMax endpoint | `https://api.minimax.io` |
|
||||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - |
|
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - |
|
||||||
| `JIMENG_BASE_URL` | Custom Jimeng endpoint | `https://visual.volcengineapi.com` |
|
| `JIMENG_BASE_URL` | Custom Jimeng endpoint | `https://visual.volcengineapi.com` |
|
||||||
| `JIMENG_REGION` | Jimeng region | `cn-north-1` |
|
| `JIMENG_REGION` | Jimeng region | `cn-north-1` |
|
||||||
| `SEEDREAM_BASE_URL` | Custom Seedream endpoint | `https://ark.cn-beijing.volces.com/api/v3` |
|
| `SEEDREAM_BASE_URL` | Custom Seedream endpoint | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||||
|
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap | `10` |
|
||||||
|
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | Override provider concurrency | provider-specific |
|
||||||
|
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | Override provider request start gap | provider-specific |
|
||||||
|
|
||||||
|
**Provider Notes**:
|
||||||
|
- Azure OpenAI: `--model` means Azure deployment name, not the underlying model family.
|
||||||
|
- DashScope: `qwen-image-2.0-pro` is the recommended default for custom `--size`, `21:9`, and strong Chinese/English text rendering.
|
||||||
|
- MiniMax: `image-01` supports documented custom `width` / `height`; `image-01-live` is lower latency and works best with `--ar`.
|
||||||
|
- MiniMax reference images are sent as `subject_reference`; the current API is specialized toward character / portrait consistency.
|
||||||
|
- Jimeng does not support reference images.
|
||||||
|
- Seedream reference images are supported by Seedream 5.0 / 4.5 / 4.0, not Seedream 3.0.
|
||||||
|
|
||||||
**Provider Auto-Selection**:
|
**Provider Auto-Selection**:
|
||||||
1. If `--provider` specified → use it
|
1. If `--provider` is specified → use it
|
||||||
2. If only one API key available → use that provider
|
2. If `--ref` is provided and no provider is specified → try Google, then OpenAI, Azure, OpenRouter, Replicate, Seedream, and finally MiniMax
|
||||||
3. If multiple available → default to Google
|
3. If only one API key is available → use that provider
|
||||||
|
4. If multiple providers are available → default to Google
|
||||||
|
|
||||||
#### baoyu-danger-gemini-web
|
#### baoyu-danger-gemini-web
|
||||||
|
|
||||||
|
|
@ -1018,11 +1062,20 @@ cat > ~/.baoyu-skills/.env << 'EOF'
|
||||||
OPENAI_API_KEY=sk-xxx
|
OPENAI_API_KEY=sk-xxx
|
||||||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||||
|
# OPENAI_IMAGE_USE_CHAT=false
|
||||||
|
|
||||||
|
# Azure OpenAI
|
||||||
|
AZURE_OPENAI_API_KEY=xxx
|
||||||
|
AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
||||||
|
AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5
|
||||||
|
# AZURE_API_VERSION=2025-04-01-preview
|
||||||
|
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
OPENROUTER_API_KEY=sk-or-xxx
|
OPENROUTER_API_KEY=sk-or-xxx
|
||||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
# OPENROUTER_HTTP_REFERER=https://your-app.example.com
|
||||||
|
# OPENROUTER_TITLE=Your App Name
|
||||||
|
|
||||||
# Google
|
# Google
|
||||||
GOOGLE_API_KEY=xxx
|
GOOGLE_API_KEY=xxx
|
||||||
|
|
@ -1034,6 +1087,11 @@ DASHSCOPE_API_KEY=sk-xxx
|
||||||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||||
|
|
||||||
|
# MiniMax
|
||||||
|
MINIMAX_API_KEY=xxx
|
||||||
|
MINIMAX_IMAGE_MODEL=image-01
|
||||||
|
# MINIMAX_BASE_URL=https://api.minimax.io
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
REPLICATE_API_TOKEN=r8_xxx
|
REPLICATE_API_TOKEN=r8_xxx
|
||||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||||
|
|
|
||||||
72
README.zh.md
72
README.zh.md
|
|
@ -663,7 +663,7 @@ AI 驱动的生成后端。
|
||||||
|
|
||||||
#### baoyu-image-gen
|
#### baoyu-image-gen
|
||||||
|
|
||||||
基于 AI SDK 的图像生成,支持 OpenAI、Azure OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比和质量预设。
|
基于 AI SDK 的图像生成,支持 OpenAI、Azure OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、MiniMax、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比、自定义尺寸、批量生成和质量预设。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 基础生成(自动检测服务商)
|
# 基础生成(自动检测服务商)
|
||||||
|
|
@ -684,9 +684,21 @@ AI 驱动的生成后端。
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter
|
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter
|
||||||
|
|
||||||
|
# OpenRouter + 参考图
|
||||||
|
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
|
||||||
|
|
||||||
# DashScope(阿里通义万相)
|
# DashScope(阿里通义万相)
|
||||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||||
|
|
||||||
|
# DashScope 自定义尺寸
|
||||||
|
/baoyu-image-gen --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
|
||||||
|
|
||||||
|
# MiniMax
|
||||||
|
/baoyu-image-gen --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax
|
||||||
|
|
||||||
|
# MiniMax + 角色参考图
|
||||||
|
/baoyu-image-gen --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate
|
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate
|
||||||
|
|
||||||
|
|
@ -696,8 +708,11 @@ AI 驱动的生成后端。
|
||||||
# 豆包(Seedream)
|
# 豆包(Seedream)
|
||||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||||
|
|
||||||
# 带参考图(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0)
|
# 带参考图(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0)
|
||||||
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
|
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
|
||||||
|
|
||||||
|
# 批量模式
|
||||||
|
/baoyu-image-gen --batchfile batch.json --jobs 4 --json
|
||||||
```
|
```
|
||||||
|
|
||||||
**选项**:
|
**选项**:
|
||||||
|
|
@ -706,44 +721,73 @@ AI 驱动的生成后端。
|
||||||
| `--prompt`, `-p` | 提示词文本 |
|
| `--prompt`, `-p` | 提示词文本 |
|
||||||
| `--promptfiles` | 从文件读取提示词(多文件拼接) |
|
| `--promptfiles` | 从文件读取提示词(多文件拼接) |
|
||||||
| `--image` | 输出图片路径(必需) |
|
| `--image` | 输出图片路径(必需) |
|
||||||
| `--provider` | `google`、`openai`、`openrouter`、`dashscope`、`jimeng`、`seedream` 或 `replicate`(默认:自动检测,优先 google) |
|
| `--batchfile` | 多图批量生成的 JSON 文件 |
|
||||||
| `--model`, `-m` | 模型 ID |
|
| `--jobs` | 批量模式的并发 worker 数 |
|
||||||
|
| `--provider` | `google`、`openai`、`azure`、`openrouter`、`dashscope`、`minimax`、`jimeng`、`seedream` 或 `replicate` |
|
||||||
|
| `--model`, `-m` | 模型 ID 或部署名。Azure 使用部署名;OpenRouter 使用完整模型 ID;MiniMax 使用 `image-01` / `image-01-live` |
|
||||||
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) |
|
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) |
|
||||||
| `--size` | 尺寸(如 `1024x1024`) |
|
| `--size` | 尺寸(如 `1024x1024`) |
|
||||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||||
| `--ref` | 参考图片(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0) |
|
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
|
||||||
|
| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0) |
|
||||||
|
| `--n` | 单次请求生成图片数量 |
|
||||||
|
| `--json` | 输出 JSON 结果 |
|
||||||
|
|
||||||
**环境变量**(配置方法见[环境配置](#环境配置)):
|
**环境变量**(配置方法见[环境配置](#环境配置)):
|
||||||
| 变量 | 说明 | 默认值 |
|
| 变量 | 说明 | 默认值 |
|
||||||
|------|------|--------|
|
|------|------|--------|
|
||||||
| `OPENAI_API_KEY` | OpenAI API 密钥 | - |
|
| `OPENAI_API_KEY` | OpenAI API 密钥 | - |
|
||||||
|
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API 密钥 | - |
|
||||||
| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - |
|
| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - |
|
||||||
| `GOOGLE_API_KEY` | Google API 密钥 | - |
|
| `GOOGLE_API_KEY` | Google API 密钥 | - |
|
||||||
|
| `GEMINI_API_KEY` | `GOOGLE_API_KEY` 的别名 | - |
|
||||||
| `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - |
|
| `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - |
|
||||||
|
| `MINIMAX_API_KEY` | MiniMax API 密钥 | - |
|
||||||
| `REPLICATE_API_TOKEN` | Replicate API Token | - |
|
| `REPLICATE_API_TOKEN` | Replicate API Token | - |
|
||||||
| `JIMENG_ACCESS_KEY_ID` | 即梦火山引擎 Access Key | - |
|
| `JIMENG_ACCESS_KEY_ID` | 即梦火山引擎 Access Key | - |
|
||||||
| `JIMENG_SECRET_ACCESS_KEY` | 即梦火山引擎 Secret Key | - |
|
| `JIMENG_SECRET_ACCESS_KEY` | 即梦火山引擎 Secret Key | - |
|
||||||
| `ARK_API_KEY` | 豆包火山引擎 ARK API 密钥 | - |
|
| `ARK_API_KEY` | 豆包火山引擎 ARK API 密钥 | - |
|
||||||
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
|
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
|
||||||
|
| `AZURE_OPENAI_DEPLOYMENT` | Azure 默认部署名 | - |
|
||||||
|
| `AZURE_OPENAI_IMAGE_MODEL` | 兼容旧配置的 Azure 部署/模型别名 | `gpt-image-1.5` |
|
||||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
|
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
||||||
|
| `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
||||||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||||
|
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
|
||||||
|
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
|
||||||
|
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
|
||||||
| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` |
|
| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` |
|
||||||
|
| `OPENROUTER_HTTP_REFERER` | OpenRouter 归因用站点 URL | - |
|
||||||
|
| `OPENROUTER_TITLE` | OpenRouter 归因用应用名 | - |
|
||||||
| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - |
|
| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - |
|
||||||
| `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - |
|
| `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - |
|
||||||
|
| `MINIMAX_BASE_URL` | 自定义 MiniMax 端点 | `https://api.minimax.io` |
|
||||||
| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - |
|
| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - |
|
||||||
| `JIMENG_BASE_URL` | 自定义即梦端点 | `https://visual.volcengineapi.com` |
|
| `JIMENG_BASE_URL` | 自定义即梦端点 | `https://visual.volcengineapi.com` |
|
||||||
| `JIMENG_REGION` | 即梦区域 | `cn-north-1` |
|
| `JIMENG_REGION` | 即梦区域 | `cn-north-1` |
|
||||||
| `SEEDREAM_BASE_URL` | 自定义豆包端点 | `https://ark.cn-beijing.volces.com/api/v3` |
|
| `SEEDREAM_BASE_URL` | 自定义豆包端点 | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||||
|
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | 批量模式最大 worker 数 | `10` |
|
||||||
|
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | 覆盖 provider 并发数 | provider 默认值 |
|
||||||
|
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | 覆盖 provider 请求启动间隔 | provider 默认值 |
|
||||||
|
|
||||||
|
**Provider 说明**:
|
||||||
|
- Azure OpenAI:`--model` 表示 Azure deployment name,不是底层模型家族名。
|
||||||
|
- DashScope:`qwen-image-2.0-pro` 是自定义 `--size`、`21:9` 和中英文排版的推荐默认模型。
|
||||||
|
- MiniMax:`image-01` 支持官方文档里的自定义 `width` / `height`;`image-01-live` 更偏低延迟,适合配合 `--ar` 使用。
|
||||||
|
- MiniMax 参考图会走 `subject_reference`,当前能力更偏角色 / 人像一致性。
|
||||||
|
- 即梦不支持参考图。
|
||||||
|
- 豆包参考图能力仅适用于 Seedream 5.0 / 4.5 / 4.0,不适用于 Seedream 3.0。
|
||||||
|
|
||||||
**服务商自动选择**:
|
**服务商自动选择**:
|
||||||
1. 如果指定了 `--provider` → 使用指定的
|
1. 如果指定了 `--provider` → 使用指定的
|
||||||
2. 如果只有一个 API 密钥 → 使用对应服务商
|
2. 如果传了 `--ref` 且未指定 provider → 依次尝试 Google、OpenAI、Azure、OpenRouter、Replicate、Seedream,最后是 MiniMax
|
||||||
3. 如果多个可用 → 默认使用 Google
|
3. 如果只有一个 API 密钥 → 使用对应服务商
|
||||||
|
4. 如果多个可用 → 默认使用 Google
|
||||||
|
|
||||||
#### baoyu-danger-gemini-web
|
#### baoyu-danger-gemini-web
|
||||||
|
|
||||||
|
|
@ -1018,11 +1062,20 @@ cat > ~/.baoyu-skills/.env << 'EOF'
|
||||||
OPENAI_API_KEY=sk-xxx
|
OPENAI_API_KEY=sk-xxx
|
||||||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||||
|
# OPENAI_IMAGE_USE_CHAT=false
|
||||||
|
|
||||||
|
# Azure OpenAI
|
||||||
|
AZURE_OPENAI_API_KEY=xxx
|
||||||
|
AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
||||||
|
AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5
|
||||||
|
# AZURE_API_VERSION=2025-04-01-preview
|
||||||
|
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
OPENROUTER_API_KEY=sk-or-xxx
|
OPENROUTER_API_KEY=sk-or-xxx
|
||||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||||
|
# OPENROUTER_HTTP_REFERER=https://your-app.example.com
|
||||||
|
# OPENROUTER_TITLE=你的应用名
|
||||||
|
|
||||||
# Google
|
# Google
|
||||||
GOOGLE_API_KEY=xxx
|
GOOGLE_API_KEY=xxx
|
||||||
|
|
@ -1034,6 +1087,11 @@ DASHSCOPE_API_KEY=sk-xxx
|
||||||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||||
|
|
||||||
|
# MiniMax
|
||||||
|
MINIMAX_API_KEY=xxx
|
||||||
|
MINIMAX_IMAGE_MODEL=image-01
|
||||||
|
# MINIMAX_BASE_URL=https://api.minimax.io
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
REPLICATE_API_TOKEN=r8_xxx
|
REPLICATE_API_TOKEN=r8_xxx
|
||||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
name: baoyu-image-gen
|
name: baoyu-image-gen
|
||||||
description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
|
description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, MiniMax, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
|
||||||
version: 1.56.3
|
version: 1.56.4
|
||||||
metadata:
|
metadata:
|
||||||
openclaw:
|
openclaw:
|
||||||
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen
|
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen
|
||||||
|
|
@ -13,7 +13,7 @@ metadata:
|
||||||
|
|
||||||
# Image Generation (AI SDK)
|
# Image Generation (AI SDK)
|
||||||
|
|
||||||
Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), Jimeng (即梦), Seedream (豆包) and Replicate providers.
|
Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), MiniMax, Jimeng (即梦), Seedream (豆包) and Replicate providers.
|
||||||
|
|
||||||
## Script Directory
|
## Script Directory
|
||||||
|
|
||||||
|
|
@ -74,7 +74,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k
|
||||||
# From prompt files
|
# From prompt files
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png
|
${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png
|
||||||
|
|
||||||
# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0)
|
# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0)
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png
|
||||||
|
|
||||||
# With reference images (explicit provider/model)
|
# With reference images (explicit provider/model)
|
||||||
|
|
@ -101,6 +101,15 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9
|
||||||
# DashScope legacy Qwen fixed-size model
|
# DashScope legacy Qwen fixed-size model
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928
|
||||||
|
|
||||||
|
# MiniMax
|
||||||
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax
|
||||||
|
|
||||||
|
# MiniMax with subject reference (best for character/portrait consistency)
|
||||||
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9
|
||||||
|
|
||||||
|
# MiniMax with custom size (documented for image-01)
|
||||||
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024
|
||||||
|
|
||||||
# Replicate (google/nano-banana-pro)
|
# Replicate (google/nano-banana-pro)
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
||||||
|
|
||||||
|
|
@ -150,13 +159,13 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `--image <path>` | Output image path (required in single-image mode) |
|
| `--image <path>` | Output image path (required in single-image mode) |
|
||||||
| `--batchfile <path>` | JSON batch file for multi-image generation |
|
| `--batchfile <path>` | JSON batch file for multi-image generation |
|
||||||
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
|
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
|
||||||
| `--provider google\|openai\|azure\|openrouter\|dashscope\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) |
|
| `--provider google\|openai\|azure\|openrouter\|dashscope\|minimax\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) |
|
||||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`) |
|
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`; MiniMax: `image-01`) |
|
||||||
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||||
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
||||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
||||||
| `--n <count>` | Number of images |
|
| `--n <count>` | Number of images |
|
||||||
| `--json` | JSON output |
|
| `--json` | JSON output |
|
||||||
|
|
||||||
|
|
@ -169,6 +178,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `OPENROUTER_API_KEY` | OpenRouter API key |
|
| `OPENROUTER_API_KEY` | OpenRouter API key |
|
||||||
| `GOOGLE_API_KEY` | Google API key |
|
| `GOOGLE_API_KEY` | Google API key |
|
||||||
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
|
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
|
||||||
|
| `MINIMAX_API_KEY` | MiniMax API key |
|
||||||
| `REPLICATE_API_TOKEN` | Replicate API token |
|
| `REPLICATE_API_TOKEN` | Replicate API token |
|
||||||
| `JIMENG_ACCESS_KEY_ID` | Jimeng (即梦) Volcengine access key |
|
| `JIMENG_ACCESS_KEY_ID` | Jimeng (即梦) Volcengine access key |
|
||||||
| `JIMENG_SECRET_ACCESS_KEY` | Jimeng (即梦) Volcengine secret key |
|
| `JIMENG_SECRET_ACCESS_KEY` | Jimeng (即梦) Volcengine secret key |
|
||||||
|
|
@ -179,6 +189,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
|
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
|
||||||
|
| `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
||||||
|
|
@ -190,6 +201,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution |
|
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution |
|
||||||
| `GOOGLE_BASE_URL` | Custom Google endpoint |
|
| `GOOGLE_BASE_URL` | Custom Google endpoint |
|
||||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
|
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
|
||||||
|
| `MINIMAX_BASE_URL` | Custom MiniMax endpoint (default: `https://api.minimax.io`) |
|
||||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
|
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
|
||||||
| `JIMENG_BASE_URL` | Custom Jimeng endpoint (default: `https://visual.volcengineapi.com`) |
|
| `JIMENG_BASE_URL` | Custom Jimeng endpoint (default: `https://visual.volcengineapi.com`) |
|
||||||
| `JIMENG_REGION` | Jimeng region (default: `cn-north-1`) |
|
| `JIMENG_REGION` | Jimeng region (default: `cn-north-1`) |
|
||||||
|
|
@ -263,6 +275,34 @@ Official references:
|
||||||
- [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image)
|
- [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image)
|
||||||
- [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api)
|
- [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api)
|
||||||
|
|
||||||
|
### MiniMax Models
|
||||||
|
|
||||||
|
Use `--model image-01` or set `default_model.minimax` / `MINIMAX_IMAGE_MODEL` when the user wants MiniMax image generation.
|
||||||
|
|
||||||
|
Official MiniMax image model options currently documented in the API reference:
|
||||||
|
|
||||||
|
- `image-01` (recommended default)
|
||||||
|
- Supports text-to-image and subject-reference image generation
|
||||||
|
- Supports official `aspect_ratio` values: `1:1`, `16:9`, `4:3`, `3:2`, `2:3`, `3:4`, `9:16`, `21:9`
|
||||||
|
- Supports documented custom `width` / `height` output sizes when using `--size <WxH>`
|
||||||
|
- `width` and `height` must both be between `512` and `2048`, and both must be divisible by `8`
|
||||||
|
- `image-01-live`
|
||||||
|
- Lower-latency variant
|
||||||
|
- Use `--ar` for sizing; MiniMax documents custom `width` / `height` as only effective for `image-01`
|
||||||
|
|
||||||
|
MiniMax subject reference notes:
|
||||||
|
|
||||||
|
- `--ref` files are sent as MiniMax `subject_reference`
|
||||||
|
- MiniMax docs currently describe `subject_reference[].type` as `character`
|
||||||
|
- Official docs say `image_file` supports public URLs or Base64 Data URLs; `baoyu-image-gen` sends local refs as Data URLs
|
||||||
|
- Official docs recommend front-facing portrait references in JPG/JPEG/PNG under 10MB
|
||||||
|
|
||||||
|
Official references:
|
||||||
|
|
||||||
|
- [MiniMax Image Generation Guide](https://platform.minimax.io/docs/guides/image-generation)
|
||||||
|
- [MiniMax Text-to-Image API](https://platform.minimax.io/docs/api-reference/image-generation-t2i)
|
||||||
|
- [MiniMax Image-to-Image API](https://platform.minimax.io/docs/api-reference/image-generation-i2i)
|
||||||
|
|
||||||
### OpenRouter Models
|
### OpenRouter Models
|
||||||
|
|
||||||
Use full OpenRouter model IDs, e.g.:
|
Use full OpenRouter model IDs, e.g.:
|
||||||
|
|
@ -297,8 +337,8 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|
||||||
|
|
||||||
## Provider Selection
|
## Provider Selection
|
||||||
|
|
||||||
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then OpenRouter, then Replicate (Jimeng and Seedream do not support reference images)
|
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Azure, then OpenRouter, then Replicate, then Seedream, then MiniMax (MiniMax subject reference is more specialized toward character/portrait consistency)
|
||||||
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `openrouter`, or `replicate`)
|
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `azure`, `openrouter`, `replicate`, `seedream`, or `minimax`)
|
||||||
3. Only one API key available → use that provider
|
3. Only one API key available → use that provider
|
||||||
4. Multiple available → default to Google
|
4. Multiple available → default to Google
|
||||||
|
|
||||||
|
|
@ -319,6 +359,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
|
||||||
- OpenAI: maps to closest supported size
|
- OpenAI: maps to closest supported size
|
||||||
- OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size <WxH>` is given, aspect ratio is inferred automatically
|
- OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size <WxH>` is given, aspect ratio is inferred automatically
|
||||||
- Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image`
|
- Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image`
|
||||||
|
- MiniMax: sends official `aspect_ratio` values directly; if `--size <WxH>` is given without `--ar`, `width` / `height` are sent for `image-01`
|
||||||
|
|
||||||
## Generation Mode
|
## Generation Mode
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,8 @@ options:
|
||||||
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
|
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
|
||||||
- label: "DashScope"
|
- label: "DashScope"
|
||||||
description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering"
|
description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering"
|
||||||
|
- label: "MiniMax"
|
||||||
|
description: "MiniMax image generation with subject-reference character workflows"
|
||||||
- label: "Replicate"
|
- label: "Replicate"
|
||||||
description: "Community models - nano-banana-pro, flexible model selection"
|
description: "Community models - nano-banana-pro, flexible model selection"
|
||||||
```
|
```
|
||||||
|
|
@ -103,6 +105,20 @@ options:
|
||||||
description: "Previous GPT Image deployment name"
|
description: "Previous GPT Image deployment name"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Question 2d: Default MiniMax Model
|
||||||
|
|
||||||
|
Only show if user selected MiniMax.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
header: "MiniMax Model"
|
||||||
|
question: "Default MiniMax image generation model?"
|
||||||
|
options:
|
||||||
|
- label: "image-01 (Recommended)"
|
||||||
|
description: "Best default, supports aspect ratios and custom width/height"
|
||||||
|
- label: "image-01-live"
|
||||||
|
description: "Faster variant, use aspect ratio instead of custom size"
|
||||||
|
```
|
||||||
|
|
||||||
### Question 3: Default Quality
|
### Question 3: Default Quality
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|
@ -149,6 +165,7 @@ default_model:
|
||||||
azure: [selected azure deployment or null]
|
azure: [selected azure deployment or null]
|
||||||
openrouter: [selected openrouter model or null]
|
openrouter: [selected openrouter model or null]
|
||||||
dashscope: null
|
dashscope: null
|
||||||
|
minimax: [selected minimax model or null]
|
||||||
replicate: null
|
replicate: null
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
@ -252,6 +269,24 @@ options:
|
||||||
description: "Google's base image model on Replicate"
|
description: "Google's base image model on Replicate"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### MiniMax Model Selection
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
header: "MiniMax Model"
|
||||||
|
question: "Choose a default MiniMax image generation model?"
|
||||||
|
options:
|
||||||
|
- label: "image-01 (Recommended)"
|
||||||
|
description: "Best general-purpose MiniMax image model with custom width/height support"
|
||||||
|
- label: "image-01-live"
|
||||||
|
description: "Lower-latency MiniMax image model using aspect ratios"
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes for MiniMax setup:
|
||||||
|
|
||||||
|
- `image-01` is the safest default. It supports official `aspect_ratio` values and documented custom `width` / `height` output sizes.
|
||||||
|
- `image-01-live` is useful when the user prefers faster generation and can work with aspect-ratio-based sizing.
|
||||||
|
- MiniMax subject reference currently uses `subject_reference[].type = character`; docs recommend front-facing portrait references in JPG/JPEG/PNG under 10MB.
|
||||||
|
|
||||||
### Update EXTEND.md
|
### Update EXTEND.md
|
||||||
|
|
||||||
After user selects a model:
|
After user selects a model:
|
||||||
|
|
@ -267,6 +302,7 @@ default_model:
|
||||||
azure: [value or null]
|
azure: [value or null]
|
||||||
openrouter: [value or null]
|
openrouter: [value or null]
|
||||||
dashscope: [value or null]
|
dashscope: [value or null]
|
||||||
|
minimax: [value or null]
|
||||||
replicate: [value or null]
|
replicate: [value or null]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
|
||||||
---
|
---
|
||||||
version: 1
|
version: 1
|
||||||
|
|
||||||
default_provider: null # google|openai|azure|openrouter|dashscope|replicate|null (null = auto-detect)
|
default_provider: null # google|openai|azure|openrouter|dashscope|minimax|replicate|null (null = auto-detect)
|
||||||
|
|
||||||
default_quality: null # normal|2k|null (null = use default: 2k)
|
default_quality: null # normal|2k|null (null = use default: 2k)
|
||||||
|
|
||||||
|
|
@ -25,6 +25,7 @@ default_model:
|
||||||
azure: null # Azure deployment name, e.g., "gpt-image-1.5" or "image-prod"
|
azure: null # Azure deployment name, e.g., "gpt-image-1.5" or "image-prod"
|
||||||
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
||||||
dashscope: null # e.g., "qwen-image-2.0-pro"
|
dashscope: null # e.g., "qwen-image-2.0-pro"
|
||||||
|
minimax: null # e.g., "image-01"
|
||||||
replicate: null # e.g., "google/nano-banana-pro"
|
replicate: null # e.g., "google/nano-banana-pro"
|
||||||
|
|
||||||
batch:
|
batch:
|
||||||
|
|
@ -48,6 +49,9 @@ batch:
|
||||||
dashscope:
|
dashscope:
|
||||||
concurrency: 3
|
concurrency: 3
|
||||||
start_interval_ms: 1100
|
start_interval_ms: 1100
|
||||||
|
minimax:
|
||||||
|
concurrency: 3
|
||||||
|
start_interval_ms: 1100
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -65,6 +69,7 @@ batch:
|
||||||
| `default_model.azure` | string\|null | null | Azure default deployment name |
|
| `default_model.azure` | string\|null | null | Azure default deployment name |
|
||||||
| `default_model.openrouter` | string\|null | null | OpenRouter default model |
|
| `default_model.openrouter` | string\|null | null | OpenRouter default model |
|
||||||
| `default_model.dashscope` | string\|null | null | DashScope default model |
|
| `default_model.dashscope` | string\|null | null | DashScope default model |
|
||||||
|
| `default_model.minimax` | string\|null | null | MiniMax default model |
|
||||||
| `default_model.replicate` | string\|null | null | Replicate default model |
|
| `default_model.replicate` | string\|null | null | Replicate default model |
|
||||||
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
|
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
|
||||||
| `batch.provider_limits.<provider>.concurrency` | int\|null | provider default | Max simultaneous requests per provider |
|
| `batch.provider_limits.<provider>.concurrency` | int\|null | provider default | Max simultaneous requests per provider |
|
||||||
|
|
@ -95,6 +100,7 @@ default_model:
|
||||||
azure: "gpt-image-1.5"
|
azure: "gpt-image-1.5"
|
||||||
openrouter: "google/gemini-3.1-flash-image-preview"
|
openrouter: "google/gemini-3.1-flash-image-preview"
|
||||||
dashscope: "qwen-image-2.0-pro"
|
dashscope: "qwen-image-2.0-pro"
|
||||||
|
minimax: "image-01"
|
||||||
replicate: "google/nano-banana-pro"
|
replicate: "google/nano-banana-pro"
|
||||||
batch:
|
batch:
|
||||||
max_workers: 10
|
max_workers: 10
|
||||||
|
|
@ -108,5 +114,8 @@ batch:
|
||||||
openrouter:
|
openrouter:
|
||||||
concurrency: 3
|
concurrency: 3
|
||||||
start_interval_ms: 1100
|
start_interval_ms: 1100
|
||||||
|
minimax:
|
||||||
|
concurrency: 3
|
||||||
|
start_interval_ms: 1100
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -124,6 +124,7 @@ default_model:
|
||||||
google: gemini-3-pro-image-preview
|
google: gemini-3-pro-image-preview
|
||||||
openai: gpt-image-1.5
|
openai: gpt-image-1.5
|
||||||
azure: image-prod
|
azure: image-prod
|
||||||
|
minimax: image-01
|
||||||
batch:
|
batch:
|
||||||
max_workers: 8
|
max_workers: 8
|
||||||
provider_limits:
|
provider_limits:
|
||||||
|
|
@ -132,6 +133,9 @@ batch:
|
||||||
start_interval_ms: 900
|
start_interval_ms: 900
|
||||||
openai:
|
openai:
|
||||||
concurrency: 4
|
concurrency: 4
|
||||||
|
minimax:
|
||||||
|
concurrency: 2
|
||||||
|
start_interval_ms: 1400
|
||||||
azure:
|
azure:
|
||||||
concurrency: 1
|
concurrency: 1
|
||||||
start_interval_ms: 1500
|
start_interval_ms: 1500
|
||||||
|
|
@ -147,6 +151,7 @@ batch:
|
||||||
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
|
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
|
||||||
assert.equal(config.default_model?.openai, "gpt-image-1.5");
|
assert.equal(config.default_model?.openai, "gpt-image-1.5");
|
||||||
assert.equal(config.default_model?.azure, "image-prod");
|
assert.equal(config.default_model?.azure, "image-prod");
|
||||||
|
assert.equal(config.default_model?.minimax, "image-01");
|
||||||
assert.equal(config.batch?.max_workers, 8);
|
assert.equal(config.batch?.max_workers, 8);
|
||||||
assert.deepEqual(config.batch?.provider_limits?.google, {
|
assert.deepEqual(config.batch?.provider_limits?.google, {
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
|
|
@ -155,6 +160,10 @@ batch:
|
||||||
assert.deepEqual(config.batch?.provider_limits?.openai, {
|
assert.deepEqual(config.batch?.provider_limits?.openai, {
|
||||||
concurrency: 4,
|
concurrency: 4,
|
||||||
});
|
});
|
||||||
|
assert.deepEqual(config.batch?.provider_limits?.minimax, {
|
||||||
|
concurrency: 2,
|
||||||
|
start_interval_ms: 1400,
|
||||||
|
});
|
||||||
assert.deepEqual(config.batch?.provider_limits?.azure, {
|
assert.deepEqual(config.batch?.provider_limits?.azure, {
|
||||||
concurrency: 1,
|
concurrency: 1,
|
||||||
start_interval_ms: 1500,
|
start_interval_ms: 1500,
|
||||||
|
|
@ -200,6 +209,7 @@ test("detectProvider rejects non-ref-capable providers and prefers Google first
|
||||||
OPENAI_API_KEY: "openai-key",
|
OPENAI_API_KEY: "openai-key",
|
||||||
OPENROUTER_API_KEY: null,
|
OPENROUTER_API_KEY: null,
|
||||||
DASHSCOPE_API_KEY: null,
|
DASHSCOPE_API_KEY: null,
|
||||||
|
MINIMAX_API_KEY: null,
|
||||||
REPLICATE_API_TOKEN: null,
|
REPLICATE_API_TOKEN: null,
|
||||||
JIMENG_ACCESS_KEY_ID: null,
|
JIMENG_ACCESS_KEY_ID: null,
|
||||||
JIMENG_SECRET_ACCESS_KEY: null,
|
JIMENG_SECRET_ACCESS_KEY: null,
|
||||||
|
|
@ -216,6 +226,7 @@ test("detectProvider selects an available ref-capable provider for reference-ima
|
||||||
AZURE_OPENAI_BASE_URL: null,
|
AZURE_OPENAI_BASE_URL: null,
|
||||||
OPENROUTER_API_KEY: null,
|
OPENROUTER_API_KEY: null,
|
||||||
DASHSCOPE_API_KEY: null,
|
DASHSCOPE_API_KEY: null,
|
||||||
|
MINIMAX_API_KEY: null,
|
||||||
REPLICATE_API_TOKEN: null,
|
REPLICATE_API_TOKEN: null,
|
||||||
JIMENG_ACCESS_KEY_ID: null,
|
JIMENG_ACCESS_KEY_ID: null,
|
||||||
JIMENG_SECRET_ACCESS_KEY: null,
|
JIMENG_SECRET_ACCESS_KEY: null,
|
||||||
|
|
@ -235,6 +246,7 @@ test("detectProvider selects Azure when only Azure credentials are configured",
|
||||||
AZURE_OPENAI_BASE_URL: "https://example.openai.azure.com",
|
AZURE_OPENAI_BASE_URL: "https://example.openai.azure.com",
|
||||||
OPENROUTER_API_KEY: null,
|
OPENROUTER_API_KEY: null,
|
||||||
DASHSCOPE_API_KEY: null,
|
DASHSCOPE_API_KEY: null,
|
||||||
|
MINIMAX_API_KEY: null,
|
||||||
REPLICATE_API_TOKEN: null,
|
REPLICATE_API_TOKEN: null,
|
||||||
JIMENG_ACCESS_KEY_ID: null,
|
JIMENG_ACCESS_KEY_ID: null,
|
||||||
JIMENG_SECRET_ACCESS_KEY: null,
|
JIMENG_SECRET_ACCESS_KEY: null,
|
||||||
|
|
@ -254,6 +266,7 @@ test("detectProvider infers Seedream from model id and allows Seedream reference
|
||||||
OPENAI_API_KEY: null,
|
OPENAI_API_KEY: null,
|
||||||
OPENROUTER_API_KEY: null,
|
OPENROUTER_API_KEY: null,
|
||||||
DASHSCOPE_API_KEY: null,
|
DASHSCOPE_API_KEY: null,
|
||||||
|
MINIMAX_API_KEY: null,
|
||||||
REPLICATE_API_TOKEN: null,
|
REPLICATE_API_TOKEN: null,
|
||||||
JIMENG_ACCESS_KEY_ID: null,
|
JIMENG_ACCESS_KEY_ID: null,
|
||||||
JIMENG_SECRET_ACCESS_KEY: null,
|
JIMENG_SECRET_ACCESS_KEY: null,
|
||||||
|
|
@ -281,6 +294,26 @@ test("detectProvider infers Seedream from model id and allows Seedream reference
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("detectProvider selects MiniMax when only MiniMax credentials are configured or the model id matches", (t) => {
|
||||||
|
useEnv(t, {
|
||||||
|
GOOGLE_API_KEY: null,
|
||||||
|
OPENAI_API_KEY: null,
|
||||||
|
AZURE_OPENAI_API_KEY: null,
|
||||||
|
AZURE_OPENAI_BASE_URL: null,
|
||||||
|
OPENROUTER_API_KEY: null,
|
||||||
|
DASHSCOPE_API_KEY: null,
|
||||||
|
MINIMAX_API_KEY: "minimax-key",
|
||||||
|
REPLICATE_API_TOKEN: null,
|
||||||
|
JIMENG_ACCESS_KEY_ID: null,
|
||||||
|
JIMENG_SECRET_ACCESS_KEY: null,
|
||||||
|
ARK_API_KEY: null,
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.equal(detectProvider(makeArgs()), "minimax");
|
||||||
|
assert.equal(detectProvider(makeArgs({ referenceImages: ["ref.png"] })), "minimax");
|
||||||
|
assert.equal(detectProvider(makeArgs({ model: "image-01-live" })), "minimax");
|
||||||
|
});
|
||||||
|
|
||||||
test("batch worker and provider-rate-limit configuration prefer env over EXTEND config", (t) => {
|
test("batch worker and provider-rate-limit configuration prefer env over EXTEND config", (t) => {
|
||||||
useEnv(t, {
|
useEnv(t, {
|
||||||
BAOYU_IMAGE_GEN_MAX_WORKERS: "12",
|
BAOYU_IMAGE_GEN_MAX_WORKERS: "12",
|
||||||
|
|
@ -296,6 +329,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND
|
||||||
concurrency: 2,
|
concurrency: 2,
|
||||||
start_interval_ms: 900,
|
start_interval_ms: 900,
|
||||||
},
|
},
|
||||||
|
minimax: {
|
||||||
|
concurrency: 1,
|
||||||
|
start_interval_ms: 1500,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
@ -305,6 +342,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND
|
||||||
concurrency: 5,
|
concurrency: 5,
|
||||||
startIntervalMs: 450,
|
startIntervalMs: 450,
|
||||||
});
|
});
|
||||||
|
assert.deepEqual(getConfiguredProviderRateLimits(extendConfig).minimax, {
|
||||||
|
concurrency: 1,
|
||||||
|
startIntervalMs: 1500,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t) => {
|
test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t) => {
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
|
||||||
openai: { concurrency: 3, startIntervalMs: 1100 },
|
openai: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
openrouter: { concurrency: 3, startIntervalMs: 1100 },
|
openrouter: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
dashscope: { concurrency: 3, startIntervalMs: 1100 },
|
dashscope: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
|
minimax: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
jimeng: { concurrency: 3, startIntervalMs: 1100 },
|
jimeng: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
seedream: { concurrency: 3, startIntervalMs: 1100 },
|
seedream: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
azure: { concurrency: 3, startIntervalMs: 1100 },
|
azure: { concurrency: 3, startIntervalMs: 1100 },
|
||||||
|
|
@ -75,13 +76,13 @@ Options:
|
||||||
--image <path> Output image path (required in single-image mode)
|
--image <path> Output image path (required in single-image mode)
|
||||||
--batchfile <path> JSON batch file for multi-image generation
|
--batchfile <path> JSON batch file for multi-image generation
|
||||||
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
|
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
|
||||||
--provider google|openai|openrouter|dashscope|replicate|jimeng|seedream|azure Force provider (auto-detect by default)
|
--provider google|openai|openrouter|dashscope|minimax|replicate|jimeng|seedream|azure Force provider (auto-detect by default)
|
||||||
-m, --model <id> Model ID
|
-m, --model <id> Model ID
|
||||||
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
|
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
|
||||||
--size <WxH> Size (e.g., 1024x1024)
|
--size <WxH> Size (e.g., 1024x1024)
|
||||||
--quality normal|2k Quality preset (default: 2k)
|
--quality normal|2k Quality preset (default: 2k)
|
||||||
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
||||||
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0)
|
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0)
|
||||||
--n <count> Number of images for the current task (default: 1)
|
--n <count> Number of images for the current task (default: 1)
|
||||||
--json JSON output
|
--json JSON output
|
||||||
-h, --help Show help
|
-h, --help Show help
|
||||||
|
|
@ -112,6 +113,7 @@ Environment variables:
|
||||||
GOOGLE_API_KEY Google API key
|
GOOGLE_API_KEY Google API key
|
||||||
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
|
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
|
||||||
DASHSCOPE_API_KEY DashScope API key
|
DASHSCOPE_API_KEY DashScope API key
|
||||||
|
MINIMAX_API_KEY MiniMax API key
|
||||||
REPLICATE_API_TOKEN Replicate API token
|
REPLICATE_API_TOKEN Replicate API token
|
||||||
JIMENG_ACCESS_KEY_ID Jimeng Access Key ID
|
JIMENG_ACCESS_KEY_ID Jimeng Access Key ID
|
||||||
JIMENG_SECRET_ACCESS_KEY Jimeng Secret Access Key
|
JIMENG_SECRET_ACCESS_KEY Jimeng Secret Access Key
|
||||||
|
|
@ -120,6 +122,7 @@ Environment variables:
|
||||||
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
|
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
|
||||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
|
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
|
||||||
|
MINIMAX_IMAGE_MODEL Default MiniMax model (image-01)
|
||||||
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
||||||
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
||||||
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
||||||
|
|
@ -130,6 +133,7 @@ Environment variables:
|
||||||
OPENROUTER_TITLE Optional app name for OpenRouter attribution
|
OPENROUTER_TITLE Optional app name for OpenRouter attribution
|
||||||
GOOGLE_BASE_URL Custom Google endpoint
|
GOOGLE_BASE_URL Custom Google endpoint
|
||||||
DASHSCOPE_BASE_URL Custom DashScope endpoint
|
DASHSCOPE_BASE_URL Custom DashScope endpoint
|
||||||
|
MINIMAX_BASE_URL Custom MiniMax endpoint
|
||||||
REPLICATE_BASE_URL Custom Replicate endpoint
|
REPLICATE_BASE_URL Custom Replicate endpoint
|
||||||
JIMENG_BASE_URL Custom Jimeng endpoint
|
JIMENG_BASE_URL Custom Jimeng endpoint
|
||||||
AZURE_OPENAI_API_KEY Azure OpenAI API key
|
AZURE_OPENAI_API_KEY Azure OpenAI API key
|
||||||
|
|
@ -235,6 +239,7 @@ export function parseArgs(argv: string[]): CliArgs {
|
||||||
v !== "openai" &&
|
v !== "openai" &&
|
||||||
v !== "openrouter" &&
|
v !== "openrouter" &&
|
||||||
v !== "dashscope" &&
|
v !== "dashscope" &&
|
||||||
|
v !== "minimax" &&
|
||||||
v !== "replicate" &&
|
v !== "replicate" &&
|
||||||
v !== "jimeng" &&
|
v !== "jimeng" &&
|
||||||
v !== "seedream" &&
|
v !== "seedream" &&
|
||||||
|
|
@ -390,6 +395,7 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
||||||
openai: null,
|
openai: null,
|
||||||
openrouter: null,
|
openrouter: null,
|
||||||
dashscope: null,
|
dashscope: null,
|
||||||
|
minimax: null,
|
||||||
replicate: null,
|
replicate: null,
|
||||||
jimeng: null,
|
jimeng: null,
|
||||||
seedream: null,
|
seedream: null,
|
||||||
|
|
@ -417,6 +423,7 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
||||||
key === "openai" ||
|
key === "openai" ||
|
||||||
key === "openrouter" ||
|
key === "openrouter" ||
|
||||||
key === "dashscope" ||
|
key === "dashscope" ||
|
||||||
|
key === "minimax" ||
|
||||||
key === "replicate" ||
|
key === "replicate" ||
|
||||||
key === "jimeng" ||
|
key === "jimeng" ||
|
||||||
key === "seedream" ||
|
key === "seedream" ||
|
||||||
|
|
@ -434,6 +441,7 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
||||||
key === "openai" ||
|
key === "openai" ||
|
||||||
key === "openrouter" ||
|
key === "openrouter" ||
|
||||||
key === "dashscope" ||
|
key === "dashscope" ||
|
||||||
|
key === "minimax" ||
|
||||||
key === "replicate" ||
|
key === "replicate" ||
|
||||||
key === "jimeng" ||
|
key === "jimeng" ||
|
||||||
key === "seedream" ||
|
key === "seedream" ||
|
||||||
|
|
@ -528,12 +536,13 @@ export function getConfiguredProviderRateLimits(
|
||||||
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
|
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
|
||||||
openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter },
|
openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter },
|
||||||
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
|
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
|
||||||
|
minimax: { ...DEFAULT_PROVIDER_RATE_LIMITS.minimax },
|
||||||
jimeng: { ...DEFAULT_PROVIDER_RATE_LIMITS.jimeng },
|
jimeng: { ...DEFAULT_PROVIDER_RATE_LIMITS.jimeng },
|
||||||
seedream: { ...DEFAULT_PROVIDER_RATE_LIMITS.seedream },
|
seedream: { ...DEFAULT_PROVIDER_RATE_LIMITS.seedream },
|
||||||
azure: { ...DEFAULT_PROVIDER_RATE_LIMITS.azure },
|
azure: { ...DEFAULT_PROVIDER_RATE_LIMITS.azure },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream", "azure"] as Provider[]) {
|
for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "minimax", "jimeng", "seedream", "azure"] as Provider[]) {
|
||||||
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
|
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
|
||||||
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
|
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
|
||||||
configured[provider] = {
|
configured[provider] = {
|
||||||
|
|
@ -582,7 +591,9 @@ export function normalizeOutputImagePath(p: string, defaultExtension = ".png"):
|
||||||
|
|
||||||
function inferProviderFromModel(model: string | null): Provider | null {
|
function inferProviderFromModel(model: string | null): Provider | null {
|
||||||
if (!model) return null;
|
if (!model) return null;
|
||||||
if (model.includes("seedream") || model.includes("seededit")) return "seedream";
|
const normalized = model.trim();
|
||||||
|
if (normalized.includes("seedream") || normalized.includes("seededit")) return "seedream";
|
||||||
|
if (normalized === "image-01" || normalized === "image-01-live") return "minimax";
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -595,10 +606,11 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
args.provider !== "azure" &&
|
args.provider !== "azure" &&
|
||||||
args.provider !== "openrouter" &&
|
args.provider !== "openrouter" &&
|
||||||
args.provider !== "replicate" &&
|
args.provider !== "replicate" &&
|
||||||
args.provider !== "seedream"
|
args.provider !== "seedream" &&
|
||||||
|
args.provider !== "minimax"
|
||||||
) {
|
) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider azure (Azure OpenAI), --provider openrouter (OpenRouter multimodal), --provider replicate, or --provider seedream for supported Seedream models."
|
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider azure (Azure OpenAI), --provider openrouter (OpenRouter multimodal), --provider replicate, --provider seedream for supported Seedream models, or --provider minimax for MiniMax subject-reference workflows."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -609,6 +621,7 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
const hasOpenai = !!process.env.OPENAI_API_KEY;
|
const hasOpenai = !!process.env.OPENAI_API_KEY;
|
||||||
const hasOpenrouter = !!process.env.OPENROUTER_API_KEY;
|
const hasOpenrouter = !!process.env.OPENROUTER_API_KEY;
|
||||||
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
|
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
|
||||||
|
const hasMinimax = !!process.env.MINIMAX_API_KEY;
|
||||||
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
|
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
|
||||||
const hasJimeng = !!(process.env.JIMENG_ACCESS_KEY_ID && process.env.JIMENG_SECRET_ACCESS_KEY);
|
const hasJimeng = !!(process.env.JIMENG_ACCESS_KEY_ID && process.env.JIMENG_SECRET_ACCESS_KEY);
|
||||||
const hasSeedream = !!process.env.ARK_API_KEY;
|
const hasSeedream = !!process.env.ARK_API_KEY;
|
||||||
|
|
@ -621,6 +634,13 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
return "seedream";
|
return "seedream";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (modelProvider === "minimax") {
|
||||||
|
if (!hasMinimax) {
|
||||||
|
throw new Error("Model looks like a MiniMax image model, but MINIMAX_API_KEY is not set.");
|
||||||
|
}
|
||||||
|
return "minimax";
|
||||||
|
}
|
||||||
|
|
||||||
if (args.referenceImages.length > 0) {
|
if (args.referenceImages.length > 0) {
|
||||||
if (hasGoogle) return "google";
|
if (hasGoogle) return "google";
|
||||||
if (hasOpenai) return "openai";
|
if (hasOpenai) return "openai";
|
||||||
|
|
@ -628,8 +648,9 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
if (hasOpenrouter) return "openrouter";
|
if (hasOpenrouter) return "openrouter";
|
||||||
if (hasReplicate) return "replicate";
|
if (hasReplicate) return "replicate";
|
||||||
if (hasSeedream) return "seedream";
|
if (hasSeedream) return "seedream";
|
||||||
|
if (hasMinimax) return "minimax";
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Reference images require Google, OpenAI, Azure, OpenRouter, Replicate, or supported Seedream models. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, or ARK_API_KEY, or remove --ref."
|
"Reference images require Google, OpenAI, Azure, OpenRouter, Replicate, supported Seedream models, or MiniMax. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, ARK_API_KEY, or MINIMAX_API_KEY, or remove --ref."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -639,6 +660,7 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
hasAzure && "azure",
|
hasAzure && "azure",
|
||||||
hasOpenrouter && "openrouter",
|
hasOpenrouter && "openrouter",
|
||||||
hasDashscope && "dashscope",
|
hasDashscope && "dashscope",
|
||||||
|
hasMinimax && "minimax",
|
||||||
hasReplicate && "replicate",
|
hasReplicate && "replicate",
|
||||||
hasJimeng && "jimeng",
|
hasJimeng && "jimeng",
|
||||||
hasSeedream && "seedream",
|
hasSeedream && "seedream",
|
||||||
|
|
@ -648,7 +670,7 @@ export function detectProvider(args: CliArgs): Provider {
|
||||||
if (available.length > 1) return available[0]!;
|
if (available.length > 1) return available[0]!;
|
||||||
|
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" +
|
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, MINIMAX_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" +
|
||||||
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
|
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -687,6 +709,7 @@ export function isRetryableGenerationError(error: unknown): boolean {
|
||||||
async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
|
async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
|
||||||
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
|
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
|
||||||
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
|
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
|
||||||
|
if (provider === "minimax") return (await import("./providers/minimax")) as ProviderModule;
|
||||||
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
|
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
|
||||||
if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule;
|
if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule;
|
||||||
if (provider === "jimeng") return (await import("./providers/jimeng")) as ProviderModule;
|
if (provider === "jimeng") return (await import("./providers/jimeng")) as ProviderModule;
|
||||||
|
|
@ -717,6 +740,7 @@ function getModelForProvider(
|
||||||
return extendConfig.default_model.openrouter;
|
return extendConfig.default_model.openrouter;
|
||||||
}
|
}
|
||||||
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
|
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
|
||||||
|
if (provider === "minimax" && extendConfig.default_model.minimax) return extendConfig.default_model.minimax;
|
||||||
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
|
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
|
||||||
if (provider === "jimeng" && extendConfig.default_model.jimeng) return extendConfig.default_model.jimeng;
|
if (provider === "jimeng" && extendConfig.default_model.jimeng) return extendConfig.default_model.jimeng;
|
||||||
if (provider === "seedream" && extendConfig.default_model.seedream) return extendConfig.default_model.seedream;
|
if (provider === "seedream" && extendConfig.default_model.seedream) return extendConfig.default_model.seedream;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,171 @@
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import fs from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import test, { type TestContext } from "node:test";
|
||||||
|
|
||||||
|
import type { CliArgs } from "../types.ts";
|
||||||
|
import {
|
||||||
|
buildMinimaxUrl,
|
||||||
|
buildRequestBody,
|
||||||
|
buildSubjectReference,
|
||||||
|
extractImageFromResponse,
|
||||||
|
parsePixelSize,
|
||||||
|
validateArgs,
|
||||||
|
} from "./minimax.ts";
|
||||||
|
|
||||||
|
function useEnv(
|
||||||
|
t: TestContext,
|
||||||
|
values: Record<string, string | null>,
|
||||||
|
): void {
|
||||||
|
const previous = new Map<string, string | undefined>();
|
||||||
|
for (const [key, value] of Object.entries(values)) {
|
||||||
|
previous.set(key, process.env[key]);
|
||||||
|
if (value == null) {
|
||||||
|
delete process.env[key];
|
||||||
|
} else {
|
||||||
|
process.env[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.after(() => {
|
||||||
|
for (const [key, value] of previous.entries()) {
|
||||||
|
if (value == null) {
|
||||||
|
delete process.env[key];
|
||||||
|
} else {
|
||||||
|
process.env[key] = value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
|
return {
|
||||||
|
prompt: null,
|
||||||
|
promptFiles: [],
|
||||||
|
imagePath: null,
|
||||||
|
provider: null,
|
||||||
|
model: null,
|
||||||
|
aspectRatio: null,
|
||||||
|
size: null,
|
||||||
|
quality: null,
|
||||||
|
imageSize: null,
|
||||||
|
referenceImages: [],
|
||||||
|
n: 1,
|
||||||
|
batchFile: null,
|
||||||
|
jobs: null,
|
||||||
|
json: false,
|
||||||
|
help: false,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test("MiniMax URL builder normalizes /v1 suffixes", (t) => {
|
||||||
|
useEnv(t, { MINIMAX_BASE_URL: "https://api.minimax.io" });
|
||||||
|
assert.equal(buildMinimaxUrl(), "https://api.minimax.io/v1/image_generation");
|
||||||
|
|
||||||
|
process.env.MINIMAX_BASE_URL = "https://proxy.example.com/custom/v1/";
|
||||||
|
assert.equal(buildMinimaxUrl(), "https://proxy.example.com/custom/v1/image_generation");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("MiniMax size parsing and validation follow documented constraints", () => {
|
||||||
|
assert.deepEqual(parsePixelSize("1536x1024"), { width: 1536, height: 1024 });
|
||||||
|
assert.deepEqual(parsePixelSize("1536*1024"), { width: 1536, height: 1024 });
|
||||||
|
assert.equal(parsePixelSize("wide"), null);
|
||||||
|
|
||||||
|
validateArgs("image-01", makeArgs({ size: "1536x1024", n: 9 }));
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("image-01-live", makeArgs({ size: "1536x1024" })),
|
||||||
|
/only supported with model image-01/,
|
||||||
|
);
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("image-01", makeArgs({ size: "1537x1024" })),
|
||||||
|
/divisible by 8/,
|
||||||
|
);
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("image-01", makeArgs({ aspectRatio: "2.35:1" })),
|
||||||
|
/aspect_ratio must be one of/,
|
||||||
|
);
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("image-01", makeArgs({ n: 10 })),
|
||||||
|
/at most 9 images/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("MiniMax request body maps aspect ratio, size, n, and subject references", async (t) => {
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "minimax-test-"));
|
||||||
|
t.after(() => fs.rm(dir, { recursive: true, force: true }));
|
||||||
|
|
||||||
|
const refPath = path.join(dir, "portrait.png");
|
||||||
|
await fs.writeFile(refPath, Buffer.from("portrait"));
|
||||||
|
|
||||||
|
const ratioBody = await buildRequestBody(
|
||||||
|
"A portrait by the window",
|
||||||
|
"image-01",
|
||||||
|
makeArgs({ aspectRatio: "16:9", n: 2, referenceImages: [refPath] }),
|
||||||
|
);
|
||||||
|
assert.equal(ratioBody.aspect_ratio, "16:9");
|
||||||
|
assert.equal(ratioBody.n, 2);
|
||||||
|
assert.equal(ratioBody.response_format, "base64");
|
||||||
|
assert.match(ratioBody.subject_reference?.[0]?.image_file || "", /^data:image\/png;base64,/);
|
||||||
|
|
||||||
|
const sizeBody = await buildRequestBody(
|
||||||
|
"A portrait by the window",
|
||||||
|
"image-01",
|
||||||
|
makeArgs({ size: "1536x1024" }),
|
||||||
|
);
|
||||||
|
assert.equal(sizeBody.width, 1536);
|
||||||
|
assert.equal(sizeBody.height, 1024);
|
||||||
|
assert.equal(sizeBody.aspect_ratio, undefined);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("MiniMax subject references require supported file types", async (t) => {
|
||||||
|
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "minimax-ref-"));
|
||||||
|
t.after(() => fs.rm(dir, { recursive: true, force: true }));
|
||||||
|
|
||||||
|
const good = path.join(dir, "portrait.jpg");
|
||||||
|
const bad = path.join(dir, "portrait.webp");
|
||||||
|
await fs.writeFile(good, Buffer.from("portrait"));
|
||||||
|
await fs.writeFile(bad, Buffer.from("portrait"));
|
||||||
|
|
||||||
|
const subjectReference = await buildSubjectReference([good]);
|
||||||
|
assert.equal(subjectReference?.[0]?.type, "character");
|
||||||
|
|
||||||
|
await assert.rejects(
|
||||||
|
() => buildSubjectReference([bad]),
|
||||||
|
/only supports JPG, JPEG, or PNG/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("MiniMax response extraction supports base64 and URL payloads", async (t) => {
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
t.after(() => {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
});
|
||||||
|
|
||||||
|
const fromBase64 = await extractImageFromResponse({
|
||||||
|
data: {
|
||||||
|
image_base64: [Buffer.from("hello").toString("base64")],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.equal(Buffer.from(fromBase64).toString("utf8"), "hello");
|
||||||
|
|
||||||
|
globalThis.fetch = async () =>
|
||||||
|
new Response(Uint8Array.from([1, 2, 3]), {
|
||||||
|
status: 200,
|
||||||
|
headers: { "Content-Type": "image/jpeg" },
|
||||||
|
});
|
||||||
|
|
||||||
|
const fromUrl = await extractImageFromResponse({
|
||||||
|
data: {
|
||||||
|
image_urls: ["https://example.com/output.jpg"],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.deepEqual([...fromUrl], [1, 2, 3]);
|
||||||
|
|
||||||
|
await assert.rejects(
|
||||||
|
() => extractImageFromResponse({ base_resp: { status_code: 1001, status_msg: "blocked" } }),
|
||||||
|
/blocked/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,220 @@
|
||||||
|
import path from "node:path";
|
||||||
|
import { readFile } from "node:fs/promises";
|
||||||
|
|
||||||
|
import type { CliArgs } from "../types";
|
||||||
|
|
||||||
|
const DEFAULT_MODEL = "image-01";
|
||||||
|
const MAX_REFERENCE_IMAGE_BYTES = 10 * 1024 * 1024;
|
||||||
|
const SUPPORTED_ASPECT_RATIOS = new Set(["1:1", "16:9", "4:3", "3:2", "2:3", "3:4", "9:16", "21:9"]);
|
||||||
|
|
||||||
|
type MinimaxSubjectReference = {
|
||||||
|
type: "character";
|
||||||
|
image_file: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type MinimaxRequestBody = {
|
||||||
|
model: string;
|
||||||
|
prompt: string;
|
||||||
|
response_format: "base64";
|
||||||
|
aspect_ratio?: string;
|
||||||
|
width?: number;
|
||||||
|
height?: number;
|
||||||
|
n?: number;
|
||||||
|
subject_reference?: MinimaxSubjectReference[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type MinimaxResponse = {
|
||||||
|
id?: string;
|
||||||
|
data?: {
|
||||||
|
image_urls?: string[];
|
||||||
|
image_base64?: string[];
|
||||||
|
};
|
||||||
|
base_resp?: {
|
||||||
|
status_code?: number;
|
||||||
|
status_msg?: string;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export function getDefaultModel(): string {
|
||||||
|
return process.env.MINIMAX_IMAGE_MODEL || DEFAULT_MODEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getApiKey(): string | null {
|
||||||
|
return process.env.MINIMAX_API_KEY || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildMinimaxUrl(): string {
|
||||||
|
const base = (process.env.MINIMAX_BASE_URL || "https://api.minimax.io").replace(/\/+$/g, "");
|
||||||
|
return base.endsWith("/v1") ? `${base}/image_generation` : `${base}/v1/image_generation`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getMimeType(filename: string): "image/jpeg" | "image/png" {
|
||||||
|
const ext = path.extname(filename).toLowerCase();
|
||||||
|
if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg";
|
||||||
|
if (ext === ".png") return "image/png";
|
||||||
|
throw new Error(
|
||||||
|
`MiniMax subject_reference only supports JPG, JPEG, or PNG files: ${filename}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parsePixelSize(size: string): { width: number; height: number } | null {
|
||||||
|
const match = size.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
const width = parseInt(match[1]!, 10);
|
||||||
|
const height = parseInt(match[2]!, 10);
|
||||||
|
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { width, height };
|
||||||
|
}
|
||||||
|
|
||||||
|
function validatePixelSize(width: number, height: number): void {
|
||||||
|
if (width < 512 || width > 2048 || height < 512 || height > 2048) {
|
||||||
|
throw new Error("MiniMax custom size must keep width and height between 512 and 2048.");
|
||||||
|
}
|
||||||
|
if (width % 8 !== 0 || height % 8 !== 0) {
|
||||||
|
throw new Error("MiniMax custom size requires width and height divisible by 8.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateArgs(model: string, args: CliArgs): void {
|
||||||
|
if (args.n > 9) {
|
||||||
|
throw new Error("MiniMax supports at most 9 images per request.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.aspectRatio && !SUPPORTED_ASPECT_RATIOS.has(args.aspectRatio)) {
|
||||||
|
throw new Error(
|
||||||
|
`MiniMax aspect_ratio must be one of: ${Array.from(SUPPORTED_ASPECT_RATIOS).join(", ")}.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.size && !args.aspectRatio) {
|
||||||
|
if (model !== "image-01") {
|
||||||
|
throw new Error("MiniMax custom --size is only supported with model image-01. Use --model image-01 or pass --ar instead.");
|
||||||
|
}
|
||||||
|
const parsed = parsePixelSize(args.size);
|
||||||
|
if (!parsed) {
|
||||||
|
throw new Error("MiniMax --size must be in WxH format, for example 1536x1024.");
|
||||||
|
}
|
||||||
|
validatePixelSize(parsed.width, parsed.height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function buildSubjectReference(
|
||||||
|
referenceImages: string[],
|
||||||
|
): Promise<MinimaxSubjectReference[] | undefined> {
|
||||||
|
if (referenceImages.length === 0) return undefined;
|
||||||
|
|
||||||
|
const subjectReference: MinimaxSubjectReference[] = [];
|
||||||
|
for (const refPath of referenceImages) {
|
||||||
|
const bytes = await readFile(refPath);
|
||||||
|
if (bytes.length > MAX_REFERENCE_IMAGE_BYTES) {
|
||||||
|
throw new Error(`MiniMax subject_reference images must be smaller than 10MB: ${refPath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
subjectReference.push({
|
||||||
|
type: "character",
|
||||||
|
image_file: `data:${getMimeType(refPath)};base64,${bytes.toString("base64")}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return subjectReference;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function buildRequestBody(
|
||||||
|
prompt: string,
|
||||||
|
model: string,
|
||||||
|
args: CliArgs,
|
||||||
|
): Promise<MinimaxRequestBody> {
|
||||||
|
validateArgs(model, args);
|
||||||
|
|
||||||
|
const body: MinimaxRequestBody = {
|
||||||
|
model,
|
||||||
|
prompt,
|
||||||
|
response_format: "base64",
|
||||||
|
};
|
||||||
|
|
||||||
|
if (args.aspectRatio) {
|
||||||
|
body.aspect_ratio = args.aspectRatio;
|
||||||
|
} else if (args.size) {
|
||||||
|
const parsed = parsePixelSize(args.size);
|
||||||
|
if (!parsed) {
|
||||||
|
throw new Error("MiniMax --size must be in WxH format, for example 1536x1024.");
|
||||||
|
}
|
||||||
|
body.width = parsed.width;
|
||||||
|
body.height = parsed.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.n > 1) {
|
||||||
|
body.n = args.n;
|
||||||
|
}
|
||||||
|
|
||||||
|
const subjectReference = await buildSubjectReference(args.referenceImages);
|
||||||
|
if (subjectReference) {
|
||||||
|
body.subject_reference = subjectReference;
|
||||||
|
}
|
||||||
|
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function downloadImage(url: string): Promise<Uint8Array> {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to download image from MiniMax: ${response.status}`);
|
||||||
|
}
|
||||||
|
return new Uint8Array(await response.arrayBuffer());
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function extractImageFromResponse(result: MinimaxResponse): Promise<Uint8Array> {
|
||||||
|
const baseResp = result.base_resp;
|
||||||
|
if (baseResp && baseResp.status_code !== undefined && baseResp.status_code !== 0) {
|
||||||
|
throw new Error(baseResp.status_msg || `MiniMax API returned status_code=${baseResp.status_code}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const base64Image = result.data?.image_base64?.[0];
|
||||||
|
if (base64Image) {
|
||||||
|
return Uint8Array.from(Buffer.from(base64Image, "base64"));
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = result.data?.image_urls?.[0];
|
||||||
|
if (url) {
|
||||||
|
return downloadImage(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("No image data in MiniMax response");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getDefaultOutputExtension(): ".jpg" {
|
||||||
|
return ".jpg";
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function generateImage(
|
||||||
|
prompt: string,
|
||||||
|
model: string,
|
||||||
|
args: CliArgs
|
||||||
|
): Promise<Uint8Array> {
|
||||||
|
const apiKey = getApiKey();
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error("MINIMAX_API_KEY is required. Get one from https://platform.minimax.io/");
|
||||||
|
}
|
||||||
|
|
||||||
|
const body = await buildRequestBody(prompt, model, args);
|
||||||
|
const response = await fetch(buildMinimaxUrl(), {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const err = await response.text();
|
||||||
|
throw new Error(`MiniMax API error (${response.status}): ${err}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = (await response.json()) as MinimaxResponse;
|
||||||
|
return extractImageFromResponse(result);
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,13 @@
|
||||||
export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate" | "jimeng" | "seedream" | "azure";
|
export type Provider =
|
||||||
|
| "google"
|
||||||
|
| "openai"
|
||||||
|
| "openrouter"
|
||||||
|
| "dashscope"
|
||||||
|
| "minimax"
|
||||||
|
| "replicate"
|
||||||
|
| "jimeng"
|
||||||
|
| "seedream"
|
||||||
|
| "azure";
|
||||||
export type Quality = "normal" | "2k";
|
export type Quality = "normal" | "2k";
|
||||||
|
|
||||||
export type CliArgs = {
|
export type CliArgs = {
|
||||||
|
|
@ -52,6 +61,7 @@ export type ExtendConfig = {
|
||||||
openai: string | null;
|
openai: string | null;
|
||||||
openrouter: string | null;
|
openrouter: string | null;
|
||||||
dashscope: string | null;
|
dashscope: string | null;
|
||||||
|
minimax: string | null;
|
||||||
replicate: string | null;
|
replicate: string | null;
|
||||||
jimeng: string | null;
|
jimeng: string | null;
|
||||||
seedream: string | null;
|
seedream: string | null;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue