diff --git a/README.md b/README.md index a48b9b9..fda24d7 100644 --- a/README.md +++ b/README.md @@ -663,7 +663,7 @@ AI-powered generation backends. #### baoyu-image-gen -AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets. +AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), MiniMax, Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, custom sizes, batch generation, and quality presets. ```bash # Basic generation (auto-detect provider) @@ -684,9 +684,21 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da # OpenRouter /baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter +# OpenRouter with reference images +/baoyu-image-gen --prompt "Make it blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png + # DashScope (Aliyun Tongyi Wanxiang) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope +# DashScope with custom size +/baoyu-image-gen --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872 + +# MiniMax +/baoyu-image-gen --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax + +# MiniMax with subject reference +/baoyu-image-gen --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9 + # Replicate /baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate @@ -696,8 +708,11 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da # Seedream (豆包) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream -# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0) +# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0) /baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png + +# Batch mode +/baoyu-image-gen --batchfile batch.json --jobs 4 --json ``` **Options**: @@ -706,44 +721,73 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `--prompt`, `-p` | Prompt text | | `--promptfiles` | Read prompt from files (concatenated) | | `--image` | Output image path (required) | -| `--provider` | `google`, `openai`, `openrouter`, `dashscope`, `jimeng`, `seedream` or `replicate` (default: auto-detect; prefers google) | -| `--model`, `-m` | Model ID | +| `--batchfile` | JSON batch file for multi-image generation | +| `--jobs` | Worker count for batch mode | +| `--provider` | `google`, `openai`, `azure`, `openrouter`, `dashscope`, `minimax`, `jimeng`, `seedream`, or `replicate` | +| `--model`, `-m` | Model ID or deployment name. Azure uses deployment name; OpenRouter uses full model IDs; MiniMax uses `image-01` / `image-01-live` | | `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size` | Size (e.g., `1024x1024`) | | `--quality` | `normal` or `2k` (default: `2k`) | -| `--ref` | Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0) | +| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter | +| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0) | +| `--n` | Number of images per request | +| `--json` | JSON output | **Environment Variables** (see [Environment Configuration](#environment-configuration) for setup): | Variable | Description | Default | |----------|-------------|---------| | `OPENAI_API_KEY` | OpenAI API key | - | +| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key | - | | `OPENROUTER_API_KEY` | OpenRouter API key | - | | `GOOGLE_API_KEY` | Google API key | - | +| `GEMINI_API_KEY` | Alias for `GOOGLE_API_KEY` | - | | `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - | +| `MINIMAX_API_KEY` | MiniMax API key | - | | `REPLICATE_API_TOKEN` | Replicate API token | - | | `JIMENG_ACCESS_KEY_ID` | Jimeng Volcengine access key | - | | `JIMENG_SECRET_ACCESS_KEY` | Jimeng Volcengine secret key | - | | `ARK_API_KEY` | Seedream Volcengine ARK API key | - | | `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` | +| `AZURE_OPENAI_DEPLOYMENT` | Azure default deployment name | - | +| `AZURE_OPENAI_IMAGE_MODEL` | Backward-compatible Azure deployment/model alias | `gpt-image-1.5` | | `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` | +| `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` | | `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` | | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | +| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` | +| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - | +| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` | | `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` | +| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution | - | +| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | - | | `GOOGLE_BASE_URL` | Custom Google endpoint | - | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - | +| `MINIMAX_BASE_URL` | Custom MiniMax endpoint | `https://api.minimax.io` | | `REPLICATE_BASE_URL` | Custom Replicate endpoint | - | | `JIMENG_BASE_URL` | Custom Jimeng endpoint | `https://visual.volcengineapi.com` | | `JIMENG_REGION` | Jimeng region | `cn-north-1` | | `SEEDREAM_BASE_URL` | Custom Seedream endpoint | `https://ark.cn-beijing.volces.com/api/v3` | +| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap | `10` | +| `BAOYU_IMAGE_GEN__CONCURRENCY` | Override provider concurrency | provider-specific | +| `BAOYU_IMAGE_GEN__START_INTERVAL_MS` | Override provider request start gap | provider-specific | + +**Provider Notes**: +- Azure OpenAI: `--model` means Azure deployment name, not the underlying model family. +- DashScope: `qwen-image-2.0-pro` is the recommended default for custom `--size`, `21:9`, and strong Chinese/English text rendering. +- MiniMax: `image-01` supports documented custom `width` / `height`; `image-01-live` is lower latency and works best with `--ar`. +- MiniMax reference images are sent as `subject_reference`; the current API is specialized toward character / portrait consistency. +- Jimeng does not support reference images. +- Seedream reference images are supported by Seedream 5.0 / 4.5 / 4.0, not Seedream 3.0. **Provider Auto-Selection**: -1. If `--provider` specified → use it -2. If only one API key available → use that provider -3. If multiple available → default to Google +1. If `--provider` is specified → use it +2. If `--ref` is provided and no provider is specified → try Google, then OpenAI, Azure, OpenRouter, Replicate, Seedream, and finally MiniMax +3. If only one API key is available → use that provider +4. If multiple providers are available → default to Google #### baoyu-danger-gemini-web @@ -1018,11 +1062,20 @@ cat > ~/.baoyu-skills/.env << 'EOF' OPENAI_API_KEY=sk-xxx OPENAI_IMAGE_MODEL=gpt-image-1.5 # OPENAI_BASE_URL=https://api.openai.com/v1 +# OPENAI_IMAGE_USE_CHAT=false + +# Azure OpenAI +AZURE_OPENAI_API_KEY=xxx +AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com +AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5 +# AZURE_API_VERSION=2025-04-01-preview # OpenRouter OPENROUTER_API_KEY=sk-or-xxx OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview # OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +# OPENROUTER_HTTP_REFERER=https://your-app.example.com +# OPENROUTER_TITLE=Your App Name # Google GOOGLE_API_KEY=xxx @@ -1034,6 +1087,11 @@ DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +# MiniMax +MINIMAX_API_KEY=xxx +MINIMAX_IMAGE_MODEL=image-01 +# MINIMAX_BASE_URL=https://api.minimax.io + # Replicate REPLICATE_API_TOKEN=r8_xxx REPLICATE_IMAGE_MODEL=google/nano-banana-pro diff --git a/README.zh.md b/README.zh.md index 87a75f2..65168af 100644 --- a/README.zh.md +++ b/README.zh.md @@ -663,7 +663,7 @@ AI 驱动的生成后端。 #### baoyu-image-gen -基于 AI SDK 的图像生成,支持 OpenAI、Azure OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比和质量预设。 +基于 AI SDK 的图像生成,支持 OpenAI、Azure OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、MiniMax、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比、自定义尺寸、批量生成和质量预设。 ```bash # 基础生成(自动检测服务商) @@ -684,9 +684,21 @@ AI 驱动的生成后端。 # OpenRouter /baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter +# OpenRouter + 参考图 +/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png + # DashScope(阿里通义万相) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope +# DashScope 自定义尺寸 +/baoyu-image-gen --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872 + +# MiniMax +/baoyu-image-gen --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax + +# MiniMax + 角色参考图 +/baoyu-image-gen --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9 + # Replicate /baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate @@ -696,8 +708,11 @@ AI 驱动的生成后端。 # 豆包(Seedream) /baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream -# 带参考图(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0) +# 带参考图(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0) /baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png + +# 批量模式 +/baoyu-image-gen --batchfile batch.json --jobs 4 --json ``` **选项**: @@ -706,44 +721,73 @@ AI 驱动的生成后端。 | `--prompt`, `-p` | 提示词文本 | | `--promptfiles` | 从文件读取提示词(多文件拼接) | | `--image` | 输出图片路径(必需) | -| `--provider` | `google`、`openai`、`openrouter`、`dashscope`、`jimeng`、`seedream` 或 `replicate`(默认:自动检测,优先 google) | -| `--model`, `-m` | 模型 ID | +| `--batchfile` | 多图批量生成的 JSON 文件 | +| `--jobs` | 批量模式的并发 worker 数 | +| `--provider` | `google`、`openai`、`azure`、`openrouter`、`dashscope`、`minimax`、`jimeng`、`seedream` 或 `replicate` | +| `--model`, `-m` | 模型 ID 或部署名。Azure 使用部署名;OpenRouter 使用完整模型 ID;MiniMax 使用 `image-01` / `image-01-live` | | `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) | | `--size` | 尺寸(如 `1024x1024`) | | `--quality` | `normal` 或 `2k`(默认:`2k`) | -| `--ref` | 参考图片(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0) | +| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` | +| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0) | +| `--n` | 单次请求生成图片数量 | +| `--json` | 输出 JSON 结果 | **环境变量**(配置方法见[环境配置](#环境配置)): | 变量 | 说明 | 默认值 | |------|------|--------| | `OPENAI_API_KEY` | OpenAI API 密钥 | - | +| `AZURE_OPENAI_API_KEY` | Azure OpenAI API 密钥 | - | | `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - | | `GOOGLE_API_KEY` | Google API 密钥 | - | +| `GEMINI_API_KEY` | `GOOGLE_API_KEY` 的别名 | - | | `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - | +| `MINIMAX_API_KEY` | MiniMax API 密钥 | - | | `REPLICATE_API_TOKEN` | Replicate API Token | - | | `JIMENG_ACCESS_KEY_ID` | 即梦火山引擎 Access Key | - | | `JIMENG_SECRET_ACCESS_KEY` | 即梦火山引擎 Secret Key | - | | `ARK_API_KEY` | 豆包火山引擎 ARK API 密钥 | - | | `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` | +| `AZURE_OPENAI_DEPLOYMENT` | Azure 默认部署名 | - | +| `AZURE_OPENAI_IMAGE_MODEL` | 兼容旧配置的 Azure 部署/模型别名 | `gpt-image-1.5` | | `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` | +| `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` | | `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` | | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | +| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` | +| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - | +| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` | | `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` | +| `OPENROUTER_HTTP_REFERER` | OpenRouter 归因用站点 URL | - | +| `OPENROUTER_TITLE` | OpenRouter 归因用应用名 | - | | `GOOGLE_BASE_URL` | 自定义 Google 端点 | - | | `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - | +| `MINIMAX_BASE_URL` | 自定义 MiniMax 端点 | `https://api.minimax.io` | | `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - | | `JIMENG_BASE_URL` | 自定义即梦端点 | `https://visual.volcengineapi.com` | | `JIMENG_REGION` | 即梦区域 | `cn-north-1` | | `SEEDREAM_BASE_URL` | 自定义豆包端点 | `https://ark.cn-beijing.volces.com/api/v3` | +| `BAOYU_IMAGE_GEN_MAX_WORKERS` | 批量模式最大 worker 数 | `10` | +| `BAOYU_IMAGE_GEN__CONCURRENCY` | 覆盖 provider 并发数 | provider 默认值 | +| `BAOYU_IMAGE_GEN__START_INTERVAL_MS` | 覆盖 provider 请求启动间隔 | provider 默认值 | + +**Provider 说明**: +- Azure OpenAI:`--model` 表示 Azure deployment name,不是底层模型家族名。 +- DashScope:`qwen-image-2.0-pro` 是自定义 `--size`、`21:9` 和中英文排版的推荐默认模型。 +- MiniMax:`image-01` 支持官方文档里的自定义 `width` / `height`;`image-01-live` 更偏低延迟,适合配合 `--ar` 使用。 +- MiniMax 参考图会走 `subject_reference`,当前能力更偏角色 / 人像一致性。 +- 即梦不支持参考图。 +- 豆包参考图能力仅适用于 Seedream 5.0 / 4.5 / 4.0,不适用于 Seedream 3.0。 **服务商自动选择**: 1. 如果指定了 `--provider` → 使用指定的 -2. 如果只有一个 API 密钥 → 使用对应服务商 -3. 如果多个可用 → 默认使用 Google +2. 如果传了 `--ref` 且未指定 provider → 依次尝试 Google、OpenAI、Azure、OpenRouter、Replicate、Seedream,最后是 MiniMax +3. 如果只有一个 API 密钥 → 使用对应服务商 +4. 如果多个可用 → 默认使用 Google #### baoyu-danger-gemini-web @@ -1018,11 +1062,20 @@ cat > ~/.baoyu-skills/.env << 'EOF' OPENAI_API_KEY=sk-xxx OPENAI_IMAGE_MODEL=gpt-image-1.5 # OPENAI_BASE_URL=https://api.openai.com/v1 +# OPENAI_IMAGE_USE_CHAT=false + +# Azure OpenAI +AZURE_OPENAI_API_KEY=xxx +AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com +AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5 +# AZURE_API_VERSION=2025-04-01-preview # OpenRouter OPENROUTER_API_KEY=sk-or-xxx OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview # OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 +# OPENROUTER_HTTP_REFERER=https://your-app.example.com +# OPENROUTER_TITLE=你的应用名 # Google GOOGLE_API_KEY=xxx @@ -1034,6 +1087,11 @@ DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +# MiniMax +MINIMAX_API_KEY=xxx +MINIMAX_IMAGE_MODEL=image-01 +# MINIMAX_BASE_URL=https://api.minimax.io + # Replicate REPLICATE_API_TOKEN=r8_xxx REPLICATE_IMAGE_MODEL=google/nano-banana-pro diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md index 9539036..d281d96 100644 --- a/skills/baoyu-image-gen/SKILL.md +++ b/skills/baoyu-image-gen/SKILL.md @@ -1,7 +1,7 @@ --- name: baoyu-image-gen -description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. -version: 1.56.3 +description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, MiniMax, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. +version: 1.56.4 metadata: openclaw: homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen @@ -13,7 +13,7 @@ metadata: # Image Generation (AI SDK) -Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), Jimeng (即梦), Seedream (豆包) and Replicate providers. +Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), MiniMax, Jimeng (即梦), Seedream (豆包) and Replicate providers. ## Script Directory @@ -74,7 +74,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k # From prompt files ${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png -# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0) +# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0) ${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png # With reference images (explicit provider/model) @@ -101,6 +101,15 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9 # DashScope legacy Qwen fixed-size model ${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928 +# MiniMax +${BUN_X} {baseDir}/scripts/main.ts --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax + +# MiniMax with subject reference (best for character/portrait consistency) +${BUN_X} {baseDir}/scripts/main.ts --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9 + +# MiniMax with custom size (documented for image-01) +${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024 + # Replicate (google/nano-banana-pro) ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate @@ -150,13 +159,13 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `--image ` | Output image path (required in single-image mode) | | `--batchfile ` | JSON batch file for multi-image generation | | `--jobs ` | Worker count for batch mode (default: auto, max from config, built-in default 10) | -| `--provider google\|openai\|azure\|openrouter\|dashscope\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) | -| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`) | +| `--provider google\|openai\|azure\|openrouter\|dashscope\|minimax\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) | +| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`; MiniMax: `image-01`) | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: `2k`) | | `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) | -| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | +| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | | `--n ` | Number of images | | `--json` | JSON output | @@ -169,6 +178,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_API_KEY` | OpenRouter API key | | `GOOGLE_API_KEY` | Google API key | | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) | +| `MINIMAX_API_KEY` | MiniMax API key | | `REPLICATE_API_TOKEN` | Replicate API token | | `JIMENG_ACCESS_KEY_ID` | Jimeng (即梦) Volcengine access key | | `JIMENG_SECRET_ACCESS_KEY` | Jimeng (即梦) Volcengine secret key | @@ -179,6 +189,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) | | `GOOGLE_IMAGE_MODEL` | Google model override | | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) | +| `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) | | `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) | | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) | @@ -190,6 +201,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | | `GOOGLE_BASE_URL` | Custom Google endpoint | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | +| `MINIMAX_BASE_URL` | Custom MiniMax endpoint (default: `https://api.minimax.io`) | | `REPLICATE_BASE_URL` | Custom Replicate endpoint | | `JIMENG_BASE_URL` | Custom Jimeng endpoint (default: `https://visual.volcengineapi.com`) | | `JIMENG_REGION` | Jimeng region (default: `cn-north-1`) | @@ -263,6 +275,34 @@ Official references: - [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image) - [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api) +### MiniMax Models + +Use `--model image-01` or set `default_model.minimax` / `MINIMAX_IMAGE_MODEL` when the user wants MiniMax image generation. + +Official MiniMax image model options currently documented in the API reference: + +- `image-01` (recommended default) + - Supports text-to-image and subject-reference image generation + - Supports official `aspect_ratio` values: `1:1`, `16:9`, `4:3`, `3:2`, `2:3`, `3:4`, `9:16`, `21:9` + - Supports documented custom `width` / `height` output sizes when using `--size ` + - `width` and `height` must both be between `512` and `2048`, and both must be divisible by `8` +- `image-01-live` + - Lower-latency variant + - Use `--ar` for sizing; MiniMax documents custom `width` / `height` as only effective for `image-01` + +MiniMax subject reference notes: + +- `--ref` files are sent as MiniMax `subject_reference` +- MiniMax docs currently describe `subject_reference[].type` as `character` +- Official docs say `image_file` supports public URLs or Base64 Data URLs; `baoyu-image-gen` sends local refs as Data URLs +- Official docs recommend front-facing portrait references in JPG/JPEG/PNG under 10MB + +Official references: + +- [MiniMax Image Generation Guide](https://platform.minimax.io/docs/guides/image-generation) +- [MiniMax Text-to-Image API](https://platform.minimax.io/docs/api-reference/image-generation-t2i) +- [MiniMax Image-to-Image API](https://platform.minimax.io/docs/api-reference/image-generation-i2i) + ### OpenRouter Models Use full OpenRouter model IDs, e.g.: @@ -297,8 +337,8 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r ## Provider Selection -1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then OpenRouter, then Replicate (Jimeng and Seedream do not support reference images) -2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `openrouter`, or `replicate`) +1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Azure, then OpenRouter, then Replicate, then Seedream, then MiniMax (MiniMax subject reference is more specialized toward character/portrait consistency) +2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `azure`, `openrouter`, `replicate`, `seedream`, or `minimax`) 3. Only one API key available → use that provider 4. Multiple available → default to Google @@ -319,6 +359,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1` - OpenAI: maps to closest supported size - OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size ` is given, aspect ratio is inferred automatically - Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image` +- MiniMax: sends official `aspect_ratio` values directly; if `--size ` is given without `--ar`, `width` / `height` are sent for `image-01` ## Generation Mode diff --git a/skills/baoyu-image-gen/references/config/first-time-setup.md b/skills/baoyu-image-gen/references/config/first-time-setup.md index 46aa746..ed32317 100644 --- a/skills/baoyu-image-gen/references/config/first-time-setup.md +++ b/skills/baoyu-image-gen/references/config/first-time-setup.md @@ -53,6 +53,8 @@ options: description: "Router for Gemini/FLUX/OpenAI-compatible image models" - label: "DashScope" description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering" + - label: "MiniMax" + description: "MiniMax image generation with subject-reference character workflows" - label: "Replicate" description: "Community models - nano-banana-pro, flexible model selection" ``` @@ -103,6 +105,20 @@ options: description: "Previous GPT Image deployment name" ``` +### Question 2d: Default MiniMax Model + +Only show if user selected MiniMax. + +```yaml +header: "MiniMax Model" +question: "Default MiniMax image generation model?" +options: + - label: "image-01 (Recommended)" + description: "Best default, supports aspect ratios and custom width/height" + - label: "image-01-live" + description: "Faster variant, use aspect ratio instead of custom size" +``` + ### Question 3: Default Quality ```yaml @@ -149,6 +165,7 @@ default_model: azure: [selected azure deployment or null] openrouter: [selected openrouter model or null] dashscope: null + minimax: [selected minimax model or null] replicate: null --- ``` @@ -252,6 +269,24 @@ options: description: "Google's base image model on Replicate" ``` +### MiniMax Model Selection + +```yaml +header: "MiniMax Model" +question: "Choose a default MiniMax image generation model?" +options: + - label: "image-01 (Recommended)" + description: "Best general-purpose MiniMax image model with custom width/height support" + - label: "image-01-live" + description: "Lower-latency MiniMax image model using aspect ratios" +``` + +Notes for MiniMax setup: + +- `image-01` is the safest default. It supports official `aspect_ratio` values and documented custom `width` / `height` output sizes. +- `image-01-live` is useful when the user prefers faster generation and can work with aspect-ratio-based sizing. +- MiniMax subject reference currently uses `subject_reference[].type = character`; docs recommend front-facing portrait references in JPG/JPEG/PNG under 10MB. + ### Update EXTEND.md After user selects a model: @@ -267,6 +302,7 @@ default_model: azure: [value or null] openrouter: [value or null] dashscope: [value or null] + minimax: [value or null] replicate: [value or null] ``` diff --git a/skills/baoyu-image-gen/references/config/preferences-schema.md b/skills/baoyu-image-gen/references/config/preferences-schema.md index cf2d497..9ff0a32 100644 --- a/skills/baoyu-image-gen/references/config/preferences-schema.md +++ b/skills/baoyu-image-gen/references/config/preferences-schema.md @@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences --- version: 1 -default_provider: null # google|openai|azure|openrouter|dashscope|replicate|null (null = auto-detect) +default_provider: null # google|openai|azure|openrouter|dashscope|minimax|replicate|null (null = auto-detect) default_quality: null # normal|2k|null (null = use default: 2k) @@ -25,6 +25,7 @@ default_model: azure: null # Azure deployment name, e.g., "gpt-image-1.5" or "image-prod" openrouter: null # e.g., "google/gemini-3.1-flash-image-preview" dashscope: null # e.g., "qwen-image-2.0-pro" + minimax: null # e.g., "image-01" replicate: null # e.g., "google/nano-banana-pro" batch: @@ -48,6 +49,9 @@ batch: dashscope: concurrency: 3 start_interval_ms: 1100 + minimax: + concurrency: 3 + start_interval_ms: 1100 --- ``` @@ -65,6 +69,7 @@ batch: | `default_model.azure` | string\|null | null | Azure default deployment name | | `default_model.openrouter` | string\|null | null | OpenRouter default model | | `default_model.dashscope` | string\|null | null | DashScope default model | +| `default_model.minimax` | string\|null | null | MiniMax default model | | `default_model.replicate` | string\|null | null | Replicate default model | | `batch.max_workers` | int\|null | 10 | Batch worker cap | | `batch.provider_limits..concurrency` | int\|null | provider default | Max simultaneous requests per provider | @@ -95,6 +100,7 @@ default_model: azure: "gpt-image-1.5" openrouter: "google/gemini-3.1-flash-image-preview" dashscope: "qwen-image-2.0-pro" + minimax: "image-01" replicate: "google/nano-banana-pro" batch: max_workers: 10 @@ -108,5 +114,8 @@ batch: openrouter: concurrency: 3 start_interval_ms: 1100 + minimax: + concurrency: 3 + start_interval_ms: 1100 --- ``` diff --git a/skills/baoyu-image-gen/scripts/main.test.ts b/skills/baoyu-image-gen/scripts/main.test.ts index 4e4740d..48baa35 100644 --- a/skills/baoyu-image-gen/scripts/main.test.ts +++ b/skills/baoyu-image-gen/scripts/main.test.ts @@ -124,6 +124,7 @@ default_model: google: gemini-3-pro-image-preview openai: gpt-image-1.5 azure: image-prod + minimax: image-01 batch: max_workers: 8 provider_limits: @@ -132,6 +133,9 @@ batch: start_interval_ms: 900 openai: concurrency: 4 + minimax: + concurrency: 2 + start_interval_ms: 1400 azure: concurrency: 1 start_interval_ms: 1500 @@ -147,6 +151,7 @@ batch: assert.equal(config.default_model?.google, "gemini-3-pro-image-preview"); assert.equal(config.default_model?.openai, "gpt-image-1.5"); assert.equal(config.default_model?.azure, "image-prod"); + assert.equal(config.default_model?.minimax, "image-01"); assert.equal(config.batch?.max_workers, 8); assert.deepEqual(config.batch?.provider_limits?.google, { concurrency: 2, @@ -155,6 +160,10 @@ batch: assert.deepEqual(config.batch?.provider_limits?.openai, { concurrency: 4, }); + assert.deepEqual(config.batch?.provider_limits?.minimax, { + concurrency: 2, + start_interval_ms: 1400, + }); assert.deepEqual(config.batch?.provider_limits?.azure, { concurrency: 1, start_interval_ms: 1500, @@ -200,6 +209,7 @@ test("detectProvider rejects non-ref-capable providers and prefers Google first OPENAI_API_KEY: "openai-key", OPENROUTER_API_KEY: null, DASHSCOPE_API_KEY: null, + MINIMAX_API_KEY: null, REPLICATE_API_TOKEN: null, JIMENG_ACCESS_KEY_ID: null, JIMENG_SECRET_ACCESS_KEY: null, @@ -216,6 +226,7 @@ test("detectProvider selects an available ref-capable provider for reference-ima AZURE_OPENAI_BASE_URL: null, OPENROUTER_API_KEY: null, DASHSCOPE_API_KEY: null, + MINIMAX_API_KEY: null, REPLICATE_API_TOKEN: null, JIMENG_ACCESS_KEY_ID: null, JIMENG_SECRET_ACCESS_KEY: null, @@ -235,6 +246,7 @@ test("detectProvider selects Azure when only Azure credentials are configured", AZURE_OPENAI_BASE_URL: "https://example.openai.azure.com", OPENROUTER_API_KEY: null, DASHSCOPE_API_KEY: null, + MINIMAX_API_KEY: null, REPLICATE_API_TOKEN: null, JIMENG_ACCESS_KEY_ID: null, JIMENG_SECRET_ACCESS_KEY: null, @@ -254,6 +266,7 @@ test("detectProvider infers Seedream from model id and allows Seedream reference OPENAI_API_KEY: null, OPENROUTER_API_KEY: null, DASHSCOPE_API_KEY: null, + MINIMAX_API_KEY: null, REPLICATE_API_TOKEN: null, JIMENG_ACCESS_KEY_ID: null, JIMENG_SECRET_ACCESS_KEY: null, @@ -281,6 +294,26 @@ test("detectProvider infers Seedream from model id and allows Seedream reference ); }); +test("detectProvider selects MiniMax when only MiniMax credentials are configured or the model id matches", (t) => { + useEnv(t, { + GOOGLE_API_KEY: null, + OPENAI_API_KEY: null, + AZURE_OPENAI_API_KEY: null, + AZURE_OPENAI_BASE_URL: null, + OPENROUTER_API_KEY: null, + DASHSCOPE_API_KEY: null, + MINIMAX_API_KEY: "minimax-key", + REPLICATE_API_TOKEN: null, + JIMENG_ACCESS_KEY_ID: null, + JIMENG_SECRET_ACCESS_KEY: null, + ARK_API_KEY: null, + }); + + assert.equal(detectProvider(makeArgs()), "minimax"); + assert.equal(detectProvider(makeArgs({ referenceImages: ["ref.png"] })), "minimax"); + assert.equal(detectProvider(makeArgs({ model: "image-01-live" })), "minimax"); +}); + test("batch worker and provider-rate-limit configuration prefer env over EXTEND config", (t) => { useEnv(t, { BAOYU_IMAGE_GEN_MAX_WORKERS: "12", @@ -296,6 +329,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND concurrency: 2, start_interval_ms: 900, }, + minimax: { + concurrency: 1, + start_interval_ms: 1500, + }, }, }, }; @@ -305,6 +342,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND concurrency: 5, startIntervalMs: 450, }); + assert.deepEqual(getConfiguredProviderRateLimits(extendConfig).minimax, { + concurrency: 1, + startIntervalMs: 1500, + }); }); test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t) => { diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts index 9ff722a..d76341a 100644 --- a/skills/baoyu-image-gen/scripts/main.ts +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -58,6 +58,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record = { openai: { concurrency: 3, startIntervalMs: 1100 }, openrouter: { concurrency: 3, startIntervalMs: 1100 }, dashscope: { concurrency: 3, startIntervalMs: 1100 }, + minimax: { concurrency: 3, startIntervalMs: 1100 }, jimeng: { concurrency: 3, startIntervalMs: 1100 }, seedream: { concurrency: 3, startIntervalMs: 1100 }, azure: { concurrency: 3, startIntervalMs: 1100 }, @@ -75,13 +76,13 @@ Options: --image Output image path (required in single-image mode) --batchfile JSON batch file for multi-image generation --jobs Worker count for batch mode (default: auto, max from config, built-in default 10) - --provider google|openai|openrouter|dashscope|replicate|jimeng|seedream|azure Force provider (auto-detect by default) + --provider google|openai|openrouter|dashscope|minimax|replicate|jimeng|seedream|azure Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) --quality normal|2k Quality preset (default: 2k) --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) - --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0) + --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0) --n Number of images for the current task (default: 1) --json JSON output -h, --help Show help @@ -112,6 +113,7 @@ Environment variables: GOOGLE_API_KEY Google API key GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) DASHSCOPE_API_KEY DashScope API key + MINIMAX_API_KEY MiniMax API key REPLICATE_API_TOKEN Replicate API token JIMENG_ACCESS_KEY_ID Jimeng Access Key ID JIMENG_SECRET_ACCESS_KEY Jimeng Secret Access Key @@ -120,6 +122,7 @@ Environment variables: OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro) + MINIMAX_IMAGE_MODEL Default MiniMax model (image-01) REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro) JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128) @@ -130,6 +133,7 @@ Environment variables: OPENROUTER_TITLE Optional app name for OpenRouter attribution GOOGLE_BASE_URL Custom Google endpoint DASHSCOPE_BASE_URL Custom DashScope endpoint + MINIMAX_BASE_URL Custom MiniMax endpoint REPLICATE_BASE_URL Custom Replicate endpoint JIMENG_BASE_URL Custom Jimeng endpoint AZURE_OPENAI_API_KEY Azure OpenAI API key @@ -235,6 +239,7 @@ export function parseArgs(argv: string[]): CliArgs { v !== "openai" && v !== "openrouter" && v !== "dashscope" && + v !== "minimax" && v !== "replicate" && v !== "jimeng" && v !== "seedream" && @@ -390,6 +395,7 @@ export function parseSimpleYaml(yaml: string): Partial { openai: null, openrouter: null, dashscope: null, + minimax: null, replicate: null, jimeng: null, seedream: null, @@ -417,6 +423,7 @@ export function parseSimpleYaml(yaml: string): Partial { key === "openai" || key === "openrouter" || key === "dashscope" || + key === "minimax" || key === "replicate" || key === "jimeng" || key === "seedream" || @@ -434,6 +441,7 @@ export function parseSimpleYaml(yaml: string): Partial { key === "openai" || key === "openrouter" || key === "dashscope" || + key === "minimax" || key === "replicate" || key === "jimeng" || key === "seedream" || @@ -528,12 +536,13 @@ export function getConfiguredProviderRateLimits( openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai }, openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter }, dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope }, + minimax: { ...DEFAULT_PROVIDER_RATE_LIMITS.minimax }, jimeng: { ...DEFAULT_PROVIDER_RATE_LIMITS.jimeng }, seedream: { ...DEFAULT_PROVIDER_RATE_LIMITS.seedream }, azure: { ...DEFAULT_PROVIDER_RATE_LIMITS.azure }, }; - for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream", "azure"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "minimax", "jimeng", "seedream", "azure"] as Provider[]) { const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`; const extendLimit = extendConfig.batch?.provider_limits?.[provider]; configured[provider] = { @@ -582,7 +591,9 @@ export function normalizeOutputImagePath(p: string, defaultExtension = ".png"): function inferProviderFromModel(model: string | null): Provider | null { if (!model) return null; - if (model.includes("seedream") || model.includes("seededit")) return "seedream"; + const normalized = model.trim(); + if (normalized.includes("seedream") || normalized.includes("seededit")) return "seedream"; + if (normalized === "image-01" || normalized === "image-01-live") return "minimax"; return null; } @@ -595,10 +606,11 @@ export function detectProvider(args: CliArgs): Provider { args.provider !== "azure" && args.provider !== "openrouter" && args.provider !== "replicate" && - args.provider !== "seedream" + args.provider !== "seedream" && + args.provider !== "minimax" ) { throw new Error( - "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider azure (Azure OpenAI), --provider openrouter (OpenRouter multimodal), --provider replicate, or --provider seedream for supported Seedream models." + "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider azure (Azure OpenAI), --provider openrouter (OpenRouter multimodal), --provider replicate, --provider seedream for supported Seedream models, or --provider minimax for MiniMax subject-reference workflows." ); } @@ -609,6 +621,7 @@ export function detectProvider(args: CliArgs): Provider { const hasOpenai = !!process.env.OPENAI_API_KEY; const hasOpenrouter = !!process.env.OPENROUTER_API_KEY; const hasDashscope = !!process.env.DASHSCOPE_API_KEY; + const hasMinimax = !!process.env.MINIMAX_API_KEY; const hasReplicate = !!process.env.REPLICATE_API_TOKEN; const hasJimeng = !!(process.env.JIMENG_ACCESS_KEY_ID && process.env.JIMENG_SECRET_ACCESS_KEY); const hasSeedream = !!process.env.ARK_API_KEY; @@ -621,6 +634,13 @@ export function detectProvider(args: CliArgs): Provider { return "seedream"; } + if (modelProvider === "minimax") { + if (!hasMinimax) { + throw new Error("Model looks like a MiniMax image model, but MINIMAX_API_KEY is not set."); + } + return "minimax"; + } + if (args.referenceImages.length > 0) { if (hasGoogle) return "google"; if (hasOpenai) return "openai"; @@ -628,8 +648,9 @@ export function detectProvider(args: CliArgs): Provider { if (hasOpenrouter) return "openrouter"; if (hasReplicate) return "replicate"; if (hasSeedream) return "seedream"; + if (hasMinimax) return "minimax"; throw new Error( - "Reference images require Google, OpenAI, Azure, OpenRouter, Replicate, or supported Seedream models. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, or ARK_API_KEY, or remove --ref." + "Reference images require Google, OpenAI, Azure, OpenRouter, Replicate, supported Seedream models, or MiniMax. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, ARK_API_KEY, or MINIMAX_API_KEY, or remove --ref." ); } @@ -639,6 +660,7 @@ export function detectProvider(args: CliArgs): Provider { hasAzure && "azure", hasOpenrouter && "openrouter", hasDashscope && "dashscope", + hasMinimax && "minimax", hasReplicate && "replicate", hasJimeng && "jimeng", hasSeedream && "seedream", @@ -648,7 +670,7 @@ export function detectProvider(args: CliArgs): Provider { if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, MINIMAX_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -687,6 +709,7 @@ export function isRetryableGenerationError(error: unknown): boolean { async function loadProviderModule(provider: Provider): Promise { if (provider === "google") return (await import("./providers/google")) as ProviderModule; if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule; + if (provider === "minimax") return (await import("./providers/minimax")) as ProviderModule; if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule; if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule; if (provider === "jimeng") return (await import("./providers/jimeng")) as ProviderModule; @@ -717,6 +740,7 @@ function getModelForProvider( return extendConfig.default_model.openrouter; } if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope; + if (provider === "minimax" && extendConfig.default_model.minimax) return extendConfig.default_model.minimax; if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate; if (provider === "jimeng" && extendConfig.default_model.jimeng) return extendConfig.default_model.jimeng; if (provider === "seedream" && extendConfig.default_model.seedream) return extendConfig.default_model.seedream; diff --git a/skills/baoyu-image-gen/scripts/providers/minimax.test.ts b/skills/baoyu-image-gen/scripts/providers/minimax.test.ts new file mode 100644 index 0000000..c334634 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/minimax.test.ts @@ -0,0 +1,171 @@ +import assert from "node:assert/strict"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import test, { type TestContext } from "node:test"; + +import type { CliArgs } from "../types.ts"; +import { + buildMinimaxUrl, + buildRequestBody, + buildSubjectReference, + extractImageFromResponse, + parsePixelSize, + validateArgs, +} from "./minimax.ts"; + +function useEnv( + t: TestContext, + values: Record, +): void { + const previous = new Map(); + for (const [key, value] of Object.entries(values)) { + previous.set(key, process.env[key]); + if (value == null) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + + t.after(() => { + for (const [key, value] of previous.entries()) { + if (value == null) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + }); +} + +function makeArgs(overrides: Partial = {}): CliArgs { + return { + prompt: null, + promptFiles: [], + imagePath: null, + provider: null, + model: null, + aspectRatio: null, + size: null, + quality: null, + imageSize: null, + referenceImages: [], + n: 1, + batchFile: null, + jobs: null, + json: false, + help: false, + ...overrides, + }; +} + +test("MiniMax URL builder normalizes /v1 suffixes", (t) => { + useEnv(t, { MINIMAX_BASE_URL: "https://api.minimax.io" }); + assert.equal(buildMinimaxUrl(), "https://api.minimax.io/v1/image_generation"); + + process.env.MINIMAX_BASE_URL = "https://proxy.example.com/custom/v1/"; + assert.equal(buildMinimaxUrl(), "https://proxy.example.com/custom/v1/image_generation"); +}); + +test("MiniMax size parsing and validation follow documented constraints", () => { + assert.deepEqual(parsePixelSize("1536x1024"), { width: 1536, height: 1024 }); + assert.deepEqual(parsePixelSize("1536*1024"), { width: 1536, height: 1024 }); + assert.equal(parsePixelSize("wide"), null); + + validateArgs("image-01", makeArgs({ size: "1536x1024", n: 9 })); + + assert.throws( + () => validateArgs("image-01-live", makeArgs({ size: "1536x1024" })), + /only supported with model image-01/, + ); + assert.throws( + () => validateArgs("image-01", makeArgs({ size: "1537x1024" })), + /divisible by 8/, + ); + assert.throws( + () => validateArgs("image-01", makeArgs({ aspectRatio: "2.35:1" })), + /aspect_ratio must be one of/, + ); + assert.throws( + () => validateArgs("image-01", makeArgs({ n: 10 })), + /at most 9 images/, + ); +}); + +test("MiniMax request body maps aspect ratio, size, n, and subject references", async (t) => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "minimax-test-")); + t.after(() => fs.rm(dir, { recursive: true, force: true })); + + const refPath = path.join(dir, "portrait.png"); + await fs.writeFile(refPath, Buffer.from("portrait")); + + const ratioBody = await buildRequestBody( + "A portrait by the window", + "image-01", + makeArgs({ aspectRatio: "16:9", n: 2, referenceImages: [refPath] }), + ); + assert.equal(ratioBody.aspect_ratio, "16:9"); + assert.equal(ratioBody.n, 2); + assert.equal(ratioBody.response_format, "base64"); + assert.match(ratioBody.subject_reference?.[0]?.image_file || "", /^data:image\/png;base64,/); + + const sizeBody = await buildRequestBody( + "A portrait by the window", + "image-01", + makeArgs({ size: "1536x1024" }), + ); + assert.equal(sizeBody.width, 1536); + assert.equal(sizeBody.height, 1024); + assert.equal(sizeBody.aspect_ratio, undefined); +}); + +test("MiniMax subject references require supported file types", async (t) => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "minimax-ref-")); + t.after(() => fs.rm(dir, { recursive: true, force: true })); + + const good = path.join(dir, "portrait.jpg"); + const bad = path.join(dir, "portrait.webp"); + await fs.writeFile(good, Buffer.from("portrait")); + await fs.writeFile(bad, Buffer.from("portrait")); + + const subjectReference = await buildSubjectReference([good]); + assert.equal(subjectReference?.[0]?.type, "character"); + + await assert.rejects( + () => buildSubjectReference([bad]), + /only supports JPG, JPEG, or PNG/, + ); +}); + +test("MiniMax response extraction supports base64 and URL payloads", async (t) => { + const originalFetch = globalThis.fetch; + t.after(() => { + globalThis.fetch = originalFetch; + }); + + const fromBase64 = await extractImageFromResponse({ + data: { + image_base64: [Buffer.from("hello").toString("base64")], + }, + }); + assert.equal(Buffer.from(fromBase64).toString("utf8"), "hello"); + + globalThis.fetch = async () => + new Response(Uint8Array.from([1, 2, 3]), { + status: 200, + headers: { "Content-Type": "image/jpeg" }, + }); + + const fromUrl = await extractImageFromResponse({ + data: { + image_urls: ["https://example.com/output.jpg"], + }, + }); + assert.deepEqual([...fromUrl], [1, 2, 3]); + + await assert.rejects( + () => extractImageFromResponse({ base_resp: { status_code: 1001, status_msg: "blocked" } }), + /blocked/, + ); +}); diff --git a/skills/baoyu-image-gen/scripts/providers/minimax.ts b/skills/baoyu-image-gen/scripts/providers/minimax.ts new file mode 100644 index 0000000..67368b8 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/minimax.ts @@ -0,0 +1,220 @@ +import path from "node:path"; +import { readFile } from "node:fs/promises"; + +import type { CliArgs } from "../types"; + +const DEFAULT_MODEL = "image-01"; +const MAX_REFERENCE_IMAGE_BYTES = 10 * 1024 * 1024; +const SUPPORTED_ASPECT_RATIOS = new Set(["1:1", "16:9", "4:3", "3:2", "2:3", "3:4", "9:16", "21:9"]); + +type MinimaxSubjectReference = { + type: "character"; + image_file: string; +}; + +type MinimaxRequestBody = { + model: string; + prompt: string; + response_format: "base64"; + aspect_ratio?: string; + width?: number; + height?: number; + n?: number; + subject_reference?: MinimaxSubjectReference[]; +}; + +type MinimaxResponse = { + id?: string; + data?: { + image_urls?: string[]; + image_base64?: string[]; + }; + base_resp?: { + status_code?: number; + status_msg?: string; + }; +}; + +export function getDefaultModel(): string { + return process.env.MINIMAX_IMAGE_MODEL || DEFAULT_MODEL; +} + +function getApiKey(): string | null { + return process.env.MINIMAX_API_KEY || null; +} + +export function buildMinimaxUrl(): string { + const base = (process.env.MINIMAX_BASE_URL || "https://api.minimax.io").replace(/\/+$/g, ""); + return base.endsWith("/v1") ? `${base}/image_generation` : `${base}/v1/image_generation`; +} + +function getMimeType(filename: string): "image/jpeg" | "image/png" { + const ext = path.extname(filename).toLowerCase(); + if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg"; + if (ext === ".png") return "image/png"; + throw new Error( + `MiniMax subject_reference only supports JPG, JPEG, or PNG files: ${filename}` + ); +} + +export function parsePixelSize(size: string): { width: number; height: number } | null { + const match = size.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/); + if (!match) return null; + + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function validatePixelSize(width: number, height: number): void { + if (width < 512 || width > 2048 || height < 512 || height > 2048) { + throw new Error("MiniMax custom size must keep width and height between 512 and 2048."); + } + if (width % 8 !== 0 || height % 8 !== 0) { + throw new Error("MiniMax custom size requires width and height divisible by 8."); + } +} + +export function validateArgs(model: string, args: CliArgs): void { + if (args.n > 9) { + throw new Error("MiniMax supports at most 9 images per request."); + } + + if (args.aspectRatio && !SUPPORTED_ASPECT_RATIOS.has(args.aspectRatio)) { + throw new Error( + `MiniMax aspect_ratio must be one of: ${Array.from(SUPPORTED_ASPECT_RATIOS).join(", ")}.` + ); + } + + if (args.size && !args.aspectRatio) { + if (model !== "image-01") { + throw new Error("MiniMax custom --size is only supported with model image-01. Use --model image-01 or pass --ar instead."); + } + const parsed = parsePixelSize(args.size); + if (!parsed) { + throw new Error("MiniMax --size must be in WxH format, for example 1536x1024."); + } + validatePixelSize(parsed.width, parsed.height); + } +} + +export async function buildSubjectReference( + referenceImages: string[], +): Promise { + if (referenceImages.length === 0) return undefined; + + const subjectReference: MinimaxSubjectReference[] = []; + for (const refPath of referenceImages) { + const bytes = await readFile(refPath); + if (bytes.length > MAX_REFERENCE_IMAGE_BYTES) { + throw new Error(`MiniMax subject_reference images must be smaller than 10MB: ${refPath}`); + } + + subjectReference.push({ + type: "character", + image_file: `data:${getMimeType(refPath)};base64,${bytes.toString("base64")}`, + }); + } + + return subjectReference; +} + +export async function buildRequestBody( + prompt: string, + model: string, + args: CliArgs, +): Promise { + validateArgs(model, args); + + const body: MinimaxRequestBody = { + model, + prompt, + response_format: "base64", + }; + + if (args.aspectRatio) { + body.aspect_ratio = args.aspectRatio; + } else if (args.size) { + const parsed = parsePixelSize(args.size); + if (!parsed) { + throw new Error("MiniMax --size must be in WxH format, for example 1536x1024."); + } + body.width = parsed.width; + body.height = parsed.height; + } + + if (args.n > 1) { + body.n = args.n; + } + + const subjectReference = await buildSubjectReference(args.referenceImages); + if (subjectReference) { + body.subject_reference = subjectReference; + } + + return body; +} + +async function downloadImage(url: string): Promise { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to download image from MiniMax: ${response.status}`); + } + return new Uint8Array(await response.arrayBuffer()); +} + +export async function extractImageFromResponse(result: MinimaxResponse): Promise { + const baseResp = result.base_resp; + if (baseResp && baseResp.status_code !== undefined && baseResp.status_code !== 0) { + throw new Error(baseResp.status_msg || `MiniMax API returned status_code=${baseResp.status_code}`); + } + + const base64Image = result.data?.image_base64?.[0]; + if (base64Image) { + return Uint8Array.from(Buffer.from(base64Image, "base64")); + } + + const url = result.data?.image_urls?.[0]; + if (url) { + return downloadImage(url); + } + + throw new Error("No image data in MiniMax response"); +} + +export function getDefaultOutputExtension(): ".jpg" { + return ".jpg"; +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs +): Promise { + const apiKey = getApiKey(); + if (!apiKey) { + throw new Error("MINIMAX_API_KEY is required. Get one from https://platform.minimax.io/"); + } + + const body = await buildRequestBody(prompt, model, args); + const response = await fetch(buildMinimaxUrl(), { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const err = await response.text(); + throw new Error(`MiniMax API error (${response.status}): ${err}`); + } + + const result = (await response.json()) as MinimaxResponse; + return extractImageFromResponse(result); +} diff --git a/skills/baoyu-image-gen/scripts/types.ts b/skills/baoyu-image-gen/scripts/types.ts index 227d13c..dd98213 100644 --- a/skills/baoyu-image-gen/scripts/types.ts +++ b/skills/baoyu-image-gen/scripts/types.ts @@ -1,4 +1,13 @@ -export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate" | "jimeng" | "seedream" | "azure"; +export type Provider = + | "google" + | "openai" + | "openrouter" + | "dashscope" + | "minimax" + | "replicate" + | "jimeng" + | "seedream" + | "azure"; export type Quality = "normal" | "2k"; export type CliArgs = { @@ -52,6 +61,7 @@ export type ExtendConfig = { openai: string | null; openrouter: string | null; dashscope: string | null; + minimax: string | null; replicate: string | null; jimeng: string | null; seedream: string | null;