From 67a45a57a03bec3a560f8370c1e7ec318d116b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Sun, 12 Apr 2026 01:16:32 -0500 Subject: [PATCH] Improve baoyu-imagine Replicate compatibility (#125) * Align Replicate image behavior with the models we actually support Replicate image generation in baoyu-imagine no longer assumes that every model accepts the nano-banana request schema. The Replicate provider now defaults to google/nano-banana-2, routes supported model families through family-specific builders and validators, blocks misleading multi-output requests before they reach the API, and updates user-facing docs/config guidance to match the actual contract. Constraint: Replicate model families expose different input schemas Constraint: Current Replicate path only saves one output image per request Constraint: Must not change non-Replicate providers Rejected: Keep one nano-banana-style payload for all Replicate models | triggers remote schema errors on Seedream and Wan Rejected: Continue accepting multi-image Replicate requests and save only the first result | silently drops outputs Confidence: high Scope-risk: narrow Reversibility: clean Directive: Add a family-specific validator and input builder before exposing more Replicate model IDs or multi-output flags Tested: npm test Tested: node --test skills/baoyu-imagine/scripts/providers/replicate.test.ts skills/baoyu-imagine/scripts/main.test.ts Not-tested: Live Replicate API calls against production models Co-authored-by: justnode * Preserve Replicate compatibility when shared defaults leak across providers Addressed the new PR review findings by teaching baoyu-imagine to track where aspect-ratio defaults came from, mirroring the earlier imageSize fix, so unsupported Replicate models can still run prompt-only requests when the value was inherited from shared config. Also corrected Seedream 4.5 custom size encoding to use the API's custom width/height schema instead of sending literal WxH strings. Constraint: Shared EXTEND defaults still need to apply globally for providers that support them Constraint: Seedream 4.5 custom sizes must follow Replicate's documented custom size schema Rejected: Ignore all aspect ratios for unknown Replicate models | would hide explicit unsupported CLI/task input Rejected: Keep Seedream custom sizes as literal strings | validated locally but fails against the provider API Confidence: high Scope-risk: narrow Reversibility: clean Directive: Any future inherited-default validation for provider-specific flags should record the source explicitly before rejecting it Tested: node --import tsx --test skills/baoyu-imagine/scripts/main.test.ts skills/baoyu-imagine/scripts/providers/replicate.test.ts Tested: npm test Not-tested: Live Replicate API calls for Seedream 4.5 custom-size requests --------- Co-authored-by: justnode --- README.md | 19 +- README.zh.md | 19 +- skills/baoyu-imagine/SKILL.md | 50 +- .../references/config/first-time-setup.md | 14 +- .../references/config/preferences-schema.md | 4 +- skills/baoyu-imagine/scripts/main.test.ts | 24 + skills/baoyu-imagine/scripts/main.ts | 28 +- .../scripts/providers/replicate.test.ts | 230 ++++++++- .../scripts/providers/replicate.ts | 445 +++++++++++++++++- skills/baoyu-imagine/scripts/types.ts | 2 + 10 files changed, 772 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index 21d7f8c..39ca7bd 100644 --- a/README.md +++ b/README.md @@ -754,9 +754,15 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da # MiniMax with subject reference /baoyu-imagine --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9 -# Replicate +# Replicate (default: google/nano-banana-2) /baoyu-imagine --prompt "A cat" --image cat.png --provider replicate +# Replicate Seedream 4.5 +/baoyu-imagine --prompt "A studio portrait" --image portrait.png --provider replicate --model bytedance/seedream-4.5 --ar 3:2 + +# Replicate Wan 2.7 Image Pro +/baoyu-imagine --prompt "A concept frame" --image frame.png --provider replicate --model wan-video/wan-2.7-image-pro --size 2048x1152 + # Jimeng (即梦) /baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider jimeng @@ -784,8 +790,8 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `--size` | Size (e.g., `1024x1024`) | | `--quality` | `normal` or `2k` (default: `2k`) | | `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter | -| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0) | -| `--n` | Number of images per request | +| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) | +| `--n` | Number of images per request (`replicate` currently requires `--n 1`) | | `--json` | JSON output | **Environment Variables** (see [Environment Configuration](#environment-configuration) for setup): @@ -813,7 +819,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `ZAI_IMAGE_MODEL` | Z.AI model | `glm-image` | | `BIGMODEL_IMAGE_MODEL` | Backward-compatible alias for Z.AI model | `glm-image` | | `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` | -| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` | +| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | @@ -844,6 +850,9 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da - MiniMax reference images are sent as `subject_reference`; the current API is specialized toward character / portrait consistency. - Jimeng does not support reference images. - Seedream reference images are supported by Seedream 5.0 / 4.5 / 4.0, not Seedream 3.0. +- Replicate defaults to `google/nano-banana-2`. `baoyu-imagine` only enables Replicate advanced options for `google/nano-banana*`, `bytedance/seedream-4.5`, `bytedance/seedream-5-lite`, `wan-video/wan-2.7-image`, and `wan-video/wan-2.7-image-pro`. +- Replicate currently saves exactly one output image per request. `--n > 1` is blocked locally instead of silently dropping extra results. +- Replicate model behavior is family-specific: nano-banana uses `--quality` / `--ar`, Seedream uses validated `--size` / `--ar`, and Wan uses validated `--size` (with `--ar` converted locally to a concrete size). **Provider Auto-Selection**: 1. If `--provider` is specified → use it @@ -1161,7 +1170,7 @@ MINIMAX_IMAGE_MODEL=image-01 # Replicate REPLICATE_API_TOKEN=r8_xxx -REPLICATE_IMAGE_MODEL=google/nano-banana-pro +REPLICATE_IMAGE_MODEL=google/nano-banana-2 # REPLICATE_BASE_URL=https://api.replicate.com # Jimeng (即梦) diff --git a/README.zh.md b/README.zh.md index 36a9669..082ed21 100644 --- a/README.zh.md +++ b/README.zh.md @@ -754,9 +754,15 @@ AI 驱动的生成后端。 # MiniMax + 角色参考图 /baoyu-imagine --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9 -# Replicate +# Replicate(默认:google/nano-banana-2) /baoyu-imagine --prompt "一只猫" --image cat.png --provider replicate +# Replicate Seedream 4.5 +/baoyu-imagine --prompt "一张影棚人像" --image portrait.png --provider replicate --model bytedance/seedream-4.5 --ar 3:2 + +# Replicate Wan 2.7 Image Pro +/baoyu-imagine --prompt "一张概念分镜" --image frame.png --provider replicate --model wan-video/wan-2.7-image-pro --size 2048x1152 + # 即梦(Jimeng) /baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider jimeng @@ -784,8 +790,8 @@ AI 驱动的生成后端。 | `--size` | 尺寸(如 `1024x1024`) | | `--quality` | `normal` 或 `2k`(默认:`2k`) | | `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` | -| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0) | -| `--n` | 单次请求生成图片数量 | +| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) | +| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) | | `--json` | 输出 JSON 结果 | **环境变量**(配置方法见[环境配置](#环境配置)): @@ -813,7 +819,7 @@ AI 驱动的生成后端。 | `ZAI_IMAGE_MODEL` | Z.AI 模型 | `glm-image` | | `BIGMODEL_IMAGE_MODEL` | Z.AI 模型向后兼容别名 | `glm-image` | | `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` | -| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` | +| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | @@ -844,6 +850,9 @@ AI 驱动的生成后端。 - MiniMax 参考图会走 `subject_reference`,当前能力更偏角色 / 人像一致性。 - 即梦不支持参考图。 - 豆包参考图能力仅适用于 Seedream 5.0 / 4.5 / 4.0,不适用于 Seedream 3.0。 +- Replicate 默认模型改为 `google/nano-banana-2`。`baoyu-imagine` 目前只对 `google/nano-banana*`、`bytedance/seedream-4.5`、`bytedance/seedream-5-lite`、`wan-video/wan-2.7-image` 和 `wan-video/wan-2.7-image-pro` 开启本地能力识别与校验。 +- Replicate 当前只保存单张输出图,`--n > 1` 会在本地直接报错,避免多图结果被静默丢弃。 +- Replicate 的参数能力按模型家族区分:nano-banana 走 `--quality` / `--ar`,Seedream 走校验后的 `--size` / `--ar`,Wan 走校验后的 `--size`(`--ar` 会先在本地换算成具体尺寸)。 **服务商自动选择**: 1. 如果指定了 `--provider` → 使用指定的 @@ -1161,7 +1170,7 @@ MINIMAX_IMAGE_MODEL=image-01 # Replicate REPLICATE_API_TOKEN=r8_xxx -REPLICATE_IMAGE_MODEL=google/nano-banana-pro +REPLICATE_IMAGE_MODEL=google/nano-banana-2 # REPLICATE_BASE_URL=https://api.replicate.com # 即梦(Jimeng) diff --git a/skills/baoyu-imagine/SKILL.md b/skills/baoyu-imagine/SKILL.md index 0f53543..0e028a6 100644 --- a/skills/baoyu-imagine/SKILL.md +++ b/skills/baoyu-imagine/SKILL.md @@ -76,7 +76,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k # From prompt files ${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png -# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0) +# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0) ${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png # With reference images (explicit provider/model) @@ -118,11 +118,14 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A girl stands by the library window # MiniMax with custom size (documented for image-01) ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024 -# Replicate (google/nano-banana-pro) +# Replicate (default: google/nano-banana-2) ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate -# Replicate with specific model -${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana +# Replicate Seedream 4.5 +${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic portrait" --image out.png --provider replicate --model bytedance/seedream-4.5 --ar 3:2 + +# Replicate Wan 2.7 Image Pro +${BUN_X} {baseDir}/scripts/main.ts --prompt "A concept frame" --image out.png --provider replicate --model wan-video/wan-2.7-image-pro --size 2048x1152 # Batch mode with saved prompt files ${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json @@ -142,7 +145,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json "promptFiles": ["prompts/hero.md"], "image": "out/hero.png", "provider": "replicate", - "model": "google/nano-banana-pro", + "model": "google/nano-banana-2", "ar": "16:9", "quality": "2k" }, @@ -173,8 +176,8 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: `2k`) | | `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) | -| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | -| `--n ` | Number of images | +| `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | +| `--n ` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image | | `--json` | JSON output | ## Environment Variables @@ -202,7 +205,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `ZAI_IMAGE_MODEL` | Z.AI model override (default: `glm-image`) | | `BIGMODEL_IMAGE_MODEL` | Backward-compatible alias for Z.AI model override | | `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) | -| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) | +| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-2) | | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | @@ -360,10 +363,33 @@ Notes: ### Replicate Models -Supported model formats: +Replicate support in `baoyu-imagine` is intentionally scoped to the model families that the tool can validate locally and save without dropping outputs: -- `owner/name` (recommended for official models), e.g. `google/nano-banana-pro` -- `owner/name:version` (community models by version), e.g. `stability-ai/sdxl:` +- `google/nano-banana*` (default: `google/nano-banana-2`) + - Supports prompt-only and reference-image generation + - Uses Replicate `aspect_ratio`, `resolution`, and `output_format` + - `--size ` is accepted only as a shorthand for a documented aspect ratio plus `1K` / `2K` +- `bytedance/seedream-4.5` + - Supports prompt-only and reference-image generation + - Uses Replicate `size`, `aspect_ratio`, and `image_input` + - Local validation blocks unsupported `1K` requests before the API call +- `bytedance/seedream-5-lite` + - Supports prompt-only and reference-image generation + - Uses Replicate `size`, `aspect_ratio`, and `image_input` + - Local validation currently accepts `2K` / `3K` only +- `wan-video/wan-2.7-image` + - Supports prompt-only and reference-image generation + - Uses Replicate `size` and `images` + - Max output size is 2K +- `wan-video/wan-2.7-image-pro` + - Supports prompt-only and reference-image generation + - Uses Replicate `size` and `images` + - 4K is allowed only for text-to-image; local validation blocks `4K + --ref` + +Guardrails: + +- Replicate currently supports only single-output save semantics in this tool. Keep `--n 1`. +- If a Replicate model is outside the compatibility list above, `baoyu-imagine` only treats it as prompt-only and rejects advanced local options instead of guessing a nano-banana-style schema. Examples: @@ -398,7 +424,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1` - Google multimodal: uses `imageConfig.aspectRatio` - OpenAI: maps to closest supported size - OpenRouter: sends `imageGenerationOptions.aspect_ratio`; if only `--size ` is given, aspect ratio is inferred automatically -- Replicate: passes `aspect_ratio` to model; when `--ref` is provided without `--ar`, defaults to `match_input_image` +- Replicate: behavior is model-family-specific. `google/nano-banana*` uses `aspect_ratio`; `bytedance/seedream-*` uses documented Replicate aspect ratios; Wan 2.7 maps `--ar` to a concrete `size` - MiniMax: sends official `aspect_ratio` values directly; if `--size ` is given without `--ar`, `width` / `height` are sent for `image-01` ## Generation Mode diff --git a/skills/baoyu-imagine/references/config/first-time-setup.md b/skills/baoyu-imagine/references/config/first-time-setup.md index 6feff5e..68e4efd 100644 --- a/skills/baoyu-imagine/references/config/first-time-setup.md +++ b/skills/baoyu-imagine/references/config/first-time-setup.md @@ -58,7 +58,7 @@ options: - label: "MiniMax" description: "MiniMax image generation with subject-reference character workflows" - label: "Replicate" - description: "Community models - nano-banana-pro, flexible model selection" + description: "Curated Replicate image families - nano-banana-2, Seedream, and Wan image models" ``` ### Question 2: Default Google Model @@ -298,10 +298,14 @@ Notes for Z.AI setup: header: "Replicate Model" question: "Choose a default Replicate image generation model?" options: - - label: "google/nano-banana-pro (Recommended)" - description: "Google's fast image model on Replicate" - - label: "google/nano-banana" - description: "Google's base image model on Replicate" + - label: "google/nano-banana-2 (Recommended)" + description: "Current default for general Replicate image generation in baoyu-imagine" + - label: "bytedance/seedream-4.5" + description: "Replicate Seedream 4.5 with validated local size/ref guardrails" + - label: "bytedance/seedream-5-lite" + description: "Replicate Seedream 5 Lite with validated local size/ref guardrails" + - label: "wan-video/wan-2.7-image-pro" + description: "Replicate Wan 2.7 Image Pro with 4K text-to-image support" ``` ### MiniMax Model Selection diff --git a/skills/baoyu-imagine/references/config/preferences-schema.md b/skills/baoyu-imagine/references/config/preferences-schema.md index b72cec1..cf35c9b 100644 --- a/skills/baoyu-imagine/references/config/preferences-schema.md +++ b/skills/baoyu-imagine/references/config/preferences-schema.md @@ -27,7 +27,7 @@ default_model: dashscope: null # e.g., "qwen-image-2.0-pro" zai: null # e.g., "glm-image" minimax: null # e.g., "image-01" - replicate: null # e.g., "google/nano-banana-pro" + replicate: null # e.g., "google/nano-banana-2" batch: max_workers: 10 @@ -107,7 +107,7 @@ default_model: dashscope: "qwen-image-2.0-pro" zai: "glm-image" minimax: "image-01" - replicate: "google/nano-banana-pro" + replicate: "google/nano-banana-2" batch: max_workers: 10 provider_limits: diff --git a/skills/baoyu-imagine/scripts/main.test.ts b/skills/baoyu-imagine/scripts/main.test.ts index 62fe505..4928367 100644 --- a/skills/baoyu-imagine/scripts/main.test.ts +++ b/skills/baoyu-imagine/scripts/main.test.ts @@ -28,9 +28,11 @@ function makeArgs(overrides: Partial = {}): CliArgs { provider: null, model: null, aspectRatio: null, + aspectRatioSource: null, size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -97,7 +99,9 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => { assert.equal(args.imagePath, "out/hero"); assert.equal(args.provider, "zai"); assert.equal(args.quality, "2k"); + assert.equal(args.aspectRatioSource, null); assert.equal(args.imageSize, "4K"); + assert.equal(args.imageSizeSource, "cli"); assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]); assert.equal(args.n, 3); assert.equal(args.jobs, 5); @@ -254,7 +258,21 @@ test("mergeConfig only fills values missing from CLI args", () => { assert.equal(merged.provider, "openai"); assert.equal(merged.quality, "2k"); assert.equal(merged.aspectRatio, "3:2"); + assert.equal(merged.aspectRatioSource, "config"); assert.equal(merged.imageSize, "4K"); + assert.equal(merged.imageSizeSource, "cli"); +}); + +test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => { + const merged = mergeConfig( + makeArgs(), + { + default_image_size: "2K", + } satisfies Partial, + ); + + assert.equal(merged.imageSize, "2K"); + assert.equal(merged.imageSizeSource, "config"); }); test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => { @@ -503,5 +521,11 @@ test("path normalization, worker count, and retry classification follow expected assert.equal(getWorkerCount(5, 0, 4), 1); assert.equal(isRetryableGenerationError(new Error("API error (401): denied")), false); + assert.equal( + isRetryableGenerationError( + new Error("Replicate returned 2 outputs, but baoyu-imagine currently supports saving exactly one image per request."), + ), + false, + ); assert.equal(isRetryableGenerationError(new Error("socket hang up")), true); }); diff --git a/skills/baoyu-imagine/scripts/main.ts b/skills/baoyu-imagine/scripts/main.ts index 7c7ca34..6be1b08 100644 --- a/skills/baoyu-imagine/scripts/main.ts +++ b/skills/baoyu-imagine/scripts/main.ts @@ -83,8 +83,8 @@ Options: --size Size (e.g., 1024x1024) --quality normal|2k Quality preset (default: 2k) --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) - --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, MiniMax, or Seedream 4.0/4.5/5.0) - --n Number of images for the current task (default: 1) + --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0) + --n Number of images for the current task (default: 1; Replicate currently requires 1) --json JSON output -h, --help Show help @@ -97,7 +97,7 @@ Batch file format: "promptFiles": ["prompts/hero.md"], "image": "out/hero.png", "provider": "replicate", - "model": "google/nano-banana-pro", + "model": "google/nano-banana-2", "ar": "16:9" } ] @@ -107,6 +107,7 @@ Behavior: - Batch mode automatically runs in parallel when pending tasks >= 2 - Each image retries automatically up to 3 attempts - Batch summary reports success count, failure count, and per-image errors + - Replicate currently supports single-image save semantics only; --n must stay at 1 Environment variables: OPENAI_API_KEY OpenAI API key @@ -128,7 +129,7 @@ Environment variables: ZAI_IMAGE_MODEL Default Z.AI model (glm-image) BIGMODEL_IMAGE_MODEL Backward-compatible alias for Z.AI model (glm-image) MINIMAX_IMAGE_MODEL Default MiniMax model (image-01) - REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro) + REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-2) JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128) OPENAI_BASE_URL Custom OpenAI endpoint @@ -164,9 +165,11 @@ export function parseArgs(argv: string[]): CliArgs { provider: null, model: null, aspectRatio: null, + aspectRatioSource: null, size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -270,6 +273,7 @@ export function parseArgs(argv: string[]): CliArgs { const v = argv[++i]; if (!v) throw new Error("Missing value for --ar"); out.aspectRatio = v; + out.aspectRatioSource = "cli"; continue; } @@ -291,6 +295,7 @@ export function parseArgs(argv: string[]): CliArgs { const v = argv[++i]?.toUpperCase(); if (v !== "1K" && v !== "2K" && v !== "4K") throw new Error(`Invalid imageSize: ${v}`); out.imageSize = v; + out.imageSizeSource = "cli"; continue; } @@ -541,12 +546,20 @@ export async function loadExtendConfig( } export function mergeConfig(args: CliArgs, extend: Partial): CliArgs { + const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null; + const imageSize = args.imageSize ?? extend.default_image_size ?? null; return { ...args, provider: args.provider ?? extend.default_provider ?? null, quality: args.quality ?? extend.default_quality ?? null, - aspectRatio: args.aspectRatio ?? extend.default_aspect_ratio ?? null, - imageSize: args.imageSize ?? extend.default_image_size ?? null, + aspectRatio, + aspectRatioSource: + args.aspectRatioSource ?? + (args.aspectRatio !== null ? "cli" : (aspectRatio !== null ? "config" : null)), + imageSize, + imageSizeSource: + args.imageSizeSource ?? + (args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)), }; } @@ -759,6 +772,7 @@ export function isRetryableGenerationError(error: unknown): boolean { "API error (403)", "API error (404)", "temporarily disabled", + "supports saving exactly one image", ]; return !nonRetryableMarkers.some((marker) => msg.includes(marker)); } @@ -872,9 +886,11 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir provider: task.provider ?? baseArgs.provider ?? null, model: task.model ?? baseArgs.model ?? null, aspectRatio: task.ar ?? baseArgs.aspectRatio ?? null, + aspectRatioSource: task.ar != null ? "task" : (baseArgs.aspectRatioSource ?? null), size: task.size ?? baseArgs.size ?? null, quality: task.quality ?? baseArgs.quality ?? null, imageSize: task.imageSize ?? baseArgs.imageSize ?? null, + imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null), referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [], n: task.n ?? baseArgs.n, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/replicate.test.ts b/skills/baoyu-imagine/scripts/providers/replicate.test.ts index c52afb1..cd90def 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.test.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.test.ts @@ -5,7 +5,10 @@ import type { CliArgs } from "../types.ts"; import { buildInput, extractOutputUrl, + getDefaultModel, + getModelFamily, parseModelId, + validateArgs, } from "./replicate.ts"; function makeArgs(overrides: Partial = {}): CliArgs { @@ -16,9 +19,11 @@ function makeArgs(overrides: Partial = {}): CliArgs { provider: null, model: null, aspectRatio: null, + aspectRatioSource: null, size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -29,10 +34,24 @@ function makeArgs(overrides: Partial = {}): CliArgs { }; } -test("Replicate model parsing accepts official formats and rejects malformed ones", () => { - assert.deepEqual(parseModelId("google/nano-banana-pro"), { +test("Replicate default model now points at nano-banana-2", () => { + const previous = process.env.REPLICATE_IMAGE_MODEL; + delete process.env.REPLICATE_IMAGE_MODEL; + try { + assert.equal(getDefaultModel(), "google/nano-banana-2"); + } finally { + if (previous == null) { + delete process.env.REPLICATE_IMAGE_MODEL; + } else { + process.env.REPLICATE_IMAGE_MODEL = previous; + } + } +}); + +test("Replicate model parsing and family detection accept supported official ids", () => { + assert.deepEqual(parseModelId("google/nano-banana-2"), { owner: "google", - name: "nano-banana-pro", + name: "nano-banana-2", version: null, }); assert.deepEqual(parseModelId("owner/model:abc123"), { @@ -41,46 +60,224 @@ test("Replicate model parsing accepts official formats and rejects malformed one version: "abc123", }); + assert.equal(getModelFamily("google/nano-banana-pro"), "nano-banana"); + assert.equal(getModelFamily("bytedance/seedream-4.5"), "seedream45"); + assert.equal(getModelFamily("bytedance/seedream-5-lite"), "seedream5lite"); + assert.equal(getModelFamily("wan-video/wan-2.7-image"), "wan27image"); + assert.equal(getModelFamily("wan-video/wan-2.7-image-pro"), "wan27imagepro"); + assert.equal(getModelFamily("stability-ai/sdxl"), "unknown"); + assert.throws( () => parseModelId("just-a-model-name"), /Invalid Replicate model format/, ); }); -test("Replicate input builder maps aspect ratio, image count, quality, and refs", () => { +test("Replicate nano-banana input builder maps refs, aspect ratio, and quality presets", () => { assert.deepEqual( buildInput( + "google/nano-banana-2", "A robot painter", makeArgs({ aspectRatio: "16:9", quality: "2k", - n: 3, }), ["data:image/png;base64,AAAA"], ), { prompt: "A robot painter", - aspect_ratio: "16:9", - number_of_images: 3, resolution: "2K", output_format: "png", + aspect_ratio: "16:9", image_input: ["data:image/png;base64,AAAA"], }, ); assert.deepEqual( - buildInput("A robot painter", makeArgs({ quality: "normal" }), ["ref"]), + buildInput( + "google/nano-banana-2", + "A robot painter", + makeArgs({ size: "1024x1024", quality: "normal" }), + [], + ), { prompt: "A robot painter", - aspect_ratio: "match_input_image", resolution: "1K", output_format: "png", - image_input: ["ref"], + aspect_ratio: "1:1", }, ); }); -test("Replicate output extraction supports string, array, and object URLs", () => { +test("Replicate Seedream and Wan inputs use family-specific request fields", () => { + assert.deepEqual( + buildInput( + "bytedance/seedream-4.5", + "A cinematic portrait", + makeArgs({ quality: "2k", referenceImages: ["local.png"] }), + ["data:image/png;base64,AAAA"], + ), + { + prompt: "A cinematic portrait", + size: "4K", + image_input: ["data:image/png;base64,AAAA"], + aspect_ratio: "match_input_image", + }, + ); + + assert.deepEqual( + buildInput( + "bytedance/seedream-4.5", + "A cinematic portrait", + makeArgs({ size: "1536x1024" }), + [], + ), + { + prompt: "A cinematic portrait", + size: "custom", + width: 1536, + height: 1024, + }, + ); + + assert.deepEqual( + buildInput( + "bytedance/seedream-5-lite", + "A poster", + makeArgs({ aspectRatio: "21:9", quality: "2k" }), + [], + ), + { + prompt: "A poster", + size: "3K", + aspect_ratio: "21:9", + }, + ); + + assert.deepEqual( + buildInput( + "wan-video/wan-2.7-image", + "A storyboard frame", + makeArgs({ aspectRatio: "16:9", quality: "2k" }), + [], + ), + { + prompt: "A storyboard frame", + size: "2048*1152", + }, + ); + + assert.deepEqual( + buildInput( + "wan-video/wan-2.7-image-pro", + "Blend these references", + makeArgs({ size: "2K", referenceImages: ["a.png", "b.png"] }), + ["ref-a", "ref-b"], + ), + { + prompt: "Blend these references", + size: "2K", + images: ["ref-a", "ref-b"], + }, + ); +}); + +test("Replicate validateArgs blocks misleading multi-output and unsupported family options locally", () => { + assert.throws( + () => + validateArgs( + "google/nano-banana-2", + makeArgs({ n: 2 }), + ), + /exactly one output image/, + ); + + assert.throws( + () => + validateArgs( + "bytedance/seedream-4.5", + makeArgs({ size: "1K" }), + ), + /2K, 4K, or an explicit WxH size/, + ); + + assert.throws( + () => + validateArgs( + "bytedance/seedream-5-lite", + makeArgs({ size: "4K" }), + ), + /supports 2K or 3K output/, + ); + + assert.throws( + () => + validateArgs( + "wan-video/wan-2.7-image", + makeArgs({ referenceImages: new Array(10).fill("ref.png") }), + ), + /at most 9 reference images/, + ); + + assert.throws( + () => + validateArgs( + "wan-video/wan-2.7-image-pro", + makeArgs({ referenceImages: ["ref.png"], size: "4K" }), + ), + /only supports 4K text-to-image/, + ); + + assert.throws( + () => + validateArgs( + "stability-ai/sdxl", + makeArgs({ aspectRatio: "16:9" }), + ), + /compatibility list/, + ); + + assert.doesNotThrow(() => + validateArgs( + "google/nano-banana-2", + makeArgs({ imageSize: "2K", imageSizeSource: "config" }), + ), + ); + + assert.throws( + () => + validateArgs( + "google/nano-banana-2", + makeArgs({ imageSize: "2K", imageSizeSource: "cli" }), + ), + /do not use --imageSize/, + ); + + assert.doesNotThrow(() => + validateArgs( + "stability-ai/sdxl", + makeArgs({ aspectRatio: "16:9", aspectRatioSource: "config" }), + ), + ); + + assert.throws( + () => + validateArgs( + "stability-ai/sdxl", + makeArgs({ aspectRatio: "16:9", aspectRatioSource: "cli" }), + ), + /compatibility list/, + ); + + assert.doesNotThrow(() => + validateArgs( + "stability-ai/sdxl", + makeArgs(), + ), + ); +}); + +test("Replicate output extraction supports single outputs and rejects silent multi-image drops", () => { assert.equal( extractOutputUrl({ output: "https://example.com/a.png" } as never), "https://example.com/a.png", @@ -94,6 +291,17 @@ test("Replicate output extraction supports string, array, and object URLs", () = "https://example.com/c.png", ); + assert.throws( + () => + extractOutputUrl({ + output: [ + "https://example.com/one.png", + "https://example.com/two.png", + ], + } as never), + /supports saving exactly one image/, + ); + assert.throws( () => extractOutputUrl({ output: { invalid: true } } as never), /Unexpected Replicate output format/, diff --git a/skills/baoyu-imagine/scripts/providers/replicate.ts b/skills/baoyu-imagine/scripts/providers/replicate.ts index 611d24e..d10e5f1 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.ts @@ -2,10 +2,37 @@ import path from "node:path"; import { readFile } from "node:fs/promises"; import type { CliArgs } from "../types"; -const DEFAULT_MODEL = "google/nano-banana-pro"; +const DEFAULT_MODEL = "google/nano-banana-2"; const SYNC_WAIT_SECONDS = 60; const POLL_INTERVAL_MS = 2000; const MAX_POLL_MS = 300_000; +const DOCUMENTED_REPLICATE_ASPECT_RATIOS = new Set([ + "1:1", + "2:3", + "3:2", + "3:4", + "4:3", + "5:4", + "4:5", + "9:16", + "16:9", + "21:9", +]); + +export type ReplicateModelFamily = + | "nano-banana" + | "seedream45" + | "seedream5lite" + | "wan27image" + | "wan27imagepro" + | "unknown"; + +type PixelSize = { + width: number; + height: number; +}; + +type Seedream45Size = "2K" | "4K" | { width: number; height: number }; export function getDefaultModel(): string { return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL; @@ -20,6 +47,40 @@ function getBaseUrl(): string { return base.replace(/\/+$/g, ""); } +function normalizeModelId(model: string): string { + return model.trim().toLowerCase().split(":")[0]!; +} + +export function getModelFamily(model: string): ReplicateModelFamily { + const normalized = normalizeModelId(model); + + if ( + normalized === "google/nano-banana" || + normalized === "google/nano-banana-pro" || + normalized === "google/nano-banana-2" + ) { + return "nano-banana"; + } + + if (normalized === "bytedance/seedream-4.5") { + return "seedream45"; + } + + if (normalized === "bytedance/seedream-5-lite") { + return "seedream5lite"; + } + + if (normalized === "wan-video/wan-2.7-image") { + return "wan27image"; + } + + if (normalized === "wan-video/wan-2.7-image-pro") { + return "wan27imagepro"; + } + + return "unknown"; +} + export function parseModelId(model: string): { owner: string; name: string; version: string | null } { const [ownerName, version] = model.split(":"); const parts = ownerName!.split("/"); @@ -31,27 +92,219 @@ export function parseModelId(model: string): { owner: string; name: string; vers return { owner: parts[0], name: parts[1], version: version || null }; } -export function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record { - const input: Record = { prompt }; +function parsePixelSize(value: string): PixelSize | null { + const match = value.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/); + if (!match) return null; + + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function parseAspectRatio(value: string): PixelSize | null { + const match = value.trim().match(/^(\d+)\s*:\s*(\d+)$/); + if (!match) return null; + + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function gcd(a: number, b: number): number { + let x = Math.abs(a); + let y = Math.abs(b); + + while (y !== 0) { + const next = x % y; + x = y; + y = next; + } + + return x || 1; +} + +function inferAspectRatioFromSize(size: string): string | null { + const parsed = parsePixelSize(size); + if (!parsed) return null; + + const divisor = gcd(parsed.width, parsed.height); + const normalized = `${parsed.width / divisor}:${parsed.height / divisor}`; + if (!DOCUMENTED_REPLICATE_ASPECT_RATIOS.has(normalized)) { + return null; + } + + return normalized; +} + +function getQualityPreset(args: CliArgs): "normal" | "2k" { + return args.quality === "normal" ? "normal" : "2k"; +} + +function validateDocumentedAspectRatio(model: string, aspectRatio: string): void { + if (aspectRatio === "match_input_image") { + return; + } + + if (DOCUMENTED_REPLICATE_ASPECT_RATIOS.has(aspectRatio)) { + return; + } + + throw new Error( + `Replicate model ${model} does not support aspect ratio ${aspectRatio}. Supported values: ${Array.from(DOCUMENTED_REPLICATE_ASPECT_RATIOS).join(", ")}` + ); +} + +function getRequestedAspectRatio(model: string, args: CliArgs): string | null { + if (args.aspectRatio) { + validateDocumentedAspectRatio(model, args.aspectRatio); + return args.aspectRatio; + } + + if (!args.size) return null; + + const inferred = inferAspectRatioFromSize(args.size); + if (!inferred) { + throw new Error( + `Replicate model ${model} cannot derive a supported aspect ratio from --size ${args.size}. Use one of: ${Array.from(DOCUMENTED_REPLICATE_ASPECT_RATIOS).join(", ")}` + ); + } + + return inferred; +} + +function getNanoBananaResolution(args: CliArgs): "1K" | "2K" { + if (args.size) { + const parsed = parsePixelSize(args.size); + if (!parsed) { + throw new Error("Replicate nano-banana --size must be in WxH format, for example 1536x1024."); + } + + const longestEdge = Math.max(parsed.width, parsed.height); + if (longestEdge <= 1024) return "1K"; + if (longestEdge <= 2048) return "2K"; + throw new Error("Replicate nano-banana only supports sizes that map to 1K or 2K output."); + } + + return getQualityPreset(args) === "normal" ? "1K" : "2K"; +} + +function resolveSeedream45Size(args: CliArgs): Seedream45Size { + if (args.size) { + const upper = args.size.trim().toUpperCase(); + if (upper === "2K" || upper === "4K") { + return upper; + } + + const parsed = parsePixelSize(args.size); + if (!parsed) { + throw new Error("Replicate Seedream 4.5 --size must be 2K, 4K, or an explicit WxH size."); + } + if (parsed.width < 1024 || parsed.width > 4096 || parsed.height < 1024 || parsed.height > 4096) { + throw new Error("Replicate Seedream 4.5 custom --size must keep width and height between 1024 and 4096."); + } + return parsed; + } + + return getQualityPreset(args) === "normal" ? "2K" : "4K"; +} + +function resolveSeedream5LiteSize(args: CliArgs): "2K" | "3K" { + if (args.size) { + const upper = args.size.trim().toUpperCase(); + if (upper === "2K" || upper === "3K") { + return upper; + } + + throw new Error("Replicate Seedream 5 Lite currently supports 2K or 3K output in this tool."); + } + + return getQualityPreset(args) === "normal" ? "2K" : "3K"; +} + +function formatCustomWanSize(size: PixelSize): string { + return `${size.width}*${size.height}`; +} + +function resolveWanSizeFromAspectRatio( + aspectRatio: string, + maxDimension: number, +): string { + const parsedRatio = parseAspectRatio(aspectRatio); + if (!parsedRatio) { + throw new Error(`Replicate Wan aspect ratio must be in W:H format, got ${aspectRatio}.`); + } + + const scale = Math.min(maxDimension / parsedRatio.width, maxDimension / parsedRatio.height); + const width = Math.max(1, Math.floor(parsedRatio.width * scale)); + const height = Math.max(1, Math.floor(parsedRatio.height * scale)); + return formatCustomWanSize({ width, height }); +} + +function resolveWanSize(family: "wan27image" | "wan27imagepro", args: CliArgs): "1K" | "2K" | "4K" | string { + const referenceMode = args.referenceImages.length > 0; + const maxDimension = family === "wan27imagepro" && !referenceMode ? 4096 : 2048; + + if (args.size) { + const upper = args.size.trim().toUpperCase(); + if (upper === "1K" || upper === "2K" || upper === "4K") { + if (upper === "4K" && family !== "wan27imagepro") { + throw new Error("Replicate Wan 2.7 Image only supports 1K, 2K, or custom sizes up to 2048px."); + } + if (upper === "4K" && referenceMode) { + throw new Error("Replicate Wan 2.7 Image Pro only supports 4K text-to-image. Remove --ref or lower the size."); + } + return upper; + } + + const parsed = parsePixelSize(args.size); + if (!parsed) { + throw new Error("Replicate Wan --size must be 1K, 2K, 4K, or an explicit WxH size."); + } + if (parsed.width > maxDimension || parsed.height > maxDimension) { + throw new Error( + `Replicate ${family === "wan27imagepro" ? "Wan 2.7 Image Pro" : "Wan 2.7 Image"} custom --size must keep width and height at or below ${maxDimension}px in the current mode.` + ); + } + return formatCustomWanSize(parsed); + } if (args.aspectRatio) { - input.aspect_ratio = args.aspectRatio; + return resolveWanSizeFromAspectRatio( + args.aspectRatio, + getQualityPreset(args) === "normal" ? 1024 : 2048, + ); + } + + return getQualityPreset(args) === "normal" ? "1K" : "2K"; +} + +function buildNanoBananaInput( + prompt: string, + model: string, + args: CliArgs, + referenceImages: string[], +): Record { + const input: Record = { + prompt, + resolution: getNanoBananaResolution(args), + output_format: "png", + }; + + const aspectRatio = getRequestedAspectRatio(model, args); + if (aspectRatio) { + input.aspect_ratio = aspectRatio; } else if (referenceImages.length > 0) { input.aspect_ratio = "match_input_image"; } - if (args.n > 1) { - input.number_of_images = args.n; - } - - if (args.quality === "normal") { - input.resolution = "1K"; - } else if (args.quality === "2k") { - input.resolution = "2K"; - } - - input.output_format = "png"; - if (referenceImages.length > 0) { input.image_input = referenceImages; } @@ -59,6 +312,158 @@ export function buildInput(prompt: string, args: CliArgs, referenceImages: strin return input; } +function buildSeedreamInput( + family: "seedream45" | "seedream5lite", + prompt: string, + model: string, + args: CliArgs, + referenceImages: string[], +): Record { + const size = family === "seedream45" ? resolveSeedream45Size(args) : resolveSeedream5LiteSize(args); + const input: Record = { + prompt, + }; + + if (family === "seedream45" && typeof size === "object") { + input.size = "custom"; + input.width = size.width; + input.height = size.height; + } else { + input.size = size; + } + + if (referenceImages.length > 0) { + input.image_input = referenceImages; + } + + if (args.aspectRatio) { + validateDocumentedAspectRatio(model, args.aspectRatio); + input.aspect_ratio = args.aspectRatio; + } else if (referenceImages.length > 0 && family === "seedream45") { + input.aspect_ratio = "match_input_image"; + } + + return input; +} + +function buildWanInput( + family: "wan27image" | "wan27imagepro", + prompt: string, + args: CliArgs, + referenceImages: string[], +): Record { + const input: Record = { + prompt, + size: resolveWanSize(family, args), + }; + + if (referenceImages.length > 0) { + input.images = referenceImages; + } + + return input; +} + +export function validateArgs(model: string, args: CliArgs): void { + parseModelId(model); + + if (args.n !== 1) { + throw new Error("Replicate integration currently supports exactly one output image per request. Remove --n or use --n 1."); + } + + if (args.imageSize && args.imageSizeSource !== "config") { + throw new Error("Replicate models in baoyu-imagine do not use --imageSize. Use --quality, --ar, or --size instead."); + } + + const family = getModelFamily(model); + + if (family === "nano-banana") { + if (args.referenceImages.length > 14) { + throw new Error("Replicate nano-banana supports at most 14 reference images."); + } + if (args.aspectRatio) { + validateDocumentedAspectRatio(model, args.aspectRatio); + } + if (args.size) { + getRequestedAspectRatio(model, args); + getNanoBananaResolution(args); + } + return; + } + + if (family === "seedream45") { + if (args.referenceImages.length > 14) { + throw new Error("Replicate Seedream 4.5 supports at most 14 reference images."); + } + if (args.aspectRatio) { + validateDocumentedAspectRatio(model, args.aspectRatio); + } + resolveSeedream45Size(args); + return; + } + + if (family === "seedream5lite") { + if (args.referenceImages.length > 14) { + throw new Error("Replicate Seedream 5 Lite supports at most 14 reference images."); + } + if (args.aspectRatio) { + validateDocumentedAspectRatio(model, args.aspectRatio); + } + resolveSeedream5LiteSize(args); + return; + } + + if (family === "wan27image" || family === "wan27imagepro") { + if (args.referenceImages.length > 9) { + throw new Error("Replicate Wan 2.7 image models support at most 9 reference images."); + } + if (args.aspectRatio) { + const parsed = parseAspectRatio(args.aspectRatio); + if (!parsed) { + throw new Error(`Replicate Wan aspect ratio must be in W:H format, got ${args.aspectRatio}.`); + } + } + resolveWanSize(family, args); + return; + } + + const hasExplicitAspectRatio = !!args.aspectRatio && args.aspectRatioSource !== "config"; + + if (args.referenceImages.length > 0 || hasExplicitAspectRatio || args.size) { + throw new Error( + `Replicate model ${model} is not in the baoyu-imagine compatibility list. Supported families: google/nano-banana*, bytedance/seedream-4.5, bytedance/seedream-5-lite, wan-video/wan-2.7-image, wan-video/wan-2.7-image-pro.` + ); + } +} + +export function getDefaultOutputExtension(model: string): ".png" { + const _family = getModelFamily(model); + return ".png"; +} + +export function buildInput( + model: string, + prompt: string, + args: CliArgs, + referenceImages: string[], +): Record { + const family = getModelFamily(model); + + if (family === "nano-banana") { + return buildNanoBananaInput(prompt, model, args, referenceImages); + } + + if (family === "seedream45" || family === "seedream5lite") { + return buildSeedreamInput(family, prompt, model, args, referenceImages); + } + + if (family === "wan27image" || family === "wan27imagepro") { + return buildWanInput(family, prompt, args, referenceImages); + } + + return { prompt }; +} + async function readImageAsDataUrl(p: string): Promise { const buf = await readFile(p); const ext = path.extname(p).toLowerCase(); @@ -150,6 +555,11 @@ export function extractOutputUrl(prediction: PredictionResponse): string { if (typeof output === "string") return output; if (Array.isArray(output)) { + if (output.length !== 1) { + throw new Error( + `Replicate returned ${output.length} outputs, but baoyu-imagine currently supports saving exactly one image per request.` + ); + } const first = output[0]; if (typeof first === "string") return first; } @@ -178,13 +588,14 @@ export async function generateImage( if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens"); const parsedModel = parseModelId(model); + validateArgs(model, args); const refDataUrls: string[] = []; for (const refPath of args.referenceImages) { refDataUrls.push(await readImageAsDataUrl(refPath)); } - const input = buildInput(prompt, args, refDataUrls); + const input = buildInput(model, prompt, args, refDataUrls); console.log(`Generating image with Replicate (${model})...`); diff --git a/skills/baoyu-imagine/scripts/types.ts b/skills/baoyu-imagine/scripts/types.ts index 7bb0c3f..b7c7640 100644 --- a/skills/baoyu-imagine/scripts/types.ts +++ b/skills/baoyu-imagine/scripts/types.ts @@ -18,9 +18,11 @@ export type CliArgs = { provider: Provider | null; model: string | null; aspectRatio: string | null; + aspectRatioSource?: "cli" | "task" | "config" | null; size: string | null; quality: Quality | null; imageSize: string | null; + imageSizeSource?: "cli" | "task" | "config" | null; referenceImages: string[]; n: number; batchFile: string | null;