From a60d8420530f7878d2fe01ef19afcfcacf9c6203 Mon Sep 17 00:00:00 2001 From: justnodejs Date: Sat, 11 Apr 2026 13:49:46 +0800 Subject: [PATCH] feat(baoyu-imagine): add schema-aware Replicate model family support --- README.md | 4 +- README.zh.md | 4 +- skills/baoyu-imagine/SKILL.md | 8 +- .../references/config/first-time-setup.md | 4 +- .../references/config/preferences-schema.md | 4 +- skills/baoyu-imagine/scripts/main.ts | 4 +- .../scripts/providers/replicate.test.ts | 171 +++++++++++++- .../scripts/providers/replicate.ts | 221 +++++++++++++++++- 8 files changed, 395 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index b034ca8..3a88d6b 100644 --- a/README.md +++ b/README.md @@ -768,7 +768,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` | | `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` | -| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` | +| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | @@ -1108,7 +1108,7 @@ MINIMAX_IMAGE_MODEL=image-01 # Replicate REPLICATE_API_TOKEN=r8_xxx -REPLICATE_IMAGE_MODEL=google/nano-banana-pro +REPLICATE_IMAGE_MODEL=google/nano-banana-2 # REPLICATE_BASE_URL=https://api.replicate.com # Jimeng (即梦) diff --git a/README.zh.md b/README.zh.md index a1757a8..d66e5b3 100644 --- a/README.zh.md +++ b/README.zh.md @@ -768,7 +768,7 @@ AI 驱动的生成后端。 | `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` | | `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` | -| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` | +| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | @@ -1108,7 +1108,7 @@ MINIMAX_IMAGE_MODEL=image-01 # Replicate REPLICATE_API_TOKEN=r8_xxx -REPLICATE_IMAGE_MODEL=google/nano-banana-pro +REPLICATE_IMAGE_MODEL=google/nano-banana-2 # REPLICATE_BASE_URL=https://api.replicate.com # 即梦(Jimeng) diff --git a/skills/baoyu-imagine/SKILL.md b/skills/baoyu-imagine/SKILL.md index 4a9f960..13d159b 100644 --- a/skills/baoyu-imagine/SKILL.md +++ b/skills/baoyu-imagine/SKILL.md @@ -112,7 +112,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A girl stands by the library window # MiniMax with custom size (documented for image-01) ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024 -# Replicate (google/nano-banana-pro) +# Replicate (google/nano-banana-2) ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate # Replicate with specific model @@ -136,7 +136,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json "promptFiles": ["prompts/hero.md"], "image": "out/hero.png", "provider": "replicate", - "model": "google/nano-banana-pro", + "model": "google/nano-banana-2", "ar": "16:9", "quality": "2k" }, @@ -192,7 +192,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `GOOGLE_IMAGE_MODEL` | Google model override | | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) | | `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) | -| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) | +| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-2) | | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | @@ -324,7 +324,7 @@ Notes: Supported model formats: -- `owner/name` (recommended for official models), e.g. `google/nano-banana-pro` +- `owner/name` (recommended for official models), e.g. `google/nano-banana-2` - `owner/name:version` (community models by version), e.g. `stability-ai/sdxl:` Examples: diff --git a/skills/baoyu-imagine/references/config/first-time-setup.md b/skills/baoyu-imagine/references/config/first-time-setup.md index 44c17d7..da46fab 100644 --- a/skills/baoyu-imagine/references/config/first-time-setup.md +++ b/skills/baoyu-imagine/references/config/first-time-setup.md @@ -56,7 +56,7 @@ options: - label: "MiniMax" description: "MiniMax image generation with subject-reference character workflows" - label: "Replicate" - description: "Community models - nano-banana-pro, flexible model selection" + description: "Community models - nano-banana-2, flexible model selection" ``` ### Question 2: Default Google Model @@ -263,7 +263,7 @@ Notes for DashScope setup: header: "Replicate Model" question: "Choose a default Replicate image generation model?" options: - - label: "google/nano-banana-pro (Recommended)" + - label: "google/nano-banana-2 (Recommended)" description: "Google's fast image model on Replicate" - label: "google/nano-banana" description: "Google's base image model on Replicate" diff --git a/skills/baoyu-imagine/references/config/preferences-schema.md b/skills/baoyu-imagine/references/config/preferences-schema.md index 4d1a6f4..55480bc 100644 --- a/skills/baoyu-imagine/references/config/preferences-schema.md +++ b/skills/baoyu-imagine/references/config/preferences-schema.md @@ -26,7 +26,7 @@ default_model: openrouter: null # e.g., "google/gemini-3.1-flash-image-preview" dashscope: null # e.g., "qwen-image-2.0-pro" minimax: null # e.g., "image-01" - replicate: null # e.g., "google/nano-banana-pro" + replicate: null # e.g., "google/nano-banana-2" batch: max_workers: 10 @@ -101,7 +101,7 @@ default_model: openrouter: "google/gemini-3.1-flash-image-preview" dashscope: "qwen-image-2.0-pro" minimax: "image-01" - replicate: "google/nano-banana-pro" + replicate: "google/nano-banana-2" batch: max_workers: 10 provider_limits: diff --git a/skills/baoyu-imagine/scripts/main.ts b/skills/baoyu-imagine/scripts/main.ts index c61dfc8..0cf2a25 100644 --- a/skills/baoyu-imagine/scripts/main.ts +++ b/skills/baoyu-imagine/scripts/main.ts @@ -96,7 +96,7 @@ Batch file format: "promptFiles": ["prompts/hero.md"], "image": "out/hero.png", "provider": "replicate", - "model": "google/nano-banana-pro", + "model": "google/nano-banana-2", "ar": "16:9" } ] @@ -123,7 +123,7 @@ Environment variables: GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro) MINIMAX_IMAGE_MODEL Default MiniMax model (image-01) - REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro) + REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-2) JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128) OPENAI_BASE_URL Custom OpenAI endpoint diff --git a/skills/baoyu-imagine/scripts/providers/replicate.test.ts b/skills/baoyu-imagine/scripts/providers/replicate.test.ts index c52afb1..c9a7972 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.test.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.test.ts @@ -5,7 +5,9 @@ import type { CliArgs } from "../types.ts"; import { buildInput, extractOutputUrl, + generateImage, parseModelId, + validateArgs, } from "./replicate.ts"; function makeArgs(overrides: Partial = {}): CliArgs { @@ -47,21 +49,20 @@ test("Replicate model parsing accepts official formats and rejects malformed one ); }); -test("Replicate input builder maps aspect ratio, image count, quality, and refs", () => { +test("Replicate input builder keeps nano-banana mapping for compatible models", () => { assert.deepEqual( buildInput( "A robot painter", + "google/nano-banana-2", makeArgs({ aspectRatio: "16:9", quality: "2k", - n: 3, }), ["data:image/png;base64,AAAA"], ), { prompt: "A robot painter", aspect_ratio: "16:9", - number_of_images: 3, resolution: "2K", output_format: "png", image_input: ["data:image/png;base64,AAAA"], @@ -69,7 +70,7 @@ test("Replicate input builder maps aspect ratio, image count, quality, and refs" ); assert.deepEqual( - buildInput("A robot painter", makeArgs({ quality: "normal" }), ["ref"]), + buildInput("A robot painter", "google/nano-banana-pro", makeArgs({ quality: "normal" }), ["ref"]), { prompt: "A robot painter", aspect_ratio: "match_input_image", @@ -80,6 +81,146 @@ test("Replicate input builder maps aspect ratio, image count, quality, and refs" ); }); +test("Replicate input builder maps Seedream models to size-based schema", () => { + assert.deepEqual( + buildInput( + "A robot painter", + "bytedance/seedream-4.5", + makeArgs({ + quality: "2k", + aspectRatio: "16:9", + n: 4, + }), + ["data:image/png;base64,AAAA"], + ), + { + prompt: "A robot painter", + size: "2K", + aspect_ratio: "16:9", + sequential_image_generation: "auto", + max_images: 4, + image_input: ["data:image/png;base64,AAAA"], + }, + ); + + assert.deepEqual( + buildInput( + "A robot painter", + "bytedance/seedream-5-lite", + makeArgs({ + size: "3K", + aspectRatio: "4:3", + }), + [], + ), + { + prompt: "A robot painter", + size: "3K", + aspect_ratio: "4:3", + output_format: "png", + }, + ); +}); + +test("Replicate input builder maps Wan models to their native schema", () => { + assert.deepEqual( + buildInput( + "A robot painter", + "wan-video/wan-2.7-image-pro", + makeArgs({ + quality: "2k", + n: 2, + }), + ["data:image/png;base64,AAAA"], + ), + { + prompt: "A robot painter", + size: "2K", + num_outputs: 2, + images: ["data:image/png;base64,AAAA"], + }, + ); + + assert.deepEqual( + buildInput( + "A robot painter", + "wan-video/wan-2.7-image", + makeArgs({ + size: "2048x1152", + }), + [], + ), + { + prompt: "A robot painter", + size: "2048*1152", + thinking_mode: true, + }, + ); +}); + +test("Replicate input builder falls back to nano-banana schema for unknown models", () => { + assert.deepEqual( + buildInput( + "A robot painter", + "unknown-owner/unknown-model", + makeArgs({ + aspectRatio: "16:9", + quality: "2k", + }), + ["ref"], + ), + { + prompt: "A robot painter", + aspect_ratio: "16:9", + resolution: "2K", + output_format: "png", + image_input: ["ref"], + }, + ); +}); + +test("Replicate validation catches unsupported Seedream and Wan argument combinations", () => { + assert.throws( + () => validateArgs("bytedance/seedream-4.5", makeArgs({ size: "large" })), + /Seedream on Replicate requires --size/, + ); + + assert.throws( + () => validateArgs("bytedance/seedream-5-lite", makeArgs({ size: "4K" })), + /Seedream on Replicate requires --size to be one of 2K, 3K/, + ); + + assert.throws( + () => validateArgs("google/nano-banana-2", makeArgs({ n: 2 })), + /Nano Banana models on Replicate do not support --n yet/, + ); + + assert.throws( + () => validateArgs("wan-video/wan-2.7-image-pro", makeArgs({ aspectRatio: "16:9" })), + /Wan image models on Replicate require --size when using --ar/, + ); + + assert.throws( + () => validateArgs("wan-video/wan-2.7-image", makeArgs({ size: "wide" })), + /Wan image models on Replicate require --size/, + ); + + assert.throws( + () => validateArgs("wan-video/wan-2.7-image", makeArgs({ size: "4K" })), + /Wan image models on Replicate require --size to be one of/, + ); + + assert.throws( + () => validateArgs("wan-video/wan-2.7-image-pro", makeArgs({ size: "4K", referenceImages: ["ref"] })), + /only supports 4K for text-to-image requests without input images/, + ); + + assert.throws( + () => validateArgs("wan-video/wan-2.7-image-pro", makeArgs({ n: 5 })), + /support --n values from 1 to 4/, + ); +}); + test("Replicate output extraction supports string, array, and object URLs", () => { assert.equal( extractOutputUrl({ output: "https://example.com/a.png" } as never), @@ -99,3 +240,25 @@ test("Replicate output extraction supports string, array, and object URLs", () = /Unexpected Replicate output format/, ); }); + +test("Replicate generateImage validates arguments before making API requests", async () => { + const previousToken = process.env.REPLICATE_API_TOKEN; + process.env.REPLICATE_API_TOKEN = "test-token"; + + try { + await assert.rejects( + generateImage( + "A robot painter", + "wan-video/wan-2.7-image-pro", + makeArgs({ aspectRatio: "16:9" }), + ), + /Wan image models on Replicate require --size when using --ar/, + ); + } finally { + if (previousToken === undefined) { + delete process.env.REPLICATE_API_TOKEN; + } else { + process.env.REPLICATE_API_TOKEN = previousToken; + } + } +}); diff --git a/skills/baoyu-imagine/scripts/providers/replicate.ts b/skills/baoyu-imagine/scripts/providers/replicate.ts index 611d24e..3481651 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.ts @@ -2,10 +2,31 @@ import path from "node:path"; import { readFile } from "node:fs/promises"; import type { CliArgs } from "../types"; -const DEFAULT_MODEL = "google/nano-banana-pro"; +const DEFAULT_MODEL = "google/nano-banana-2"; const SYNC_WAIT_SECONDS = 60; const POLL_INTERVAL_MS = 2000; const MAX_POLL_MS = 300_000; +const SIZE_PRESET_PATTERN = /^\d+K$/i; +const SEEDREAM_45_SIZES = new Set(["2K", "4K"]); +const SEEDREAM_5_LITE_SIZES = new Set(["2K", "3K"]); +const WAN_PRO_SIZES = new Set([ + "1K", "2K", "4K", + "1024*1024", "2048*2048", "4096*4096", + "1280*720", "720*1280", + "2048*1152", "1152*2048", + "4096*2304", "2304*4096", + "1024*768", "768*1024", + "2048*1536", "1536*2048", + "4096*3072", "3072*4096", +]); +const WAN_SIZES = new Set([ + "1K", "2K", + "1024*1024", "2048*2048", + "1280*720", "720*1280", + "2048*1152", "1152*2048", + "1024*768", "768*1024", + "2048*1536", "1536*2048", +]); export function getDefaultModel(): string { return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL; @@ -31,7 +52,78 @@ export function parseModelId(model: string): { owner: string; name: string; vers return { owner: parts[0], name: parts[1], version: version || null }; } -export function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record { +function isNanoBananaModel(model: string): boolean { + return model.startsWith("google/nano-banana"); +} + +function isSeedreamModel(model: string): boolean { + return model.startsWith("bytedance/seedream-4.5") || model.startsWith("bytedance/seedream-5-lite"); +} + +function isSeedream45Model(model: string): boolean { + return model.startsWith("bytedance/seedream-4.5"); +} + +function isSeedream5LiteModel(model: string): boolean { + return model.startsWith("bytedance/seedream-5-lite"); +} + +function isWanModel(model: string): boolean { + return model.startsWith("wan-video/wan-2.7-image"); +} + +function isWanProModel(model: string): boolean { + return model.startsWith("wan-video/wan-2.7-image-pro"); +} + +function parsePixelSize(size: string): { width: number; height: number } | null { + const match = size.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/); + if (!match) return null; + + const width = Number.parseInt(match[1]!, 10); + const height = Number.parseInt(match[2]!, 10); + + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function normalizePixelSize(size: string): string { + const parsed = parsePixelSize(size); + if (!parsed) return size; + return `${parsed.width}*${parsed.height}`; +} + +function isPresetSize(size: string): boolean { + return SIZE_PRESET_PATTERN.test(size.trim()); +} + +function getSeedreamDefaultSize(model: string, quality: CliArgs["quality"]): string | null { + if (!isSeedreamModel(model) || !quality) return null; + return "2K"; +} + +function getWanDefaultSize(quality: CliArgs["quality"]): string | null { + if (quality === "normal") return "1K"; + if (quality === "2k") return "2K"; + return null; +} + +function getAllowedSeedreamSizes(model: string): Set { + return isSeedream45Model(model) ? SEEDREAM_45_SIZES : SEEDREAM_5_LITE_SIZES; +} + +function getAllowedWanSizes(model: string): Set { + return isWanProModel(model) ? WAN_PRO_SIZES : WAN_SIZES; +} + +function normalizePresetSize(size: string): string { + return size.trim().toUpperCase(); +} + +function buildNanoBananaInput(prompt: string, args: CliArgs, referenceImages: string[]): Record { const input: Record = { prompt }; if (args.aspectRatio) { @@ -40,10 +132,6 @@ export function buildInput(prompt: string, args: CliArgs, referenceImages: strin input.aspect_ratio = "match_input_image"; } - if (args.n > 1) { - input.number_of_images = args.n; - } - if (args.quality === "normal") { input.resolution = "1K"; } else if (args.quality === "2k") { @@ -59,6 +147,123 @@ export function buildInput(prompt: string, args: CliArgs, referenceImages: strin return input; } +function buildSeedreamInput(prompt: string, model: string, args: CliArgs, referenceImages: string[]): Record { + const input: Record = { prompt }; + const requestedSize = args.size || getSeedreamDefaultSize(model, args.quality); + + if (requestedSize) { + input.size = normalizePresetSize(requestedSize); + } + + if (args.aspectRatio) { + input.aspect_ratio = args.aspectRatio; + } + + if (args.n > 1) { + input.sequential_image_generation = "auto"; + input.max_images = args.n; + } + + if (referenceImages.length > 0) { + input.image_input = referenceImages; + } + + if (isSeedream5LiteModel(model)) { + input.output_format = "png"; + } + + return input; +} + +function buildWanInput(prompt: string, model: string, args: CliArgs, referenceImages: string[]): Record { + const input: Record = { prompt }; + const requestedSize = args.size || getWanDefaultSize(args.quality); + + if (requestedSize) { + input.size = parsePixelSize(requestedSize) ? normalizePixelSize(requestedSize) : normalizePresetSize(requestedSize); + } + + if (args.n > 1) { + input.num_outputs = args.n; + } + + if (referenceImages.length > 0) { + input.images = referenceImages; + } + + // thinking_mode only applies to pure text-to-image. + // image_set_mode is not exposed by the current CLI, so no extra check is needed here yet. + if (referenceImages.length === 0) { + input.thinking_mode = true; + } + + return input; +} + +export function validateArgs(model: string, args: CliArgs): void { + if (isNanoBananaModel(model) && args.n > 1) { + throw new Error("Nano Banana models on Replicate do not support --n yet because their current schema does not expose a multi-image count field."); + } + + if (isSeedreamModel(model)) { + if (args.size) { + const normalizedSize = normalizePresetSize(args.size); + if (!getAllowedSeedreamSizes(model).has(normalizedSize)) { + throw new Error( + `Seedream on Replicate requires --size to be one of ${Array.from(getAllowedSeedreamSizes(model)).join(", ")}. Received: ${args.size}` + ); + } + } + + if (args.n < 1 || args.n > 15) { + throw new Error("Seedream on Replicate supports --n values from 1 to 15."); + } + } + + if (isWanModel(model)) { + if (args.aspectRatio && !args.size) { + throw new Error("Wan image models on Replicate require --size when using --ar. This provider does not infer size from aspect ratio."); + } + + if (args.size) { + const normalizedSize = parsePixelSize(args.size) ? normalizePixelSize(args.size) : normalizePresetSize(args.size); + if (!getAllowedWanSizes(model).has(normalizedSize)) { + throw new Error( + `Wan image models on Replicate require --size to be one of ${Array.from(getAllowedWanSizes(model)).join(", ")}. Received: ${args.size}` + ); + } + } + + if (args.n < 1 || args.n > 4) { + throw new Error("Wan image models on Replicate support --n values from 1 to 4 in standard mode."); + } + + if (args.size && normalizePresetSize(args.size) === "4K" && args.referenceImages.length > 0) { + throw new Error("Wan 2.7 Image Pro on Replicate only supports 4K for text-to-image requests without input images."); + } + } +} + +export function buildInput( + prompt: string, + model: string, + args: CliArgs, + referenceImages: string[] +): Record { + if (isSeedreamModel(model)) { + return buildSeedreamInput(prompt, model, args, referenceImages); + } + + if (isWanModel(model)) { + return buildWanInput(prompt, model, args, referenceImages); + } + + // Fall back to nano-banana schema for unknown Replicate models. + // This preserves backward compatibility; unsupported models will fail + // at API validation time if they reject nano-banana-style fields. + return buildNanoBananaInput(prompt, args, referenceImages); +} + async function readImageAsDataUrl(p: string): Promise { const buf = await readFile(p); const ext = path.extname(p).toLowerCase(); @@ -177,6 +382,8 @@ export async function generateImage( const apiToken = getApiToken(); if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens"); + validateArgs(model, args); + const parsedModel = parseModelId(model); const refDataUrls: string[] = []; @@ -184,7 +391,7 @@ export async function generateImage( refDataUrls.push(await readImageAsDataUrl(refPath)); } - const input = buildInput(prompt, args, refDataUrls); + const input = buildInput(prompt, model, args, refDataUrls); console.log(`Generating image with Replicate (${model})...`);