Make Replicate image requests schema-aware without overpromising multi-image output
Replicate image models no longer share one assumed Nano Banana input shape. This change routes Seedream and Wan requests through model-family-specific input builders, switches the default Replicate model to nano-banana-2, and adds local validation for the most common schema mismatches before the API returns a remote error. The implementation stays deliberately narrow. baoyu-imagine still writes one output file per Replicate request, so this commit blocks --n on the Replicate provider instead of exposing partial multi-image behavior that would silently drop outputs. Constraint: baoyu-imagine currently persists a single output path per request Constraint: Replicate model families use different request schemas and output defaults Rejected: Port the previous broader PR as-is | it enabled multi-image request fields without completing multi-image output handling Rejected: Add full multi-image output support here | larger behavioral surface than needed for a safe first merge Confidence: high Scope-risk: moderate Reversibility: clean Directive: Do not re-enable Replicate --n until the CLI can name, download, and report multiple output files end to end Tested: node --import tsx --test skills/baoyu-imagine/scripts/providers/replicate.test.ts Tested: node --import tsx --test skills/baoyu-imagine/scripts/main.test.ts Tested: npm test Not-tested: Live Replicate API calls against Seedream and Wan credentials Co-authored-by: justnodejs <justnodejs@gmail.com>
This commit is contained in:
parent
ec5f4ffcc9
commit
648f563378
|
|
@ -806,7 +806,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
||||||
| `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` |
|
| `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-2` |
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||||
|
|
@ -1146,7 +1146,7 @@ MINIMAX_IMAGE_MODEL=image-01
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
REPLICATE_API_TOKEN=r8_xxx
|
REPLICATE_API_TOKEN=r8_xxx
|
||||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
REPLICATE_IMAGE_MODEL=google/nano-banana-2
|
||||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||||
|
|
||||||
# Jimeng (即梦)
|
# Jimeng (即梦)
|
||||||
|
|
|
||||||
|
|
@ -806,7 +806,7 @@ AI 驱动的生成后端。
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
||||||
| `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` |
|
| `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-2` |
|
||||||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||||
|
|
@ -1146,7 +1146,7 @@ MINIMAX_IMAGE_MODEL=image-01
|
||||||
|
|
||||||
# Replicate
|
# Replicate
|
||||||
REPLICATE_API_TOKEN=r8_xxx
|
REPLICATE_API_TOKEN=r8_xxx
|
||||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
REPLICATE_IMAGE_MODEL=google/nano-banana-2
|
||||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||||
|
|
||||||
# 即梦(Jimeng)
|
# 即梦(Jimeng)
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A girl stands by the library window
|
||||||
# MiniMax with custom size (documented for image-01)
|
# MiniMax with custom size (documented for image-01)
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cinematic poster" --image out.jpg --provider minimax --model image-01 --size 1536x1024
|
||||||
|
|
||||||
# Replicate (google/nano-banana-pro)
|
# Replicate (google/nano-banana-2)
|
||||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
||||||
|
|
||||||
# Replicate with specific model
|
# Replicate with specific model
|
||||||
|
|
@ -136,7 +136,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json
|
||||||
"promptFiles": ["prompts/hero.md"],
|
"promptFiles": ["prompts/hero.md"],
|
||||||
"image": "out/hero.png",
|
"image": "out/hero.png",
|
||||||
"provider": "replicate",
|
"provider": "replicate",
|
||||||
"model": "google/nano-banana-pro",
|
"model": "google/nano-banana-2",
|
||||||
"ar": "16:9",
|
"ar": "16:9",
|
||||||
"quality": "2k"
|
"quality": "2k"
|
||||||
},
|
},
|
||||||
|
|
@ -192,7 +192,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
||||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
|
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
|
||||||
| `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) |
|
| `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) |
|
||||||
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-2) |
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
||||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
||||||
|
|
@ -324,7 +324,7 @@ Notes:
|
||||||
|
|
||||||
Supported model formats:
|
Supported model formats:
|
||||||
|
|
||||||
- `owner/name` (recommended for official models), e.g. `google/nano-banana-pro`
|
- `owner/name` (recommended for official models), e.g. `google/nano-banana-2`
|
||||||
- `owner/name:version` (community models by version), e.g. `stability-ai/sdxl:<version>`
|
- `owner/name:version` (community models by version), e.g. `stability-ai/sdxl:<version>`
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ options:
|
||||||
- label: "MiniMax"
|
- label: "MiniMax"
|
||||||
description: "MiniMax image generation with subject-reference character workflows"
|
description: "MiniMax image generation with subject-reference character workflows"
|
||||||
- label: "Replicate"
|
- label: "Replicate"
|
||||||
description: "Community models - nano-banana-pro, flexible model selection"
|
description: "Community models - nano-banana-2, flexible model selection"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Question 2: Default Google Model
|
### Question 2: Default Google Model
|
||||||
|
|
@ -263,7 +263,7 @@ Notes for DashScope setup:
|
||||||
header: "Replicate Model"
|
header: "Replicate Model"
|
||||||
question: "Choose a default Replicate image generation model?"
|
question: "Choose a default Replicate image generation model?"
|
||||||
options:
|
options:
|
||||||
- label: "google/nano-banana-pro (Recommended)"
|
- label: "google/nano-banana-2 (Recommended)"
|
||||||
description: "Google's fast image model on Replicate"
|
description: "Google's fast image model on Replicate"
|
||||||
- label: "google/nano-banana"
|
- label: "google/nano-banana"
|
||||||
description: "Google's base image model on Replicate"
|
description: "Google's base image model on Replicate"
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ default_model:
|
||||||
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
||||||
dashscope: null # e.g., "qwen-image-2.0-pro"
|
dashscope: null # e.g., "qwen-image-2.0-pro"
|
||||||
minimax: null # e.g., "image-01"
|
minimax: null # e.g., "image-01"
|
||||||
replicate: null # e.g., "google/nano-banana-pro"
|
replicate: null # e.g., "google/nano-banana-2"
|
||||||
|
|
||||||
batch:
|
batch:
|
||||||
max_workers: 10
|
max_workers: 10
|
||||||
|
|
@ -101,7 +101,7 @@ default_model:
|
||||||
openrouter: "google/gemini-3.1-flash-image-preview"
|
openrouter: "google/gemini-3.1-flash-image-preview"
|
||||||
dashscope: "qwen-image-2.0-pro"
|
dashscope: "qwen-image-2.0-pro"
|
||||||
minimax: "image-01"
|
minimax: "image-01"
|
||||||
replicate: "google/nano-banana-pro"
|
replicate: "google/nano-banana-2"
|
||||||
batch:
|
batch:
|
||||||
max_workers: 10
|
max_workers: 10
|
||||||
provider_limits:
|
provider_limits:
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ Batch file format:
|
||||||
"promptFiles": ["prompts/hero.md"],
|
"promptFiles": ["prompts/hero.md"],
|
||||||
"image": "out/hero.png",
|
"image": "out/hero.png",
|
||||||
"provider": "replicate",
|
"provider": "replicate",
|
||||||
"model": "google/nano-banana-pro",
|
"model": "google/nano-banana-2",
|
||||||
"ar": "16:9"
|
"ar": "16:9"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
@ -123,7 +123,7 @@ Environment variables:
|
||||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
|
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
|
||||||
MINIMAX_IMAGE_MODEL Default MiniMax model (image-01)
|
MINIMAX_IMAGE_MODEL Default MiniMax model (image-01)
|
||||||
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-2)
|
||||||
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
||||||
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
||||||
OPENAI_BASE_URL Custom OpenAI endpoint
|
OPENAI_BASE_URL Custom OpenAI endpoint
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,10 @@ import type { CliArgs } from "../types.ts";
|
||||||
import {
|
import {
|
||||||
buildInput,
|
buildInput,
|
||||||
extractOutputUrl,
|
extractOutputUrl,
|
||||||
|
generateImage,
|
||||||
|
getDefaultOutputExtension,
|
||||||
parseModelId,
|
parseModelId,
|
||||||
|
validateArgs,
|
||||||
} from "./replicate.ts";
|
} from "./replicate.ts";
|
||||||
|
|
||||||
function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
|
|
@ -47,21 +50,20 @@ test("Replicate model parsing accepts official formats and rejects malformed one
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("Replicate input builder maps aspect ratio, image count, quality, and refs", () => {
|
test("Replicate input builder keeps nano-banana mapping for compatible models", () => {
|
||||||
assert.deepEqual(
|
assert.deepEqual(
|
||||||
buildInput(
|
buildInput(
|
||||||
"A robot painter",
|
"A robot painter",
|
||||||
|
"google/nano-banana-2",
|
||||||
makeArgs({
|
makeArgs({
|
||||||
aspectRatio: "16:9",
|
aspectRatio: "16:9",
|
||||||
quality: "2k",
|
quality: "2k",
|
||||||
n: 3,
|
|
||||||
}),
|
}),
|
||||||
["data:image/png;base64,AAAA"],
|
["data:image/png;base64,AAAA"],
|
||||||
),
|
),
|
||||||
{
|
{
|
||||||
prompt: "A robot painter",
|
prompt: "A robot painter",
|
||||||
aspect_ratio: "16:9",
|
aspect_ratio: "16:9",
|
||||||
number_of_images: 3,
|
|
||||||
resolution: "2K",
|
resolution: "2K",
|
||||||
output_format: "png",
|
output_format: "png",
|
||||||
image_input: ["data:image/png;base64,AAAA"],
|
image_input: ["data:image/png;base64,AAAA"],
|
||||||
|
|
@ -69,7 +71,7 @@ test("Replicate input builder maps aspect ratio, image count, quality, and refs"
|
||||||
);
|
);
|
||||||
|
|
||||||
assert.deepEqual(
|
assert.deepEqual(
|
||||||
buildInput("A robot painter", makeArgs({ quality: "normal" }), ["ref"]),
|
buildInput("A robot painter", "google/nano-banana-pro", makeArgs({ quality: "normal" }), ["ref"]),
|
||||||
{
|
{
|
||||||
prompt: "A robot painter",
|
prompt: "A robot painter",
|
||||||
aspect_ratio: "match_input_image",
|
aspect_ratio: "match_input_image",
|
||||||
|
|
@ -80,6 +82,120 @@ test("Replicate input builder maps aspect ratio, image count, quality, and refs"
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("Replicate input builder maps Seedream models to their native schema", () => {
|
||||||
|
assert.deepEqual(
|
||||||
|
buildInput(
|
||||||
|
"A robot painter",
|
||||||
|
"bytedance/seedream-4.5",
|
||||||
|
makeArgs({
|
||||||
|
size: "1536x1024",
|
||||||
|
aspectRatio: "16:9",
|
||||||
|
}),
|
||||||
|
["data:image/png;base64,AAAA"],
|
||||||
|
),
|
||||||
|
{
|
||||||
|
prompt: "A robot painter",
|
||||||
|
size: "custom",
|
||||||
|
width: 1536,
|
||||||
|
height: 1024,
|
||||||
|
image_input: ["data:image/png;base64,AAAA"],
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
buildInput(
|
||||||
|
"A robot painter",
|
||||||
|
"bytedance/seedream-5-lite",
|
||||||
|
makeArgs({
|
||||||
|
size: "3K",
|
||||||
|
aspectRatio: "4:3",
|
||||||
|
}),
|
||||||
|
[],
|
||||||
|
),
|
||||||
|
{
|
||||||
|
prompt: "A robot painter",
|
||||||
|
size: "3K",
|
||||||
|
output_format: "png",
|
||||||
|
aspect_ratio: "4:3",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("Replicate input builder maps Wan models to their native schema", () => {
|
||||||
|
assert.deepEqual(
|
||||||
|
buildInput(
|
||||||
|
"A robot painter",
|
||||||
|
"wan-video/wan-2.7-image-pro",
|
||||||
|
makeArgs({
|
||||||
|
quality: "2k",
|
||||||
|
}),
|
||||||
|
["data:image/png;base64,AAAA"],
|
||||||
|
),
|
||||||
|
{
|
||||||
|
prompt: "A robot painter",
|
||||||
|
size: "2K",
|
||||||
|
images: ["data:image/png;base64,AAAA"],
|
||||||
|
thinking_mode: false,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
buildInput(
|
||||||
|
"A robot painter",
|
||||||
|
"wan-video/wan-2.7-image",
|
||||||
|
makeArgs({
|
||||||
|
size: "1536x1024",
|
||||||
|
}),
|
||||||
|
[],
|
||||||
|
),
|
||||||
|
{
|
||||||
|
prompt: "A robot painter",
|
||||||
|
size: "1536*1024",
|
||||||
|
thinking_mode: true,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("Replicate validation rejects unsupported schema combinations before the API call", () => {
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("google/nano-banana-2", makeArgs({ n: 2 })),
|
||||||
|
/Replicate --n is not supported yet/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("bytedance/seedream-4.5", makeArgs({ size: "8x8" })),
|
||||||
|
/must keep width and height between 1024 and 4096/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("bytedance/seedream-5-lite", makeArgs({ size: "4K" })),
|
||||||
|
/requires --size to be 2K or 3K/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("wan-video/wan-2.7-image-pro", makeArgs({ aspectRatio: "16:9" })),
|
||||||
|
/do not accept --ar/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("wan-video/wan-2.7-image", makeArgs({ referenceImages: Array.from({ length: 10 }, () => "ref.png") })),
|
||||||
|
/at most 9 reference images/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.throws(
|
||||||
|
() => validateArgs("wan-video/wan-2.7-image-pro", makeArgs({ size: "4K", referenceImages: ["ref.png"] })),
|
||||||
|
/only supports 4K for text-to-image requests/,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.doesNotThrow(
|
||||||
|
() => validateArgs("bytedance/seedream-4.5", makeArgs({ size: "1536x1024" })),
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.doesNotThrow(
|
||||||
|
() => validateArgs("wan-video/wan-2.7-image", makeArgs({ size: "1920x1080" })),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test("Replicate output extraction supports string, array, and object URLs", () => {
|
test("Replicate output extraction supports string, array, and object URLs", () => {
|
||||||
assert.equal(
|
assert.equal(
|
||||||
extractOutputUrl({ output: "https://example.com/a.png" } as never),
|
extractOutputUrl({ output: "https://example.com/a.png" } as never),
|
||||||
|
|
@ -99,3 +215,31 @@ test("Replicate output extraction supports string, array, and object URLs", () =
|
||||||
/Unexpected Replicate output format/,
|
/Unexpected Replicate output format/,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("Replicate default output extension matches model family behavior", () => {
|
||||||
|
assert.equal(getDefaultOutputExtension("bytedance/seedream-4.5"), ".jpg");
|
||||||
|
assert.equal(getDefaultOutputExtension("bytedance/seedream-5-lite"), ".png");
|
||||||
|
assert.equal(getDefaultOutputExtension("google/nano-banana-2"), ".png");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("Replicate generateImage validates arguments before making API requests", async () => {
|
||||||
|
const previousToken = process.env.REPLICATE_API_TOKEN;
|
||||||
|
process.env.REPLICATE_API_TOKEN = "test-token";
|
||||||
|
|
||||||
|
try {
|
||||||
|
await assert.rejects(
|
||||||
|
generateImage(
|
||||||
|
"A robot painter",
|
||||||
|
"wan-video/wan-2.7-image-pro",
|
||||||
|
makeArgs({ aspectRatio: "16:9" }),
|
||||||
|
),
|
||||||
|
/do not accept --ar/,
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
if (previousToken === undefined) {
|
||||||
|
delete process.env.REPLICATE_API_TOKEN;
|
||||||
|
} else {
|
||||||
|
process.env.REPLICATE_API_TOKEN = previousToken;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,16 @@ import path from "node:path";
|
||||||
import { readFile } from "node:fs/promises";
|
import { readFile } from "node:fs/promises";
|
||||||
import type { CliArgs } from "../types";
|
import type { CliArgs } from "../types";
|
||||||
|
|
||||||
const DEFAULT_MODEL = "google/nano-banana-pro";
|
const DEFAULT_MODEL = "google/nano-banana-2";
|
||||||
const SYNC_WAIT_SECONDS = 60;
|
const SYNC_WAIT_SECONDS = 60;
|
||||||
const POLL_INTERVAL_MS = 2000;
|
const POLL_INTERVAL_MS = 2000;
|
||||||
const MAX_POLL_MS = 300_000;
|
const MAX_POLL_MS = 300_000;
|
||||||
|
const SEEDREAM_45_SIZES = new Set(["2K", "4K"]);
|
||||||
|
const SEEDREAM_5_LITE_SIZES = new Set(["2K", "3K"]);
|
||||||
|
const WAN_SIZES = new Set(["1K", "2K"]);
|
||||||
|
const WAN_PRO_SIZES = new Set(["1K", "2K", "4K"]);
|
||||||
|
const SEEDREAM_45_CUSTOM_MIN = 1024;
|
||||||
|
const SEEDREAM_45_CUSTOM_MAX = 4096;
|
||||||
|
|
||||||
export function getDefaultModel(): string {
|
export function getDefaultModel(): string {
|
||||||
return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL;
|
return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL;
|
||||||
|
|
@ -31,7 +37,63 @@ export function parseModelId(model: string): { owner: string; name: string; vers
|
||||||
return { owner: parts[0], name: parts[1], version: version || null };
|
return { owner: parts[0], name: parts[1], version: version || null };
|
||||||
}
|
}
|
||||||
|
|
||||||
export function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
|
function isSeedream45Model(model: string): boolean {
|
||||||
|
return model.startsWith("bytedance/seedream-4.5");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSeedream5LiteModel(model: string): boolean {
|
||||||
|
return model.startsWith("bytedance/seedream-5-lite");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSeedreamModel(model: string): boolean {
|
||||||
|
return isSeedream45Model(model) || isSeedream5LiteModel(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isWanProModel(model: string): boolean {
|
||||||
|
return model.startsWith("wan-video/wan-2.7-image-pro");
|
||||||
|
}
|
||||||
|
|
||||||
|
function isWanModel(model: string): boolean {
|
||||||
|
return model.startsWith("wan-video/wan-2.7-image");
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePixelSize(size: string): { width: number; height: number } | null {
|
||||||
|
const match = size.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
const width = Number.parseInt(match[1]!, 10);
|
||||||
|
const height = Number.parseInt(match[2]!, 10);
|
||||||
|
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { width, height };
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizePixelSize(size: string): string {
|
||||||
|
const parsed = parsePixelSize(size);
|
||||||
|
if (!parsed) return size;
|
||||||
|
return `${parsed.width}*${parsed.height}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizePresetSize(size: string): string {
|
||||||
|
return size.trim().toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSeedreamSize(model: string, args: CliArgs): string | null {
|
||||||
|
if (args.size) return args.size;
|
||||||
|
if (isSeedream45Model(model) || isSeedream5LiteModel(model)) return "2K";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWanSize(args: CliArgs): string | null {
|
||||||
|
if (args.size) return args.size;
|
||||||
|
if (args.quality === "normal") return "1K";
|
||||||
|
if (args.quality === "2k") return "2K";
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildNanoBananaInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
|
||||||
const input: Record<string, unknown> = { prompt };
|
const input: Record<string, unknown> = { prompt };
|
||||||
|
|
||||||
if (args.aspectRatio) {
|
if (args.aspectRatio) {
|
||||||
|
|
@ -59,11 +121,165 @@ export function buildInput(prompt: string, args: CliArgs, referenceImages: strin
|
||||||
return input;
|
return input;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildSeedreamInput(
|
||||||
|
prompt: string,
|
||||||
|
model: string,
|
||||||
|
args: CliArgs,
|
||||||
|
referenceImages: string[],
|
||||||
|
): Record<string, unknown> {
|
||||||
|
const input: Record<string, unknown> = { prompt };
|
||||||
|
const requestedSize = getSeedreamSize(model, args);
|
||||||
|
|
||||||
|
if (requestedSize) {
|
||||||
|
if (isSeedream45Model(model)) {
|
||||||
|
const customSize = parsePixelSize(requestedSize);
|
||||||
|
if (customSize) {
|
||||||
|
input.size = "custom";
|
||||||
|
input.width = customSize.width;
|
||||||
|
input.height = customSize.height;
|
||||||
|
} else {
|
||||||
|
input.size = normalizePresetSize(requestedSize);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
input.size = normalizePresetSize(requestedSize);
|
||||||
|
input.output_format = "png";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.aspectRatio && input.size !== "custom") {
|
||||||
|
input.aspect_ratio = args.aspectRatio;
|
||||||
|
} else if (!args.aspectRatio && referenceImages.length > 0 && input.size !== "custom") {
|
||||||
|
input.aspect_ratio = "match_input_image";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (referenceImages.length > 0) {
|
||||||
|
input.image_input = referenceImages;
|
||||||
|
}
|
||||||
|
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildWanInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
|
||||||
|
const input: Record<string, unknown> = { prompt };
|
||||||
|
const requestedSize = getWanSize(args);
|
||||||
|
|
||||||
|
if (requestedSize) {
|
||||||
|
input.size = parsePixelSize(requestedSize)
|
||||||
|
? normalizePixelSize(requestedSize)
|
||||||
|
: normalizePresetSize(requestedSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (referenceImages.length > 0) {
|
||||||
|
input.images = referenceImages;
|
||||||
|
input.thinking_mode = false;
|
||||||
|
} else {
|
||||||
|
input.thinking_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getDefaultOutputExtension(model: string): ".png" | ".jpg" {
|
||||||
|
if (isSeedream45Model(model)) return ".jpg";
|
||||||
|
return ".png";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function validateArgs(model: string, args: CliArgs): void {
|
||||||
|
if (args.n > 1) {
|
||||||
|
throw new Error(
|
||||||
|
"Replicate --n is not supported yet in baoyu-imagine because this provider currently writes a single output file per request."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSeedream45Model(model)) {
|
||||||
|
const requestedSize = getSeedreamSize(model, args);
|
||||||
|
if (requestedSize) {
|
||||||
|
const customSize = parsePixelSize(requestedSize);
|
||||||
|
if (customSize) {
|
||||||
|
if (
|
||||||
|
customSize.width < SEEDREAM_45_CUSTOM_MIN ||
|
||||||
|
customSize.width > SEEDREAM_45_CUSTOM_MAX ||
|
||||||
|
customSize.height < SEEDREAM_45_CUSTOM_MIN ||
|
||||||
|
customSize.height > SEEDREAM_45_CUSTOM_MAX
|
||||||
|
) {
|
||||||
|
throw new Error("Seedream 4.5 on Replicate custom --size must keep width and height between 1024 and 4096.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const normalizedSize = normalizePresetSize(requestedSize);
|
||||||
|
if (!SEEDREAM_45_SIZES.has(normalizedSize)) {
|
||||||
|
throw new Error("Seedream 4.5 on Replicate requires --size to be 2K, 4K, or custom dimensions like 1536x1024.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.referenceImages.length > 14) {
|
||||||
|
throw new Error("Seedream 4.5 on Replicate supports at most 14 reference images per request.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSeedream5LiteModel(model)) {
|
||||||
|
const requestedSize = getSeedreamSize(model, args);
|
||||||
|
if (requestedSize && !SEEDREAM_5_LITE_SIZES.has(normalizePresetSize(requestedSize))) {
|
||||||
|
throw new Error("Seedream 5 lite on Replicate requires --size to be 2K or 3K.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.referenceImages.length > 14) {
|
||||||
|
throw new Error("Seedream 5 lite on Replicate supports at most 14 reference images per request.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isWanModel(model)) {
|
||||||
|
if (args.aspectRatio) {
|
||||||
|
throw new Error("Wan image models on Replicate do not accept --ar. Use --size with a preset like 2K or explicit dimensions like 1920x1080.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.referenceImages.length > 9) {
|
||||||
|
throw new Error("Wan image models on Replicate support at most 9 reference images per request.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestedSize = getWanSize(args);
|
||||||
|
if (requestedSize) {
|
||||||
|
const customSize = parsePixelSize(requestedSize);
|
||||||
|
if (!customSize) {
|
||||||
|
const normalizedSize = normalizePresetSize(requestedSize);
|
||||||
|
const allowedSizes = isWanProModel(model) ? WAN_PRO_SIZES : WAN_SIZES;
|
||||||
|
if (!allowedSizes.has(normalizedSize)) {
|
||||||
|
throw new Error(
|
||||||
|
`Wan image models on Replicate require --size to be one of ${Array.from(allowedSizes).join(", ")} or custom dimensions like 1920x1080.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.referenceImages.length > 0 && requestedSize && normalizePresetSize(requestedSize) === "4K") {
|
||||||
|
throw new Error("Wan 2.7 Image Pro on Replicate only supports 4K for text-to-image requests without input images.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildInput(
|
||||||
|
prompt: string,
|
||||||
|
model: string,
|
||||||
|
args: CliArgs,
|
||||||
|
referenceImages: string[],
|
||||||
|
): Record<string, unknown> {
|
||||||
|
if (isSeedreamModel(model)) {
|
||||||
|
return buildSeedreamInput(prompt, model, args, referenceImages);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isWanModel(model)) {
|
||||||
|
return buildWanInput(prompt, args, referenceImages);
|
||||||
|
}
|
||||||
|
|
||||||
|
return buildNanoBananaInput(prompt, args, referenceImages);
|
||||||
|
}
|
||||||
|
|
||||||
async function readImageAsDataUrl(p: string): Promise<string> {
|
async function readImageAsDataUrl(p: string): Promise<string> {
|
||||||
const buf = await readFile(p);
|
const buf = await readFile(p);
|
||||||
const ext = path.extname(p).toLowerCase();
|
const ext = path.extname(p).toLowerCase();
|
||||||
let mimeType = "image/png";
|
let mimeType = "image/png";
|
||||||
if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";
|
if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";
|
||||||
|
else if (ext === ".bmp") mimeType = "image/bmp";
|
||||||
else if (ext === ".gif") mimeType = "image/gif";
|
else if (ext === ".gif") mimeType = "image/gif";
|
||||||
else if (ext === ".webp") mimeType = "image/webp";
|
else if (ext === ".webp") mimeType = "image/webp";
|
||||||
return `data:${mimeType};base64,${buf.toString("base64")}`;
|
return `data:${mimeType};base64,${buf.toString("base64")}`;
|
||||||
|
|
@ -177,6 +393,8 @@ export async function generateImage(
|
||||||
const apiToken = getApiToken();
|
const apiToken = getApiToken();
|
||||||
if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens");
|
if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens");
|
||||||
|
|
||||||
|
validateArgs(model, args);
|
||||||
|
|
||||||
const parsedModel = parseModelId(model);
|
const parsedModel = parseModelId(model);
|
||||||
|
|
||||||
const refDataUrls: string[] = [];
|
const refDataUrls: string[] = [];
|
||||||
|
|
@ -184,7 +402,7 @@ export async function generateImage(
|
||||||
refDataUrls.push(await readImageAsDataUrl(refPath));
|
refDataUrls.push(await readImageAsDataUrl(refPath));
|
||||||
}
|
}
|
||||||
|
|
||||||
const input = buildInput(prompt, args, refDataUrls);
|
const input = buildInput(prompt, model, args, refDataUrls);
|
||||||
|
|
||||||
console.log(`Generating image with Replicate (${model})...`);
|
console.log(`Generating image with Replicate (${model})...`);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue