feat(baoyu-image-gen): add Seedream reference image support and model-specific validation
This commit is contained in:
parent
cf76a0d4d5
commit
fc2f0d042a
|
|
@ -695,7 +695,7 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
|
|||
# Seedream (豆包)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
|
||||
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
|
||||
# With reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0)
|
||||
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
|
||||
```
|
||||
|
||||
|
|
@ -710,7 +710,7 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
|
|||
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size` | Size (e.g., `1024x1024`) |
|
||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||
| `--ref` | Reference images (Google, OpenAI, OpenRouter or Replicate) |
|
||||
| `--ref` | Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0) |
|
||||
|
||||
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
|
||||
| Variable | Description | Default |
|
||||
|
|
|
|||
|
|
@ -695,7 +695,7 @@ AI 驱动的生成后端。
|
|||
# 豆包(Seedream)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
|
||||
# 带参考图(Google、OpenAI、OpenRouter 或 Replicate)
|
||||
# 带参考图(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0)
|
||||
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
|
||||
```
|
||||
|
||||
|
|
@ -710,7 +710,7 @@ AI 驱动的生成后端。
|
|||
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) |
|
||||
| `--size` | 尺寸(如 `1024x1024`) |
|
||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、OpenRouter 或 Replicate) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0) |
|
||||
|
||||
**环境变量**(配置方法见[环境配置](#环境配置)):
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: baoyu-image-gen
|
||||
description: AI image generation with OpenAI, Google, OpenRouter, DashScope, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images.
|
||||
version: 1.56.2
|
||||
version: 1.56.3
|
||||
metadata:
|
||||
openclaw:
|
||||
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-image-gen
|
||||
|
|
@ -74,7 +74,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --quality 2k
|
|||
# From prompt files
|
||||
${BUN_X} {baseDir}/scripts/main.ts --promptfiles system.md content.md --image out.png
|
||||
|
||||
# With reference images (Google, OpenAI, OpenRouter, or Replicate)
|
||||
# With reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "Make blue" --image out.png --ref source.png
|
||||
|
||||
# With reference images (explicit provider/model)
|
||||
|
|
@ -153,7 +153,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal models, and Replicate. Not supported by Jimeng or Seedream |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal models, Replicate, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
||||
| `--n <count>` | Number of images |
|
||||
| `--json` | JSON output |
|
||||
|
||||
|
|
|
|||
|
|
@ -216,6 +216,39 @@ test("detectProvider selects an available ref-capable provider for reference-ima
|
|||
);
|
||||
});
|
||||
|
||||
test("detectProvider infers Seedream from model id and allows Seedream reference-image workflows", (t) => {
|
||||
useEnv(t, {
|
||||
GOOGLE_API_KEY: null,
|
||||
OPENAI_API_KEY: null,
|
||||
OPENROUTER_API_KEY: null,
|
||||
DASHSCOPE_API_KEY: null,
|
||||
REPLICATE_API_TOKEN: null,
|
||||
JIMENG_ACCESS_KEY_ID: null,
|
||||
JIMENG_SECRET_ACCESS_KEY: null,
|
||||
ARK_API_KEY: "ark-key",
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
detectProvider(
|
||||
makeArgs({
|
||||
model: "doubao-seedream-4-5-251128",
|
||||
referenceImages: ["ref.png"],
|
||||
}),
|
||||
),
|
||||
"seedream",
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
detectProvider(
|
||||
makeArgs({
|
||||
provider: "seedream",
|
||||
referenceImages: ["ref.png"],
|
||||
}),
|
||||
),
|
||||
"seedream",
|
||||
);
|
||||
});
|
||||
|
||||
test("batch worker and provider-rate-limit configuration prefer env over EXTEND config", (t) => {
|
||||
useEnv(t, {
|
||||
BAOYU_IMAGE_GEN_MAX_WORKERS: "12",
|
||||
|
|
@ -294,6 +327,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
|
|||
|
||||
test("path normalization, worker count, and retry classification follow expected rules", () => {
|
||||
assert.match(normalizeOutputImagePath("out/sample"), /out[\\/]+sample\.png$/);
|
||||
assert.match(normalizeOutputImagePath("out/sample", ".jpg"), /out[\\/]+sample\.jpg$/);
|
||||
assert.match(normalizeOutputImagePath("out/sample.webp"), /out[\\/]+sample\.webp$/);
|
||||
|
||||
assert.equal(getWorkerCount(8, null, 3), 3);
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ import type {
|
|||
type ProviderModule = {
|
||||
getDefaultModel: () => string;
|
||||
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
|
||||
validateArgs?: (model: string, args: CliArgs) => void;
|
||||
getDefaultOutputExtension?: (model: string, args: CliArgs) => string;
|
||||
};
|
||||
|
||||
type PreparedTask = {
|
||||
|
|
@ -78,7 +80,7 @@ Options:
|
|||
--size <WxH> Size (e.g., 1024x1024)
|
||||
--quality normal|2k Quality preset (default: 2k)
|
||||
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
||||
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, OpenRouter multimodal, or Replicate)
|
||||
--ref <files...> Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0)
|
||||
--n <count> Number of images for the current task (default: 1)
|
||||
--json JSON output
|
||||
-h, --help Show help
|
||||
|
|
@ -560,11 +562,17 @@ async function readPromptFromStdin(): Promise<string | null> {
|
|||
}
|
||||
}
|
||||
|
||||
export function normalizeOutputImagePath(p: string): string {
|
||||
export function normalizeOutputImagePath(p: string, defaultExtension = ".png"): string {
|
||||
const full = path.resolve(p);
|
||||
const ext = path.extname(full);
|
||||
if (ext) return full;
|
||||
return `${full}.png`;
|
||||
return `${full}${defaultExtension}`;
|
||||
}
|
||||
|
||||
function inferProviderFromModel(model: string | null): Provider | null {
|
||||
if (!model) return null;
|
||||
if (model.includes("seedream") || model.includes("seededit")) return "seedream";
|
||||
return null;
|
||||
}
|
||||
|
||||
export function detectProvider(args: CliArgs): Provider {
|
||||
|
|
@ -574,10 +582,11 @@ export function detectProvider(args: CliArgs): Provider {
|
|||
args.provider !== "google" &&
|
||||
args.provider !== "openai" &&
|
||||
args.provider !== "openrouter" &&
|
||||
args.provider !== "replicate"
|
||||
args.provider !== "replicate" &&
|
||||
args.provider !== "seedream"
|
||||
) {
|
||||
throw new Error(
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), or --provider replicate."
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), --provider replicate, or --provider seedream for supported Seedream models."
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -590,14 +599,23 @@ export function detectProvider(args: CliArgs): Provider {
|
|||
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
|
||||
const hasJimeng = !!(process.env.JIMENG_ACCESS_KEY_ID && process.env.JIMENG_SECRET_ACCESS_KEY);
|
||||
const hasSeedream = !!process.env.ARK_API_KEY;
|
||||
const modelProvider = inferProviderFromModel(args.model);
|
||||
|
||||
if (modelProvider === "seedream") {
|
||||
if (!hasSeedream) {
|
||||
throw new Error("Model looks like a Volcengine ARK image model, but ARK_API_KEY is not set.");
|
||||
}
|
||||
return "seedream";
|
||||
}
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
if (hasGoogle) return "google";
|
||||
if (hasOpenai) return "openai";
|
||||
if (hasOpenrouter) return "openrouter";
|
||||
if (hasReplicate) return "replicate";
|
||||
if (hasSeedream) return "seedream";
|
||||
throw new Error(
|
||||
"Reference images require Google, OpenAI, OpenRouter, or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
|
||||
"Reference images require Google, OpenAI, OpenRouter, Replicate, or supported Seedream models. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, or ARK_API_KEY, or remove --ref."
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -701,6 +719,8 @@ async function prepareSingleTask(args: CliArgs, extendConfig: Partial<ExtendConf
|
|||
const provider = detectProvider(args);
|
||||
const providerModule = await loadProviderModule(provider);
|
||||
const model = getModelForProvider(provider, args.model, extendConfig, providerModule);
|
||||
providerModule.validateArgs?.(model, args);
|
||||
const defaultOutputExtension = providerModule.getDefaultOutputExtension?.(model, args) ?? ".png";
|
||||
|
||||
return {
|
||||
id: "single",
|
||||
|
|
@ -708,7 +728,7 @@ async function prepareSingleTask(args: CliArgs, extendConfig: Partial<ExtendConf
|
|||
args,
|
||||
provider,
|
||||
model,
|
||||
outputPath: normalizeOutputImagePath(args.imagePath),
|
||||
outputPath: normalizeOutputImagePath(args.imagePath, defaultOutputExtension),
|
||||
providerModule,
|
||||
};
|
||||
}
|
||||
|
|
@ -784,13 +804,15 @@ async function prepareBatchTasks(
|
|||
const provider = detectProvider(taskArgs);
|
||||
const providerModule = await loadProviderModule(provider);
|
||||
const model = getModelForProvider(provider, taskArgs.model, extendConfig, providerModule);
|
||||
providerModule.validateArgs?.(model, taskArgs);
|
||||
const defaultOutputExtension = providerModule.getDefaultOutputExtension?.(model, taskArgs) ?? ".png";
|
||||
prepared.push({
|
||||
id: task.id || `task-${String(i + 1).padStart(2, "0")}`,
|
||||
prompt,
|
||||
args: taskArgs,
|
||||
provider,
|
||||
model,
|
||||
outputPath: normalizeOutputImagePath(taskArgs.imagePath),
|
||||
outputPath: normalizeOutputImagePath(taskArgs.imagePath, defaultOutputExtension),
|
||||
providerModule,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,244 @@
|
|||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import test, { type TestContext } from "node:test";
|
||||
|
||||
import type { CliArgs } from "../types.ts";
|
||||
import {
|
||||
buildImageInput,
|
||||
buildRequestBody,
|
||||
generateImage,
|
||||
getDefaultOutputExtension,
|
||||
resolveSeedreamSize,
|
||||
validateArgs,
|
||||
} from "./seedream.ts";
|
||||
|
||||
function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||
return {
|
||||
prompt: null,
|
||||
promptFiles: [],
|
||||
imagePath: null,
|
||||
provider: null,
|
||||
model: null,
|
||||
aspectRatio: null,
|
||||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
jobs: null,
|
||||
json: false,
|
||||
help: false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function useEnv(
|
||||
t: TestContext,
|
||||
values: Record<string, string | null>,
|
||||
): void {
|
||||
const previous = new Map<string, string | undefined>();
|
||||
for (const [key, value] of Object.entries(values)) {
|
||||
previous.set(key, process.env[key]);
|
||||
if (value == null) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
t.after(() => {
|
||||
for (const [key, value] of previous.entries()) {
|
||||
if (value == null) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function makeTempPng(t: TestContext, name: string): Promise<string> {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "seedream-test-"));
|
||||
t.after(() => fs.rm(dir, { recursive: true, force: true }));
|
||||
|
||||
const filePath = path.join(dir, name);
|
||||
const png1x1 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+a7m0AAAAASUVORK5CYII=";
|
||||
await fs.writeFile(filePath, Buffer.from(png1x1, "base64"));
|
||||
return filePath;
|
||||
}
|
||||
|
||||
test("Seedream request body and default extensions follow official model capabilities", () => {
|
||||
const five = buildRequestBody(
|
||||
"A robot illustrator",
|
||||
"doubao-seedream-5-0-260128",
|
||||
makeArgs(),
|
||||
);
|
||||
assert.equal(five.size, "2K");
|
||||
assert.equal(five.response_format, "url");
|
||||
assert.equal(five.output_format, "png");
|
||||
assert.equal(getDefaultOutputExtension("doubao-seedream-5-0-260128"), ".png");
|
||||
|
||||
const fourFive = buildRequestBody(
|
||||
"A robot illustrator",
|
||||
"doubao-seedream-4-5-251128",
|
||||
makeArgs(),
|
||||
);
|
||||
assert.equal(fourFive.size, "2K");
|
||||
assert.equal(fourFive.response_format, "url");
|
||||
assert.ok(!("output_format" in fourFive));
|
||||
assert.equal(getDefaultOutputExtension("doubao-seedream-4-5-251128"), ".jpg");
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
buildRequestBody(
|
||||
"Change the bubbles into hearts",
|
||||
"doubao-seededit-3-0-i2i-250628",
|
||||
makeArgs({ referenceImages: ["ref.png"] }),
|
||||
"data:image/png;base64,AAAA",
|
||||
),
|
||||
/no longer supported/,
|
||||
);
|
||||
});
|
||||
|
||||
test("Seedream size selection validates model-specific presets", () => {
|
||||
assert.equal(
|
||||
resolveSeedreamSize("doubao-seedream-4-0-250828", makeArgs({ quality: "normal" })),
|
||||
"1K",
|
||||
);
|
||||
assert.equal(
|
||||
resolveSeedreamSize("doubao-seedream-3-0-t2i-250415", makeArgs({ quality: "2k" })),
|
||||
"2048x2048",
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSeedreamSize("doubao-seedream-5-0-260128", makeArgs({ size: "4K" })),
|
||||
/only supports 2K, 3K/,
|
||||
);
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSeedreamSize("doubao-seedream-3-0-t2i-250415", makeArgs({ imageSize: "2K" })),
|
||||
/only supports explicit WxH sizes/,
|
||||
);
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSeedreamSize("doubao-seededit-3-0-i2i-250628", makeArgs({ size: "1024x1024" })),
|
||||
/no longer supported/,
|
||||
);
|
||||
});
|
||||
|
||||
test("Seedream reference-image support is model-specific", () => {
|
||||
assert.doesNotThrow(() =>
|
||||
validateArgs(
|
||||
"doubao-seedream-5-0-260128",
|
||||
makeArgs({ referenceImages: ["a.png", "b.png"] }),
|
||||
),
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
validateArgs(
|
||||
"doubao-seedream-3-0-t2i-250415",
|
||||
makeArgs({ referenceImages: ["a.png"] }),
|
||||
),
|
||||
/does not support reference images/,
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
validateArgs(
|
||||
"doubao-seededit-3-0-i2i-250628",
|
||||
makeArgs(),
|
||||
),
|
||||
/no longer supported/,
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
validateArgs(
|
||||
"ep-20260315171508-t8br2",
|
||||
makeArgs({ referenceImages: ["a.png"] }),
|
||||
),
|
||||
/require a known model ID/,
|
||||
);
|
||||
});
|
||||
|
||||
test("Seedream image input encodes local references as data URLs", async (t) => {
|
||||
const refOne = await makeTempPng(t, "one.png");
|
||||
const refTwo = await makeTempPng(t, "two.png");
|
||||
|
||||
const single = await buildImageInput("doubao-seedream-4-5-251128", [refOne]);
|
||||
assert.match(String(single), /^data:image\/png;base64,/);
|
||||
|
||||
const multiple = await buildImageInput("doubao-seedream-5-0-260128", [refOne, refTwo]);
|
||||
assert.ok(Array.isArray(multiple));
|
||||
assert.equal(multiple.length, 2);
|
||||
});
|
||||
|
||||
test("Seedream generateImage posts the documented response_format and downloads the returned URL", async (t) => {
|
||||
useEnv(t, { ARK_API_KEY: "test-key", SEEDREAM_BASE_URL: null });
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
t.after(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
});
|
||||
|
||||
const calls: Array<{
|
||||
input: string;
|
||||
init?: RequestInit;
|
||||
}> = [];
|
||||
|
||||
globalThis.fetch = async (input, init) => {
|
||||
calls.push({
|
||||
input: String(input),
|
||||
init,
|
||||
});
|
||||
|
||||
if (calls.length === 1) {
|
||||
return Response.json({
|
||||
model: "doubao-seedream-4-5-251128",
|
||||
created: 1740000000,
|
||||
data: [
|
||||
{
|
||||
url: "https://example.com/generated-image",
|
||||
size: "2048x2048",
|
||||
},
|
||||
],
|
||||
usage: {
|
||||
generated_images: 1,
|
||||
output_tokens: 1,
|
||||
total_tokens: 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return new Response(Uint8Array.from([7, 8, 9]), {
|
||||
status: 200,
|
||||
headers: { "Content-Type": "image/jpeg" },
|
||||
});
|
||||
};
|
||||
|
||||
const image = await generateImage(
|
||||
"A robot illustrator",
|
||||
"doubao-seedream-4-5-251128",
|
||||
makeArgs(),
|
||||
);
|
||||
|
||||
assert.deepEqual([...image], [7, 8, 9]);
|
||||
assert.equal(calls.length, 2);
|
||||
assert.equal(
|
||||
calls[0]?.input,
|
||||
"https://ark.cn-beijing.volces.com/api/v3/images/generations",
|
||||
);
|
||||
|
||||
const requestBody = JSON.parse(String(calls[0]?.init?.body)) as Record<string, unknown>;
|
||||
assert.equal(requestBody.model, "doubao-seedream-4-5-251128");
|
||||
assert.equal(requestBody.size, "2K");
|
||||
assert.equal(requestBody.response_format, "url");
|
||||
assert.ok(!("output_format" in requestBody));
|
||||
assert.equal(calls[1]?.input, "https://example.com/generated-image");
|
||||
});
|
||||
|
|
@ -1,5 +1,50 @@
|
|||
import path from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
|
||||
import type { CliArgs } from "../types";
|
||||
|
||||
export type SeedreamModelFamily =
|
||||
| "seedream5"
|
||||
| "seedream45"
|
||||
| "seedream40"
|
||||
| "seedream30"
|
||||
| "unknown";
|
||||
|
||||
type SeedreamRequestImage = string | string[];
|
||||
|
||||
type SeedreamRequestBody = {
|
||||
model: string;
|
||||
prompt: string;
|
||||
size: string;
|
||||
response_format: "url";
|
||||
watermark: boolean;
|
||||
image?: SeedreamRequestImage;
|
||||
output_format?: "png";
|
||||
};
|
||||
|
||||
type SeedreamImageResponse = {
|
||||
model?: string;
|
||||
created?: number;
|
||||
data?: Array<{
|
||||
url?: string;
|
||||
b64_json?: string;
|
||||
size?: string;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
};
|
||||
}>;
|
||||
usage?: {
|
||||
generated_images: number;
|
||||
output_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
};
|
||||
};
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.SEEDREAM_IMAGE_MODEL || "doubao-seedream-5-0-260128";
|
||||
}
|
||||
|
|
@ -12,46 +57,252 @@ function getBaseUrl(): string {
|
|||
return process.env.SEEDREAM_BASE_URL || "https://ark.cn-beijing.volces.com/api/v3";
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert aspect ratio to Seedream size format
|
||||
* Seedream API accepts: "2k" (default), "3k", or WIDTHxHEIGHT format
|
||||
* Note: API uses lowercase "2k"/"3k", not "2K"/"3K"
|
||||
*/
|
||||
function getSeedreamSize(ar: string | null, quality: CliArgs["quality"], imageSize?: string | null): string {
|
||||
// If explicit size is provided
|
||||
if (imageSize) {
|
||||
const upper = imageSize.toUpperCase();
|
||||
if (upper === "2K" || upper === "3K") {
|
||||
return upper.toLowerCase(); // API expects "2k" or "3k"
|
||||
}
|
||||
// For widthxheight format, pass through as-is
|
||||
if (imageSize.includes("x")) {
|
||||
return imageSize;
|
||||
}
|
||||
function parsePixelSize(value: string): { width: number; height: number } | null {
|
||||
const match = value.trim().match(/^(\d+)\s*[xX]\s*(\d+)$/);
|
||||
if (!match) return null;
|
||||
|
||||
const width = parseInt(match[1]!, 10);
|
||||
const height = parseInt(match[2]!, 10);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Default to 2k (smallest option supported by API)
|
||||
return "2k";
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
type SeedreamImageResponse = {
|
||||
model: string;
|
||||
created: number;
|
||||
data: Array<{
|
||||
url: string;
|
||||
size: string;
|
||||
}>;
|
||||
usage: {
|
||||
generated_images: number;
|
||||
output_tokens: number;
|
||||
total_tokens: number;
|
||||
function normalizePixelSize(value: string): string | null {
|
||||
const parsed = parsePixelSize(value);
|
||||
if (!parsed) return null;
|
||||
return `${parsed.width}x${parsed.height}`;
|
||||
}
|
||||
|
||||
function normalizeSizePreset(value: string): string | null {
|
||||
const upper = value.trim().toUpperCase();
|
||||
if (upper === "ADAPTIVE") return "adaptive";
|
||||
if (upper === "1K" || upper === "2K" || upper === "3K" || upper === "4K") return upper;
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeSizeValue(value: string): string | null {
|
||||
return normalizeSizePreset(value) ?? normalizePixelSize(value);
|
||||
}
|
||||
|
||||
function getMimeType(filename: string): string {
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
if (ext === ".jpg" || ext === ".jpeg") return "image/jpeg";
|
||||
if (ext === ".webp") return "image/webp";
|
||||
if (ext === ".gif") return "image/gif";
|
||||
if (ext === ".bmp") return "image/bmp";
|
||||
if (ext === ".tiff" || ext === ".tif") return "image/tiff";
|
||||
return "image/png";
|
||||
}
|
||||
|
||||
async function readImageAsDataUrl(filePath: string): Promise<string> {
|
||||
const bytes = await readFile(filePath);
|
||||
return `data:${getMimeType(filePath)};base64,${bytes.toString("base64")}`;
|
||||
}
|
||||
|
||||
export function getModelFamily(model: string): SeedreamModelFamily {
|
||||
const normalized = model.trim();
|
||||
if (/^doubao-seedream-5-0(?:-lite)?-\d+$/.test(normalized)) return "seedream5";
|
||||
if (/^doubao-seedream-4-5-\d+$/.test(normalized)) return "seedream45";
|
||||
if (/^doubao-seedream-4-0-\d+$/.test(normalized)) return "seedream40";
|
||||
if (/^doubao-seedream-3-0-t2i-\d+$/.test(normalized)) return "seedream30";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
function isRemovedSeededitModel(model: string): boolean {
|
||||
return /^doubao-seededit-3-0-i2i-\d+$/.test(model.trim());
|
||||
}
|
||||
|
||||
function assertSupportedModel(model: string): void {
|
||||
if (isRemovedSeededitModel(model)) {
|
||||
throw new Error(
|
||||
`${model} is no longer supported. SeedEdit 3.0 support has been removed from this tool; use Seedream 5.0/4.5/4.0/3.0 instead.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function supportsReferenceImages(model: string): boolean {
|
||||
const family = getModelFamily(model);
|
||||
return family === "seedream5" || family === "seedream45" || family === "seedream40";
|
||||
}
|
||||
|
||||
function supportsOutputFormat(model: string): boolean {
|
||||
return getModelFamily(model) === "seedream5";
|
||||
}
|
||||
|
||||
export function getDefaultOutputExtension(model: string): ".png" | ".jpg" {
|
||||
assertSupportedModel(model);
|
||||
return supportsOutputFormat(model) ? ".png" : ".jpg";
|
||||
}
|
||||
|
||||
export function getDefaultSeedreamSize(model: string, args: CliArgs): string {
|
||||
assertSupportedModel(model);
|
||||
const family = getModelFamily(model);
|
||||
|
||||
if (family === "seedream5") return "2K";
|
||||
if (family === "seedream45") return "2K";
|
||||
if (family === "seedream40") return args.quality === "normal" ? "1K" : "2K";
|
||||
if (family === "seedream30") return args.quality === "2k" ? "2048x2048" : "1024x1024";
|
||||
return "2K";
|
||||
}
|
||||
|
||||
export function resolveSeedreamSize(model: string, args: CliArgs): string {
|
||||
assertSupportedModel(model);
|
||||
const family = getModelFamily(model);
|
||||
const requested = args.size || args.imageSize || null;
|
||||
const normalized = requested ? normalizeSizeValue(requested) : null;
|
||||
|
||||
if (!normalized) {
|
||||
return getDefaultSeedreamSize(model, args);
|
||||
}
|
||||
|
||||
if (family === "seedream30") {
|
||||
const pixelSize = normalizePixelSize(normalized);
|
||||
if (!pixelSize) {
|
||||
throw new Error("Seedream 3.0 only supports explicit WxH sizes such as 1024x1024.");
|
||||
}
|
||||
return pixelSize;
|
||||
}
|
||||
|
||||
if (family === "seedream5") {
|
||||
if (normalized === "4K" || normalized === "1K" || normalized === "adaptive") {
|
||||
throw new Error("Seedream 5.0 only supports 2K, 3K, or explicit WxH sizes.");
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
if (family === "seedream45") {
|
||||
if (normalized === "1K" || normalized === "3K" || normalized === "adaptive") {
|
||||
throw new Error("Seedream 4.5 only supports 2K, 4K, or explicit WxH sizes.");
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
if (family === "seedream40") {
|
||||
if (normalized === "3K" || normalized === "adaptive") {
|
||||
throw new Error("Seedream 4.0 only supports 1K, 2K, 4K, or explicit WxH sizes.");
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
if (normalized === "adaptive") {
|
||||
throw new Error("Adaptive size is not supported by Seedream image generation.");
|
||||
}
|
||||
|
||||
if (normalized === "1K" || normalized === "3K" || normalized === "4K") {
|
||||
throw new Error(
|
||||
"Unknown Seedream model ID. Use a documented model ID or pass an explicit WxH size instead of preset imageSize."
|
||||
);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function validateArgs(model: string, args: CliArgs): void {
|
||||
assertSupportedModel(model);
|
||||
const family = getModelFamily(model);
|
||||
const refCount = args.referenceImages.length;
|
||||
|
||||
if (refCount === 0) {
|
||||
resolveSeedreamSize(model, args);
|
||||
return;
|
||||
}
|
||||
|
||||
if (family === "unknown") {
|
||||
throw new Error(
|
||||
"Reference images with Seedream require a known model ID. Use Seedream 5.0/4.5/4.0 model IDs instead of an endpoint ID."
|
||||
);
|
||||
}
|
||||
|
||||
if (!supportsReferenceImages(model)) {
|
||||
throw new Error(`${model} does not support reference images.`);
|
||||
}
|
||||
|
||||
if ((family === "seedream5" || family === "seedream45" || family === "seedream40") && refCount > 14) {
|
||||
throw new Error(`${model} supports at most 14 reference images.`);
|
||||
}
|
||||
|
||||
resolveSeedreamSize(model, args);
|
||||
}
|
||||
|
||||
export async function buildImageInput(
|
||||
model: string,
|
||||
referenceImages: string[],
|
||||
): Promise<SeedreamRequestImage | undefined> {
|
||||
if (referenceImages.length === 0) return undefined;
|
||||
assertSupportedModel(model);
|
||||
|
||||
const encoded = await Promise.all(referenceImages.map((refPath) => readImageAsDataUrl(refPath)));
|
||||
|
||||
return encoded.length === 1 ? encoded[0]! : encoded;
|
||||
}
|
||||
|
||||
export function buildRequestBody(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs,
|
||||
imageInput?: SeedreamRequestImage,
|
||||
): SeedreamRequestBody {
|
||||
validateArgs(model, args);
|
||||
|
||||
const requestBody: SeedreamRequestBody = {
|
||||
model,
|
||||
prompt,
|
||||
size: resolveSeedreamSize(model, args),
|
||||
response_format: "url",
|
||||
watermark: false,
|
||||
};
|
||||
};
|
||||
|
||||
if (imageInput) {
|
||||
requestBody.image = imageInput;
|
||||
}
|
||||
|
||||
if (supportsOutputFormat(model)) {
|
||||
requestBody.output_format = "png";
|
||||
}
|
||||
|
||||
return requestBody;
|
||||
}
|
||||
|
||||
async function downloadImage(url: string): Promise<Uint8Array> {
|
||||
const imgResponse = await fetch(url);
|
||||
if (!imgResponse.ok) {
|
||||
throw new Error(`Failed to download image from ${url}`);
|
||||
}
|
||||
|
||||
const buffer = await imgResponse.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
export async function extractImageFromResponse(result: SeedreamImageResponse): Promise<Uint8Array> {
|
||||
const first = result.data?.find((item) => item.url || item.b64_json || item.error);
|
||||
|
||||
if (!first) {
|
||||
throw new Error("No image data in Seedream response");
|
||||
}
|
||||
|
||||
if (first.error) {
|
||||
throw new Error(first.error.message || "Seedream returned an image generation error");
|
||||
}
|
||||
|
||||
if (first.b64_json) {
|
||||
return Uint8Array.from(Buffer.from(first.b64_json, "base64"));
|
||||
}
|
||||
|
||||
if (first.url) {
|
||||
console.error(`Downloading image from ${first.url}...`);
|
||||
return downloadImage(first.url);
|
||||
}
|
||||
|
||||
throw new Error("No image URL or base64 data in Seedream response");
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs
|
||||
args: CliArgs,
|
||||
): Promise<Uint8Array> {
|
||||
const apiKey = getApiKey();
|
||||
if (!apiKey) {
|
||||
|
|
@ -61,20 +312,13 @@ export async function generateImage(
|
|||
);
|
||||
}
|
||||
|
||||
const baseUrl = getBaseUrl();
|
||||
const size = getSeedreamSize(args.aspectRatio, args.quality, args.imageSize);
|
||||
validateArgs(model, args);
|
||||
const imageInput = await buildImageInput(model, args.referenceImages);
|
||||
const requestBody = buildRequestBody(prompt, model, args, imageInput);
|
||||
|
||||
console.error(`Calling Seedream API (${model}) with size: ${size}`);
|
||||
console.error(`Calling Seedream API (${model}) with size: ${requestBody.size}`);
|
||||
|
||||
const requestBody = {
|
||||
model,
|
||||
prompt,
|
||||
size,
|
||||
output_format: "png",
|
||||
watermark: false,
|
||||
};
|
||||
|
||||
const response = await fetch(`${baseUrl}/images/generations`, {
|
||||
const response = await fetch(`${getBaseUrl()}/images/generations`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
|
|
@ -89,23 +333,9 @@ export async function generateImage(
|
|||
}
|
||||
|
||||
const result = (await response.json()) as SeedreamImageResponse;
|
||||
|
||||
if (!result.data || result.data.length === 0) {
|
||||
throw new Error("No image data in Seedream response");
|
||||
if (result.error) {
|
||||
throw new Error(result.error.message || "Seedream API returned an error");
|
||||
}
|
||||
|
||||
const imageUrl = result.data[0].url;
|
||||
if (!imageUrl) {
|
||||
throw new Error("No image URL in Seedream response");
|
||||
}
|
||||
|
||||
// Download image from URL
|
||||
console.error(`Downloading image from ${imageUrl}...`);
|
||||
const imgResponse = await fetch(imageUrl);
|
||||
if (!imgResponse.ok) {
|
||||
throw new Error(`Failed to download image from ${imageUrl}`);
|
||||
}
|
||||
|
||||
const buffer = await imgResponse.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
return extractImageFromResponse(result);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue