feat(baoyu-image-gen): add replicate provider

This commit is contained in:
justnodejs 2026-02-24 19:12:36 +08:00
parent 7b2c02a007
commit 65a561e654
5 changed files with 241 additions and 16 deletions

View File

@ -1,11 +1,11 @@
---
name: baoyu-image-gen
description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
---
# Image Generation (AI SDK)
Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers.
Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers.
## Script Directory
@ -71,6 +71,12 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
# DashScope (阿里通义万象)
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
# Replicate (google/nano-banana-pro)
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
# Replicate with specific model
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
```
## Options
@ -80,7 +86,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
| `--prompt <text>`, `-p` | Prompt text |
| `--promptfiles <files...>` | Read prompt from files (concatenated) |
| `--image <path>` | Output image path (required) |
| `--provider google\|openai\|dashscope` | Force provider (default: google) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) |
| `--model <id>`, `-m` | Model ID (`--ref` with OpenAI requires GPT Image model, e.g. `gpt-image-1.5`) |
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size <WxH>` | Size (e.g., `1024x1024`) |
@ -97,19 +103,22 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
| `OPENAI_API_KEY` | OpenAI API key |
| `GOOGLE_API_KEY` | Google API key |
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
| `REPLICATE_API_TOKEN` | Replicate API token |
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
| `GOOGLE_IMAGE_MODEL` | Google model override |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
| `GOOGLE_BASE_URL` | Custom Google endpoint |
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
**Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
## Provider Selection
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI
2. `--provider` specified → use it (if `--ref`, must be `google` or `openai`)
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`)
3. Only one API key available → use that provider
4. Multiple available → default to Google

View File

@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
---
version: 1
default_provider: null # google|openai|dashscope|null (null = auto-detect)
default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect)
default_quality: null # normal|2k|null (null = use default: 2k)
@ -23,6 +23,7 @@ default_model:
google: null # e.g., "gemini-3-pro-image-preview"
openai: null # e.g., "gpt-image-1.5"
dashscope: null # e.g., "z-image-turbo"
replicate: null # e.g., "google/nano-banana-pro"
---
```
@ -38,6 +39,7 @@ default_model:
| `default_model.google` | string\|null | null | Google default model |
| `default_model.openai` | string\|null | null | OpenAI default model |
| `default_model.dashscope` | string\|null | null | DashScope default model |
| `default_model.replicate` | string\|null | null | Replicate default model |
## Examples
@ -62,5 +64,6 @@ default_model:
google: "gemini-3-pro-image-preview"
openai: "gpt-image-1.5"
dashscope: "z-image-turbo"
replicate: "google/nano-banana-pro"
---
```

View File

@ -14,7 +14,7 @@ Options:
-p, --prompt <text> Prompt text
--promptfiles <files...> Read prompt from files (concatenated)
--image <path> Output image path (required)
--provider google|openai|dashscope Force provider (auto-detect by default)
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
-m, --model <id> Model ID
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
--size <WxH> Size (e.g., 1024x1024)
@ -30,12 +30,15 @@ Environment variables:
GOOGLE_API_KEY Google API key
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
DASHSCOPE_API_KEY DashScope API key ()
REPLICATE_API_TOKEN Replicate API token
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
OPENAI_BASE_URL Custom OpenAI endpoint
GOOGLE_BASE_URL Custom Google endpoint
DASHSCOPE_BASE_URL Custom DashScope endpoint
REPLICATE_BASE_URL Custom Replicate endpoint
Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
}
@ -108,7 +111,7 @@ function parseArgs(argv: string[]): CliArgs {
if (a === "--provider") {
const v = argv[++i];
if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`);
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`);
out.provider = v;
continue;
}
@ -250,9 +253,9 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
} else if (key === "default_image_size") {
config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K");
} else if (key === "default_model") {
config.default_model = { google: null, openai: null, dashscope: null };
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
currentKey = "default_model";
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope")) {
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) {
const cleaned = value.replace(/['"]/g, "");
config.default_model![key] = cleaned === "null" ? null : cleaned;
}
@ -323,9 +326,9 @@ function normalizeOutputImagePath(p: string): string {
}
function detectProvider(args: CliArgs): Provider {
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai") {
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") {
throw new Error(
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal) or --provider openai (GPT Image edits)."
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
);
}
@ -334,22 +337,24 @@ function detectProvider(args: CliArgs): Provider {
const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
const hasOpenai = !!process.env.OPENAI_API_KEY;
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
if (args.referenceImages.length > 0) {
if (hasGoogle) return "google";
if (hasOpenai) return "openai";
if (hasReplicate) return "replicate";
throw new Error(
"Reference images require Google or OpenAI. Set GOOGLE_API_KEY/GEMINI_API_KEY or OPENAI_API_KEY, or remove --ref."
"Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
);
}
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[];
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[];
if (available.length === 1) return available[0]!;
if (available.length > 1) return available[0]!;
throw new Error(
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
);
}
@ -389,6 +394,9 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
if (provider === "dashscope") {
return (await import("./providers/dashscope")) as ProviderModule;
}
if (provider === "replicate") {
return (await import("./providers/replicate")) as ProviderModule;
}
return (await import("./providers/openai")) as ProviderModule;
}
@ -436,6 +444,7 @@ async function main(): Promise<void> {
if (provider === "google") model = extendConfig.default_model.google ?? null;
if (provider === "openai") model = extendConfig.default_model.openai ?? null;
if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
}
model = model || providerModule.getDefaultModel();

View File

@ -0,0 +1,203 @@
import path from "node:path";
import { readFile } from "node:fs/promises";
import type { CliArgs } from "../types";
const DEFAULT_MODEL = "google/nano-banana";
const SYNC_WAIT_SECONDS = 60;
const POLL_INTERVAL_MS = 2000;
const MAX_POLL_MS = 300_000;
export function getDefaultModel(): string {
return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL;
}
function getApiToken(): string | null {
return process.env.REPLICATE_API_TOKEN || null;
}
function getBaseUrl(): string {
const base = process.env.REPLICATE_BASE_URL || "https://api.replicate.com";
return base.replace(/\/+$/g, "");
}
function parseModelId(model: string): { owner: string; name: string; version: string | null } {
const [ownerName, version] = model.split(":");
const parts = ownerName!.split("/");
if (parts.length !== 2 || !parts[0] || !parts[1]) {
throw new Error(
`Invalid Replicate model format: "${model}". Expected "owner/name" or "owner/name:version".`
);
}
return { owner: parts[0], name: parts[1], version: version || null };
}
function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
const input: Record<string, unknown> = { prompt };
if (args.aspectRatio) {
input.aspect_ratio = args.aspectRatio;
}
if (args.n > 1) {
input.number_of_images = args.n;
}
input.output_format = "png";
if (referenceImages.length > 0) {
if (referenceImages.length === 1) {
input.image = referenceImages[0];
} else {
for (let i = 0; i < referenceImages.length; i++) {
input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
}
}
}
return input;
}
async function readImageAsDataUrl(p: string): Promise<string> {
const buf = await readFile(p);
const ext = path.extname(p).toLowerCase();
let mimeType = "image/png";
if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";
else if (ext === ".gif") mimeType = "image/gif";
else if (ext === ".webp") mimeType = "image/webp";
return `data:${mimeType};base64,${buf.toString("base64")}`;
}
type PredictionResponse = {
id: string;
status: string;
output: unknown;
error: string | null;
urls?: { get?: string };
};
async function createPrediction(
apiToken: string,
model: { owner: string; name: string; version: string | null },
input: Record<string, unknown>,
sync: boolean
): Promise<PredictionResponse> {
const baseUrl = getBaseUrl();
let url: string;
const body: Record<string, unknown> = { input };
if (model.version) {
url = `${baseUrl}/v1/predictions`;
body.version = model.version;
} else {
url = `${baseUrl}/v1/models/${model.owner}/${model.name}/predictions`;
}
const headers: Record<string, string> = {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
};
if (sync) {
headers["Prefer"] = `wait=${SYNC_WAIT_SECONDS}`;
}
const res = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify(body),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`Replicate API error (${res.status}): ${err}`);
}
return (await res.json()) as PredictionResponse;
}
async function pollPrediction(apiToken: string, getUrl: string): Promise<PredictionResponse> {
const start = Date.now();
while (Date.now() - start < MAX_POLL_MS) {
const res = await fetch(getUrl, {
headers: { Authorization: `Bearer ${apiToken}` },
});
if (!res.ok) {
const err = await res.text();
throw new Error(`Replicate poll error (${res.status}): ${err}`);
}
const prediction = (await res.json()) as PredictionResponse;
if (prediction.status === "succeeded") return prediction;
if (prediction.status === "failed" || prediction.status === "canceled") {
throw new Error(`Replicate prediction ${prediction.status}: ${prediction.error || "unknown error"}`);
}
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
}
throw new Error(`Replicate prediction timed out after ${MAX_POLL_MS / 1000}s`);
}
function extractOutputUrl(prediction: PredictionResponse): string {
const output = prediction.output;
if (typeof output === "string") return output;
if (Array.isArray(output)) {
const first = output[0];
if (typeof first === "string") return first;
}
if (output && typeof output === "object" && "url" in output) {
const url = (output as Record<string, unknown>).url;
if (typeof url === "string") return url;
}
throw new Error(`Unexpected Replicate output format: ${JSON.stringify(output)}`);
}
async function downloadImage(url: string): Promise<Uint8Array> {
const res = await fetch(url);
if (!res.ok) throw new Error(`Failed to download image from Replicate: ${res.status}`);
const buf = await res.arrayBuffer();
return new Uint8Array(buf);
}
export async function generateImage(
prompt: string,
model: string,
args: CliArgs
): Promise<Uint8Array> {
const apiToken = getApiToken();
if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens");
const parsedModel = parseModelId(model);
const refDataUrls: string[] = [];
for (const refPath of args.referenceImages) {
refDataUrls.push(await readImageAsDataUrl(refPath));
}
const input = buildInput(prompt, args, refDataUrls);
console.log(`Generating image with Replicate (${model})...`);
let prediction = await createPrediction(apiToken, parsedModel, input, true);
if (prediction.status !== "succeeded") {
if (!prediction.urls?.get) {
throw new Error("Replicate prediction did not return a poll URL");
}
console.log("Waiting for prediction to complete...");
prediction = await pollPrediction(apiToken, prediction.urls.get);
}
console.log("Generation completed.");
const outputUrl = extractOutputUrl(prediction);
return downloadImage(outputUrl);
}

View File

@ -1,4 +1,4 @@
export type Provider = "google" | "openai" | "dashscope";
export type Provider = "google" | "openai" | "dashscope" | "replicate";
export type Quality = "normal" | "2k";
export type CliArgs = {
@ -27,5 +27,6 @@ export type ExtendConfig = {
google: string | null;
openai: string | null;
dashscope: string | null;
replicate: string | null;
};
};