feat(baoyu-image-gen): add replicate provider
This commit is contained in:
parent
7b2c02a007
commit
65a561e654
|
|
@ -1,11 +1,11 @@
|
|||
---
|
||||
name: baoyu-image-gen
|
||||
description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
|
||||
description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
|
||||
---
|
||||
|
||||
# Image Generation (AI SDK)
|
||||
|
||||
Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers.
|
||||
Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers.
|
||||
|
||||
## Script Directory
|
||||
|
||||
|
|
@ -71,6 +71,12 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
|
|||
|
||||
# DashScope (阿里通义万象)
|
||||
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
|
||||
|
||||
# Replicate (google/nano-banana-pro)
|
||||
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
||||
|
||||
# Replicate with specific model
|
||||
npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
|
||||
```
|
||||
|
||||
## Options
|
||||
|
|
@ -80,7 +86,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
|
|||
| `--prompt <text>`, `-p` | Prompt text |
|
||||
| `--promptfiles <files...>` | Read prompt from files (concatenated) |
|
||||
| `--image <path>` | Output image path (required) |
|
||||
| `--provider google\|openai\|dashscope` | Force provider (default: google) |
|
||||
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) |
|
||||
| `--model <id>`, `-m` | Model ID (`--ref` with OpenAI requires GPT Image model, e.g. `gpt-image-1.5`) |
|
||||
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
|
|
@ -97,19 +103,22 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
|
|||
| `OPENAI_API_KEY` | OpenAI API key |
|
||||
| `GOOGLE_API_KEY` | Google API key |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API token |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
||||
| `GOOGLE_BASE_URL` | Custom Google endpoint |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
|
||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
|
||||
|
||||
**Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
|
||||
|
||||
## Provider Selection
|
||||
|
||||
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI
|
||||
2. `--provider` specified → use it (if `--ref`, must be `google` or `openai`)
|
||||
1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate
|
||||
2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`)
|
||||
3. Only one API key available → use that provider
|
||||
4. Multiple available → default to Google
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
|
|||
---
|
||||
version: 1
|
||||
|
||||
default_provider: null # google|openai|dashscope|null (null = auto-detect)
|
||||
default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect)
|
||||
|
||||
default_quality: null # normal|2k|null (null = use default: 2k)
|
||||
|
||||
|
|
@ -23,6 +23,7 @@ default_model:
|
|||
google: null # e.g., "gemini-3-pro-image-preview"
|
||||
openai: null # e.g., "gpt-image-1.5"
|
||||
dashscope: null # e.g., "z-image-turbo"
|
||||
replicate: null # e.g., "google/nano-banana-pro"
|
||||
---
|
||||
```
|
||||
|
||||
|
|
@ -38,6 +39,7 @@ default_model:
|
|||
| `default_model.google` | string\|null | null | Google default model |
|
||||
| `default_model.openai` | string\|null | null | OpenAI default model |
|
||||
| `default_model.dashscope` | string\|null | null | DashScope default model |
|
||||
| `default_model.replicate` | string\|null | null | Replicate default model |
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
@ -62,5 +64,6 @@ default_model:
|
|||
google: "gemini-3-pro-image-preview"
|
||||
openai: "gpt-image-1.5"
|
||||
dashscope: "z-image-turbo"
|
||||
replicate: "google/nano-banana-pro"
|
||||
---
|
||||
```
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Options:
|
|||
-p, --prompt <text> Prompt text
|
||||
--promptfiles <files...> Read prompt from files (concatenated)
|
||||
--image <path> Output image path (required)
|
||||
--provider google|openai|dashscope Force provider (auto-detect by default)
|
||||
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
|
||||
-m, --model <id> Model ID
|
||||
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
|
||||
--size <WxH> Size (e.g., 1024x1024)
|
||||
|
|
@ -30,12 +30,15 @@ Environment variables:
|
|||
GOOGLE_API_KEY Google API key
|
||||
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
|
||||
DASHSCOPE_API_KEY DashScope API key (阿里云通义万象)
|
||||
REPLICATE_API_TOKEN Replicate API token
|
||||
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
|
||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
|
||||
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
||||
OPENAI_BASE_URL Custom OpenAI endpoint
|
||||
GOOGLE_BASE_URL Custom Google endpoint
|
||||
DASHSCOPE_BASE_URL Custom DashScope endpoint
|
||||
REPLICATE_BASE_URL Custom Replicate endpoint
|
||||
|
||||
Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
|
||||
}
|
||||
|
|
@ -108,7 +111,7 @@ function parseArgs(argv: string[]): CliArgs {
|
|||
|
||||
if (a === "--provider") {
|
||||
const v = argv[++i];
|
||||
if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`);
|
||||
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`);
|
||||
out.provider = v;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -250,9 +253,9 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
} else if (key === "default_image_size") {
|
||||
config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K");
|
||||
} else if (key === "default_model") {
|
||||
config.default_model = { google: null, openai: null, dashscope: null };
|
||||
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
|
||||
currentKey = "default_model";
|
||||
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope")) {
|
||||
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) {
|
||||
const cleaned = value.replace(/['"]/g, "");
|
||||
config.default_model![key] = cleaned === "null" ? null : cleaned;
|
||||
}
|
||||
|
|
@ -323,9 +326,9 @@ function normalizeOutputImagePath(p: string): string {
|
|||
}
|
||||
|
||||
function detectProvider(args: CliArgs): Provider {
|
||||
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai") {
|
||||
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") {
|
||||
throw new Error(
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal) or --provider openai (GPT Image edits)."
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -334,22 +337,24 @@ function detectProvider(args: CliArgs): Provider {
|
|||
const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
|
||||
const hasOpenai = !!process.env.OPENAI_API_KEY;
|
||||
const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
|
||||
const hasReplicate = !!process.env.REPLICATE_API_TOKEN;
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
if (hasGoogle) return "google";
|
||||
if (hasOpenai) return "openai";
|
||||
if (hasReplicate) return "replicate";
|
||||
throw new Error(
|
||||
"Reference images require Google or OpenAI. Set GOOGLE_API_KEY/GEMINI_API_KEY or OPENAI_API_KEY, or remove --ref."
|
||||
"Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
|
||||
);
|
||||
}
|
||||
|
||||
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[];
|
||||
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[];
|
||||
|
||||
if (available.length === 1) return available[0]!;
|
||||
if (available.length > 1) return available[0]!;
|
||||
|
||||
throw new Error(
|
||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
|
||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
|
||||
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
|
||||
);
|
||||
}
|
||||
|
|
@ -389,6 +394,9 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
|
|||
if (provider === "dashscope") {
|
||||
return (await import("./providers/dashscope")) as ProviderModule;
|
||||
}
|
||||
if (provider === "replicate") {
|
||||
return (await import("./providers/replicate")) as ProviderModule;
|
||||
}
|
||||
return (await import("./providers/openai")) as ProviderModule;
|
||||
}
|
||||
|
||||
|
|
@ -436,6 +444,7 @@ async function main(): Promise<void> {
|
|||
if (provider === "google") model = extendConfig.default_model.google ?? null;
|
||||
if (provider === "openai") model = extendConfig.default_model.openai ?? null;
|
||||
if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
|
||||
if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
|
||||
}
|
||||
model = model || providerModule.getDefaultModel();
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
import path from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import type { CliArgs } from "../types";
|
||||
|
||||
const DEFAULT_MODEL = "google/nano-banana";
|
||||
const SYNC_WAIT_SECONDS = 60;
|
||||
const POLL_INTERVAL_MS = 2000;
|
||||
const MAX_POLL_MS = 300_000;
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
function getApiToken(): string | null {
|
||||
return process.env.REPLICATE_API_TOKEN || null;
|
||||
}
|
||||
|
||||
function getBaseUrl(): string {
|
||||
const base = process.env.REPLICATE_BASE_URL || "https://api.replicate.com";
|
||||
return base.replace(/\/+$/g, "");
|
||||
}
|
||||
|
||||
function parseModelId(model: string): { owner: string; name: string; version: string | null } {
|
||||
const [ownerName, version] = model.split(":");
|
||||
const parts = ownerName!.split("/");
|
||||
if (parts.length !== 2 || !parts[0] || !parts[1]) {
|
||||
throw new Error(
|
||||
`Invalid Replicate model format: "${model}". Expected "owner/name" or "owner/name:version".`
|
||||
);
|
||||
}
|
||||
return { owner: parts[0], name: parts[1], version: version || null };
|
||||
}
|
||||
|
||||
function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
|
||||
const input: Record<string, unknown> = { prompt };
|
||||
|
||||
if (args.aspectRatio) {
|
||||
input.aspect_ratio = args.aspectRatio;
|
||||
}
|
||||
|
||||
if (args.n > 1) {
|
||||
input.number_of_images = args.n;
|
||||
}
|
||||
|
||||
input.output_format = "png";
|
||||
|
||||
if (referenceImages.length > 0) {
|
||||
if (referenceImages.length === 1) {
|
||||
input.image = referenceImages[0];
|
||||
} else {
|
||||
for (let i = 0; i < referenceImages.length; i++) {
|
||||
input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
async function readImageAsDataUrl(p: string): Promise<string> {
|
||||
const buf = await readFile(p);
|
||||
const ext = path.extname(p).toLowerCase();
|
||||
let mimeType = "image/png";
|
||||
if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";
|
||||
else if (ext === ".gif") mimeType = "image/gif";
|
||||
else if (ext === ".webp") mimeType = "image/webp";
|
||||
return `data:${mimeType};base64,${buf.toString("base64")}`;
|
||||
}
|
||||
|
||||
type PredictionResponse = {
|
||||
id: string;
|
||||
status: string;
|
||||
output: unknown;
|
||||
error: string | null;
|
||||
urls?: { get?: string };
|
||||
};
|
||||
|
||||
async function createPrediction(
|
||||
apiToken: string,
|
||||
model: { owner: string; name: string; version: string | null },
|
||||
input: Record<string, unknown>,
|
||||
sync: boolean
|
||||
): Promise<PredictionResponse> {
|
||||
const baseUrl = getBaseUrl();
|
||||
|
||||
let url: string;
|
||||
const body: Record<string, unknown> = { input };
|
||||
|
||||
if (model.version) {
|
||||
url = `${baseUrl}/v1/predictions`;
|
||||
body.version = model.version;
|
||||
} else {
|
||||
url = `${baseUrl}/v1/models/${model.owner}/${model.name}/predictions`;
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
|
||||
if (sync) {
|
||||
headers["Prefer"] = `wait=${SYNC_WAIT_SECONDS}`;
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text();
|
||||
throw new Error(`Replicate API error (${res.status}): ${err}`);
|
||||
}
|
||||
|
||||
return (await res.json()) as PredictionResponse;
|
||||
}
|
||||
|
||||
async function pollPrediction(apiToken: string, getUrl: string): Promise<PredictionResponse> {
|
||||
const start = Date.now();
|
||||
|
||||
while (Date.now() - start < MAX_POLL_MS) {
|
||||
const res = await fetch(getUrl, {
|
||||
headers: { Authorization: `Bearer ${apiToken}` },
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text();
|
||||
throw new Error(`Replicate poll error (${res.status}): ${err}`);
|
||||
}
|
||||
|
||||
const prediction = (await res.json()) as PredictionResponse;
|
||||
|
||||
if (prediction.status === "succeeded") return prediction;
|
||||
if (prediction.status === "failed" || prediction.status === "canceled") {
|
||||
throw new Error(`Replicate prediction ${prediction.status}: ${prediction.error || "unknown error"}`);
|
||||
}
|
||||
|
||||
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
||||
}
|
||||
|
||||
throw new Error(`Replicate prediction timed out after ${MAX_POLL_MS / 1000}s`);
|
||||
}
|
||||
|
||||
function extractOutputUrl(prediction: PredictionResponse): string {
|
||||
const output = prediction.output;
|
||||
|
||||
if (typeof output === "string") return output;
|
||||
|
||||
if (Array.isArray(output)) {
|
||||
const first = output[0];
|
||||
if (typeof first === "string") return first;
|
||||
}
|
||||
|
||||
if (output && typeof output === "object" && "url" in output) {
|
||||
const url = (output as Record<string, unknown>).url;
|
||||
if (typeof url === "string") return url;
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected Replicate output format: ${JSON.stringify(output)}`);
|
||||
}
|
||||
|
||||
async function downloadImage(url: string): Promise<Uint8Array> {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`Failed to download image from Replicate: ${res.status}`);
|
||||
const buf = await res.arrayBuffer();
|
||||
return new Uint8Array(buf);
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs
|
||||
): Promise<Uint8Array> {
|
||||
const apiToken = getApiToken();
|
||||
if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens");
|
||||
|
||||
const parsedModel = parseModelId(model);
|
||||
|
||||
const refDataUrls: string[] = [];
|
||||
for (const refPath of args.referenceImages) {
|
||||
refDataUrls.push(await readImageAsDataUrl(refPath));
|
||||
}
|
||||
|
||||
const input = buildInput(prompt, args, refDataUrls);
|
||||
|
||||
console.log(`Generating image with Replicate (${model})...`);
|
||||
|
||||
let prediction = await createPrediction(apiToken, parsedModel, input, true);
|
||||
|
||||
if (prediction.status !== "succeeded") {
|
||||
if (!prediction.urls?.get) {
|
||||
throw new Error("Replicate prediction did not return a poll URL");
|
||||
}
|
||||
console.log("Waiting for prediction to complete...");
|
||||
prediction = await pollPrediction(apiToken, prediction.urls.get);
|
||||
}
|
||||
|
||||
console.log("Generation completed.");
|
||||
|
||||
const outputUrl = extractOutputUrl(prediction);
|
||||
return downloadImage(outputUrl);
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
export type Provider = "google" | "openai" | "dashscope";
|
||||
export type Provider = "google" | "openai" | "dashscope" | "replicate";
|
||||
export type Quality = "normal" | "2k";
|
||||
|
||||
export type CliArgs = {
|
||||
|
|
@ -27,5 +27,6 @@ export type ExtendConfig = {
|
|||
google: string | null;
|
||||
openai: string | null;
|
||||
dashscope: string | null;
|
||||
replicate: string | null;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue