diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md index 1a900c4..d4730ff 100644 --- a/skills/baoyu-image-gen/SKILL.md +++ b/skills/baoyu-image-gen/SKILL.md @@ -1,11 +1,11 @@ --- name: baoyu-image-gen -description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images. +description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images. --- # Image Generation (AI SDK) -Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers. +Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers. ## Script Directory @@ -71,6 +71,12 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi # DashScope (阿里通义万象) npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope + +# Replicate (google/nano-banana-pro) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate + +# Replicate with specific model +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana ``` ## Options @@ -80,7 +86,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou | `--prompt `, `-p` | Prompt text | | `--promptfiles ` | Read prompt from files (concatenated) | | `--image ` | Output image path (required) | -| `--provider google\|openai\|dashscope` | Force provider (default: google) | +| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) | | `--model `, `-m` | Model ID (`--ref` with OpenAI requires GPT Image model, e.g. `gpt-image-1.5`) | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | @@ -97,19 +103,22 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou | `OPENAI_API_KEY` | OpenAI API key | | `GOOGLE_API_KEY` | Google API key | | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) | +| `REPLICATE_API_TOKEN` | Replicate API token | | `OPENAI_IMAGE_MODEL` | OpenAI model override | | `GOOGLE_IMAGE_MODEL` | Google model override | | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) | +| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | | `GOOGLE_BASE_URL` | Custom Google endpoint | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | +| `REPLICATE_BASE_URL` | Custom Replicate endpoint | **Load Priority**: CLI args > EXTEND.md > env vars > `/.baoyu-skills/.env` > `~/.baoyu-skills/.env` ## Provider Selection -1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI -2. `--provider` specified → use it (if `--ref`, must be `google` or `openai`) +1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate +2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`) 3. Only one API key available → use that provider 4. Multiple available → default to Google diff --git a/skills/baoyu-image-gen/references/config/preferences-schema.md b/skills/baoyu-image-gen/references/config/preferences-schema.md index ba840ee..ce7696a 100644 --- a/skills/baoyu-image-gen/references/config/preferences-schema.md +++ b/skills/baoyu-image-gen/references/config/preferences-schema.md @@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences --- version: 1 -default_provider: null # google|openai|dashscope|null (null = auto-detect) +default_provider: null # google|openai|dashscope|replicate|null (null = auto-detect) default_quality: null # normal|2k|null (null = use default: 2k) @@ -23,6 +23,7 @@ default_model: google: null # e.g., "gemini-3-pro-image-preview" openai: null # e.g., "gpt-image-1.5" dashscope: null # e.g., "z-image-turbo" + replicate: null # e.g., "google/nano-banana-pro" --- ``` @@ -38,6 +39,7 @@ default_model: | `default_model.google` | string\|null | null | Google default model | | `default_model.openai` | string\|null | null | OpenAI default model | | `default_model.dashscope` | string\|null | null | DashScope default model | +| `default_model.replicate` | string\|null | null | Replicate default model | ## Examples @@ -62,5 +64,6 @@ default_model: google: "gemini-3-pro-image-preview" openai: "gpt-image-1.5" dashscope: "z-image-turbo" + replicate: "google/nano-banana-pro" --- ``` diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts index ce0620c..82274b2 100644 --- a/skills/baoyu-image-gen/scripts/main.ts +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -14,7 +14,7 @@ Options: -p, --prompt Prompt text --promptfiles Read prompt from files (concatenated) --image Output image path (required) - --provider google|openai|dashscope Force provider (auto-detect by default) + --provider google|openai|dashscope|replicate Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) @@ -30,12 +30,15 @@ Environment variables: GOOGLE_API_KEY Google API key GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) DASHSCOPE_API_KEY DashScope API key (阿里云通义万象) + REPLICATE_API_TOKEN Replicate API token OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo) + REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro) OPENAI_BASE_URL Custom OpenAI endpoint GOOGLE_BASE_URL Custom Google endpoint DASHSCOPE_BASE_URL Custom DashScope endpoint + REPLICATE_BASE_URL Custom Replicate endpoint Env file load order: CLI args > EXTEND.md > process.env > /.baoyu-skills/.env > ~/.baoyu-skills/.env`); } @@ -108,7 +111,7 @@ function parseArgs(argv: string[]): CliArgs { if (a === "--provider") { const v = argv[++i]; - if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`); + if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`); out.provider = v; continue; } @@ -250,9 +253,9 @@ function parseSimpleYaml(yaml: string): Partial { } else if (key === "default_image_size") { config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K"); } else if (key === "default_model") { - config.default_model = { google: null, openai: null, dashscope: null }; + config.default_model = { google: null, openai: null, dashscope: null, replicate: null }; currentKey = "default_model"; - } else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope")) { + } else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) { const cleaned = value.replace(/['"]/g, ""); config.default_model![key] = cleaned === "null" ? null : cleaned; } @@ -323,9 +326,9 @@ function normalizeOutputImagePath(p: string): string { } function detectProvider(args: CliArgs): Provider { - if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai") { + if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") { throw new Error( - "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal) or --provider openai (GPT Image edits)." + "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate." ); } @@ -334,22 +337,24 @@ function detectProvider(args: CliArgs): Provider { const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY); const hasOpenai = !!process.env.OPENAI_API_KEY; const hasDashscope = !!process.env.DASHSCOPE_API_KEY; + const hasReplicate = !!process.env.REPLICATE_API_TOKEN; if (args.referenceImages.length > 0) { if (hasGoogle) return "google"; if (hasOpenai) return "openai"; + if (hasReplicate) return "replicate"; throw new Error( - "Reference images require Google or OpenAI. Set GOOGLE_API_KEY/GEMINI_API_KEY or OPENAI_API_KEY, or remove --ref." + "Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref." ); } - const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[]; + const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[]; if (available.length === 1) return available[0]!; if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -389,6 +394,9 @@ async function loadProviderModule(provider: Provider): Promise { if (provider === "dashscope") { return (await import("./providers/dashscope")) as ProviderModule; } + if (provider === "replicate") { + return (await import("./providers/replicate")) as ProviderModule; + } return (await import("./providers/openai")) as ProviderModule; } @@ -436,6 +444,7 @@ async function main(): Promise { if (provider === "google") model = extendConfig.default_model.google ?? null; if (provider === "openai") model = extendConfig.default_model.openai ?? null; if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null; + if (provider === "replicate") model = extendConfig.default_model.replicate ?? null; } model = model || providerModule.getDefaultModel(); diff --git a/skills/baoyu-image-gen/scripts/providers/replicate.ts b/skills/baoyu-image-gen/scripts/providers/replicate.ts new file mode 100644 index 0000000..ca58917 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/replicate.ts @@ -0,0 +1,203 @@ +import path from "node:path"; +import { readFile } from "node:fs/promises"; +import type { CliArgs } from "../types"; + +const DEFAULT_MODEL = "google/nano-banana"; +const SYNC_WAIT_SECONDS = 60; +const POLL_INTERVAL_MS = 2000; +const MAX_POLL_MS = 300_000; + +export function getDefaultModel(): string { + return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL; +} + +function getApiToken(): string | null { + return process.env.REPLICATE_API_TOKEN || null; +} + +function getBaseUrl(): string { + const base = process.env.REPLICATE_BASE_URL || "https://api.replicate.com"; + return base.replace(/\/+$/g, ""); +} + +function parseModelId(model: string): { owner: string; name: string; version: string | null } { + const [ownerName, version] = model.split(":"); + const parts = ownerName!.split("/"); + if (parts.length !== 2 || !parts[0] || !parts[1]) { + throw new Error( + `Invalid Replicate model format: "${model}". Expected "owner/name" or "owner/name:version".` + ); + } + return { owner: parts[0], name: parts[1], version: version || null }; +} + +function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record { + const input: Record = { prompt }; + + if (args.aspectRatio) { + input.aspect_ratio = args.aspectRatio; + } + + if (args.n > 1) { + input.number_of_images = args.n; + } + + input.output_format = "png"; + + if (referenceImages.length > 0) { + if (referenceImages.length === 1) { + input.image = referenceImages[0]; + } else { + for (let i = 0; i < referenceImages.length; i++) { + input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i]; + } + } + } + + return input; +} + +async function readImageAsDataUrl(p: string): Promise { + const buf = await readFile(p); + const ext = path.extname(p).toLowerCase(); + let mimeType = "image/png"; + if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg"; + else if (ext === ".gif") mimeType = "image/gif"; + else if (ext === ".webp") mimeType = "image/webp"; + return `data:${mimeType};base64,${buf.toString("base64")}`; +} + +type PredictionResponse = { + id: string; + status: string; + output: unknown; + error: string | null; + urls?: { get?: string }; +}; + +async function createPrediction( + apiToken: string, + model: { owner: string; name: string; version: string | null }, + input: Record, + sync: boolean +): Promise { + const baseUrl = getBaseUrl(); + + let url: string; + const body: Record = { input }; + + if (model.version) { + url = `${baseUrl}/v1/predictions`; + body.version = model.version; + } else { + url = `${baseUrl}/v1/models/${model.owner}/${model.name}/predictions`; + } + + const headers: Record = { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }; + + if (sync) { + headers["Prefer"] = `wait=${SYNC_WAIT_SECONDS}`; + } + + const res = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`Replicate API error (${res.status}): ${err}`); + } + + return (await res.json()) as PredictionResponse; +} + +async function pollPrediction(apiToken: string, getUrl: string): Promise { + const start = Date.now(); + + while (Date.now() - start < MAX_POLL_MS) { + const res = await fetch(getUrl, { + headers: { Authorization: `Bearer ${apiToken}` }, + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`Replicate poll error (${res.status}): ${err}`); + } + + const prediction = (await res.json()) as PredictionResponse; + + if (prediction.status === "succeeded") return prediction; + if (prediction.status === "failed" || prediction.status === "canceled") { + throw new Error(`Replicate prediction ${prediction.status}: ${prediction.error || "unknown error"}`); + } + + await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); + } + + throw new Error(`Replicate prediction timed out after ${MAX_POLL_MS / 1000}s`); +} + +function extractOutputUrl(prediction: PredictionResponse): string { + const output = prediction.output; + + if (typeof output === "string") return output; + + if (Array.isArray(output)) { + const first = output[0]; + if (typeof first === "string") return first; + } + + if (output && typeof output === "object" && "url" in output) { + const url = (output as Record).url; + if (typeof url === "string") return url; + } + + throw new Error(`Unexpected Replicate output format: ${JSON.stringify(output)}`); +} + +async function downloadImage(url: string): Promise { + const res = await fetch(url); + if (!res.ok) throw new Error(`Failed to download image from Replicate: ${res.status}`); + const buf = await res.arrayBuffer(); + return new Uint8Array(buf); +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs +): Promise { + const apiToken = getApiToken(); + if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens"); + + const parsedModel = parseModelId(model); + + const refDataUrls: string[] = []; + for (const refPath of args.referenceImages) { + refDataUrls.push(await readImageAsDataUrl(refPath)); + } + + const input = buildInput(prompt, args, refDataUrls); + + console.log(`Generating image with Replicate (${model})...`); + + let prediction = await createPrediction(apiToken, parsedModel, input, true); + + if (prediction.status !== "succeeded") { + if (!prediction.urls?.get) { + throw new Error("Replicate prediction did not return a poll URL"); + } + console.log("Waiting for prediction to complete..."); + prediction = await pollPrediction(apiToken, prediction.urls.get); + } + + console.log("Generation completed."); + + const outputUrl = extractOutputUrl(prediction); + return downloadImage(outputUrl); +} diff --git a/skills/baoyu-image-gen/scripts/types.ts b/skills/baoyu-image-gen/scripts/types.ts index a595e9a..23b3f70 100644 --- a/skills/baoyu-image-gen/scripts/types.ts +++ b/skills/baoyu-image-gen/scripts/types.ts @@ -1,4 +1,4 @@ -export type Provider = "google" | "openai" | "dashscope"; +export type Provider = "google" | "openai" | "dashscope" | "replicate"; export type Quality = "normal" | "2k"; export type CliArgs = { @@ -27,5 +27,6 @@ export type ExtendConfig = { google: string | null; openai: string | null; dashscope: string | null; + replicate: string | null; }; };