feat(baoyu-image-gen): add replicate provider

2026-02-24 19:12:36 +08:00 · 2026-02-24 19:12:36 +08:00 · 65a561e654
parent 7b2c02a007
commit 65a561e654
5 changed files with 241 additions and 16 deletions
--- a/skills/baoyu-image-gen/SKILL.md
+++ b/skills/baoyu-image-gen/SKILL.md
@ -1,11 +1,11 @@
 ---
 name: baoyu-image-gen
-description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
+description: AI image generation with OpenAI, Google, DashScope and Replicate APIs. Supports text-to-image, reference images, aspect ratios. Sequential by default; parallel generation available on request. Use when user asks to generate, create, or draw images.
 ---

 # Image Generation (AI SDK)

-Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers.
+Official API-based image generation. Supports OpenAI, Google, DashScope (阿里通义万象) and Replicate providers.

 ## Script Directory

@ -71,6 +71,12 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi

 # DashScope (阿里通义万象)
 npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
+
+# Replicate (google/nano-banana-pro)
+npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
+
+# Replicate with specific model
+npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
 ```

 ## Options
@ -80,7 +86,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
 | `--prompt <text>`, `-p` | Prompt text |
 | `--promptfiles <files...>` | Read prompt from files (concatenated) |
 | `--image <path>` | Output image path (required) |
-| `--provider google\|openai\|dashscope` | Force provider (default: google) |
+| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) |
 | `--model <id>`, `-m` | Model ID (`--ref` with OpenAI requires GPT Image model, e.g. `gpt-image-1.5`) |
 | `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
 | `--size <WxH>` | Size (e.g., `1024x1024`) |
@ -97,19 +103,22 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image ou
 | `OPENAI_API_KEY` | OpenAI API key |
 | `GOOGLE_API_KEY` | Google API key |
 | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
+| `REPLICATE_API_TOKEN` | Replicate API token |
 | `OPENAI_IMAGE_MODEL` | OpenAI model override |
 | `GOOGLE_IMAGE_MODEL` | Google model override |
 | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
+| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
 | `OPENAI_BASE_URL` | Custom OpenAI endpoint |
 | `GOOGLE_BASE_URL` | Custom Google endpoint |
 | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
+| `REPLICATE_BASE_URL` | Custom Replicate endpoint |

 **Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`

 ## Provider Selection

-1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI
-2. `--provider` specified → use it (if `--ref`, must be `google` or `openai`)
+1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Replicate
+2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, or `replicate`)
 3. Only one API key available → use that provider
 4. Multiple available → default to Google

--- a/skills/baoyu-image-gen/references/config/preferences-schema.md
+++ b/skills/baoyu-image-gen/references/config/preferences-schema.md
@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-image-gen user preferences
 ---
 version: 1

-default_provider: null      # google|openai|dashscope|null (null = auto-detect)
+default_provider: null      # google|openai|dashscope|replicate|null (null = auto-detect)

 default_quality: null       # normal|2k|null (null = use default: 2k)

@ -23,6 +23,7 @@ default_model:
  google: null              # e.g., "gemini-3-pro-image-preview"
  openai: null              # e.g., "gpt-image-1.5"
  dashscope: null           # e.g., "z-image-turbo"
+  replicate: null           # e.g., "google/nano-banana-pro"
 ---
 ```

@ -38,6 +39,7 @@ default_model:
 | `default_model.google` | string\|null | null | Google default model |
 | `default_model.openai` | string\|null | null | OpenAI default model |
 | `default_model.dashscope` | string\|null | null | DashScope default model |
+| `default_model.replicate` | string\|null | null | Replicate default model |

 ## Examples

@ -62,5 +64,6 @@ default_model:
  google: "gemini-3-pro-image-preview"
  openai: "gpt-image-1.5"
  dashscope: "z-image-turbo"
+  replicate: "google/nano-banana-pro"
 ---
 ```
--- a/skills/baoyu-image-gen/scripts/main.ts
+++ b/skills/baoyu-image-gen/scripts/main.ts
@ -14,7 +14,7 @@ Options:
  -p, --prompt <text>       Prompt text
  --promptfiles <files...>  Read prompt from files (concatenated)
  --image <path>            Output image path (required)
-  --provider google|openai|dashscope  Force provider (auto-detect by default)
+  --provider google|openai|dashscope|replicate  Force provider (auto-detect by default)
  -m, --model <id>          Model ID
  --ar <ratio>              Aspect ratio (e.g., 16:9, 1:1, 4:3)
  --size <WxH>              Size (e.g., 1024x1024)
@ -30,12 +30,15 @@ Environment variables:
  GOOGLE_API_KEY            Google API key
  GEMINI_API_KEY            Gemini API key (alias for GOOGLE_API_KEY)
  DASHSCOPE_API_KEY         DashScope API key (阿里云通义万象)
+  REPLICATE_API_TOKEN       Replicate API token
  OPENAI_IMAGE_MODEL        Default OpenAI model (gpt-image-1.5)
  GOOGLE_IMAGE_MODEL        Default Google model (gemini-3-pro-image-preview)
  DASHSCOPE_IMAGE_MODEL     Default DashScope model (z-image-turbo)
+  REPLICATE_IMAGE_MODEL     Default Replicate model (google/nano-banana-pro)
  OPENAI_BASE_URL           Custom OpenAI endpoint
  GOOGLE_BASE_URL           Custom Google endpoint
  DASHSCOPE_BASE_URL        Custom DashScope endpoint
+  REPLICATE_BASE_URL        Custom Replicate endpoint

 Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
 }
@ -108,7 +111,7 @@ function parseArgs(argv: string[]): CliArgs {

    if (a === "--provider") {
      const v = argv[++i];
-      if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`);
+      if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`);
      out.provider = v;
      continue;
    }
@ -250,9 +253,9 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
      } else if (key === "default_image_size") {
        config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K");
      } else if (key === "default_model") {
-        config.default_model = { google: null, openai: null, dashscope: null };
+        config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
        currentKey = "default_model";
-      } else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope")) {
+      } else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) {
        const cleaned = value.replace(/['"]/g, "");
        config.default_model![key] = cleaned === "null" ? null : cleaned;
      }
@ -323,9 +326,9 @@ function normalizeOutputImagePath(p: string): string {
 }

 function detectProvider(args: CliArgs): Provider {
-  if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai") {
+  if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") {
    throw new Error(
-      "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal) or --provider openai (GPT Image edits)."
+      "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
    );
  }

@ -334,22 +337,24 @@ function detectProvider(args: CliArgs): Provider {
  const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
  const hasOpenai = !!process.env.OPENAI_API_KEY;
  const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
+  const hasReplicate = !!process.env.REPLICATE_API_TOKEN;

  if (args.referenceImages.length > 0) {
    if (hasGoogle) return "google";
    if (hasOpenai) return "openai";
+    if (hasReplicate) return "replicate";
    throw new Error(
-      "Reference images require Google or OpenAI. Set GOOGLE_API_KEY/GEMINI_API_KEY or OPENAI_API_KEY, or remove --ref."
+      "Reference images require Google, OpenAI or Replicate. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, or REPLICATE_API_TOKEN, or remove --ref."
    );
  }

-  const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[];
+  const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[];

  if (available.length === 1) return available[0]!;
  if (available.length > 1) return available[0]!;

  throw new Error(
-    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
+    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
      "Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
  );
 }
@ -389,6 +394,9 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
  if (provider === "dashscope") {
    return (await import("./providers/dashscope")) as ProviderModule;
  }
+  if (provider === "replicate") {
+    return (await import("./providers/replicate")) as ProviderModule;
+  }
  return (await import("./providers/openai")) as ProviderModule;
 }

@ -436,6 +444,7 @@ async function main(): Promise<void> {
    if (provider === "google") model = extendConfig.default_model.google ?? null;
    if (provider === "openai") model = extendConfig.default_model.openai ?? null;
    if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
+    if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
  }
  model = model || providerModule.getDefaultModel();

--- a/skills/baoyu-image-gen/scripts/providers/replicate.ts
+++ b/skills/baoyu-image-gen/scripts/providers/replicate.ts
@ -0,0 +1,203 @@
+import path from "node:path";
+import { readFile } from "node:fs/promises";
+import type { CliArgs } from "../types";
+
+const DEFAULT_MODEL = "google/nano-banana";
+const SYNC_WAIT_SECONDS = 60;
+const POLL_INTERVAL_MS = 2000;
+const MAX_POLL_MS = 300_000;
+
+export function getDefaultModel(): string {
+  return process.env.REPLICATE_IMAGE_MODEL || DEFAULT_MODEL;
+}
+
+function getApiToken(): string | null {
+  return process.env.REPLICATE_API_TOKEN || null;
+}
+
+function getBaseUrl(): string {
+  const base = process.env.REPLICATE_BASE_URL || "https://api.replicate.com";
+  return base.replace(/\/+$/g, "");
+}
+
+function parseModelId(model: string): { owner: string; name: string; version: string | null } {
+  const [ownerName, version] = model.split(":");
+  const parts = ownerName!.split("/");
+  if (parts.length !== 2 || !parts[0] || !parts[1]) {
+    throw new Error(
+      `Invalid Replicate model format: "${model}". Expected "owner/name" or "owner/name:version".`
+    );
+  }
+  return { owner: parts[0], name: parts[1], version: version || null };
+}
+
+function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): Record<string, unknown> {
+  const input: Record<string, unknown> = { prompt };
+
+  if (args.aspectRatio) {
+    input.aspect_ratio = args.aspectRatio;
+  }
+
+  if (args.n > 1) {
+    input.number_of_images = args.n;
+  }
+
+  input.output_format = "png";
+
+  if (referenceImages.length > 0) {
+    if (referenceImages.length === 1) {
+      input.image = referenceImages[0];
+    } else {
+      for (let i = 0; i < referenceImages.length; i++) {
+        input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
+      }
+    }
+  }
+
+  return input;
+}
+
+async function readImageAsDataUrl(p: string): Promise<string> {
+  const buf = await readFile(p);
+  const ext = path.extname(p).toLowerCase();
+  let mimeType = "image/png";
+  if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";
+  else if (ext === ".gif") mimeType = "image/gif";
+  else if (ext === ".webp") mimeType = "image/webp";
+  return `data:${mimeType};base64,${buf.toString("base64")}`;
+}
+
+type PredictionResponse = {
+  id: string;
+  status: string;
+  output: unknown;
+  error: string | null;
+  urls?: { get?: string };
+};
+
+async function createPrediction(
+  apiToken: string,
+  model: { owner: string; name: string; version: string | null },
+  input: Record<string, unknown>,
+  sync: boolean
+): Promise<PredictionResponse> {
+  const baseUrl = getBaseUrl();
+
+  let url: string;
+  const body: Record<string, unknown> = { input };
+
+  if (model.version) {
+    url = `${baseUrl}/v1/predictions`;
+    body.version = model.version;
+  } else {
+    url = `${baseUrl}/v1/models/${model.owner}/${model.name}/predictions`;
+  }
+
+  const headers: Record<string, string> = {
+    Authorization: `Bearer ${apiToken}`,
+    "Content-Type": "application/json",
+  };
+
+  if (sync) {
+    headers["Prefer"] = `wait=${SYNC_WAIT_SECONDS}`;
+  }
+
+  const res = await fetch(url, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(body),
+  });
+
+  if (!res.ok) {
+    const err = await res.text();
+    throw new Error(`Replicate API error (${res.status}): ${err}`);
+  }
+
+  return (await res.json()) as PredictionResponse;
+}
+
+async function pollPrediction(apiToken: string, getUrl: string): Promise<PredictionResponse> {
+  const start = Date.now();
+
+  while (Date.now() - start < MAX_POLL_MS) {
+    const res = await fetch(getUrl, {
+      headers: { Authorization: `Bearer ${apiToken}` },
+    });
+
+    if (!res.ok) {
+      const err = await res.text();
+      throw new Error(`Replicate poll error (${res.status}): ${err}`);
+    }
+
+    const prediction = (await res.json()) as PredictionResponse;
+
+    if (prediction.status === "succeeded") return prediction;
+    if (prediction.status === "failed" || prediction.status === "canceled") {
+      throw new Error(`Replicate prediction ${prediction.status}: ${prediction.error || "unknown error"}`);
+    }
+
+    await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
+  }
+
+  throw new Error(`Replicate prediction timed out after ${MAX_POLL_MS / 1000}s`);
+}
+
+function extractOutputUrl(prediction: PredictionResponse): string {
+  const output = prediction.output;
+
+  if (typeof output === "string") return output;
+
+  if (Array.isArray(output)) {
+    const first = output[0];
+    if (typeof first === "string") return first;
+  }
+
+  if (output && typeof output === "object" && "url" in output) {
+    const url = (output as Record<string, unknown>).url;
+    if (typeof url === "string") return url;
+  }
+
+  throw new Error(`Unexpected Replicate output format: ${JSON.stringify(output)}`);
+}
+
+async function downloadImage(url: string): Promise<Uint8Array> {
+  const res = await fetch(url);
+  if (!res.ok) throw new Error(`Failed to download image from Replicate: ${res.status}`);
+  const buf = await res.arrayBuffer();
+  return new Uint8Array(buf);
+}
+
+export async function generateImage(
+  prompt: string,
+  model: string,
+  args: CliArgs
+): Promise<Uint8Array> {
+  const apiToken = getApiToken();
+  if (!apiToken) throw new Error("REPLICATE_API_TOKEN is required. Get one at https://replicate.com/account/api-tokens");
+
+  const parsedModel = parseModelId(model);
+
+  const refDataUrls: string[] = [];
+  for (const refPath of args.referenceImages) {
+    refDataUrls.push(await readImageAsDataUrl(refPath));
+  }
+
+  const input = buildInput(prompt, args, refDataUrls);
+
+  console.log(`Generating image with Replicate (${model})...`);
+
+  let prediction = await createPrediction(apiToken, parsedModel, input, true);
+
+  if (prediction.status !== "succeeded") {
+    if (!prediction.urls?.get) {
+      throw new Error("Replicate prediction did not return a poll URL");
+    }
+    console.log("Waiting for prediction to complete...");
+    prediction = await pollPrediction(apiToken, prediction.urls.get);
+  }
+
+  console.log("Generation completed.");
+
+  const outputUrl = extractOutputUrl(prediction);
+  return downloadImage(outputUrl);
+}
--- a/skills/baoyu-image-gen/scripts/types.ts
+++ b/skills/baoyu-image-gen/scripts/types.ts
@ -1,4 +1,4 @@
-export type Provider = "google" | "openai" | "dashscope";
+export type Provider = "google" | "openai" | "dashscope" | "replicate";
 export type Quality = "normal" | "2k";

 export type CliArgs = {
@ -27,5 +27,6 @@ export type ExtendConfig = {
    google: string | null;
    openai: string | null;
    dashscope: string | null;
+    replicate: string | null;
  };
 };