Adapting baoyu-image-gen to the Tongyi Text-to-Image Model

2026-01-28 09:04:57 +08:00 · 2026-01-28 09:04:57 +08:00 · 907c8ab852
parent 64945f3341
commit 907c8ab852
5 changed files with 167 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -152,3 +152,8 @@ slide-deck/
 infographic/
 illustrations/
 comic/
 ### IntelliJ IDEA ###
 .idea
 *.iws
 *.iml
 *.ipr
--- a/skills/baoyu-image-gen/SKILL.md
+++ b/skills/baoyu-image-gen/SKILL.md
@ -1,11 +1,11 @@
 ---
 name: baoyu-image-gen
-description: AI image generation with OpenAI and Google APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images.
+description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images.
 ---
 # Image Generation (AI SDK)
-Official API-based image generation. Supports OpenAI and Google providers.
+Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers.
 ## Script Directory
@ -63,6 +63,9 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Make blue" --image out.png --r
 # Specific provider
 npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider openai
 # DashScope (阿里通义万象)
 npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
 ```
 ## Options
@ -72,7 +75,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
 | `--prompt <text>`, `-p` | Prompt text |
 | `--promptfiles <files...>` | Read prompt from files (concatenated) |
 | `--image <path>` | Output image path (required) |
-| `--provider google\|openai` | Force provider (default: google) |
+| `--provider google\|openai\|dashscope` | Force provider (default: google) |
 | `--model <id>`, `-m` | Model ID |
 | `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
 | `--size <WxH>` | Size (e.g., `1024x1024`) |
@ -88,10 +91,13 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
 |----------|-------------|
 | `OPENAI_API_KEY` | OpenAI API key |
 | `GOOGLE_API_KEY` | Google API key |
 | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
 | `OPENAI_IMAGE_MODEL` | OpenAI model override |
 | `GOOGLE_IMAGE_MODEL` | Google model override |
 | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
 | `OPENAI_BASE_URL` | Custom OpenAI endpoint |
 | `GOOGLE_BASE_URL` | Custom Google endpoint |
 | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
 **Load Priority**: CLI args > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
@ -99,7 +105,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
 1. `--provider` specified → use it
 2. Only one API key available → use that provider
-3. Both available → default to Google
+3. Multiple available → default to Google
 ## Quality Presets
--- a/skills/baoyu-image-gen/scripts/main.ts
+++ b/skills/baoyu-image-gen/scripts/main.ts
@ -14,7 +14,7 @@ Options:
  -p, --prompt <text>       Prompt text
  --promptfiles <files...>  Read prompt from files (concatenated)
  --image <path>            Output image path (required)
-  --provider google|openai  Force provider (auto-detect by default)
+  --provider google|openai|dashscope  Force provider (auto-detect by default)
  -m, --model <id>          Model ID
  --ar <ratio>              Aspect ratio (e.g., 16:9, 1:1, 4:3)
  --size <WxH>              Size (e.g., 1024x1024)
@ -29,10 +29,13 @@ Environment variables:
  OPENAI_API_KEY            OpenAI API key
  GOOGLE_API_KEY            Google API key
  GEMINI_API_KEY            Gemini API key (alias for GOOGLE_API_KEY)
  DASHSCOPE_API_KEY         DashScope API key (阿里云通义万象)
  OPENAI_IMAGE_MODEL        Default OpenAI model (gpt-image-1.5)
  GOOGLE_IMAGE_MODEL        Default Google model (gemini-3-pro-image-preview)
  DASHSCOPE_IMAGE_MODEL     Default DashScope model (z-image-turbo)
  OPENAI_BASE_URL           Custom OpenAI endpoint
  GOOGLE_BASE_URL           Custom Google endpoint
  DASHSCOPE_BASE_URL        Custom DashScope endpoint
 Env file load order: CLI args > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
 }
@ -105,7 +108,7 @@ function parseArgs(argv: string[]): CliArgs {
    if (a === "--provider") {
      const v = argv[++i];
-      if (v !== "google" && v !== "openai") throw new Error(`Invalid provider: ${v}`);
+      if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`);
      out.provider = v;
      continue;
    }
@ -243,13 +246,15 @@ function detectProvider(args: CliArgs): Provider {
  const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
  const hasOpenai = !!process.env.OPENAI_API_KEY;
  const hasDashscope = !!process.env.DASHSCOPE_API_KEY;
-  if (hasGoogle && !hasOpenai) return "google";
+  const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[];
-  if (hasOpenai && !hasGoogle) return "openai";
+
-  if (hasGoogle && hasOpenai) return "google";
+  if (available.length === 1) return available[0]!;
  if (available.length > 1) return available[0]!;
  throw new Error(
-    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, or OPENAI_API_KEY.\n" +
+    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
      "Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
  );
 }
@ -263,6 +268,9 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
  if (provider === "google") {
    return (await import("./providers/google")) as ProviderModule;
  }
  if (provider === "dashscope") {
    return (await import("./providers/dashscope")) as ProviderModule;
  }
  return (await import("./providers/openai")) as ProviderModule;
 }
--- a/skills/baoyu-image-gen/scripts/providers/dashscope.ts
+++ b/skills/baoyu-image-gen/scripts/providers/dashscope.ts
@ -0,0 +1,137 @@
 import type { CliArgs } from "../types";
 export function getDefaultModel(): string {
  return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo";
 }
 function getApiKey(): string | null {
  return process.env.DASHSCOPE_API_KEY || null;
 }
 function getBaseUrl(): string {
  const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
  return base.replace(/\/+$/g, "");
 }
 function parseAspectRatio(ar: string): { width: number; height: number } | null {
  const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
  if (!match) return null;
  const w = parseFloat(match[1]!);
  const h = parseFloat(match[2]!);
  if (w <= 0 || h <= 0) return null;
  return { width: w, height: h };
 }
 function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
  const baseSize = quality === "2k" ? 1440 : 1024;
  if (!ar) return `${baseSize}*${baseSize}`;
  const parsed = parseAspectRatio(ar);
  if (!parsed) return `${baseSize}*${baseSize}`;
  const ratio = parsed.width / parsed.height;
  if (Math.abs(ratio - 1) < 0.1) {
    return `${baseSize}*${baseSize}`;
  }
  if (ratio > 1) {
    const w = Math.round(baseSize * ratio);
    return `${w}*${baseSize}`;
  }
  const h = Math.round(baseSize / ratio);
  return `${baseSize}*${h}`;
 }
 function normalizeSize(size: string): string {
  return size.replace("x", "*");
 }
 export async function generateImage(
  prompt: string,
  model: string,
  args: CliArgs
 ): Promise<Uint8Array> {
  const apiKey = getApiKey();
  if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
  if (args.referenceImages.length > 0) {
    console.error("Warning: Reference images not yet supported with DashScope, ignoring.");
  }
  const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality);
  const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
  const body = {
    model,
    input: {
      messages: [
        {
          role: "user",
          content: [{ text: prompt }],
        },
      ],
    },
    parameters: {
      prompt_extend: false,
      size,
    },
  };
  console.log(`Generating image with DashScope (${model})...`, { size });
  const res = await fetch(url, {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
      Authorization: `Bearer ${apiKey}`,
    },
    body: JSON.stringify(body),
  });
  if (!res.ok) {
    const err = await res.text();
    throw new Error(`DashScope API error (${res.status}): ${err}`);
  }
  const result = await res.json() as {
    output?: {
      result_image?: string;
      choices?: Array<{
        message?: {
          content?: Array<{ image?: string }>;
        };
      }>;
    };
  };
  let imageData: string | null = null;
  if (result.output?.result_image) {
    imageData = result.output.result_image;
  } else if (result.output?.choices?.[0]?.message?.content) {
    const content = result.output.choices[0].message.content;
    for (const item of content) {
      if (item.image) {
        imageData = item.image;
        break;
      }
    }
  }
  if (!imageData) {
    console.error("Response:", JSON.stringify(result, null, 2));
    throw new Error("No image in response");
  }
  if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
    const imgRes = await fetch(imageData);
    if (!imgRes.ok) throw new Error("Failed to download image");
    const buf = await imgRes.arrayBuffer();
    return new Uint8Array(buf);
  }
  return Uint8Array.from(Buffer.from(imageData, "base64"));
 }
--- a/skills/baoyu-image-gen/scripts/types.ts
+++ b/skills/baoyu-image-gen/scripts/types.ts
@ -1,4 +1,4 @@
-export type Provider = "google" | "openai";
+export type Provider = "google" | "openai" | "dashscope";
 export type Quality = "normal" | "2k";
 export type CliArgs = {