Adapting baoyu-image-gen to the Tongyi Text-to-Image Model

2026-01-28 09:04:57 +08:00 · 2026-01-28 09:04:57 +08:00 · 907c8ab852
parent 64945f3341
commit 907c8ab852
5 changed files with 167 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -152,3 +152,8 @@ slide-deck/
 infographic/
 illustrations/
 comic/
+### IntelliJ IDEA ###
+.idea
+*.iws
+*.iml
+*.ipr
--- a/skills/baoyu-image-gen/SKILL.md
+++ b/skills/baoyu-image-gen/SKILL.md
@ -1,11 +1,11 @@
 ---
 name: baoyu-image-gen
-description: AI image generation with OpenAI and Google APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images.
+description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images.
 ---

 # Image Generation (AI SDK)

-Official API-based image generation. Supports OpenAI and Google providers.
+Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers.

 ## Script Directory

@ -63,6 +63,9 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Make blue" --image out.png --r

 # Specific provider
 npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider openai
+
+# DashScope (阿里通义万象)
+npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
 ```

 ## Options
@ -72,7 +75,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
 | `--prompt <text>`, `-p` | Prompt text |
 | `--promptfiles <files...>` | Read prompt from files (concatenated) |
 | `--image <path>` | Output image path (required) |
-| `--provider google\|openai` | Force provider (default: google) |
+| `--provider google\|openai\|dashscope` | Force provider (default: google) |
 | `--model <id>`, `-m` | Model ID |
 | `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
 | `--size <WxH>` | Size (e.g., `1024x1024`) |
@ -88,10 +91,13 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi
 |----------|-------------|
 | `OPENAI_API_KEY` | OpenAI API key |
 | `GOOGLE_API_KEY` | Google API key |
+| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) |
 | `OPENAI_IMAGE_MODEL` | OpenAI model override |
 | `GOOGLE_IMAGE_MODEL` | Google model override |
+| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
 | `OPENAI_BASE_URL` | Custom OpenAI endpoint |
 | `GOOGLE_BASE_URL` | Custom Google endpoint |
+| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |

 **Load Priority**: CLI args > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`

@ -99,7 +105,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi

 1. `--provider` specified → use it
 2. Only one API key available → use that provider
-3. Both available → default to Google
+3. Multiple available → default to Google

 ## Quality Presets

--- a/skills/baoyu-image-gen/scripts/main.ts
+++ b/skills/baoyu-image-gen/scripts/main.ts
@ -14,7 +14,7 @@ Options:
  -p, --prompt <text>       Prompt text
  --promptfiles <files...>  Read prompt from files (concatenated)
  --image <path>            Output image path (required)
-  --provider google|openai  Force provider (auto-detect by default)
+  --provider google|openai|dashscope  Force provider (auto-detect by default)
  -m, --model <id>          Model ID
  --ar <ratio>              Aspect ratio (e.g., 16:9, 1:1, 4:3)
  --size <WxH>              Size (e.g., 1024x1024)
@ -29,10 +29,13 @@ Environment variables:
  OPENAI_API_KEY            OpenAI API key
  GOOGLE_API_KEY            Google API key
  GEMINI_API_KEY            Gemini API key (alias for GOOGLE_API_KEY)
+  DASHSCOPE_API_KEY         DashScope API key (阿里云通义万象)
  OPENAI_IMAGE_MODEL        Default OpenAI model (gpt-image-1.5)
  GOOGLE_IMAGE_MODEL        Default Google model (gemini-3-pro-image-preview)
+  DASHSCOPE_IMAGE_MODEL     Default DashScope model (z-image-turbo)
  OPENAI_BASE_URL           Custom OpenAI endpoint
  GOOGLE_BASE_URL           Custom Google endpoint
+  DASHSCOPE_BASE_URL        Custom DashScope endpoint

 Env file load order: CLI args > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
 }
@ -105,7 +108,7 @@ function parseArgs(argv: string[]): CliArgs {

    if (a === "--provider") {
      const v = argv[++i];
-      if (v !== "google" && v !== "openai") throw new Error(`Invalid provider: ${v}`);
+      if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`);
      out.provider = v;
      continue;
    }
@ -243,13 +246,15 @@ function detectProvider(args: CliArgs): Provider {

  const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY);
  const hasOpenai = !!process.env.OPENAI_API_KEY;
+  const hasDashscope = !!process.env.DASHSCOPE_API_KEY;

-  if (hasGoogle && !hasOpenai) return "google";
-  if (hasOpenai && !hasGoogle) return "openai";
-  if (hasGoogle && hasOpenai) return "google";
+  const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[];
+
+  if (available.length === 1) return available[0]!;
+  if (available.length > 1) return available[0]!;

  throw new Error(
-    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, or OPENAI_API_KEY.\n" +
+    "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
      "Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
  );
 }
@ -263,6 +268,9 @@ async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
  if (provider === "google") {
    return (await import("./providers/google")) as ProviderModule;
  }
+  if (provider === "dashscope") {
+    return (await import("./providers/dashscope")) as ProviderModule;
+  }
  return (await import("./providers/openai")) as ProviderModule;
 }

--- a/skills/baoyu-image-gen/scripts/providers/dashscope.ts
+++ b/skills/baoyu-image-gen/scripts/providers/dashscope.ts
@ -0,0 +1,137 @@
+import type { CliArgs } from "../types";
+
+export function getDefaultModel(): string {
+  return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo";
+}
+
+function getApiKey(): string | null {
+  return process.env.DASHSCOPE_API_KEY || null;
+}
+
+function getBaseUrl(): string {
+  const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
+  return base.replace(/\/+$/g, "");
+}
+
+function parseAspectRatio(ar: string): { width: number; height: number } | null {
+  const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
+  if (!match) return null;
+  const w = parseFloat(match[1]!);
+  const h = parseFloat(match[2]!);
+  if (w <= 0 || h <= 0) return null;
+  return { width: w, height: h };
+}
+
+function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
+  const baseSize = quality === "2k" ? 1440 : 1024;
+
+  if (!ar) return `${baseSize}*${baseSize}`;
+
+  const parsed = parseAspectRatio(ar);
+  if (!parsed) return `${baseSize}*${baseSize}`;
+
+  const ratio = parsed.width / parsed.height;
+
+  if (Math.abs(ratio - 1) < 0.1) {
+    return `${baseSize}*${baseSize}`;
+  }
+
+  if (ratio > 1) {
+    const w = Math.round(baseSize * ratio);
+    return `${w}*${baseSize}`;
+  }
+
+  const h = Math.round(baseSize / ratio);
+  return `${baseSize}*${h}`;
+}
+
+function normalizeSize(size: string): string {
+  return size.replace("x", "*");
+}
+
+export async function generateImage(
+  prompt: string,
+  model: string,
+  args: CliArgs
+): Promise<Uint8Array> {
+  const apiKey = getApiKey();
+  if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
+
+  if (args.referenceImages.length > 0) {
+    console.error("Warning: Reference images not yet supported with DashScope, ignoring.");
+  }
+
+  const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality);
+  const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
+
+  const body = {
+    model,
+    input: {
+      messages: [
+        {
+          role: "user",
+          content: [{ text: prompt }],
+        },
+      ],
+    },
+    parameters: {
+      prompt_extend: false,
+      size,
+    },
+  };
+
+  console.log(`Generating image with DashScope (${model})...`, { size });
+
+  const res = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  if (!res.ok) {
+    const err = await res.text();
+    throw new Error(`DashScope API error (${res.status}): ${err}`);
+  }
+
+  const result = await res.json() as {
+    output?: {
+      result_image?: string;
+      choices?: Array<{
+        message?: {
+          content?: Array<{ image?: string }>;
+        };
+      }>;
+    };
+  };
+
+  let imageData: string | null = null;
+
+  if (result.output?.result_image) {
+    imageData = result.output.result_image;
+  } else if (result.output?.choices?.[0]?.message?.content) {
+    const content = result.output.choices[0].message.content;
+    for (const item of content) {
+      if (item.image) {
+        imageData = item.image;
+        break;
+      }
+    }
+  }
+
+  if (!imageData) {
+    console.error("Response:", JSON.stringify(result, null, 2));
+    throw new Error("No image in response");
+  }
+
+  if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
+    const imgRes = await fetch(imageData);
+    if (!imgRes.ok) throw new Error("Failed to download image");
+    const buf = await imgRes.arrayBuffer();
+    return new Uint8Array(buf);
+  }
+
+  return Uint8Array.from(Buffer.from(imageData, "base64"));
+}
--- a/skills/baoyu-image-gen/scripts/types.ts
+++ b/skills/baoyu-image-gen/scripts/types.ts
@ -1,4 +1,4 @@
-export type Provider = "google" | "openai";
+export type Provider = "google" | "openai" | "dashscope";
 export type Quality = "normal" | "2k";

 export type CliArgs = {