diff --git a/.gitignore b/.gitignore index e1a4812..cb5bd8f 100644 --- a/.gitignore +++ b/.gitignore @@ -152,3 +152,8 @@ slide-deck/ infographic/ illustrations/ comic/ +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr \ No newline at end of file diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md index 49f40ed..7a24a10 100644 --- a/skills/baoyu-image-gen/SKILL.md +++ b/skills/baoyu-image-gen/SKILL.md @@ -1,11 +1,11 @@ --- name: baoyu-image-gen -description: AI image generation with OpenAI and Google APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images. +description: AI image generation with OpenAI, Google and DashScope APIs. Supports text-to-image, reference images, aspect ratios, and parallel generation (recommended 4 concurrent subagents). Use when user asks to generate, create, or draw images. --- # Image Generation (AI SDK) -Official API-based image generation. Supports OpenAI and Google providers. +Official API-based image generation. Supports OpenAI, Google and DashScope (阿里通义万象) providers. ## Script Directory @@ -63,6 +63,9 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "Make blue" --image out.png --r # Specific provider npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provider openai + +# DashScope (阿里通义万象) +npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope ``` ## Options @@ -72,7 +75,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi | `--prompt `, `-p` | Prompt text | | `--promptfiles ` | Read prompt from files (concatenated) | | `--image ` | Output image path (required) | -| `--provider google\|openai` | Force provider (default: google) | +| `--provider google\|openai\|dashscope` | Force provider (default: google) | | `--model `, `-m` | Model ID | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | @@ -88,10 +91,13 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi |----------|-------------| | `OPENAI_API_KEY` | OpenAI API key | | `GOOGLE_API_KEY` | Google API key | +| `DASHSCOPE_API_KEY` | DashScope API key (阿里云) | | `OPENAI_IMAGE_MODEL` | OpenAI model override | | `GOOGLE_IMAGE_MODEL` | Google model override | +| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | | `GOOGLE_BASE_URL` | Custom Google endpoint | +| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | **Load Priority**: CLI args > env vars > `/.baoyu-skills/.env` > `~/.baoyu-skills/.env` @@ -99,7 +105,7 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi 1. `--provider` specified → use it 2. Only one API key available → use that provider -3. Both available → default to Google +3. Multiple available → default to Google ## Quality Presets diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts index 88bd331..1dddcd1 100644 --- a/skills/baoyu-image-gen/scripts/main.ts +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -14,7 +14,7 @@ Options: -p, --prompt Prompt text --promptfiles Read prompt from files (concatenated) --image Output image path (required) - --provider google|openai Force provider (auto-detect by default) + --provider google|openai|dashscope Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) @@ -29,10 +29,13 @@ Environment variables: OPENAI_API_KEY OpenAI API key GOOGLE_API_KEY Google API key GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) + DASHSCOPE_API_KEY DashScope API key (阿里云通义万象) OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) + DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo) OPENAI_BASE_URL Custom OpenAI endpoint GOOGLE_BASE_URL Custom Google endpoint + DASHSCOPE_BASE_URL Custom DashScope endpoint Env file load order: CLI args > process.env > /.baoyu-skills/.env > ~/.baoyu-skills/.env`); } @@ -105,7 +108,7 @@ function parseArgs(argv: string[]): CliArgs { if (a === "--provider") { const v = argv[++i]; - if (v !== "google" && v !== "openai") throw new Error(`Invalid provider: ${v}`); + if (v !== "google" && v !== "openai" && v !== "dashscope") throw new Error(`Invalid provider: ${v}`); out.provider = v; continue; } @@ -243,13 +246,15 @@ function detectProvider(args: CliArgs): Provider { const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY); const hasOpenai = !!process.env.OPENAI_API_KEY; + const hasDashscope = !!process.env.DASHSCOPE_API_KEY; - if (hasGoogle && !hasOpenai) return "google"; - if (hasOpenai && !hasGoogle) return "openai"; - if (hasGoogle && hasOpenai) return "google"; + const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope"].filter(Boolean) as Provider[]; + + if (available.length === 1) return available[0]!; + if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, or OPENAI_API_KEY.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -263,6 +268,9 @@ async function loadProviderModule(provider: Provider): Promise { if (provider === "google") { return (await import("./providers/google")) as ProviderModule; } + if (provider === "dashscope") { + return (await import("./providers/dashscope")) as ProviderModule; + } return (await import("./providers/openai")) as ProviderModule; } diff --git a/skills/baoyu-image-gen/scripts/providers/dashscope.ts b/skills/baoyu-image-gen/scripts/providers/dashscope.ts new file mode 100644 index 0000000..aff55b5 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/dashscope.ts @@ -0,0 +1,137 @@ +import type { CliArgs } from "../types"; + +export function getDefaultModel(): string { + return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo"; +} + +function getApiKey(): string | null { + return process.env.DASHSCOPE_API_KEY || null; +} + +function getBaseUrl(): string { + const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com"; + return base.replace(/\/+$/g, ""); +} + +function parseAspectRatio(ar: string): { width: number; height: number } | null { + const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/); + if (!match) return null; + const w = parseFloat(match[1]!); + const h = parseFloat(match[2]!); + if (w <= 0 || h <= 0) return null; + return { width: w, height: h }; +} + +function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string { + const baseSize = quality === "2k" ? 1440 : 1024; + + if (!ar) return `${baseSize}*${baseSize}`; + + const parsed = parseAspectRatio(ar); + if (!parsed) return `${baseSize}*${baseSize}`; + + const ratio = parsed.width / parsed.height; + + if (Math.abs(ratio - 1) < 0.1) { + return `${baseSize}*${baseSize}`; + } + + if (ratio > 1) { + const w = Math.round(baseSize * ratio); + return `${w}*${baseSize}`; + } + + const h = Math.round(baseSize / ratio); + return `${baseSize}*${h}`; +} + +function normalizeSize(size: string): string { + return size.replace("x", "*"); +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs +): Promise { + const apiKey = getApiKey(); + if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required"); + + if (args.referenceImages.length > 0) { + console.error("Warning: Reference images not yet supported with DashScope, ignoring."); + } + + const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality); + const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`; + + const body = { + model, + input: { + messages: [ + { + role: "user", + content: [{ text: prompt }], + }, + ], + }, + parameters: { + prompt_extend: false, + size, + }, + }; + + console.log(`Generating image with DashScope (${model})...`, { size }); + + const res = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`DashScope API error (${res.status}): ${err}`); + } + + const result = await res.json() as { + output?: { + result_image?: string; + choices?: Array<{ + message?: { + content?: Array<{ image?: string }>; + }; + }>; + }; + }; + + let imageData: string | null = null; + + if (result.output?.result_image) { + imageData = result.output.result_image; + } else if (result.output?.choices?.[0]?.message?.content) { + const content = result.output.choices[0].message.content; + for (const item of content) { + if (item.image) { + imageData = item.image; + break; + } + } + } + + if (!imageData) { + console.error("Response:", JSON.stringify(result, null, 2)); + throw new Error("No image in response"); + } + + if (imageData.startsWith("http://") || imageData.startsWith("https://")) { + const imgRes = await fetch(imageData); + if (!imgRes.ok) throw new Error("Failed to download image"); + const buf = await imgRes.arrayBuffer(); + return new Uint8Array(buf); + } + + return Uint8Array.from(Buffer.from(imageData, "base64")); +} diff --git a/skills/baoyu-image-gen/scripts/types.ts b/skills/baoyu-image-gen/scripts/types.ts index 6121391..cdd3c8c 100644 --- a/skills/baoyu-image-gen/scripts/types.ts +++ b/skills/baoyu-image-gen/scripts/types.ts @@ -1,4 +1,4 @@ -export type Provider = "google" | "openai"; +export type Provider = "google" | "openai" | "dashscope"; export type Quality = "normal" | "2k"; export type CliArgs = {