Add qwen-image-2.0-pro support for baoyu-image-gen

2026-03-13 19:09:54 -05:00 · 2026-03-13 19:09:54 -05:00 · ac2ce0b8b6
parent de7dc85361
commit ac2ce0b8b6
8 changed files with 583 additions and 97 deletions
--- a/README.md
+++ b/README.md
@ -726,7 +726,7 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
 | `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
 | `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
 | `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
-| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` |
+| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
 | `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
 | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
 | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview

 # DashScope (Aliyun Tongyi Wanxiang)
 DASHSCOPE_API_KEY=sk-xxx
-DASHSCOPE_IMAGE_MODEL=z-image-turbo
+DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
 # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1

 # Replicate
--- a/README.zh.md
+++ b/README.zh.md
@ -726,7 +726,7 @@ AI 驱动的生成后端。
 | `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
 | `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
 | `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
-| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` |
+| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
 | `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
 | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
 | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview

 # DashScope（阿里通义万相）
 DASHSCOPE_API_KEY=sk-xxx
-DASHSCOPE_IMAGE_MODEL=z-image-turbo
+DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
 # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1

 # Replicate
--- a/skills/baoyu-image-gen/SKILL.md
+++ b/skills/baoyu-image-gen/SKILL.md
@ -92,6 +92,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider o
 # DashScope (阿里通义万象)
 ${BUN_X} {baseDir}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope

+# DashScope Qwen-Image 2.0 Pro (recommended for custom sizes and text rendering)
+${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9 横幅海报，包含清晰中文标题" --image out.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
+
+# DashScope legacy Qwen fixed-size model
+${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928
+
 # Replicate (google/nano-banana-pro)
 ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate

@ -142,7 +148,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
 | `--batchfile <path>` | JSON batch file for multi-image generation |
 | `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
 | `--provider google\|openai\|openrouter\|dashscope\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) |
-| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) |
+| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`) |
 | `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
 | `--size <WxH>` | Size (e.g., `1024x1024`) |
 | `--quality normal\|2k` | Quality preset (default: `2k`) |
@ -166,7 +172,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
 | `OPENAI_IMAGE_MODEL` | OpenAI model override |
 | `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
 | `GOOGLE_IMAGE_MODEL` | Google model override |
-| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
+| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
 | `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
 | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
 | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
@ -201,6 +207,52 @@ Model priority (highest → lowest), applies to all providers:
 - Show: `Using [provider] / [model]`
 - Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`

+### DashScope Models
+
+Use `--model qwen-image-2.0-pro` or set `default_model.dashscope` / `DASHSCOPE_IMAGE_MODEL` when the user wants official Qwen-Image behavior.
+
+Official DashScope model families:
+
+- `qwen-image-2.0-pro`, `qwen-image-2.0-pro-2026-03-03`, `qwen-image-2.0`, `qwen-image-2.0-2026-03-03`
+  - Free-form `size` in `宽*高` format
+  - Total pixels must stay between `512*512` and `2048*2048`
+  - Default size is approximately `1024*1024`
+  - Best choice for custom ratios such as `21:9` and text-heavy Chinese/English layouts
+- `qwen-image-max`, `qwen-image-max-2025-12-30`, `qwen-image-plus`, `qwen-image-plus-2026-01-09`, `qwen-image`
+  - Fixed sizes only: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`
+  - Default size is `1664*928`
+  - `qwen-image` currently has the same capability as `qwen-image-plus`
+- Legacy DashScope models such as `z-image-turbo`, `z-image-ultra`, `wanx-v1`
+  - Keep using them only when the user explicitly asks for legacy behavior or compatibility
+
+When translating CLI args into DashScope behavior:
+
+- `--size` wins over `--ar`
+- For `qwen-image-2.0*`, prefer explicit `--size`; otherwise infer from `--ar` and use the official recommended resolutions below
+- For `qwen-image-max/plus/image`, only use the five official fixed sizes; if the requested ratio is not covered, switch to `qwen-image-2.0-pro`
+- `--quality` is a baoyu-image-gen compatibility preset, not a native DashScope API field. Mapping `normal` / `2k` onto the `qwen-image-2.0*` table below is an implementation inference, not an official API guarantee
+
+Recommended `qwen-image-2.0*` sizes for common aspect ratios:
+
+| Ratio | `normal` | `2k` |
+|-------|----------|------|
+| `1:1` | `1024*1024` | `1536*1536` |
+| `2:3` | `768*1152` | `1024*1536` |
+| `3:2` | `1152*768` | `1536*1024` |
+| `3:4` | `960*1280` | `1080*1440` |
+| `4:3` | `1280*960` | `1440*1080` |
+| `9:16` | `720*1280` | `1080*1920` |
+| `16:9` | `1280*720` | `1920*1080` |
+| `21:9` | `1344*576` | `2048*872` |
+
+DashScope official APIs also expose `negative_prompt`, `prompt_extend`, and `watermark`, but `baoyu-image-gen` does not expose them as dedicated CLI flags today.
+
+Official references:
+
+- [Qwen-Image API](https://help.aliyun.com/zh/model-studio/qwen-image-api)
+- [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image)
+- [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api)
+
 ### OpenRouter Models

 Use full OpenRouter model IDs, e.g.:
--- a/skills/baoyu-image-gen/references/config/first-time-setup.md
+++ b/skills/baoyu-image-gen/references/config/first-time-setup.md
@ -50,7 +50,7 @@ options:
  - label: "OpenRouter"
    description: "Router for Gemini/FLUX/OpenAI-compatible image models"
  - label: "DashScope"
-    description: "Alibaba Cloud - z-image-turbo, good for Chinese content"
+    description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering"
  - label: "Replicate"
    description: "Community models - nano-banana-pro, flexible model selection"
 ```
@ -186,12 +186,26 @@ options:
 header: "DashScope Model"
 question: "Choose a default DashScope image generation model?"
 options:
-  - label: "z-image-turbo (Recommended)"
-    description: "Fast generation, good quality"
+  - label: "qwen-image-2.0-pro (Recommended)"
+    description: "Best DashScope model for text rendering and custom sizes"
+  - label: "qwen-image-2.0"
+    description: "Faster 2.0 variant with flexible output size"
+  - label: "qwen-image-max"
+    description: "Legacy Qwen model with five fixed output sizes"
+  - label: "qwen-image-plus"
+    description: "Legacy Qwen model, same current capability as qwen-image"
+  - label: "z-image-turbo"
+    description: "Legacy DashScope model for compatibility"
  - label: "z-image-ultra"
-    description: "Higher quality, slower generation"
+    description: "Legacy DashScope model, higher quality but slower"
 ```

+Notes for DashScope setup:
+
+- Prefer `qwen-image-2.0-pro` when the user needs custom `--size`, uncommon ratios like `21:9`, or strong Chinese/English text rendering.
+- `qwen-image-max` / `qwen-image-plus` / `qwen-image` only support five fixed sizes: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`.
+- In `baoyu-image-gen`, `quality` is a compatibility preset. It is not a native DashScope parameter.
+
 ### Replicate Model Selection

 ```yaml
--- a/skills/baoyu-image-gen/references/config/preferences-schema.md
+++ b/skills/baoyu-image-gen/references/config/preferences-schema.md
@ -23,7 +23,7 @@ default_model:
  google: null              # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
  openai: null              # e.g., "gpt-image-1.5", "gpt-image-1"
  openrouter: null          # e.g., "google/gemini-3.1-flash-image-preview"
-  dashscope: null           # e.g., "z-image-turbo"
+  dashscope: null           # e.g., "qwen-image-2.0-pro"
  replicate: null           # e.g., "google/nano-banana-pro"

 batch:
@ -88,7 +88,7 @@ default_model:
  google: "gemini-3-pro-image-preview"
  openai: "gpt-image-1.5"
  openrouter: "google/gemini-3.1-flash-image-preview"
-  dashscope: "z-image-turbo"
+  dashscope: "qwen-image-2.0-pro"
  replicate: "google/nano-banana-pro"
 batch:
  max_workers: 10
--- a/skills/baoyu-image-gen/scripts/main.ts
+++ b/skills/baoyu-image-gen/scripts/main.ts
@ -116,7 +116,7 @@ Environment variables:
  OPENAI_IMAGE_MODEL        Default OpenAI model (gpt-image-1.5)
  OPENROUTER_IMAGE_MODEL    Default OpenRouter model (google/gemini-3.1-flash-image-preview)
  GOOGLE_IMAGE_MODEL        Default Google model (gemini-3-pro-image-preview)
-  DASHSCOPE_IMAGE_MODEL     Default DashScope model (z-image-turbo)
+  DASHSCOPE_IMAGE_MODEL     Default DashScope model (qwen-image-2.0-pro)
  REPLICATE_IMAGE_MODEL     Default Replicate model (google/nano-banana-pro)
  JIMENG_IMAGE_MODEL        Default Jimeng model (jimeng_t2i_v40)
  SEEDREAM_IMAGE_MODEL      Default Seedream model (doubao-seedream-5-0-260128)
--- a/skills/baoyu-image-gen/scripts/providers/dashscope.test.ts
+++ b/skills/baoyu-image-gen/scripts/providers/dashscope.test.ts
@ -1,25 +1,147 @@
 import assert from "node:assert/strict";
-import test from "node:test";
+import test, { type TestContext } from "node:test";

 import {
+  getDefaultModel,
+  getModelFamily,
+  getQwen2SizeFromAspectRatio,
  getSizeFromAspectRatio,
  normalizeSize,
  parseAspectRatio,
+  parseSize,
+  resolveSizeForModel,
 } from "./dashscope.ts";

+function useEnv(
+  t: TestContext,
+  values: Record<string, string | null>,
+): void {
+  const previous = new Map<string, string | undefined>();
+  for (const [key, value] of Object.entries(values)) {
+    previous.set(key, process.env[key]);
+    if (value == null) {
+      delete process.env[key];
+    } else {
+      process.env[key] = value;
+    }
+  }
+
+  t.after(() => {
+    for (const [key, value] of previous.entries()) {
+      if (value == null) {
+        delete process.env[key];
+      } else {
+        process.env[key] = value;
+      }
+    }
+  });
+}
+
+test("DashScope default model prefers env override and otherwise uses qwen-image-2.0-pro", (t) => {
+  useEnv(t, { DASHSCOPE_IMAGE_MODEL: null });
+  assert.equal(getDefaultModel(), "qwen-image-2.0-pro");
+
+  process.env.DASHSCOPE_IMAGE_MODEL = "qwen-image-max";
+  assert.equal(getDefaultModel(), "qwen-image-max");
+});
+
 test("DashScope aspect-ratio parsing accepts numeric ratios only", () => {
  assert.deepEqual(parseAspectRatio("3:2"), { width: 3, height: 2 });
  assert.equal(parseAspectRatio("square"), null);
  assert.equal(parseAspectRatio("-1:2"), null);
 });

-test("DashScope size selection picks the closest supported size per quality preset", () => {
+test("DashScope model family routing distinguishes qwen-2.0, fixed-size qwen, and legacy models", () => {
+  assert.equal(getModelFamily("qwen-image-2.0-pro"), "qwen2");
+  assert.equal(getModelFamily("qwen-image"), "qwenFixed");
+  assert.equal(getModelFamily("z-image-turbo"), "legacy");
+  assert.equal(getModelFamily("wanx-v1"), "legacy");
+});
+
+test("Legacy DashScope size selection keeps the previous quality-based heuristic", () => {
  assert.equal(getSizeFromAspectRatio(null, "normal"), "1024*1024");
  assert.equal(getSizeFromAspectRatio("16:9", "normal"), "1280*720");
  assert.equal(getSizeFromAspectRatio("16:9", "2k"), "2048*1152");
  assert.equal(getSizeFromAspectRatio("invalid", "2k"), "1536*1536");
 });

+test("Qwen 2.0 recommended sizes follow the official common-ratio table", () => {
+  assert.equal(getQwen2SizeFromAspectRatio(null, "normal"), "1024*1024");
+  assert.equal(getQwen2SizeFromAspectRatio(null, "2k"), "1536*1536");
+  assert.equal(getQwen2SizeFromAspectRatio("16:9", "normal"), "1280*720");
+  assert.equal(getQwen2SizeFromAspectRatio("21:9", "2k"), "2048*872");
+});
+
+test("Qwen 2.0 derives free-form sizes within pixel budget for uncommon ratios", () => {
+  const size = getQwen2SizeFromAspectRatio("5:2", "normal");
+  const parsed = parseSize(size);
+  assert.ok(parsed);
+  assert.ok(parsed.width * parsed.height >= 512 * 512);
+  assert.ok(parsed.width * parsed.height <= 2048 * 2048);
+  assert.ok(Math.abs(parsed.width / parsed.height - 2.5) < 0.08);
+});
+
+test("resolveSizeForModel validates explicit qwen-image-2.0 sizes by total pixels", () => {
+  assert.equal(
+    resolveSizeForModel("qwen-image-2.0-pro", {
+      size: "2048x872",
+      aspectRatio: null,
+      quality: "2k",
+    }),
+    "2048*872",
+  );
+
+  assert.throws(
+    () =>
+      resolveSizeForModel("qwen-image-2.0-pro", {
+        size: "4096x4096",
+        aspectRatio: null,
+        quality: "2k",
+      }),
+    /total pixels between/,
+  );
+});
+
+test("resolveSizeForModel enforces fixed sizes for qwen-image-max/plus/image", () => {
+  assert.equal(
+    resolveSizeForModel("qwen-image-max", {
+      size: null,
+      aspectRatio: "1:1",
+      quality: "2k",
+    }),
+    "1328*1328",
+  );
+
+  assert.equal(
+    resolveSizeForModel("qwen-image", {
+      size: "1664x928",
+      aspectRatio: "9:16",
+      quality: "normal",
+    }),
+    "1664*928",
+  );
+
+  assert.throws(
+    () =>
+      resolveSizeForModel("qwen-image-max", {
+        size: null,
+        aspectRatio: "21:9",
+        quality: "2k",
+      }),
+    /supports only fixed ratios/,
+  );
+
+  assert.throws(
+    () =>
+      resolveSizeForModel("qwen-image-plus", {
+        size: "1024x1024",
+        aspectRatio: null,
+        quality: "2k",
+      }),
+    /support only these sizes/,
+  );
+});
+
 test("DashScope size normalization converts WxH into provider format", () => {
  assert.equal(normalizeSize("1024x1024"), "1024*1024");
  assert.equal(normalizeSize("2048*1152"), "2048*1152");
--- a/skills/baoyu-image-gen/scripts/providers/dashscope.ts
+++ b/skills/baoyu-image-gen/scripts/providers/dashscope.ts
@ -1,28 +1,46 @@
-import type { CliArgs } from "../types";
+import type { CliArgs, Quality } from "../types";

-export function getDefaultModel(): string {
-  return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo";
-}
+type DashScopeModelFamily = "qwen2" | "qwenFixed" | "legacy";

-function getApiKey(): string | null {
-  return process.env.DASHSCOPE_API_KEY || null;
-}
+type DashScopeModelSpec = {
+  family: DashScopeModelFamily;
+  defaultSize: string;
+};

-function getBaseUrl(): string {
-  const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
-  return base.replace(/\/+$/g, "");
-}
+const DEFAULT_MODEL = "qwen-image-2.0-pro";
+const MIN_QWEN_2_TOTAL_PIXELS = 512 * 512;
+const MAX_QWEN_2_TOTAL_PIXELS = 2048 * 2048;
+const SIZE_STEP = 16;
+const QWEN_NEGATIVE_PROMPT =
+  "低分辨率，低画质，肢体畸形，手指畸形，画面过饱和，蜡像感，人脸无细节，过度光滑，画面具有AI感，构图混乱，文字模糊，扭曲";

-export function parseAspectRatio(ar: string): { width: number; height: number } | null {
-  const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
-  if (!match) return null;
-  const w = parseFloat(match[1]!);
-  const h = parseFloat(match[2]!);
-  if (w <= 0 || h <= 0) return null;
-  return { width: w, height: h };
-}
+const QWEN_2_TARGET_PIXELS: Record<Quality, number> = {
+  normal: 1024 * 1024,
+  "2k": 1536 * 1536,
+};

-const STANDARD_SIZES: [number, number][] = [
+const QWEN_2_RECOMMENDED: Record<string, Record<Quality, string>> = {
+  "1:1": { normal: "1024*1024", "2k": "1536*1536" },
+  "2:3": { normal: "768*1152", "2k": "1024*1536" },
+  "3:2": { normal: "1152*768", "2k": "1536*1024" },
+  "3:4": { normal: "960*1280", "2k": "1080*1440" },
+  "4:3": { normal: "1280*960", "2k": "1440*1080" },
+  "9:16": { normal: "720*1280", "2k": "1080*1920" },
+  "16:9": { normal: "1280*720", "2k": "1920*1080" },
+  "21:9": { normal: "1344*576", "2k": "2048*872" },
+};
+
+const QWEN_FIXED_SIZES_BY_RATIO: Record<string, string> = {
+  "16:9": "1664*928",
+  "4:3": "1472*1104",
+  "1:1": "1328*1328",
+  "3:4": "1104*1472",
+  "9:16": "928*1664",
+};
+
+const QWEN_FIXED_SIZES = Object.values(QWEN_FIXED_SIZES_BY_RATIO);
+
+const LEGACY_STANDARD_SIZES: [number, number][] = [
  [1024, 1024],
  [1280, 720],
  [720, 1280],
@ -34,7 +52,7 @@ const STANDARD_SIZES: [number, number][] = [
  [864, 1536],
 ];

-const STANDARD_SIZES_2K: [number, number][] = [
+const LEGACY_STANDARD_SIZES_2K: [number, number][] = [
  [1536, 1536],
  [2048, 1152],
  [1152, 2048],
@ -45,9 +63,167 @@ const STANDARD_SIZES_2K: [number, number][] = [
  [2048, 2048],
 ];

+const QWEN_2_SPEC: DashScopeModelSpec = {
+  family: "qwen2",
+  defaultSize: "1024*1024",
+};
+
+const QWEN_FIXED_SPEC: DashScopeModelSpec = {
+  family: "qwenFixed",
+  defaultSize: QWEN_FIXED_SIZES_BY_RATIO["16:9"],
+};
+
+const LEGACY_SPEC: DashScopeModelSpec = {
+  family: "legacy",
+  defaultSize: "1536*1536",
+};
+
+const MODEL_SPEC_ALIASES: Record<string, DashScopeModelSpec> = {
+  "qwen-image-2.0-pro": QWEN_2_SPEC,
+  "qwen-image-2.0-pro-2026-03-03": QWEN_2_SPEC,
+  "qwen-image-2.0": QWEN_2_SPEC,
+  "qwen-image-2.0-2026-03-03": QWEN_2_SPEC,
+  "qwen-image-max": QWEN_FIXED_SPEC,
+  "qwen-image-max-2025-12-30": QWEN_FIXED_SPEC,
+  "qwen-image-plus": QWEN_FIXED_SPEC,
+  "qwen-image-plus-2026-01-09": QWEN_FIXED_SPEC,
+  "qwen-image": QWEN_FIXED_SPEC,
+};
+
+export function getDefaultModel(): string {
+  return process.env.DASHSCOPE_IMAGE_MODEL || DEFAULT_MODEL;
+}
+
+function getApiKey(): string | null {
+  return process.env.DASHSCOPE_API_KEY || null;
+}
+
+function getBaseUrl(): string {
+  const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
+  return base.replace(/\/+$/g, "");
+}
+
+function getModelSpec(model: string): DashScopeModelSpec {
+  return MODEL_SPEC_ALIASES[model.trim().toLowerCase()] || LEGACY_SPEC;
+}
+
+export function getModelFamily(model: string): DashScopeModelFamily {
+  return getModelSpec(model).family;
+}
+
+function normalizeQuality(quality: CliArgs["quality"]): Quality {
+  return quality === "normal" ? "normal" : "2k";
+}
+
+export function parseAspectRatio(ar: string): { width: number; height: number } | null {
+  const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
+  if (!match) return null;
+  const w = parseFloat(match[1]!);
+  const h = parseFloat(match[2]!);
+  if (w <= 0 || h <= 0) return null;
+  return { width: w, height: h };
+}
+
+export function normalizeSize(size: string): string {
+  return size.replace("x", "*");
+}
+
+export function parseSize(size: string): { width: number; height: number } | null {
+  const match = normalizeSize(size).match(/^(\d+)\*(\d+)$/);
+  if (!match) return null;
+  const width = Number(match[1]);
+  const height = Number(match[2]);
+  if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
+    return null;
+  }
+  return { width, height };
+}
+
+function formatSize(width: number, height: number): string {
+  return `${width}*${height}`;
+}
+
+function getRatioValue(ar: string): number | null {
+  const parsed = parseAspectRatio(ar);
+  if (!parsed) return null;
+  return parsed.width / parsed.height;
+}
+
+function findKnownRatioKey(ar: string, candidates: string[], tolerance = 0.02): string | null {
+  const targetRatio = getRatioValue(ar);
+  if (targetRatio == null) return null;
+
+  let bestKey: string | null = null;
+  let bestDiff = Infinity;
+
+  for (const candidate of candidates) {
+    const candidateRatio = getRatioValue(candidate);
+    if (candidateRatio == null) continue;
+    const diff = Math.abs(candidateRatio - targetRatio);
+    if (diff < bestDiff) {
+      bestDiff = diff;
+      bestKey = candidate;
+    }
+  }
+
+  return bestDiff <= tolerance ? bestKey : null;
+}
+
+function roundToStep(value: number): number {
+  return Math.max(SIZE_STEP, Math.round(value / SIZE_STEP) * SIZE_STEP);
+}
+
+function fitToPixelBudget(
+  width: number,
+  height: number,
+  minPixels: number,
+  maxPixels: number,
+): { width: number; height: number } {
+  let nextWidth = width;
+  let nextHeight = height;
+  let pixels = nextWidth * nextHeight;
+
+  if (pixels > maxPixels) {
+    const scale = Math.sqrt(maxPixels / pixels);
+    nextWidth *= scale;
+    nextHeight *= scale;
+  } else if (pixels < minPixels) {
+    const scale = Math.sqrt(minPixels / pixels);
+    nextWidth *= scale;
+    nextHeight *= scale;
+  }
+
+  let roundedWidth = roundToStep(nextWidth);
+  let roundedHeight = roundToStep(nextHeight);
+  pixels = roundedWidth * roundedHeight;
+
+  while (pixels > maxPixels && (roundedWidth > SIZE_STEP || roundedHeight > SIZE_STEP)) {
+    if (roundedWidth >= roundedHeight && roundedWidth > SIZE_STEP) {
+      roundedWidth -= SIZE_STEP;
+    } else if (roundedHeight > SIZE_STEP) {
+      roundedHeight -= SIZE_STEP;
+    } else {
+      break;
+    }
+    pixels = roundedWidth * roundedHeight;
+  }
+
+  while (pixels < minPixels) {
+    if (roundedWidth <= roundedHeight) {
+      roundedWidth += SIZE_STEP;
+    } else {
+      roundedHeight += SIZE_STEP;
+    }
+    pixels = roundedWidth * roundedHeight;
+  }
+
+  return { width: roundedWidth, height: roundedHeight };
+}
+
 export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
-  const is2k = quality === "2k";
-  const defaultSize = is2k ? "1536*1536" : "1024*1024";
+  const normalizedQuality = normalizeQuality(quality);
+  const sizes = normalizedQuality === "2k" ? LEGACY_STANDARD_SIZES_2K : LEGACY_STANDARD_SIZES;
+  const defaultSize = normalizedQuality === "2k" ? "1536*1536" : "1024*1024";

  if (!ar) return defaultSize;

@ -55,86 +231,157 @@ export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["qual
  if (!parsed) return defaultSize;

  const targetRatio = parsed.width / parsed.height;
-  const sizes = is2k ? STANDARD_SIZES_2K : STANDARD_SIZES;
-
  let best = defaultSize;
  let bestDiff = Infinity;

-  for (const [w, h] of sizes) {
-    const diff = Math.abs(w / h - targetRatio);
+  for (const [width, height] of sizes) {
+    const diff = Math.abs(width / height - targetRatio);
    if (diff < bestDiff) {
      bestDiff = diff;
-      best = `${w}*${h}`;
+      best = formatSize(width, height);
    }
  }

  return best;
 }

-export function normalizeSize(size: string): string {
-  return size.replace("x", "*");
+export function getQwen2SizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
+  const normalizedQuality = normalizeQuality(quality);
+
+  if (!ar) {
+    return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
+  }
+
+  const recommendedRatio = findKnownRatioKey(ar, Object.keys(QWEN_2_RECOMMENDED));
+  if (recommendedRatio) {
+    return QWEN_2_RECOMMENDED[recommendedRatio][normalizedQuality];
+  }
+
+  const parsed = parseAspectRatio(ar);
+  if (!parsed) {
+    return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
+  }
+
+  const targetRatio = parsed.width / parsed.height;
+  const targetPixels = QWEN_2_TARGET_PIXELS[normalizedQuality];
+  const rawWidth = Math.sqrt(targetPixels * targetRatio);
+  const rawHeight = Math.sqrt(targetPixels / targetRatio);
+  const fitted = fitToPixelBudget(
+    rawWidth,
+    rawHeight,
+    MIN_QWEN_2_TOTAL_PIXELS,
+    MAX_QWEN_2_TOTAL_PIXELS,
+  );
+
+  return formatSize(fitted.width, fitted.height);
 }

-export async function generateImage(
-  prompt: string,
-  model: string,
-  args: CliArgs
-): Promise<Uint8Array> {
-  const apiKey = getApiKey();
-  if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
-
-  if (args.referenceImages.length > 0) {
-    throw new Error(
-      "Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
+function getQwenFixedSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
+  if (quality === "normal") {
+    console.warn(
+      "DashScope qwen-image-max/plus/image models use fixed output sizes; --quality normal does not change the generated resolution."
    );
  }

-  const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality);
-  const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
+  if (!ar) return QWEN_FIXED_SPEC.defaultSize;

-  const body = {
-    model,
-    input: {
-      messages: [
-        {
-          role: "user",
-          content: [{ text: prompt }],
-        },
-      ],
-    },
-    parameters: {
-      prompt_extend: false,
-      size,
-    },
-  };
-
-  console.log(`Generating image with DashScope (${model})...`, { size });
-
-  const res = await fetch(url, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
-      Authorization: `Bearer ${apiKey}`,
-    },
-    body: JSON.stringify(body),
-  });
-
-  if (!res.ok) {
-    const err = await res.text();
-    throw new Error(`DashScope API error (${res.status}): ${err}`);
+  const ratioKey = findKnownRatioKey(ar, Object.keys(QWEN_FIXED_SIZES_BY_RATIO));
+  if (!ratioKey) {
+    throw new Error(
+      `DashScope model supports only fixed ratios ${Object.keys(QWEN_FIXED_SIZES_BY_RATIO).join(", ")}. ` +
+      `For custom ratios like "${ar}", use --model qwen-image-2.0-pro.`
+    );
  }

-  const result = await res.json() as {
-    output?: {
-      result_image?: string;
-      choices?: Array<{
-        message?: {
-          content?: Array<{ image?: string }>;
-        };
-      }>;
-    };
+  return QWEN_FIXED_SIZES_BY_RATIO[ratioKey]!;
+}
+
+function validateSizeFormat(size: string): { width: number; height: number } {
+  const parsed = parseSize(size);
+  if (!parsed) {
+    throw new Error(`Invalid DashScope size "${size}". Expected <width>x<height> or <width>*<height>.`);
+  }
+  return parsed;
+}
+
+function validateQwen2Size(size: string): string {
+  const normalized = normalizeSize(size);
+  const parsed = validateSizeFormat(normalized);
+  const totalPixels = parsed.width * parsed.height;
+  if (totalPixels < MIN_QWEN_2_TOTAL_PIXELS || totalPixels > MAX_QWEN_2_TOTAL_PIXELS) {
+    throw new Error(
+      `DashScope qwen-image-2.0* models require total pixels between ${MIN_QWEN_2_TOTAL_PIXELS} ` +
+      `and ${MAX_QWEN_2_TOTAL_PIXELS}. Received ${normalized} (${totalPixels} pixels).`
+    );
+  }
+  return normalized;
+}
+
+function validateQwenFixedSize(size: string): string {
+  const normalized = normalizeSize(size);
+  validateSizeFormat(normalized);
+  if (!QWEN_FIXED_SIZES.includes(normalized)) {
+    throw new Error(
+      `DashScope qwen-image-max/plus/image models support only these sizes: ${QWEN_FIXED_SIZES.join(", ")}. ` +
+      `Received ${normalized}.`
+    );
+  }
+  return normalized;
+}
+
+export function resolveSizeForModel(
+  model: string,
+  args: Pick<CliArgs, "size" | "aspectRatio" | "quality">,
+): string {
+  const spec = getModelSpec(model);
+
+  if (args.size) {
+    if (spec.family === "qwen2") return validateQwen2Size(args.size);
+    if (spec.family === "qwenFixed") return validateQwenFixedSize(args.size);
+    validateSizeFormat(args.size);
+    return normalizeSize(args.size);
+  }
+
+  if (spec.family === "qwen2") {
+    return getQwen2SizeFromAspectRatio(args.aspectRatio, args.quality);
+  }
+
+  if (spec.family === "qwenFixed") {
+    return getQwenFixedSizeFromAspectRatio(args.aspectRatio, args.quality);
+  }
+
+  return getSizeFromAspectRatio(args.aspectRatio, args.quality);
+}
+
+function buildParameters(
+  family: DashScopeModelFamily,
+  size: string,
+): Record<string, unknown> {
+  const parameters: Record<string, unknown> = {
+    prompt_extend: false,
+    size,
  };

+  if (family === "qwen2" || family === "qwenFixed") {
+    parameters.watermark = false;
+    parameters.negative_prompt = QWEN_NEGATIVE_PROMPT;
+  }
+
+  return parameters;
+}
+
+type DashScopeResponse = {
+  output?: {
+    result_image?: string;
+    choices?: Array<{
+      message?: {
+        content?: Array<{ image?: string }>;
+      };
+    }>;
+  };
+};
+
+async function extractImageFromResponse(result: DashScopeResponse): Promise<Uint8Array> {
  let imageData: string | null = null;

  if (result.output?.result_image) {
@ -163,3 +410,54 @@ export async function generateImage(

  return Uint8Array.from(Buffer.from(imageData, "base64"));
 }
+
+export async function generateImage(
+  prompt: string,
+  model: string,
+  args: CliArgs
+): Promise<Uint8Array> {
+  const apiKey = getApiKey();
+  if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
+
+  if (args.referenceImages.length > 0) {
+    throw new Error(
+      "Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
+    );
+  }
+
+  const spec = getModelSpec(model);
+  const size = resolveSizeForModel(model, args);
+  const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
+
+  const body = {
+    model,
+    input: {
+      messages: [
+        {
+          role: "user",
+          content: [{ text: prompt }],
+        },
+      ],
+    },
+    parameters: buildParameters(spec.family, size),
+  };
+
+  console.log(`Generating image with DashScope (${model})...`, { family: spec.family, size });
+
+  const res = await fetch(url, {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${apiKey}`,
+    },
+    body: JSON.stringify(body),
+  });
+
+  if (!res.ok) {
+    const err = await res.text();
+    throw new Error(`DashScope API error (${res.status}): ${err}`);
+  }
+
+  const result = await res.json() as DashScopeResponse;
+  return extractImageFromResponse(result);
+}