Add qwen-image-2.0-pro support for baoyu-image-gen

This commit is contained in:
JianJang2017 2026-03-13 19:09:54 -05:00 committed by Jim Liu 宝玉
parent de7dc85361
commit ac2ce0b8b6
8 changed files with 583 additions and 97 deletions

View File

@ -726,7 +726,7 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
# DashScope (Aliyun Tongyi Wanxiang)
DASHSCOPE_API_KEY=sk-xxx
DASHSCOPE_IMAGE_MODEL=z-image-turbo
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
# Replicate

View File

@ -726,7 +726,7 @@ AI 驱动的生成后端。
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` |
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
# DashScope阿里通义万相
DASHSCOPE_API_KEY=sk-xxx
DASHSCOPE_IMAGE_MODEL=z-image-turbo
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
# Replicate

View File

@ -92,6 +92,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider o
# DashScope (阿里通义万象)
${BUN_X} {baseDir}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
# DashScope Qwen-Image 2.0 Pro (recommended for custom sizes and text rendering)
${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image out.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
# DashScope legacy Qwen fixed-size model
${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928
# Replicate (google/nano-banana-pro)
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
@ -142,7 +148,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| `--batchfile <path>` | JSON batch file for multi-image generation |
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
| `--provider google\|openai\|openrouter\|dashscope\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`) |
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size <WxH>` | Size (e.g., `1024x1024`) |
| `--quality normal\|2k` | Quality preset (default: `2k`) |
@ -166,7 +172,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
| `GOOGLE_IMAGE_MODEL` | Google model override |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
@ -201,6 +207,52 @@ Model priority (highest → lowest), applies to all providers:
- Show: `Using [provider] / [model]`
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
### DashScope Models
Use `--model qwen-image-2.0-pro` or set `default_model.dashscope` / `DASHSCOPE_IMAGE_MODEL` when the user wants official Qwen-Image behavior.
Official DashScope model families:
- `qwen-image-2.0-pro`, `qwen-image-2.0-pro-2026-03-03`, `qwen-image-2.0`, `qwen-image-2.0-2026-03-03`
- Free-form `size` in `宽*高` format
- Total pixels must stay between `512*512` and `2048*2048`
- Default size is approximately `1024*1024`
- Best choice for custom ratios such as `21:9` and text-heavy Chinese/English layouts
- `qwen-image-max`, `qwen-image-max-2025-12-30`, `qwen-image-plus`, `qwen-image-plus-2026-01-09`, `qwen-image`
- Fixed sizes only: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`
- Default size is `1664*928`
- `qwen-image` currently has the same capability as `qwen-image-plus`
- Legacy DashScope models such as `z-image-turbo`, `z-image-ultra`, `wanx-v1`
- Keep using them only when the user explicitly asks for legacy behavior or compatibility
When translating CLI args into DashScope behavior:
- `--size` wins over `--ar`
- For `qwen-image-2.0*`, prefer explicit `--size`; otherwise infer from `--ar` and use the official recommended resolutions below
- For `qwen-image-max/plus/image`, only use the five official fixed sizes; if the requested ratio is not covered, switch to `qwen-image-2.0-pro`
- `--quality` is a baoyu-image-gen compatibility preset, not a native DashScope API field. Mapping `normal` / `2k` onto the `qwen-image-2.0*` table below is an implementation inference, not an official API guarantee
Recommended `qwen-image-2.0*` sizes for common aspect ratios:
| Ratio | `normal` | `2k` |
|-------|----------|------|
| `1:1` | `1024*1024` | `1536*1536` |
| `2:3` | `768*1152` | `1024*1536` |
| `3:2` | `1152*768` | `1536*1024` |
| `3:4` | `960*1280` | `1080*1440` |
| `4:3` | `1280*960` | `1440*1080` |
| `9:16` | `720*1280` | `1080*1920` |
| `16:9` | `1280*720` | `1920*1080` |
| `21:9` | `1344*576` | `2048*872` |
DashScope official APIs also expose `negative_prompt`, `prompt_extend`, and `watermark`, but `baoyu-image-gen` does not expose them as dedicated CLI flags today.
Official references:
- [Qwen-Image API](https://help.aliyun.com/zh/model-studio/qwen-image-api)
- [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image)
- [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api)
### OpenRouter Models
Use full OpenRouter model IDs, e.g.:

View File

@ -50,7 +50,7 @@ options:
- label: "OpenRouter"
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
- label: "DashScope"
description: "Alibaba Cloud - z-image-turbo, good for Chinese content"
description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering"
- label: "Replicate"
description: "Community models - nano-banana-pro, flexible model selection"
```
@ -186,12 +186,26 @@ options:
header: "DashScope Model"
question: "Choose a default DashScope image generation model?"
options:
- label: "z-image-turbo (Recommended)"
description: "Fast generation, good quality"
- label: "qwen-image-2.0-pro (Recommended)"
description: "Best DashScope model for text rendering and custom sizes"
- label: "qwen-image-2.0"
description: "Faster 2.0 variant with flexible output size"
- label: "qwen-image-max"
description: "Legacy Qwen model with five fixed output sizes"
- label: "qwen-image-plus"
description: "Legacy Qwen model, same current capability as qwen-image"
- label: "z-image-turbo"
description: "Legacy DashScope model for compatibility"
- label: "z-image-ultra"
description: "Higher quality, slower generation"
description: "Legacy DashScope model, higher quality but slower"
```
Notes for DashScope setup:
- Prefer `qwen-image-2.0-pro` when the user needs custom `--size`, uncommon ratios like `21:9`, or strong Chinese/English text rendering.
- `qwen-image-max` / `qwen-image-plus` / `qwen-image` only support five fixed sizes: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`.
- In `baoyu-image-gen`, `quality` is a compatibility preset. It is not a native DashScope parameter.
### Replicate Model Selection
```yaml

View File

@ -23,7 +23,7 @@ default_model:
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
dashscope: null # e.g., "z-image-turbo"
dashscope: null # e.g., "qwen-image-2.0-pro"
replicate: null # e.g., "google/nano-banana-pro"
batch:
@ -88,7 +88,7 @@ default_model:
google: "gemini-3-pro-image-preview"
openai: "gpt-image-1.5"
openrouter: "google/gemini-3.1-flash-image-preview"
dashscope: "z-image-turbo"
dashscope: "qwen-image-2.0-pro"
replicate: "google/nano-banana-pro"
batch:
max_workers: 10

View File

@ -116,7 +116,7 @@ Environment variables:
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)

View File

@ -1,25 +1,147 @@
import assert from "node:assert/strict";
import test from "node:test";
import test, { type TestContext } from "node:test";
import {
getDefaultModel,
getModelFamily,
getQwen2SizeFromAspectRatio,
getSizeFromAspectRatio,
normalizeSize,
parseAspectRatio,
parseSize,
resolveSizeForModel,
} from "./dashscope.ts";
function useEnv(
t: TestContext,
values: Record<string, string | null>,
): void {
const previous = new Map<string, string | undefined>();
for (const [key, value] of Object.entries(values)) {
previous.set(key, process.env[key]);
if (value == null) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
t.after(() => {
for (const [key, value] of previous.entries()) {
if (value == null) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
});
}
test("DashScope default model prefers env override and otherwise uses qwen-image-2.0-pro", (t) => {
useEnv(t, { DASHSCOPE_IMAGE_MODEL: null });
assert.equal(getDefaultModel(), "qwen-image-2.0-pro");
process.env.DASHSCOPE_IMAGE_MODEL = "qwen-image-max";
assert.equal(getDefaultModel(), "qwen-image-max");
});
test("DashScope aspect-ratio parsing accepts numeric ratios only", () => {
assert.deepEqual(parseAspectRatio("3:2"), { width: 3, height: 2 });
assert.equal(parseAspectRatio("square"), null);
assert.equal(parseAspectRatio("-1:2"), null);
});
test("DashScope size selection picks the closest supported size per quality preset", () => {
test("DashScope model family routing distinguishes qwen-2.0, fixed-size qwen, and legacy models", () => {
assert.equal(getModelFamily("qwen-image-2.0-pro"), "qwen2");
assert.equal(getModelFamily("qwen-image"), "qwenFixed");
assert.equal(getModelFamily("z-image-turbo"), "legacy");
assert.equal(getModelFamily("wanx-v1"), "legacy");
});
test("Legacy DashScope size selection keeps the previous quality-based heuristic", () => {
assert.equal(getSizeFromAspectRatio(null, "normal"), "1024*1024");
assert.equal(getSizeFromAspectRatio("16:9", "normal"), "1280*720");
assert.equal(getSizeFromAspectRatio("16:9", "2k"), "2048*1152");
assert.equal(getSizeFromAspectRatio("invalid", "2k"), "1536*1536");
});
test("Qwen 2.0 recommended sizes follow the official common-ratio table", () => {
assert.equal(getQwen2SizeFromAspectRatio(null, "normal"), "1024*1024");
assert.equal(getQwen2SizeFromAspectRatio(null, "2k"), "1536*1536");
assert.equal(getQwen2SizeFromAspectRatio("16:9", "normal"), "1280*720");
assert.equal(getQwen2SizeFromAspectRatio("21:9", "2k"), "2048*872");
});
test("Qwen 2.0 derives free-form sizes within pixel budget for uncommon ratios", () => {
const size = getQwen2SizeFromAspectRatio("5:2", "normal");
const parsed = parseSize(size);
assert.ok(parsed);
assert.ok(parsed.width * parsed.height >= 512 * 512);
assert.ok(parsed.width * parsed.height <= 2048 * 2048);
assert.ok(Math.abs(parsed.width / parsed.height - 2.5) < 0.08);
});
test("resolveSizeForModel validates explicit qwen-image-2.0 sizes by total pixels", () => {
assert.equal(
resolveSizeForModel("qwen-image-2.0-pro", {
size: "2048x872",
aspectRatio: null,
quality: "2k",
}),
"2048*872",
);
assert.throws(
() =>
resolveSizeForModel("qwen-image-2.0-pro", {
size: "4096x4096",
aspectRatio: null,
quality: "2k",
}),
/total pixels between/,
);
});
test("resolveSizeForModel enforces fixed sizes for qwen-image-max/plus/image", () => {
assert.equal(
resolveSizeForModel("qwen-image-max", {
size: null,
aspectRatio: "1:1",
quality: "2k",
}),
"1328*1328",
);
assert.equal(
resolveSizeForModel("qwen-image", {
size: "1664x928",
aspectRatio: "9:16",
quality: "normal",
}),
"1664*928",
);
assert.throws(
() =>
resolveSizeForModel("qwen-image-max", {
size: null,
aspectRatio: "21:9",
quality: "2k",
}),
/supports only fixed ratios/,
);
assert.throws(
() =>
resolveSizeForModel("qwen-image-plus", {
size: "1024x1024",
aspectRatio: null,
quality: "2k",
}),
/support only these sizes/,
);
});
test("DashScope size normalization converts WxH into provider format", () => {
assert.equal(normalizeSize("1024x1024"), "1024*1024");
assert.equal(normalizeSize("2048*1152"), "2048*1152");

View File

@ -1,28 +1,46 @@
import type { CliArgs } from "../types";
import type { CliArgs, Quality } from "../types";
export function getDefaultModel(): string {
return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo";
}
type DashScopeModelFamily = "qwen2" | "qwenFixed" | "legacy";
function getApiKey(): string | null {
return process.env.DASHSCOPE_API_KEY || null;
}
type DashScopeModelSpec = {
family: DashScopeModelFamily;
defaultSize: string;
};
function getBaseUrl(): string {
const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
return base.replace(/\/+$/g, "");
}
const DEFAULT_MODEL = "qwen-image-2.0-pro";
const MIN_QWEN_2_TOTAL_PIXELS = 512 * 512;
const MAX_QWEN_2_TOTAL_PIXELS = 2048 * 2048;
const SIZE_STEP = 16;
const QWEN_NEGATIVE_PROMPT =
"低分辨率低画质肢体畸形手指畸形画面过饱和蜡像感人脸无细节过度光滑画面具有AI感构图混乱文字模糊扭曲";
export function parseAspectRatio(ar: string): { width: number; height: number } | null {
const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
if (!match) return null;
const w = parseFloat(match[1]!);
const h = parseFloat(match[2]!);
if (w <= 0 || h <= 0) return null;
return { width: w, height: h };
}
const QWEN_2_TARGET_PIXELS: Record<Quality, number> = {
normal: 1024 * 1024,
"2k": 1536 * 1536,
};
const STANDARD_SIZES: [number, number][] = [
const QWEN_2_RECOMMENDED: Record<string, Record<Quality, string>> = {
"1:1": { normal: "1024*1024", "2k": "1536*1536" },
"2:3": { normal: "768*1152", "2k": "1024*1536" },
"3:2": { normal: "1152*768", "2k": "1536*1024" },
"3:4": { normal: "960*1280", "2k": "1080*1440" },
"4:3": { normal: "1280*960", "2k": "1440*1080" },
"9:16": { normal: "720*1280", "2k": "1080*1920" },
"16:9": { normal: "1280*720", "2k": "1920*1080" },
"21:9": { normal: "1344*576", "2k": "2048*872" },
};
const QWEN_FIXED_SIZES_BY_RATIO: Record<string, string> = {
"16:9": "1664*928",
"4:3": "1472*1104",
"1:1": "1328*1328",
"3:4": "1104*1472",
"9:16": "928*1664",
};
const QWEN_FIXED_SIZES = Object.values(QWEN_FIXED_SIZES_BY_RATIO);
const LEGACY_STANDARD_SIZES: [number, number][] = [
[1024, 1024],
[1280, 720],
[720, 1280],
@ -34,7 +52,7 @@ const STANDARD_SIZES: [number, number][] = [
[864, 1536],
];
const STANDARD_SIZES_2K: [number, number][] = [
const LEGACY_STANDARD_SIZES_2K: [number, number][] = [
[1536, 1536],
[2048, 1152],
[1152, 2048],
@ -45,9 +63,167 @@ const STANDARD_SIZES_2K: [number, number][] = [
[2048, 2048],
];
const QWEN_2_SPEC: DashScopeModelSpec = {
family: "qwen2",
defaultSize: "1024*1024",
};
const QWEN_FIXED_SPEC: DashScopeModelSpec = {
family: "qwenFixed",
defaultSize: QWEN_FIXED_SIZES_BY_RATIO["16:9"],
};
const LEGACY_SPEC: DashScopeModelSpec = {
family: "legacy",
defaultSize: "1536*1536",
};
const MODEL_SPEC_ALIASES: Record<string, DashScopeModelSpec> = {
"qwen-image-2.0-pro": QWEN_2_SPEC,
"qwen-image-2.0-pro-2026-03-03": QWEN_2_SPEC,
"qwen-image-2.0": QWEN_2_SPEC,
"qwen-image-2.0-2026-03-03": QWEN_2_SPEC,
"qwen-image-max": QWEN_FIXED_SPEC,
"qwen-image-max-2025-12-30": QWEN_FIXED_SPEC,
"qwen-image-plus": QWEN_FIXED_SPEC,
"qwen-image-plus-2026-01-09": QWEN_FIXED_SPEC,
"qwen-image": QWEN_FIXED_SPEC,
};
export function getDefaultModel(): string {
return process.env.DASHSCOPE_IMAGE_MODEL || DEFAULT_MODEL;
}
function getApiKey(): string | null {
return process.env.DASHSCOPE_API_KEY || null;
}
function getBaseUrl(): string {
const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
return base.replace(/\/+$/g, "");
}
function getModelSpec(model: string): DashScopeModelSpec {
return MODEL_SPEC_ALIASES[model.trim().toLowerCase()] || LEGACY_SPEC;
}
export function getModelFamily(model: string): DashScopeModelFamily {
return getModelSpec(model).family;
}
function normalizeQuality(quality: CliArgs["quality"]): Quality {
return quality === "normal" ? "normal" : "2k";
}
export function parseAspectRatio(ar: string): { width: number; height: number } | null {
const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
if (!match) return null;
const w = parseFloat(match[1]!);
const h = parseFloat(match[2]!);
if (w <= 0 || h <= 0) return null;
return { width: w, height: h };
}
export function normalizeSize(size: string): string {
return size.replace("x", "*");
}
export function parseSize(size: string): { width: number; height: number } | null {
const match = normalizeSize(size).match(/^(\d+)\*(\d+)$/);
if (!match) return null;
const width = Number(match[1]);
const height = Number(match[2]);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return { width, height };
}
function formatSize(width: number, height: number): string {
return `${width}*${height}`;
}
function getRatioValue(ar: string): number | null {
const parsed = parseAspectRatio(ar);
if (!parsed) return null;
return parsed.width / parsed.height;
}
function findKnownRatioKey(ar: string, candidates: string[], tolerance = 0.02): string | null {
const targetRatio = getRatioValue(ar);
if (targetRatio == null) return null;
let bestKey: string | null = null;
let bestDiff = Infinity;
for (const candidate of candidates) {
const candidateRatio = getRatioValue(candidate);
if (candidateRatio == null) continue;
const diff = Math.abs(candidateRatio - targetRatio);
if (diff < bestDiff) {
bestDiff = diff;
bestKey = candidate;
}
}
return bestDiff <= tolerance ? bestKey : null;
}
function roundToStep(value: number): number {
return Math.max(SIZE_STEP, Math.round(value / SIZE_STEP) * SIZE_STEP);
}
function fitToPixelBudget(
width: number,
height: number,
minPixels: number,
maxPixels: number,
): { width: number; height: number } {
let nextWidth = width;
let nextHeight = height;
let pixels = nextWidth * nextHeight;
if (pixels > maxPixels) {
const scale = Math.sqrt(maxPixels / pixels);
nextWidth *= scale;
nextHeight *= scale;
} else if (pixels < minPixels) {
const scale = Math.sqrt(minPixels / pixels);
nextWidth *= scale;
nextHeight *= scale;
}
let roundedWidth = roundToStep(nextWidth);
let roundedHeight = roundToStep(nextHeight);
pixels = roundedWidth * roundedHeight;
while (pixels > maxPixels && (roundedWidth > SIZE_STEP || roundedHeight > SIZE_STEP)) {
if (roundedWidth >= roundedHeight && roundedWidth > SIZE_STEP) {
roundedWidth -= SIZE_STEP;
} else if (roundedHeight > SIZE_STEP) {
roundedHeight -= SIZE_STEP;
} else {
break;
}
pixels = roundedWidth * roundedHeight;
}
while (pixels < minPixels) {
if (roundedWidth <= roundedHeight) {
roundedWidth += SIZE_STEP;
} else {
roundedHeight += SIZE_STEP;
}
pixels = roundedWidth * roundedHeight;
}
return { width: roundedWidth, height: roundedHeight };
}
export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
const is2k = quality === "2k";
const defaultSize = is2k ? "1536*1536" : "1024*1024";
const normalizedQuality = normalizeQuality(quality);
const sizes = normalizedQuality === "2k" ? LEGACY_STANDARD_SIZES_2K : LEGACY_STANDARD_SIZES;
const defaultSize = normalizedQuality === "2k" ? "1536*1536" : "1024*1024";
if (!ar) return defaultSize;
@ -55,86 +231,157 @@ export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["qual
if (!parsed) return defaultSize;
const targetRatio = parsed.width / parsed.height;
const sizes = is2k ? STANDARD_SIZES_2K : STANDARD_SIZES;
let best = defaultSize;
let bestDiff = Infinity;
for (const [w, h] of sizes) {
const diff = Math.abs(w / h - targetRatio);
for (const [width, height] of sizes) {
const diff = Math.abs(width / height - targetRatio);
if (diff < bestDiff) {
bestDiff = diff;
best = `${w}*${h}`;
best = formatSize(width, height);
}
}
return best;
}
export function normalizeSize(size: string): string {
return size.replace("x", "*");
export function getQwen2SizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
const normalizedQuality = normalizeQuality(quality);
if (!ar) {
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
}
const recommendedRatio = findKnownRatioKey(ar, Object.keys(QWEN_2_RECOMMENDED));
if (recommendedRatio) {
return QWEN_2_RECOMMENDED[recommendedRatio][normalizedQuality];
}
const parsed = parseAspectRatio(ar);
if (!parsed) {
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
}
const targetRatio = parsed.width / parsed.height;
const targetPixels = QWEN_2_TARGET_PIXELS[normalizedQuality];
const rawWidth = Math.sqrt(targetPixels * targetRatio);
const rawHeight = Math.sqrt(targetPixels / targetRatio);
const fitted = fitToPixelBudget(
rawWidth,
rawHeight,
MIN_QWEN_2_TOTAL_PIXELS,
MAX_QWEN_2_TOTAL_PIXELS,
);
return formatSize(fitted.width, fitted.height);
}
export async function generateImage(
prompt: string,
model: string,
args: CliArgs
): Promise<Uint8Array> {
const apiKey = getApiKey();
if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
if (args.referenceImages.length > 0) {
throw new Error(
"Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
function getQwenFixedSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
if (quality === "normal") {
console.warn(
"DashScope qwen-image-max/plus/image models use fixed output sizes; --quality normal does not change the generated resolution."
);
}
const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality);
const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
if (!ar) return QWEN_FIXED_SPEC.defaultSize;
const body = {
model,
input: {
messages: [
{
role: "user",
content: [{ text: prompt }],
},
],
},
parameters: {
prompt_extend: false,
size,
},
};
console.log(`Generating image with DashScope (${model})...`, { size });
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(body),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`DashScope API error (${res.status}): ${err}`);
const ratioKey = findKnownRatioKey(ar, Object.keys(QWEN_FIXED_SIZES_BY_RATIO));
if (!ratioKey) {
throw new Error(
`DashScope model supports only fixed ratios ${Object.keys(QWEN_FIXED_SIZES_BY_RATIO).join(", ")}. ` +
`For custom ratios like "${ar}", use --model qwen-image-2.0-pro.`
);
}
const result = await res.json() as {
output?: {
result_image?: string;
choices?: Array<{
message?: {
content?: Array<{ image?: string }>;
};
}>;
};
return QWEN_FIXED_SIZES_BY_RATIO[ratioKey]!;
}
function validateSizeFormat(size: string): { width: number; height: number } {
const parsed = parseSize(size);
if (!parsed) {
throw new Error(`Invalid DashScope size "${size}". Expected <width>x<height> or <width>*<height>.`);
}
return parsed;
}
function validateQwen2Size(size: string): string {
const normalized = normalizeSize(size);
const parsed = validateSizeFormat(normalized);
const totalPixels = parsed.width * parsed.height;
if (totalPixels < MIN_QWEN_2_TOTAL_PIXELS || totalPixels > MAX_QWEN_2_TOTAL_PIXELS) {
throw new Error(
`DashScope qwen-image-2.0* models require total pixels between ${MIN_QWEN_2_TOTAL_PIXELS} ` +
`and ${MAX_QWEN_2_TOTAL_PIXELS}. Received ${normalized} (${totalPixels} pixels).`
);
}
return normalized;
}
function validateQwenFixedSize(size: string): string {
const normalized = normalizeSize(size);
validateSizeFormat(normalized);
if (!QWEN_FIXED_SIZES.includes(normalized)) {
throw new Error(
`DashScope qwen-image-max/plus/image models support only these sizes: ${QWEN_FIXED_SIZES.join(", ")}. ` +
`Received ${normalized}.`
);
}
return normalized;
}
export function resolveSizeForModel(
model: string,
args: Pick<CliArgs, "size" | "aspectRatio" | "quality">,
): string {
const spec = getModelSpec(model);
if (args.size) {
if (spec.family === "qwen2") return validateQwen2Size(args.size);
if (spec.family === "qwenFixed") return validateQwenFixedSize(args.size);
validateSizeFormat(args.size);
return normalizeSize(args.size);
}
if (spec.family === "qwen2") {
return getQwen2SizeFromAspectRatio(args.aspectRatio, args.quality);
}
if (spec.family === "qwenFixed") {
return getQwenFixedSizeFromAspectRatio(args.aspectRatio, args.quality);
}
return getSizeFromAspectRatio(args.aspectRatio, args.quality);
}
function buildParameters(
family: DashScopeModelFamily,
size: string,
): Record<string, unknown> {
const parameters: Record<string, unknown> = {
prompt_extend: false,
size,
};
if (family === "qwen2" || family === "qwenFixed") {
parameters.watermark = false;
parameters.negative_prompt = QWEN_NEGATIVE_PROMPT;
}
return parameters;
}
type DashScopeResponse = {
output?: {
result_image?: string;
choices?: Array<{
message?: {
content?: Array<{ image?: string }>;
};
}>;
};
};
async function extractImageFromResponse(result: DashScopeResponse): Promise<Uint8Array> {
let imageData: string | null = null;
if (result.output?.result_image) {
@ -163,3 +410,54 @@ export async function generateImage(
return Uint8Array.from(Buffer.from(imageData, "base64"));
}
export async function generateImage(
prompt: string,
model: string,
args: CliArgs
): Promise<Uint8Array> {
const apiKey = getApiKey();
if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
if (args.referenceImages.length > 0) {
throw new Error(
"Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
);
}
const spec = getModelSpec(model);
const size = resolveSizeForModel(model, args);
const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
const body = {
model,
input: {
messages: [
{
role: "user",
content: [{ text: prompt }],
},
],
},
parameters: buildParameters(spec.family, size),
};
console.log(`Generating image with DashScope (${model})...`, { family: spec.family, size });
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(body),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`DashScope API error (${res.status}): ${err}`);
}
const result = await res.json() as DashScopeResponse;
return extractImageFromResponse(result);
}