Add qwen-image-2.0-pro support for baoyu-image-gen
This commit is contained in:
parent
de7dc85361
commit
ac2ce0b8b6
|
|
@ -726,7 +726,7 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
|
|||
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `z-image-turbo` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
||||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||
|
|
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
|||
|
||||
# DashScope (Aliyun Tongyi Wanxiang)
|
||||
DASHSCOPE_API_KEY=sk-xxx
|
||||
DASHSCOPE_IMAGE_MODEL=z-image-turbo
|
||||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Replicate
|
||||
|
|
|
|||
|
|
@ -726,7 +726,7 @@ AI 驱动的生成后端。
|
|||
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `z-image-turbo` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
||||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||
|
|
@ -996,7 +996,7 @@ GOOGLE_IMAGE_MODEL=gemini-3-pro-image-preview
|
|||
|
||||
# DashScope(阿里通义万相)
|
||||
DASHSCOPE_API_KEY=sk-xxx
|
||||
DASHSCOPE_IMAGE_MODEL=z-image-turbo
|
||||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Replicate
|
||||
|
|
|
|||
|
|
@ -92,6 +92,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider o
|
|||
# DashScope (阿里通义万象)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "一只可爱的猫" --image out.png --provider dashscope
|
||||
|
||||
# DashScope Qwen-Image 2.0 Pro (recommended for custom sizes and text rendering)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image out.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
|
||||
|
||||
# DashScope legacy Qwen fixed-size model
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928
|
||||
|
||||
# Replicate (google/nano-banana-pro)
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate
|
||||
|
||||
|
|
@ -142,7 +148,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `--batchfile <path>` | JSON batch file for multi-image generation |
|
||||
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
|
||||
| `--provider google\|openai\|openrouter\|dashscope\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`) |
|
||||
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||
|
|
@ -166,7 +172,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `OPENAI_IMAGE_MODEL` | OpenAI model override |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model override |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: z-image-turbo) |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-pro) |
|
||||
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
||||
|
|
@ -201,6 +207,52 @@ Model priority (highest → lowest), applies to all providers:
|
|||
- Show: `Using [provider] / [model]`
|
||||
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
|
||||
|
||||
### DashScope Models
|
||||
|
||||
Use `--model qwen-image-2.0-pro` or set `default_model.dashscope` / `DASHSCOPE_IMAGE_MODEL` when the user wants official Qwen-Image behavior.
|
||||
|
||||
Official DashScope model families:
|
||||
|
||||
- `qwen-image-2.0-pro`, `qwen-image-2.0-pro-2026-03-03`, `qwen-image-2.0`, `qwen-image-2.0-2026-03-03`
|
||||
- Free-form `size` in `宽*高` format
|
||||
- Total pixels must stay between `512*512` and `2048*2048`
|
||||
- Default size is approximately `1024*1024`
|
||||
- Best choice for custom ratios such as `21:9` and text-heavy Chinese/English layouts
|
||||
- `qwen-image-max`, `qwen-image-max-2025-12-30`, `qwen-image-plus`, `qwen-image-plus-2026-01-09`, `qwen-image`
|
||||
- Fixed sizes only: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`
|
||||
- Default size is `1664*928`
|
||||
- `qwen-image` currently has the same capability as `qwen-image-plus`
|
||||
- Legacy DashScope models such as `z-image-turbo`, `z-image-ultra`, `wanx-v1`
|
||||
- Keep using them only when the user explicitly asks for legacy behavior or compatibility
|
||||
|
||||
When translating CLI args into DashScope behavior:
|
||||
|
||||
- `--size` wins over `--ar`
|
||||
- For `qwen-image-2.0*`, prefer explicit `--size`; otherwise infer from `--ar` and use the official recommended resolutions below
|
||||
- For `qwen-image-max/plus/image`, only use the five official fixed sizes; if the requested ratio is not covered, switch to `qwen-image-2.0-pro`
|
||||
- `--quality` is a baoyu-image-gen compatibility preset, not a native DashScope API field. Mapping `normal` / `2k` onto the `qwen-image-2.0*` table below is an implementation inference, not an official API guarantee
|
||||
|
||||
Recommended `qwen-image-2.0*` sizes for common aspect ratios:
|
||||
|
||||
| Ratio | `normal` | `2k` |
|
||||
|-------|----------|------|
|
||||
| `1:1` | `1024*1024` | `1536*1536` |
|
||||
| `2:3` | `768*1152` | `1024*1536` |
|
||||
| `3:2` | `1152*768` | `1536*1024` |
|
||||
| `3:4` | `960*1280` | `1080*1440` |
|
||||
| `4:3` | `1280*960` | `1440*1080` |
|
||||
| `9:16` | `720*1280` | `1080*1920` |
|
||||
| `16:9` | `1280*720` | `1920*1080` |
|
||||
| `21:9` | `1344*576` | `2048*872` |
|
||||
|
||||
DashScope official APIs also expose `negative_prompt`, `prompt_extend`, and `watermark`, but `baoyu-image-gen` does not expose them as dedicated CLI flags today.
|
||||
|
||||
Official references:
|
||||
|
||||
- [Qwen-Image API](https://help.aliyun.com/zh/model-studio/qwen-image-api)
|
||||
- [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image)
|
||||
- [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api)
|
||||
|
||||
### OpenRouter Models
|
||||
|
||||
Use full OpenRouter model IDs, e.g.:
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ options:
|
|||
- label: "OpenRouter"
|
||||
description: "Router for Gemini/FLUX/OpenAI-compatible image models"
|
||||
- label: "DashScope"
|
||||
description: "Alibaba Cloud - z-image-turbo, good for Chinese content"
|
||||
description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering"
|
||||
- label: "Replicate"
|
||||
description: "Community models - nano-banana-pro, flexible model selection"
|
||||
```
|
||||
|
|
@ -186,12 +186,26 @@ options:
|
|||
header: "DashScope Model"
|
||||
question: "Choose a default DashScope image generation model?"
|
||||
options:
|
||||
- label: "z-image-turbo (Recommended)"
|
||||
description: "Fast generation, good quality"
|
||||
- label: "qwen-image-2.0-pro (Recommended)"
|
||||
description: "Best DashScope model for text rendering and custom sizes"
|
||||
- label: "qwen-image-2.0"
|
||||
description: "Faster 2.0 variant with flexible output size"
|
||||
- label: "qwen-image-max"
|
||||
description: "Legacy Qwen model with five fixed output sizes"
|
||||
- label: "qwen-image-plus"
|
||||
description: "Legacy Qwen model, same current capability as qwen-image"
|
||||
- label: "z-image-turbo"
|
||||
description: "Legacy DashScope model for compatibility"
|
||||
- label: "z-image-ultra"
|
||||
description: "Higher quality, slower generation"
|
||||
description: "Legacy DashScope model, higher quality but slower"
|
||||
```
|
||||
|
||||
Notes for DashScope setup:
|
||||
|
||||
- Prefer `qwen-image-2.0-pro` when the user needs custom `--size`, uncommon ratios like `21:9`, or strong Chinese/English text rendering.
|
||||
- `qwen-image-max` / `qwen-image-plus` / `qwen-image` only support five fixed sizes: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`.
|
||||
- In `baoyu-image-gen`, `quality` is a compatibility preset. It is not a native DashScope parameter.
|
||||
|
||||
### Replicate Model Selection
|
||||
|
||||
```yaml
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ default_model:
|
|||
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
||||
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
||||
openrouter: null # e.g., "google/gemini-3.1-flash-image-preview"
|
||||
dashscope: null # e.g., "z-image-turbo"
|
||||
dashscope: null # e.g., "qwen-image-2.0-pro"
|
||||
replicate: null # e.g., "google/nano-banana-pro"
|
||||
|
||||
batch:
|
||||
|
|
@ -88,7 +88,7 @@ default_model:
|
|||
google: "gemini-3-pro-image-preview"
|
||||
openai: "gpt-image-1.5"
|
||||
openrouter: "google/gemini-3.1-flash-image-preview"
|
||||
dashscope: "z-image-turbo"
|
||||
dashscope: "qwen-image-2.0-pro"
|
||||
replicate: "google/nano-banana-pro"
|
||||
batch:
|
||||
max_workers: 10
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ Environment variables:
|
|||
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
|
||||
OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview)
|
||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (z-image-turbo)
|
||||
DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro)
|
||||
REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-pro)
|
||||
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
||||
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
||||
|
|
|
|||
|
|
@ -1,25 +1,147 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
import test, { type TestContext } from "node:test";
|
||||
|
||||
import {
|
||||
getDefaultModel,
|
||||
getModelFamily,
|
||||
getQwen2SizeFromAspectRatio,
|
||||
getSizeFromAspectRatio,
|
||||
normalizeSize,
|
||||
parseAspectRatio,
|
||||
parseSize,
|
||||
resolveSizeForModel,
|
||||
} from "./dashscope.ts";
|
||||
|
||||
function useEnv(
|
||||
t: TestContext,
|
||||
values: Record<string, string | null>,
|
||||
): void {
|
||||
const previous = new Map<string, string | undefined>();
|
||||
for (const [key, value] of Object.entries(values)) {
|
||||
previous.set(key, process.env[key]);
|
||||
if (value == null) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
t.after(() => {
|
||||
for (const [key, value] of previous.entries()) {
|
||||
if (value == null) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
test("DashScope default model prefers env override and otherwise uses qwen-image-2.0-pro", (t) => {
|
||||
useEnv(t, { DASHSCOPE_IMAGE_MODEL: null });
|
||||
assert.equal(getDefaultModel(), "qwen-image-2.0-pro");
|
||||
|
||||
process.env.DASHSCOPE_IMAGE_MODEL = "qwen-image-max";
|
||||
assert.equal(getDefaultModel(), "qwen-image-max");
|
||||
});
|
||||
|
||||
test("DashScope aspect-ratio parsing accepts numeric ratios only", () => {
|
||||
assert.deepEqual(parseAspectRatio("3:2"), { width: 3, height: 2 });
|
||||
assert.equal(parseAspectRatio("square"), null);
|
||||
assert.equal(parseAspectRatio("-1:2"), null);
|
||||
});
|
||||
|
||||
test("DashScope size selection picks the closest supported size per quality preset", () => {
|
||||
test("DashScope model family routing distinguishes qwen-2.0, fixed-size qwen, and legacy models", () => {
|
||||
assert.equal(getModelFamily("qwen-image-2.0-pro"), "qwen2");
|
||||
assert.equal(getModelFamily("qwen-image"), "qwenFixed");
|
||||
assert.equal(getModelFamily("z-image-turbo"), "legacy");
|
||||
assert.equal(getModelFamily("wanx-v1"), "legacy");
|
||||
});
|
||||
|
||||
test("Legacy DashScope size selection keeps the previous quality-based heuristic", () => {
|
||||
assert.equal(getSizeFromAspectRatio(null, "normal"), "1024*1024");
|
||||
assert.equal(getSizeFromAspectRatio("16:9", "normal"), "1280*720");
|
||||
assert.equal(getSizeFromAspectRatio("16:9", "2k"), "2048*1152");
|
||||
assert.equal(getSizeFromAspectRatio("invalid", "2k"), "1536*1536");
|
||||
});
|
||||
|
||||
test("Qwen 2.0 recommended sizes follow the official common-ratio table", () => {
|
||||
assert.equal(getQwen2SizeFromAspectRatio(null, "normal"), "1024*1024");
|
||||
assert.equal(getQwen2SizeFromAspectRatio(null, "2k"), "1536*1536");
|
||||
assert.equal(getQwen2SizeFromAspectRatio("16:9", "normal"), "1280*720");
|
||||
assert.equal(getQwen2SizeFromAspectRatio("21:9", "2k"), "2048*872");
|
||||
});
|
||||
|
||||
test("Qwen 2.0 derives free-form sizes within pixel budget for uncommon ratios", () => {
|
||||
const size = getQwen2SizeFromAspectRatio("5:2", "normal");
|
||||
const parsed = parseSize(size);
|
||||
assert.ok(parsed);
|
||||
assert.ok(parsed.width * parsed.height >= 512 * 512);
|
||||
assert.ok(parsed.width * parsed.height <= 2048 * 2048);
|
||||
assert.ok(Math.abs(parsed.width / parsed.height - 2.5) < 0.08);
|
||||
});
|
||||
|
||||
test("resolveSizeForModel validates explicit qwen-image-2.0 sizes by total pixels", () => {
|
||||
assert.equal(
|
||||
resolveSizeForModel("qwen-image-2.0-pro", {
|
||||
size: "2048x872",
|
||||
aspectRatio: null,
|
||||
quality: "2k",
|
||||
}),
|
||||
"2048*872",
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSizeForModel("qwen-image-2.0-pro", {
|
||||
size: "4096x4096",
|
||||
aspectRatio: null,
|
||||
quality: "2k",
|
||||
}),
|
||||
/total pixels between/,
|
||||
);
|
||||
});
|
||||
|
||||
test("resolveSizeForModel enforces fixed sizes for qwen-image-max/plus/image", () => {
|
||||
assert.equal(
|
||||
resolveSizeForModel("qwen-image-max", {
|
||||
size: null,
|
||||
aspectRatio: "1:1",
|
||||
quality: "2k",
|
||||
}),
|
||||
"1328*1328",
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
resolveSizeForModel("qwen-image", {
|
||||
size: "1664x928",
|
||||
aspectRatio: "9:16",
|
||||
quality: "normal",
|
||||
}),
|
||||
"1664*928",
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSizeForModel("qwen-image-max", {
|
||||
size: null,
|
||||
aspectRatio: "21:9",
|
||||
quality: "2k",
|
||||
}),
|
||||
/supports only fixed ratios/,
|
||||
);
|
||||
|
||||
assert.throws(
|
||||
() =>
|
||||
resolveSizeForModel("qwen-image-plus", {
|
||||
size: "1024x1024",
|
||||
aspectRatio: null,
|
||||
quality: "2k",
|
||||
}),
|
||||
/support only these sizes/,
|
||||
);
|
||||
});
|
||||
|
||||
test("DashScope size normalization converts WxH into provider format", () => {
|
||||
assert.equal(normalizeSize("1024x1024"), "1024*1024");
|
||||
assert.equal(normalizeSize("2048*1152"), "2048*1152");
|
||||
|
|
|
|||
|
|
@ -1,28 +1,46 @@
|
|||
import type { CliArgs } from "../types";
|
||||
import type { CliArgs, Quality } from "../types";
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.DASHSCOPE_IMAGE_MODEL || "z-image-turbo";
|
||||
}
|
||||
type DashScopeModelFamily = "qwen2" | "qwenFixed" | "legacy";
|
||||
|
||||
function getApiKey(): string | null {
|
||||
return process.env.DASHSCOPE_API_KEY || null;
|
||||
}
|
||||
type DashScopeModelSpec = {
|
||||
family: DashScopeModelFamily;
|
||||
defaultSize: string;
|
||||
};
|
||||
|
||||
function getBaseUrl(): string {
|
||||
const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
|
||||
return base.replace(/\/+$/g, "");
|
||||
}
|
||||
const DEFAULT_MODEL = "qwen-image-2.0-pro";
|
||||
const MIN_QWEN_2_TOTAL_PIXELS = 512 * 512;
|
||||
const MAX_QWEN_2_TOTAL_PIXELS = 2048 * 2048;
|
||||
const SIZE_STEP = 16;
|
||||
const QWEN_NEGATIVE_PROMPT =
|
||||
"低分辨率,低画质,肢体畸形,手指畸形,画面过饱和,蜡像感,人脸无细节,过度光滑,画面具有AI感,构图混乱,文字模糊,扭曲";
|
||||
|
||||
export function parseAspectRatio(ar: string): { width: number; height: number } | null {
|
||||
const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
|
||||
if (!match) return null;
|
||||
const w = parseFloat(match[1]!);
|
||||
const h = parseFloat(match[2]!);
|
||||
if (w <= 0 || h <= 0) return null;
|
||||
return { width: w, height: h };
|
||||
}
|
||||
const QWEN_2_TARGET_PIXELS: Record<Quality, number> = {
|
||||
normal: 1024 * 1024,
|
||||
"2k": 1536 * 1536,
|
||||
};
|
||||
|
||||
const STANDARD_SIZES: [number, number][] = [
|
||||
const QWEN_2_RECOMMENDED: Record<string, Record<Quality, string>> = {
|
||||
"1:1": { normal: "1024*1024", "2k": "1536*1536" },
|
||||
"2:3": { normal: "768*1152", "2k": "1024*1536" },
|
||||
"3:2": { normal: "1152*768", "2k": "1536*1024" },
|
||||
"3:4": { normal: "960*1280", "2k": "1080*1440" },
|
||||
"4:3": { normal: "1280*960", "2k": "1440*1080" },
|
||||
"9:16": { normal: "720*1280", "2k": "1080*1920" },
|
||||
"16:9": { normal: "1280*720", "2k": "1920*1080" },
|
||||
"21:9": { normal: "1344*576", "2k": "2048*872" },
|
||||
};
|
||||
|
||||
const QWEN_FIXED_SIZES_BY_RATIO: Record<string, string> = {
|
||||
"16:9": "1664*928",
|
||||
"4:3": "1472*1104",
|
||||
"1:1": "1328*1328",
|
||||
"3:4": "1104*1472",
|
||||
"9:16": "928*1664",
|
||||
};
|
||||
|
||||
const QWEN_FIXED_SIZES = Object.values(QWEN_FIXED_SIZES_BY_RATIO);
|
||||
|
||||
const LEGACY_STANDARD_SIZES: [number, number][] = [
|
||||
[1024, 1024],
|
||||
[1280, 720],
|
||||
[720, 1280],
|
||||
|
|
@ -34,7 +52,7 @@ const STANDARD_SIZES: [number, number][] = [
|
|||
[864, 1536],
|
||||
];
|
||||
|
||||
const STANDARD_SIZES_2K: [number, number][] = [
|
||||
const LEGACY_STANDARD_SIZES_2K: [number, number][] = [
|
||||
[1536, 1536],
|
||||
[2048, 1152],
|
||||
[1152, 2048],
|
||||
|
|
@ -45,9 +63,167 @@ const STANDARD_SIZES_2K: [number, number][] = [
|
|||
[2048, 2048],
|
||||
];
|
||||
|
||||
const QWEN_2_SPEC: DashScopeModelSpec = {
|
||||
family: "qwen2",
|
||||
defaultSize: "1024*1024",
|
||||
};
|
||||
|
||||
const QWEN_FIXED_SPEC: DashScopeModelSpec = {
|
||||
family: "qwenFixed",
|
||||
defaultSize: QWEN_FIXED_SIZES_BY_RATIO["16:9"],
|
||||
};
|
||||
|
||||
const LEGACY_SPEC: DashScopeModelSpec = {
|
||||
family: "legacy",
|
||||
defaultSize: "1536*1536",
|
||||
};
|
||||
|
||||
const MODEL_SPEC_ALIASES: Record<string, DashScopeModelSpec> = {
|
||||
"qwen-image-2.0-pro": QWEN_2_SPEC,
|
||||
"qwen-image-2.0-pro-2026-03-03": QWEN_2_SPEC,
|
||||
"qwen-image-2.0": QWEN_2_SPEC,
|
||||
"qwen-image-2.0-2026-03-03": QWEN_2_SPEC,
|
||||
"qwen-image-max": QWEN_FIXED_SPEC,
|
||||
"qwen-image-max-2025-12-30": QWEN_FIXED_SPEC,
|
||||
"qwen-image-plus": QWEN_FIXED_SPEC,
|
||||
"qwen-image-plus-2026-01-09": QWEN_FIXED_SPEC,
|
||||
"qwen-image": QWEN_FIXED_SPEC,
|
||||
};
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.DASHSCOPE_IMAGE_MODEL || DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
function getApiKey(): string | null {
|
||||
return process.env.DASHSCOPE_API_KEY || null;
|
||||
}
|
||||
|
||||
function getBaseUrl(): string {
|
||||
const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
|
||||
return base.replace(/\/+$/g, "");
|
||||
}
|
||||
|
||||
function getModelSpec(model: string): DashScopeModelSpec {
|
||||
return MODEL_SPEC_ALIASES[model.trim().toLowerCase()] || LEGACY_SPEC;
|
||||
}
|
||||
|
||||
export function getModelFamily(model: string): DashScopeModelFamily {
|
||||
return getModelSpec(model).family;
|
||||
}
|
||||
|
||||
function normalizeQuality(quality: CliArgs["quality"]): Quality {
|
||||
return quality === "normal" ? "normal" : "2k";
|
||||
}
|
||||
|
||||
export function parseAspectRatio(ar: string): { width: number; height: number } | null {
|
||||
const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
|
||||
if (!match) return null;
|
||||
const w = parseFloat(match[1]!);
|
||||
const h = parseFloat(match[2]!);
|
||||
if (w <= 0 || h <= 0) return null;
|
||||
return { width: w, height: h };
|
||||
}
|
||||
|
||||
export function normalizeSize(size: string): string {
|
||||
return size.replace("x", "*");
|
||||
}
|
||||
|
||||
export function parseSize(size: string): { width: number; height: number } | null {
|
||||
const match = normalizeSize(size).match(/^(\d+)\*(\d+)$/);
|
||||
if (!match) return null;
|
||||
const width = Number(match[1]);
|
||||
const height = Number(match[2]);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
function formatSize(width: number, height: number): string {
|
||||
return `${width}*${height}`;
|
||||
}
|
||||
|
||||
function getRatioValue(ar: string): number | null {
|
||||
const parsed = parseAspectRatio(ar);
|
||||
if (!parsed) return null;
|
||||
return parsed.width / parsed.height;
|
||||
}
|
||||
|
||||
function findKnownRatioKey(ar: string, candidates: string[], tolerance = 0.02): string | null {
|
||||
const targetRatio = getRatioValue(ar);
|
||||
if (targetRatio == null) return null;
|
||||
|
||||
let bestKey: string | null = null;
|
||||
let bestDiff = Infinity;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const candidateRatio = getRatioValue(candidate);
|
||||
if (candidateRatio == null) continue;
|
||||
const diff = Math.abs(candidateRatio - targetRatio);
|
||||
if (diff < bestDiff) {
|
||||
bestDiff = diff;
|
||||
bestKey = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return bestDiff <= tolerance ? bestKey : null;
|
||||
}
|
||||
|
||||
function roundToStep(value: number): number {
|
||||
return Math.max(SIZE_STEP, Math.round(value / SIZE_STEP) * SIZE_STEP);
|
||||
}
|
||||
|
||||
function fitToPixelBudget(
|
||||
width: number,
|
||||
height: number,
|
||||
minPixels: number,
|
||||
maxPixels: number,
|
||||
): { width: number; height: number } {
|
||||
let nextWidth = width;
|
||||
let nextHeight = height;
|
||||
let pixels = nextWidth * nextHeight;
|
||||
|
||||
if (pixels > maxPixels) {
|
||||
const scale = Math.sqrt(maxPixels / pixels);
|
||||
nextWidth *= scale;
|
||||
nextHeight *= scale;
|
||||
} else if (pixels < minPixels) {
|
||||
const scale = Math.sqrt(minPixels / pixels);
|
||||
nextWidth *= scale;
|
||||
nextHeight *= scale;
|
||||
}
|
||||
|
||||
let roundedWidth = roundToStep(nextWidth);
|
||||
let roundedHeight = roundToStep(nextHeight);
|
||||
pixels = roundedWidth * roundedHeight;
|
||||
|
||||
while (pixels > maxPixels && (roundedWidth > SIZE_STEP || roundedHeight > SIZE_STEP)) {
|
||||
if (roundedWidth >= roundedHeight && roundedWidth > SIZE_STEP) {
|
||||
roundedWidth -= SIZE_STEP;
|
||||
} else if (roundedHeight > SIZE_STEP) {
|
||||
roundedHeight -= SIZE_STEP;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
pixels = roundedWidth * roundedHeight;
|
||||
}
|
||||
|
||||
while (pixels < minPixels) {
|
||||
if (roundedWidth <= roundedHeight) {
|
||||
roundedWidth += SIZE_STEP;
|
||||
} else {
|
||||
roundedHeight += SIZE_STEP;
|
||||
}
|
||||
pixels = roundedWidth * roundedHeight;
|
||||
}
|
||||
|
||||
return { width: roundedWidth, height: roundedHeight };
|
||||
}
|
||||
|
||||
export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
|
||||
const is2k = quality === "2k";
|
||||
const defaultSize = is2k ? "1536*1536" : "1024*1024";
|
||||
const normalizedQuality = normalizeQuality(quality);
|
||||
const sizes = normalizedQuality === "2k" ? LEGACY_STANDARD_SIZES_2K : LEGACY_STANDARD_SIZES;
|
||||
const defaultSize = normalizedQuality === "2k" ? "1536*1536" : "1024*1024";
|
||||
|
||||
if (!ar) return defaultSize;
|
||||
|
||||
|
|
@ -55,86 +231,157 @@ export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["qual
|
|||
if (!parsed) return defaultSize;
|
||||
|
||||
const targetRatio = parsed.width / parsed.height;
|
||||
const sizes = is2k ? STANDARD_SIZES_2K : STANDARD_SIZES;
|
||||
|
||||
let best = defaultSize;
|
||||
let bestDiff = Infinity;
|
||||
|
||||
for (const [w, h] of sizes) {
|
||||
const diff = Math.abs(w / h - targetRatio);
|
||||
for (const [width, height] of sizes) {
|
||||
const diff = Math.abs(width / height - targetRatio);
|
||||
if (diff < bestDiff) {
|
||||
bestDiff = diff;
|
||||
best = `${w}*${h}`;
|
||||
best = formatSize(width, height);
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
export function normalizeSize(size: string): string {
|
||||
return size.replace("x", "*");
|
||||
export function getQwen2SizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
|
||||
const normalizedQuality = normalizeQuality(quality);
|
||||
|
||||
if (!ar) {
|
||||
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
|
||||
}
|
||||
|
||||
const recommendedRatio = findKnownRatioKey(ar, Object.keys(QWEN_2_RECOMMENDED));
|
||||
if (recommendedRatio) {
|
||||
return QWEN_2_RECOMMENDED[recommendedRatio][normalizedQuality];
|
||||
}
|
||||
|
||||
const parsed = parseAspectRatio(ar);
|
||||
if (!parsed) {
|
||||
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
|
||||
}
|
||||
|
||||
const targetRatio = parsed.width / parsed.height;
|
||||
const targetPixels = QWEN_2_TARGET_PIXELS[normalizedQuality];
|
||||
const rawWidth = Math.sqrt(targetPixels * targetRatio);
|
||||
const rawHeight = Math.sqrt(targetPixels / targetRatio);
|
||||
const fitted = fitToPixelBudget(
|
||||
rawWidth,
|
||||
rawHeight,
|
||||
MIN_QWEN_2_TOTAL_PIXELS,
|
||||
MAX_QWEN_2_TOTAL_PIXELS,
|
||||
);
|
||||
|
||||
return formatSize(fitted.width, fitted.height);
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs
|
||||
): Promise<Uint8Array> {
|
||||
const apiKey = getApiKey();
|
||||
if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
throw new Error(
|
||||
"Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
|
||||
function getQwenFixedSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
|
||||
if (quality === "normal") {
|
||||
console.warn(
|
||||
"DashScope qwen-image-max/plus/image models use fixed output sizes; --quality normal does not change the generated resolution."
|
||||
);
|
||||
}
|
||||
|
||||
const size = args.size ? normalizeSize(args.size) : getSizeFromAspectRatio(args.aspectRatio, args.quality);
|
||||
const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
|
||||
if (!ar) return QWEN_FIXED_SPEC.defaultSize;
|
||||
|
||||
const body = {
|
||||
model,
|
||||
input: {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ text: prompt }],
|
||||
},
|
||||
],
|
||||
},
|
||||
parameters: {
|
||||
prompt_extend: false,
|
||||
size,
|
||||
},
|
||||
};
|
||||
|
||||
console.log(`Generating image with DashScope (${model})...`, { size });
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text();
|
||||
throw new Error(`DashScope API error (${res.status}): ${err}`);
|
||||
const ratioKey = findKnownRatioKey(ar, Object.keys(QWEN_FIXED_SIZES_BY_RATIO));
|
||||
if (!ratioKey) {
|
||||
throw new Error(
|
||||
`DashScope model supports only fixed ratios ${Object.keys(QWEN_FIXED_SIZES_BY_RATIO).join(", ")}. ` +
|
||||
`For custom ratios like "${ar}", use --model qwen-image-2.0-pro.`
|
||||
);
|
||||
}
|
||||
|
||||
const result = await res.json() as {
|
||||
output?: {
|
||||
result_image?: string;
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: Array<{ image?: string }>;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
return QWEN_FIXED_SIZES_BY_RATIO[ratioKey]!;
|
||||
}
|
||||
|
||||
function validateSizeFormat(size: string): { width: number; height: number } {
|
||||
const parsed = parseSize(size);
|
||||
if (!parsed) {
|
||||
throw new Error(`Invalid DashScope size "${size}". Expected <width>x<height> or <width>*<height>.`);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function validateQwen2Size(size: string): string {
|
||||
const normalized = normalizeSize(size);
|
||||
const parsed = validateSizeFormat(normalized);
|
||||
const totalPixels = parsed.width * parsed.height;
|
||||
if (totalPixels < MIN_QWEN_2_TOTAL_PIXELS || totalPixels > MAX_QWEN_2_TOTAL_PIXELS) {
|
||||
throw new Error(
|
||||
`DashScope qwen-image-2.0* models require total pixels between ${MIN_QWEN_2_TOTAL_PIXELS} ` +
|
||||
`and ${MAX_QWEN_2_TOTAL_PIXELS}. Received ${normalized} (${totalPixels} pixels).`
|
||||
);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function validateQwenFixedSize(size: string): string {
|
||||
const normalized = normalizeSize(size);
|
||||
validateSizeFormat(normalized);
|
||||
if (!QWEN_FIXED_SIZES.includes(normalized)) {
|
||||
throw new Error(
|
||||
`DashScope qwen-image-max/plus/image models support only these sizes: ${QWEN_FIXED_SIZES.join(", ")}. ` +
|
||||
`Received ${normalized}.`
|
||||
);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function resolveSizeForModel(
|
||||
model: string,
|
||||
args: Pick<CliArgs, "size" | "aspectRatio" | "quality">,
|
||||
): string {
|
||||
const spec = getModelSpec(model);
|
||||
|
||||
if (args.size) {
|
||||
if (spec.family === "qwen2") return validateQwen2Size(args.size);
|
||||
if (spec.family === "qwenFixed") return validateQwenFixedSize(args.size);
|
||||
validateSizeFormat(args.size);
|
||||
return normalizeSize(args.size);
|
||||
}
|
||||
|
||||
if (spec.family === "qwen2") {
|
||||
return getQwen2SizeFromAspectRatio(args.aspectRatio, args.quality);
|
||||
}
|
||||
|
||||
if (spec.family === "qwenFixed") {
|
||||
return getQwenFixedSizeFromAspectRatio(args.aspectRatio, args.quality);
|
||||
}
|
||||
|
||||
return getSizeFromAspectRatio(args.aspectRatio, args.quality);
|
||||
}
|
||||
|
||||
function buildParameters(
|
||||
family: DashScopeModelFamily,
|
||||
size: string,
|
||||
): Record<string, unknown> {
|
||||
const parameters: Record<string, unknown> = {
|
||||
prompt_extend: false,
|
||||
size,
|
||||
};
|
||||
|
||||
if (family === "qwen2" || family === "qwenFixed") {
|
||||
parameters.watermark = false;
|
||||
parameters.negative_prompt = QWEN_NEGATIVE_PROMPT;
|
||||
}
|
||||
|
||||
return parameters;
|
||||
}
|
||||
|
||||
type DashScopeResponse = {
|
||||
output?: {
|
||||
result_image?: string;
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: Array<{ image?: string }>;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
};
|
||||
|
||||
async function extractImageFromResponse(result: DashScopeResponse): Promise<Uint8Array> {
|
||||
let imageData: string | null = null;
|
||||
|
||||
if (result.output?.result_image) {
|
||||
|
|
@ -163,3 +410,54 @@ export async function generateImage(
|
|||
|
||||
return Uint8Array.from(Buffer.from(imageData, "base64"));
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: CliArgs
|
||||
): Promise<Uint8Array> {
|
||||
const apiKey = getApiKey();
|
||||
if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
throw new Error(
|
||||
"Reference images are not supported with DashScope provider in baoyu-image-gen. Use --provider google with a Gemini multimodal model."
|
||||
);
|
||||
}
|
||||
|
||||
const spec = getModelSpec(model);
|
||||
const size = resolveSizeForModel(model, args);
|
||||
const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
|
||||
|
||||
const body = {
|
||||
model,
|
||||
input: {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ text: prompt }],
|
||||
},
|
||||
],
|
||||
},
|
||||
parameters: buildParameters(spec.family, size),
|
||||
};
|
||||
|
||||
console.log(`Generating image with DashScope (${model})...`, { family: spec.family, size });
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.text();
|
||||
throw new Error(`DashScope API error (${res.status}): ${err}`);
|
||||
}
|
||||
|
||||
const result = await res.json() as DashScopeResponse;
|
||||
return extractImageFromResponse(result);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue