JimLiu-baoyu-skills/skills/baoyu-imagine/scripts/providers/dashscope.ts

464 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { CliArgs, Quality } from "../types";
type DashScopeModelFamily = "qwen2" | "qwenFixed" | "legacy";
type DashScopeModelSpec = {
family: DashScopeModelFamily;
defaultSize: string;
};
const DEFAULT_MODEL = "qwen-image-2.0-pro";
const MIN_QWEN_2_TOTAL_PIXELS = 512 * 512;
const MAX_QWEN_2_TOTAL_PIXELS = 2048 * 2048;
const SIZE_STEP = 16;
const QWEN_NEGATIVE_PROMPT =
"低分辨率低画质肢体畸形手指畸形画面过饱和蜡像感人脸无细节过度光滑画面具有AI感构图混乱文字模糊扭曲";
const QWEN_2_TARGET_PIXELS: Record<Quality, number> = {
normal: 1024 * 1024,
"2k": 1536 * 1536,
};
const QWEN_2_RECOMMENDED: Record<string, Record<Quality, string>> = {
"1:1": { normal: "1024*1024", "2k": "1536*1536" },
"2:3": { normal: "768*1152", "2k": "1024*1536" },
"3:2": { normal: "1152*768", "2k": "1536*1024" },
"3:4": { normal: "960*1280", "2k": "1080*1440" },
"4:3": { normal: "1280*960", "2k": "1440*1080" },
"9:16": { normal: "720*1280", "2k": "1080*1920" },
"16:9": { normal: "1280*720", "2k": "1920*1080" },
"21:9": { normal: "1344*576", "2k": "2048*872" },
};
const QWEN_FIXED_SIZES_BY_RATIO: Record<string, string> = {
"16:9": "1664*928",
"4:3": "1472*1104",
"1:1": "1328*1328",
"3:4": "1104*1472",
"9:16": "928*1664",
};
const QWEN_FIXED_SIZES = Object.values(QWEN_FIXED_SIZES_BY_RATIO);
const LEGACY_STANDARD_SIZES: [number, number][] = [
[1024, 1024],
[1280, 720],
[720, 1280],
[1024, 768],
[768, 1024],
[1536, 1024],
[1024, 1536],
[1536, 864],
[864, 1536],
];
const LEGACY_STANDARD_SIZES_2K: [number, number][] = [
[1536, 1536],
[2048, 1152],
[1152, 2048],
[1536, 1024],
[1024, 1536],
[1536, 864],
[864, 1536],
[2048, 2048],
];
const QWEN_2_SPEC: DashScopeModelSpec = {
family: "qwen2",
defaultSize: "1024*1024",
};
const QWEN_FIXED_SPEC: DashScopeModelSpec = {
family: "qwenFixed",
defaultSize: QWEN_FIXED_SIZES_BY_RATIO["16:9"],
};
const LEGACY_SPEC: DashScopeModelSpec = {
family: "legacy",
defaultSize: "1536*1536",
};
const MODEL_SPEC_ALIASES: Record<string, DashScopeModelSpec> = {
"qwen-image-2.0-pro": QWEN_2_SPEC,
"qwen-image-2.0-pro-2026-03-03": QWEN_2_SPEC,
"qwen-image-2.0": QWEN_2_SPEC,
"qwen-image-2.0-2026-03-03": QWEN_2_SPEC,
"qwen-image-max": QWEN_FIXED_SPEC,
"qwen-image-max-2025-12-30": QWEN_FIXED_SPEC,
"qwen-image-plus": QWEN_FIXED_SPEC,
"qwen-image-plus-2026-01-09": QWEN_FIXED_SPEC,
"qwen-image": QWEN_FIXED_SPEC,
};
export function getDefaultModel(): string {
return process.env.DASHSCOPE_IMAGE_MODEL || DEFAULT_MODEL;
}
function getApiKey(): string | null {
return process.env.DASHSCOPE_API_KEY || null;
}
function getBaseUrl(): string {
const base = process.env.DASHSCOPE_BASE_URL || "https://dashscope.aliyuncs.com";
return base.replace(/\/+$/g, "");
}
function getModelSpec(model: string): DashScopeModelSpec {
return MODEL_SPEC_ALIASES[model.trim().toLowerCase()] || LEGACY_SPEC;
}
export function getModelFamily(model: string): DashScopeModelFamily {
return getModelSpec(model).family;
}
function normalizeQuality(quality: CliArgs["quality"]): Quality {
return quality === "normal" ? "normal" : "2k";
}
export function parseAspectRatio(ar: string): { width: number; height: number } | null {
const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/);
if (!match) return null;
const w = parseFloat(match[1]!);
const h = parseFloat(match[2]!);
if (w <= 0 || h <= 0) return null;
return { width: w, height: h };
}
export function normalizeSize(size: string): string {
return size.replace("x", "*");
}
export function parseSize(size: string): { width: number; height: number } | null {
const match = normalizeSize(size).match(/^(\d+)\*(\d+)$/);
if (!match) return null;
const width = Number(match[1]);
const height = Number(match[2]);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return { width, height };
}
function formatSize(width: number, height: number): string {
return `${width}*${height}`;
}
function getRatioValue(ar: string): number | null {
const parsed = parseAspectRatio(ar);
if (!parsed) return null;
return parsed.width / parsed.height;
}
function findKnownRatioKey(ar: string, candidates: string[], tolerance = 0.02): string | null {
const targetRatio = getRatioValue(ar);
if (targetRatio == null) return null;
let bestKey: string | null = null;
let bestDiff = Infinity;
for (const candidate of candidates) {
const candidateRatio = getRatioValue(candidate);
if (candidateRatio == null) continue;
const diff = Math.abs(candidateRatio - targetRatio);
if (diff < bestDiff) {
bestDiff = diff;
bestKey = candidate;
}
}
return bestDiff <= tolerance ? bestKey : null;
}
function roundToStep(value: number): number {
return Math.max(SIZE_STEP, Math.round(value / SIZE_STEP) * SIZE_STEP);
}
function fitToPixelBudget(
width: number,
height: number,
minPixels: number,
maxPixels: number,
): { width: number; height: number } {
let nextWidth = width;
let nextHeight = height;
let pixels = nextWidth * nextHeight;
if (pixels > maxPixels) {
const scale = Math.sqrt(maxPixels / pixels);
nextWidth *= scale;
nextHeight *= scale;
} else if (pixels < minPixels) {
const scale = Math.sqrt(minPixels / pixels);
nextWidth *= scale;
nextHeight *= scale;
}
let roundedWidth = roundToStep(nextWidth);
let roundedHeight = roundToStep(nextHeight);
pixels = roundedWidth * roundedHeight;
while (pixels > maxPixels && (roundedWidth > SIZE_STEP || roundedHeight > SIZE_STEP)) {
if (roundedWidth >= roundedHeight && roundedWidth > SIZE_STEP) {
roundedWidth -= SIZE_STEP;
} else if (roundedHeight > SIZE_STEP) {
roundedHeight -= SIZE_STEP;
} else {
break;
}
pixels = roundedWidth * roundedHeight;
}
while (pixels < minPixels) {
if (roundedWidth <= roundedHeight) {
roundedWidth += SIZE_STEP;
} else {
roundedHeight += SIZE_STEP;
}
pixels = roundedWidth * roundedHeight;
}
return { width: roundedWidth, height: roundedHeight };
}
export function getSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
const normalizedQuality = normalizeQuality(quality);
const sizes = normalizedQuality === "2k" ? LEGACY_STANDARD_SIZES_2K : LEGACY_STANDARD_SIZES;
const defaultSize = normalizedQuality === "2k" ? "1536*1536" : "1024*1024";
if (!ar) return defaultSize;
const parsed = parseAspectRatio(ar);
if (!parsed) return defaultSize;
const targetRatio = parsed.width / parsed.height;
let best = defaultSize;
let bestDiff = Infinity;
for (const [width, height] of sizes) {
const diff = Math.abs(width / height - targetRatio);
if (diff < bestDiff) {
bestDiff = diff;
best = formatSize(width, height);
}
}
return best;
}
export function getQwen2SizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
const normalizedQuality = normalizeQuality(quality);
if (!ar) {
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
}
const recommendedRatio = findKnownRatioKey(ar, Object.keys(QWEN_2_RECOMMENDED));
if (recommendedRatio) {
return QWEN_2_RECOMMENDED[recommendedRatio][normalizedQuality];
}
const parsed = parseAspectRatio(ar);
if (!parsed) {
return QWEN_2_RECOMMENDED["1:1"][normalizedQuality];
}
const targetRatio = parsed.width / parsed.height;
const targetPixels = QWEN_2_TARGET_PIXELS[normalizedQuality];
const rawWidth = Math.sqrt(targetPixels * targetRatio);
const rawHeight = Math.sqrt(targetPixels / targetRatio);
const fitted = fitToPixelBudget(
rawWidth,
rawHeight,
MIN_QWEN_2_TOTAL_PIXELS,
MAX_QWEN_2_TOTAL_PIXELS,
);
return formatSize(fitted.width, fitted.height);
}
function getQwenFixedSizeFromAspectRatio(ar: string | null, quality: CliArgs["quality"]): string {
if (quality === "normal") {
console.warn(
"DashScope qwen-image-max/plus/image models use fixed output sizes; --quality normal does not change the generated resolution."
);
}
if (!ar) return QWEN_FIXED_SPEC.defaultSize;
const ratioKey = findKnownRatioKey(ar, Object.keys(QWEN_FIXED_SIZES_BY_RATIO));
if (!ratioKey) {
throw new Error(
`DashScope model supports only fixed ratios ${Object.keys(QWEN_FIXED_SIZES_BY_RATIO).join(", ")}. ` +
`For custom ratios like "${ar}", use --model qwen-image-2.0-pro.`
);
}
return QWEN_FIXED_SIZES_BY_RATIO[ratioKey]!;
}
function validateSizeFormat(size: string): { width: number; height: number } {
const parsed = parseSize(size);
if (!parsed) {
throw new Error(`Invalid DashScope size "${size}". Expected <width>x<height> or <width>*<height>.`);
}
return parsed;
}
function validateQwen2Size(size: string): string {
const normalized = normalizeSize(size);
const parsed = validateSizeFormat(normalized);
const totalPixels = parsed.width * parsed.height;
if (totalPixels < MIN_QWEN_2_TOTAL_PIXELS || totalPixels > MAX_QWEN_2_TOTAL_PIXELS) {
throw new Error(
`DashScope qwen-image-2.0* models require total pixels between ${MIN_QWEN_2_TOTAL_PIXELS} ` +
`and ${MAX_QWEN_2_TOTAL_PIXELS}. Received ${normalized} (${totalPixels} pixels).`
);
}
return normalized;
}
function validateQwenFixedSize(size: string): string {
const normalized = normalizeSize(size);
validateSizeFormat(normalized);
if (!QWEN_FIXED_SIZES.includes(normalized)) {
throw new Error(
`DashScope qwen-image-max/plus/image models support only these sizes: ${QWEN_FIXED_SIZES.join(", ")}. ` +
`Received ${normalized}.`
);
}
return normalized;
}
export function resolveSizeForModel(
model: string,
args: Pick<CliArgs, "size" | "aspectRatio" | "quality">,
): string {
const spec = getModelSpec(model);
if (args.size) {
if (spec.family === "qwen2") return validateQwen2Size(args.size);
if (spec.family === "qwenFixed") return validateQwenFixedSize(args.size);
validateSizeFormat(args.size);
return normalizeSize(args.size);
}
if (spec.family === "qwen2") {
return getQwen2SizeFromAspectRatio(args.aspectRatio, args.quality);
}
if (spec.family === "qwenFixed") {
return getQwenFixedSizeFromAspectRatio(args.aspectRatio, args.quality);
}
return getSizeFromAspectRatio(args.aspectRatio, args.quality);
}
function buildParameters(
family: DashScopeModelFamily,
size: string,
): Record<string, unknown> {
const parameters: Record<string, unknown> = {
prompt_extend: false,
size,
};
if (family === "qwen2" || family === "qwenFixed") {
parameters.watermark = false;
parameters.negative_prompt = QWEN_NEGATIVE_PROMPT;
}
return parameters;
}
type DashScopeResponse = {
output?: {
result_image?: string;
choices?: Array<{
message?: {
content?: Array<{ image?: string }>;
};
}>;
};
};
async function extractImageFromResponse(result: DashScopeResponse): Promise<Uint8Array> {
let imageData: string | null = null;
if (result.output?.result_image) {
imageData = result.output.result_image;
} else if (result.output?.choices?.[0]?.message?.content) {
const content = result.output.choices[0].message.content;
for (const item of content) {
if (item.image) {
imageData = item.image;
break;
}
}
}
if (!imageData) {
console.error("Response:", JSON.stringify(result, null, 2));
throw new Error("No image in response");
}
if (imageData.startsWith("http://") || imageData.startsWith("https://")) {
const imgRes = await fetch(imageData);
if (!imgRes.ok) throw new Error("Failed to download image");
const buf = await imgRes.arrayBuffer();
return new Uint8Array(buf);
}
return Uint8Array.from(Buffer.from(imageData, "base64"));
}
export async function generateImage(
prompt: string,
model: string,
args: CliArgs
): Promise<Uint8Array> {
const apiKey = getApiKey();
if (!apiKey) throw new Error("DASHSCOPE_API_KEY is required");
if (args.referenceImages.length > 0) {
throw new Error(
"Reference images are not supported with DashScope provider in baoyu-imagine. Use --provider google with a Gemini multimodal model."
);
}
const spec = getModelSpec(model);
const size = resolveSizeForModel(model, args);
const url = `${getBaseUrl()}/api/v1/services/aigc/multimodal-generation/generation`;
const body = {
model,
input: {
messages: [
{
role: "user",
content: [{ text: prompt }],
},
],
},
parameters: buildParameters(spec.family, size),
};
console.log(`Generating image with DashScope (${model})...`, { family: spec.family, size });
const res = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(body),
});
if (!res.ok) {
const err = await res.text();
throw new Error(`DashScope API error (${res.status}): ${err}`);
}
const result = await res.json() as DashScopeResponse;
return extractImageFromResponse(result);
}