From 11d80eeaa95c0cc785720ca631d6719b10460821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Sun, 12 Apr 2026 02:14:18 -0500 Subject: [PATCH] feat(baoyu-imagine): add OpenAI-compatible image API dialect support Add --imageApiDialect flag, OPENAI_IMAGE_API_DIALECT env var, and default_image_api_dialect config for gateways that expect aspect-ratio size plus metadata.resolution instead of pixel size. --- README.md | 2 + README.zh.md | 2 + skills/baoyu-imagine/SKILL.md | 20 ++- .../references/config/first-time-setup.md | 3 + .../references/config/preferences-schema.md | 5 + skills/baoyu-imagine/scripts/main.test.ts | 30 ++++ skills/baoyu-imagine/scripts/main.ts | 31 ++++ .../scripts/providers/azure.test.ts | 1 + .../scripts/providers/google.test.ts | 1 + .../scripts/providers/jimeng.test.ts | 1 + .../scripts/providers/minimax.test.ts | 1 + .../scripts/providers/openai.test.ts | 70 +++++++++ .../baoyu-imagine/scripts/providers/openai.ts | 137 ++++++++++++++++-- .../scripts/providers/openrouter.test.ts | 1 + .../scripts/providers/replicate.test.ts | 1 + .../scripts/providers/seedream.test.ts | 1 + .../scripts/providers/zai.test.ts | 1 + skills/baoyu-imagine/scripts/types.ts | 4 + 18 files changed, 298 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 39ca7bd..a1a1eb8 100644 --- a/README.md +++ b/README.md @@ -790,6 +790,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `--size` | Size (e.g., `1024x1024`) | | `--quality` | `normal` or `2k` (default: `2k`) | | `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter | +| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways | | `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) | | `--n` | Number of images per request (`replicate` currently requires `--n 1`) | | `--json` | JSON output | @@ -823,6 +824,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | +| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` | | `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` | | `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - | | `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` | diff --git a/README.zh.md b/README.zh.md index 082ed21..58922f4 100644 --- a/README.zh.md +++ b/README.zh.md @@ -790,6 +790,7 @@ AI 驱动的生成后端。 | `--size` | 尺寸(如 `1024x1024`) | | `--quality` | `normal` 或 `2k`(默认:`2k`) | | `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` | +| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言(`openai-native` 或 `ratio-metadata`) | | `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) | | `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) | | `--json` | 输出 JSON 结果 | @@ -823,6 +824,7 @@ AI 驱动的生成后端。 | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | +| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言(`openai-native` 或 `ratio-metadata`) | `openai-native` | | `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` | | `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - | | `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` | diff --git a/skills/baoyu-imagine/SKILL.md b/skills/baoyu-imagine/SKILL.md index 0e028a6..43ac0e1 100644 --- a/skills/baoyu-imagine/SKILL.md +++ b/skills/baoyu-imagine/SKILL.md @@ -57,7 +57,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-imagine/EXTEND.md") { "user" } Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path. -**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits +**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | OpenAI image API dialect | Default models | Batch worker cap | Provider-specific batch limits Schema: `references/config/preferences-schema.md` @@ -176,6 +176,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: `2k`) | | `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) | +| `--imageApiDialect openai-native\|ratio-metadata` | OpenAI-compatible image API dialect. Use `ratio-metadata` when the endpoint is OpenAI-compatible but expects aspect-ratio `size` plus `metadata.resolution` instead of pixel `size` | | `--ref ` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | | `--n ` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image | | `--json` | JSON output | @@ -209,6 +210,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | +| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect override (`openai-native` or `ratio-metadata`) | | `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint | | `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) | | `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) | @@ -242,6 +244,22 @@ For Azure, `--model` / `default_model.azure` should be the Azure deployment name **EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins. +### OpenAI-Compatible Gateway Dialects + +`provider=openai` means the auth and routing entrypoint is OpenAI-compatible. It does **not** guarantee that the upstream image API uses OpenAI native image-request semantics. + +Use `default_image_api_dialect` in `EXTEND.md`, `OPENAI_IMAGE_API_DIALECT`, or `--imageApiDialect` when the endpoint expects a different wire format: + +- `openai-native`: Sends pixel `size` such as `1536x1024` and native OpenAI quality fields when supported +- `ratio-metadata`: Sends aspect-ratio `size` such as `16:9` and maps quality/size intent into `metadata.resolution` (`1K|2K|4K`) plus `metadata.orientation` + +Recommended use: + +- OpenAI native Images API or strict clones: keep `openai-native` +- OpenAI-compatible gateways in front of Gemini or similar models: try `ratio-metadata` + +Current limitation: `ratio-metadata` only applies to text-to-image generation. Reference-image edit flows still require `openai-native` or another provider with first-class edit support. + **Agent MUST display model info** before each generation: - Show: `Using [provider] / [model]` - Show switch hint: `Switch model: --model | EXTEND.md default_model.[provider] | env _IMAGE_MODEL` diff --git a/skills/baoyu-imagine/references/config/first-time-setup.md b/skills/baoyu-imagine/references/config/first-time-setup.md index 68e4efd..0d73811 100644 --- a/skills/baoyu-imagine/references/config/first-time-setup.md +++ b/skills/baoyu-imagine/references/config/first-time-setup.md @@ -175,6 +175,7 @@ default_provider: [selected provider or null] default_quality: [selected quality] default_aspect_ratio: null default_image_size: null +default_image_api_dialect: null default_model: google: [selected google model or null] openai: null @@ -187,6 +188,8 @@ default_model: --- ``` +If the user selects `OpenAI` but says their endpoint is only OpenAI-compatible and fronts another image model family, save `default_image_api_dialect: ratio-metadata` when they explicitly confirm the gateway expects aspect-ratio `size` plus metadata-based resolution. Otherwise leave it `null` / `openai-native`. + ## Flow 2: EXTEND.md Exists, Model Null When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider. diff --git a/skills/baoyu-imagine/references/config/preferences-schema.md b/skills/baoyu-imagine/references/config/preferences-schema.md index cf35c9b..617ac76 100644 --- a/skills/baoyu-imagine/references/config/preferences-schema.md +++ b/skills/baoyu-imagine/references/config/preferences-schema.md @@ -19,6 +19,8 @@ default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality) +default_image_api_dialect: null # openai-native|ratio-metadata|null (OpenAI-compatible gateways; null = use env/default) + default_model: google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview" openai: null # e.g., "gpt-image-1.5", "gpt-image-1" @@ -68,6 +70,7 @@ batch: | `default_quality` | string\|null | null | Default quality (null = 2k) | | `default_aspect_ratio` | string\|null | null | Default aspect ratio | | `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) | +| `default_image_api_dialect` | string\|null | null | OpenAI-compatible image dialect (`openai-native` or `ratio-metadata`) | | `default_model.google` | string\|null | null | Google default model | | `default_model.openai` | string\|null | null | OpenAI default model | | `default_model.azure` | string\|null | null | Azure default deployment name | @@ -88,6 +91,7 @@ batch: version: 1 default_provider: google default_quality: 2k +default_image_api_dialect: null --- ``` @@ -99,6 +103,7 @@ default_provider: google default_quality: 2k default_aspect_ratio: "16:9" default_image_size: 2K +default_image_api_dialect: null default_model: google: "gemini-3-pro-image-preview" openai: "gpt-image-1.5" diff --git a/skills/baoyu-imagine/scripts/main.test.ts b/skills/baoyu-imagine/scripts/main.test.ts index 4928367..6a8bbf4 100644 --- a/skills/baoyu-imagine/scripts/main.test.ts +++ b/skills/baoyu-imagine/scripts/main.test.ts @@ -17,6 +17,7 @@ import { mergeConfig, normalizeOutputImagePath, parseArgs, + parseOpenAIImageApiDialect, parseSimpleYaml, } from "./main.ts"; @@ -33,6 +34,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { quality: null, imageSize: null, imageSizeSource: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, @@ -85,6 +87,8 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => { "2k", "--imageSize", "4k", + "--imageApiDialect", + "ratio-metadata", "--ref", "ref/one.png", "ref/two.jpg", @@ -102,6 +106,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => { assert.equal(args.aspectRatioSource, null); assert.equal(args.imageSize, "4K"); assert.equal(args.imageSizeSource, "cli"); + assert.equal(args.imageApiDialect, "ratio-metadata"); assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]); assert.equal(args.n, 3); assert.equal(args.jobs, 5); @@ -125,6 +130,7 @@ default_provider: openrouter default_quality: normal default_aspect_ratio: '16:9' default_image_size: 2K +default_image_api_dialect: ratio-metadata default_model: google: gemini-3-pro-image-preview openai: gpt-image-1.5 @@ -157,6 +163,7 @@ batch: assert.equal(config.default_quality, "normal"); assert.equal(config.default_aspect_ratio, "16:9"); assert.equal(config.default_image_size, "2K"); + assert.equal(config.default_image_api_dialect, "ratio-metadata"); assert.equal(config.default_model?.google, "gemini-3-pro-image-preview"); assert.equal(config.default_model?.openai, "gpt-image-1.5"); assert.equal(config.default_model?.zai, "glm-image"); @@ -252,6 +259,7 @@ test("mergeConfig only fills values missing from CLI args", () => { default_quality: "2k", default_aspect_ratio: "3:2", default_image_size: "2K", + default_image_api_dialect: "ratio-metadata", } satisfies Partial, ); @@ -261,6 +269,7 @@ test("mergeConfig only fills values missing from CLI args", () => { assert.equal(merged.aspectRatioSource, "config"); assert.equal(merged.imageSize, "4K"); assert.equal(merged.imageSizeSource, "cli"); + assert.equal(merged.imageApiDialect, "ratio-metadata"); }); test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => { @@ -275,6 +284,25 @@ test("mergeConfig tags inherited imageSize defaults so providers can ignore inco assert.equal(merged.imageSizeSource, "config"); }); +test("mergeConfig falls back to OPENAI_IMAGE_API_DIALECT when CLI and EXTEND are unset", (t) => { + useEnv(t, { + OPENAI_IMAGE_API_DIALECT: "ratio-metadata", + }); + + const merged = mergeConfig(makeArgs(), {}); + assert.equal(merged.imageApiDialect, "ratio-metadata"); +}); + +test("parseOpenAIImageApiDialect validates supported values", () => { + assert.equal(parseOpenAIImageApiDialect("openai-native"), "openai-native"); + assert.equal(parseOpenAIImageApiDialect("ratio-metadata"), "ratio-metadata"); + assert.equal(parseOpenAIImageApiDialect(null), null); + assert.throws( + () => parseOpenAIImageApiDialect("gateway-magic"), + /Invalid OpenAI image API dialect/, + ); +}); + test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => { assert.throws( () => @@ -492,6 +520,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t) makeArgs({ provider: "replicate", quality: "2k", + imageApiDialect: "ratio-metadata", json: true, }), loaded.tasks[0]!, @@ -508,6 +537,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t) assert.equal(taskArgs.provider, "replicate"); assert.equal(taskArgs.aspectRatio, "16:9"); assert.equal(taskArgs.quality, "2k"); + assert.equal(taskArgs.imageApiDialect, "ratio-metadata"); assert.equal(taskArgs.json, true); }); diff --git a/skills/baoyu-imagine/scripts/main.ts b/skills/baoyu-imagine/scripts/main.ts index 6be1b08..a101bb0 100644 --- a/skills/baoyu-imagine/scripts/main.ts +++ b/skills/baoyu-imagine/scripts/main.ts @@ -8,6 +8,7 @@ import type { BatchTaskInput, CliArgs, ExtendConfig, + OpenAIImageApiDialect, Provider, } from "./types"; @@ -83,6 +84,7 @@ Options: --size Size (e.g., 1024x1024) --quality normal|2k Quality preset (default: 2k) --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) + --imageApiDialect OpenAI-compatible image dialect: openai-native|ratio-metadata --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0) --n Number of images for the current task (default: 1; Replicate currently requires 1) --json JSON output @@ -133,6 +135,7 @@ Environment variables: JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128) OPENAI_BASE_URL Custom OpenAI endpoint + OPENAI_IMAGE_API_DIALECT OpenAI-compatible image dialect (openai-native|ratio-metadata) OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false) OPENROUTER_BASE_URL Custom OpenRouter endpoint OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution @@ -170,6 +173,7 @@ export function parseArgs(argv: string[]): CliArgs { quality: null, imageSize: null, imageSizeSource: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, @@ -299,6 +303,15 @@ export function parseArgs(argv: string[]): CliArgs { continue; } + if (a === "--imageApiDialect") { + const v = argv[++i]; + if (v !== "openai-native" && v !== "ratio-metadata") { + throw new Error(`Invalid imageApiDialect: ${v}`); + } + out.imageApiDialect = v; + continue; + } + if (a === "--ref" || a === "--reference") { const { items, next } = takeMany(i); if (items.length === 0) throw new Error(`Missing files for ${a}`); @@ -402,6 +415,9 @@ export function parseSimpleYaml(yaml: string): Partial { config.default_aspect_ratio = cleaned === "null" ? null : cleaned; } else if (key === "default_image_size") { config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K"; + } else if (key === "default_image_api_dialect") { + config.default_image_api_dialect = + value === "null" ? null : parseOpenAIImageApiDialect(value); } else if (key === "default_model") { config.default_model = { google: null, @@ -487,6 +503,15 @@ export function parseSimpleYaml(yaml: string): Partial { return config; } +export function parseOpenAIImageApiDialect( + value: string | undefined | null +): OpenAIImageApiDialect | null { + if (!value) return null; + const normalized = value.replace(/['"]/g, "").trim(); + if (normalized === "openai-native" || normalized === "ratio-metadata") return normalized; + throw new Error(`Invalid OpenAI image API dialect: ${value}`); +} + type ExtendConfigPathPair = { current: string; legacy: string; @@ -548,6 +573,10 @@ export async function loadExtendConfig( export function mergeConfig(args: CliArgs, extend: Partial): CliArgs { const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null; const imageSize = args.imageSize ?? extend.default_image_size ?? null; + const imageApiDialect = + args.imageApiDialect ?? + extend.default_image_api_dialect ?? + parseOpenAIImageApiDialect(process.env.OPENAI_IMAGE_API_DIALECT); return { ...args, provider: args.provider ?? extend.default_provider ?? null, @@ -560,6 +589,7 @@ export function mergeConfig(args: CliArgs, extend: Partial): CliAr imageSizeSource: args.imageSizeSource ?? (args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)), + imageApiDialect, }; } @@ -891,6 +921,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir quality: task.quality ?? baseArgs.quality ?? null, imageSize: task.imageSize ?? baseArgs.imageSize ?? null, imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null), + imageApiDialect: task.imageApiDialect ?? baseArgs.imageApiDialect ?? null, referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [], n: task.n ?? baseArgs.n, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/azure.test.ts b/skills/baoyu-imagine/scripts/providers/azure.test.ts index 26cb0a5..cd85919 100644 --- a/skills/baoyu-imagine/scripts/providers/azure.test.ts +++ b/skills/baoyu-imagine/scripts/providers/azure.test.ts @@ -48,6 +48,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/google.test.ts b/skills/baoyu-imagine/scripts/providers/google.test.ts index aec3372..88d4e00 100644 --- a/skills/baoyu-imagine/scripts/providers/google.test.ts +++ b/skills/baoyu-imagine/scripts/providers/google.test.ts @@ -50,6 +50,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/jimeng.test.ts b/skills/baoyu-imagine/scripts/providers/jimeng.test.ts index ed38fb9..811844b 100644 --- a/skills/baoyu-imagine/scripts/providers/jimeng.test.ts +++ b/skills/baoyu-imagine/scripts/providers/jimeng.test.ts @@ -15,6 +15,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/minimax.test.ts b/skills/baoyu-imagine/scripts/providers/minimax.test.ts index c334634..7a1179f 100644 --- a/skills/baoyu-imagine/scripts/providers/minimax.test.ts +++ b/skills/baoyu-imagine/scripts/providers/minimax.test.ts @@ -50,6 +50,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/openai.test.ts b/skills/baoyu-imagine/scripts/providers/openai.test.ts index c4dcd79..b6b44f8 100644 --- a/skills/baoyu-imagine/scripts/providers/openai.test.ts +++ b/skills/baoyu-imagine/scripts/providers/openai.test.ts @@ -2,9 +2,16 @@ import assert from "node:assert/strict"; import test from "node:test"; import { + buildOpenAIGenerationsBody, extractImageFromResponse, + getOpenAIAspectRatio, + getOpenAIImageApiDialect, + getOpenAIResolution, getMimeType, getOpenAISize, + getOrientationFromAspectRatio, + inferAspectRatioFromSize, + inferResolutionFromSize, parseAspectRatio, } from "./openai.ts"; @@ -18,6 +25,69 @@ test("OpenAI aspect-ratio parsing and size selection match model families", () = assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024"); assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024"); assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024"); + assert.equal(inferAspectRatioFromSize("1536x1024"), "3:2"); + assert.equal(inferResolutionFromSize("1536x1024"), "2K"); + assert.equal(getOpenAIAspectRatio({ aspectRatio: null, size: "2048x1152" }), "16:9"); + assert.equal(getOpenAIResolution({ imageSize: null, size: "2048x1152", quality: "normal" }), "2K"); + assert.equal(getOrientationFromAspectRatio("16:9"), "landscape"); + assert.equal(getOrientationFromAspectRatio("9:16"), "portrait"); + assert.equal(getOrientationFromAspectRatio("1:1"), null); + assert.equal(getOpenAIImageApiDialect({ imageApiDialect: null }), "openai-native"); +}); + +test("OpenAI generations body switches between native and ratio-metadata dialects", () => { + assert.deepEqual( + buildOpenAIGenerationsBody("Draw a skyline", "gpt-image-1.5", { + aspectRatio: "16:9", + size: null, + quality: "2k", + imageSize: null, + imageApiDialect: null, + }), + { + model: "gpt-image-1.5", + prompt: "Draw a skyline", + size: "1536x1024", + }, + ); + + assert.deepEqual( + buildOpenAIGenerationsBody("Draw a skyline", "gemini-3-pro-image-preview", { + aspectRatio: "16:9", + size: null, + quality: "2k", + imageSize: null, + imageApiDialect: "ratio-metadata", + }), + { + model: "gemini-3-pro-image-preview", + prompt: "Draw a skyline", + size: "16:9", + metadata: { + resolution: "2K", + orientation: "landscape", + }, + }, + ); + + assert.deepEqual( + buildOpenAIGenerationsBody("Draw a portrait", "gemini-3-pro-image-preview", { + aspectRatio: null, + size: "1152x2048", + quality: "normal", + imageSize: null, + imageApiDialect: "ratio-metadata", + }), + { + model: "gemini-3-pro-image-preview", + prompt: "Draw a portrait", + size: "9:16", + metadata: { + resolution: "2K", + orientation: "portrait", + }, + }, + ); }); test("OpenAI mime-type detection covers supported reference image extensions", () => { diff --git a/skills/baoyu-imagine/scripts/providers/openai.ts b/skills/baoyu-imagine/scripts/providers/openai.ts index 875631d..2777682 100644 --- a/skills/baoyu-imagine/scripts/providers/openai.ts +++ b/skills/baoyu-imagine/scripts/providers/openai.ts @@ -1,6 +1,6 @@ import path from "node:path"; import { readFile } from "node:fs/promises"; -import type { CliArgs } from "../types"; +import type { CliArgs, OpenAIImageApiDialect } from "../types"; export function getDefaultModel(): string { return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5"; @@ -23,6 +23,8 @@ type SizeMapping = { portrait: string; }; +type OpenAIGenerationsBody = Record; + export function getOpenAISize( model: string, ar: string | null, @@ -60,6 +62,114 @@ export function getOpenAISize( return sizes.square; } +function parsePixelSize(value: string): { width: number; height: number } | null { + const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/); + if (!match) return null; + + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + + return { width, height }; +} + +function gcd(a: number, b: number): number { + let x = Math.abs(a); + let y = Math.abs(b); + while (y !== 0) { + const next = x % y; + x = y; + y = next; + } + return x || 1; +} + +export function getOpenAIImageApiDialect(args: Pick): OpenAIImageApiDialect { + return args.imageApiDialect ?? "openai-native"; +} + +export function inferAspectRatioFromSize(size: string | null): string | null { + if (!size) return null; + const parsed = parsePixelSize(size); + if (!parsed) return null; + + const divisor = gcd(parsed.width, parsed.height); + return `${parsed.width / divisor}:${parsed.height / divisor}`; +} + +export function inferResolutionFromSize(size: string | null): "1K" | "2K" | "4K" | null { + if (!size) return null; + const parsed = parsePixelSize(size); + if (!parsed) return null; + + const longestEdge = Math.max(parsed.width, parsed.height); + if (longestEdge <= 1024) return "1K"; + if (longestEdge <= 2048) return "2K"; + return "4K"; +} + +export function getOpenAIAspectRatio(args: Pick): string { + return args.aspectRatio ?? inferAspectRatioFromSize(args.size) ?? "1:1"; +} + +export function getOpenAIResolution( + args: Pick +): "1K" | "2K" | "4K" { + if (args.imageSize === "1K" || args.imageSize === "2K" || args.imageSize === "4K") { + return args.imageSize; + } + + const inferred = inferResolutionFromSize(args.size); + if (inferred) return inferred; + + return args.quality === "normal" ? "1K" : "2K"; +} + +export function getOrientationFromAspectRatio(ar: string): "landscape" | "portrait" | null { + const parsed = parseAspectRatio(ar); + if (!parsed) return null; + + const ratio = parsed.width / parsed.height; + if (Math.abs(ratio - 1) < 0.1) return null; + return ratio > 1 ? "landscape" : "portrait"; +} + +export function buildOpenAIGenerationsBody( + prompt: string, + model: string, + args: Pick +): OpenAIGenerationsBody { + if (getOpenAIImageApiDialect(args) === "ratio-metadata") { + const aspectRatio = getOpenAIAspectRatio(args); + const metadata: Record = { + resolution: getOpenAIResolution(args), + }; + const orientation = getOrientationFromAspectRatio(aspectRatio); + if (orientation) metadata.orientation = orientation; + + return { + model, + prompt, + size: aspectRatio, + metadata, + }; + } + + const body: OpenAIGenerationsBody = { + model, + prompt, + size: args.size || getOpenAISize(model, args.aspectRatio, args.quality), + }; + + if (model.includes("dall-e-3")) { + body.quality = args.quality === "2k" ? "hd" : "standard"; + } + + return body; +} + export async function generateImage( prompt: string, model: string, @@ -78,18 +188,28 @@ export async function generateImage( return generateWithChatCompletions(baseURL, apiKey, prompt, model); } - const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality); + const imageApiDialect = getOpenAIImageApiDialect(args); if (args.referenceImages.length > 0) { + if (imageApiDialect !== "openai-native") { + throw new Error( + "Reference images are not supported with the ratio-metadata OpenAI dialect yet. Use openai-native, Google, Azure, OpenRouter, MiniMax, Seedream, or Replicate for image-edit workflows." + ); + } if (model.includes("dall-e-2") || model.includes("dall-e-3")) { throw new Error( "Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)." ); } + const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality); return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality); } - return generateWithOpenAIGenerations(baseURL, apiKey, prompt, model, size, args.quality); + return generateWithOpenAIGenerations( + baseURL, + apiKey, + buildOpenAIGenerationsBody(prompt, model, args) + ); } async function generateWithChatCompletions( @@ -129,17 +249,8 @@ async function generateWithChatCompletions( async function generateWithOpenAIGenerations( baseURL: string, apiKey: string, - prompt: string, - model: string, - size: string, - quality: CliArgs["quality"] + body: OpenAIGenerationsBody ): Promise { - const body: Record = { model, prompt, size }; - - if (model.includes("dall-e-3")) { - body.quality = quality === "2k" ? "hd" : "standard"; - } - const res = await fetch(`${baseURL}/images/generations`, { method: "POST", headers: { diff --git a/skills/baoyu-imagine/scripts/providers/openrouter.test.ts b/skills/baoyu-imagine/scripts/providers/openrouter.test.ts index 415122e..8878e14 100644 --- a/skills/baoyu-imagine/scripts/providers/openrouter.test.ts +++ b/skills/baoyu-imagine/scripts/providers/openrouter.test.ts @@ -28,6 +28,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/replicate.test.ts b/skills/baoyu-imagine/scripts/providers/replicate.test.ts index cd90def..0b35590 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.test.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.test.ts @@ -24,6 +24,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { quality: null, imageSize: null, imageSizeSource: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/seedream.test.ts b/skills/baoyu-imagine/scripts/providers/seedream.test.ts index 5ec94d6..7176278 100644 --- a/skills/baoyu-imagine/scripts/providers/seedream.test.ts +++ b/skills/baoyu-imagine/scripts/providers/seedream.test.ts @@ -25,6 +25,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/providers/zai.test.ts b/skills/baoyu-imagine/scripts/providers/zai.test.ts index 59dcef4..ea9681f 100644 --- a/skills/baoyu-imagine/scripts/providers/zai.test.ts +++ b/skills/baoyu-imagine/scripts/providers/zai.test.ts @@ -25,6 +25,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageApiDialect: null, referenceImages: [], n: 1, batchFile: null, diff --git a/skills/baoyu-imagine/scripts/types.ts b/skills/baoyu-imagine/scripts/types.ts index b7c7640..ead86e6 100644 --- a/skills/baoyu-imagine/scripts/types.ts +++ b/skills/baoyu-imagine/scripts/types.ts @@ -10,6 +10,7 @@ export type Provider = | "seedream" | "azure"; export type Quality = "normal" | "2k"; +export type OpenAIImageApiDialect = "openai-native" | "ratio-metadata"; export type CliArgs = { prompt: string | null; @@ -23,6 +24,7 @@ export type CliArgs = { quality: Quality | null; imageSize: string | null; imageSizeSource?: "cli" | "task" | "config" | null; + imageApiDialect: OpenAIImageApiDialect | null; referenceImages: string[]; n: number; batchFile: string | null; @@ -42,6 +44,7 @@ export type BatchTaskInput = { size?: string | null; quality?: Quality | null; imageSize?: "1K" | "2K" | "4K" | null; + imageApiDialect?: OpenAIImageApiDialect | null; ref?: string[]; n?: number; }; @@ -59,6 +62,7 @@ export type ExtendConfig = { default_quality: Quality | null; default_aspect_ratio: string | null; default_image_size: "1K" | "2K" | "4K" | null; + default_image_api_dialect: OpenAIImageApiDialect | null; default_model: { google: string | null; openai: string | null;