From 11d80eeaa95c0cc785720ca631d6719b10460821 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= <junminliu@gmail.com>
Date: Sun, 12 Apr 2026 02:14:18 -0500
Subject: [PATCH] feat(baoyu-imagine): add OpenAI-compatible image API dialect
 support

Add --imageApiDialect flag, OPENAI_IMAGE_API_DIALECT env var, and
default_image_api_dialect config for gateways that expect aspect-ratio
size plus metadata.resolution instead of pixel size.
---
 README.md                                     |   2 +
 README.zh.md                                  |   2 +
 skills/baoyu-imagine/SKILL.md                 |  20 ++-
 .../references/config/first-time-setup.md     |   3 +
 .../references/config/preferences-schema.md   |   5 +
 skills/baoyu-imagine/scripts/main.test.ts     |  30 ++++
 skills/baoyu-imagine/scripts/main.ts          |  31 ++++
 .../scripts/providers/azure.test.ts           |   1 +
 .../scripts/providers/google.test.ts          |   1 +
 .../scripts/providers/jimeng.test.ts          |   1 +
 .../scripts/providers/minimax.test.ts         |   1 +
 .../scripts/providers/openai.test.ts          |  70 +++++++++
 .../baoyu-imagine/scripts/providers/openai.ts | 137 ++++++++++++++++--
 .../scripts/providers/openrouter.test.ts      |   1 +
 .../scripts/providers/replicate.test.ts       |   1 +
 .../scripts/providers/seedream.test.ts        |   1 +
 .../scripts/providers/zai.test.ts             |   1 +
 skills/baoyu-imagine/scripts/types.ts         |   4 +
 18 files changed, 298 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 39ca7bd..a1a1eb8 100644
--- a/README.md
+++ b/README.md
@@ -790,6 +790,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
 | `--size` | Size (e.g., `1024x1024`) |
 | `--quality` | `normal` or `2k` (default: `2k`) |
 | `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
+| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways |
 | `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
 | `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
 | `--json` | JSON output |
@@ -823,6 +824,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
 | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
 | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
 | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
+| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` |
 | `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
 | `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
 | `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
diff --git a/README.zh.md b/README.zh.md
index 082ed21..58922f4 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -790,6 +790,7 @@ AI 驱动的生成后端。
 | `--size` | 尺寸（如 `1024x1024`） |
 | `--quality` | `normal` 或 `2k`（默认：`2k`） |
 | `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
+| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言（`openai-native` 或 `ratio-metadata`） |
 | `--ref` | 参考图片（Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0） |
 | `--n` | 单次请求生成图片数量（`replicate` 当前只支持 `--n 1`） |
 | `--json` | 输出 JSON 结果 |
@@ -823,6 +824,7 @@ AI 驱动的生成后端。
 | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
 | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
 | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
+| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言（`openai-native` 或 `ratio-metadata`） | `openai-native` |
 | `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
 | `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
 | `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
diff --git a/skills/baoyu-imagine/SKILL.md b/skills/baoyu-imagine/SKILL.md
index 0e028a6..43ac0e1 100644
--- a/skills/baoyu-imagine/SKILL.md
+++ b/skills/baoyu-imagine/SKILL.md
@@ -57,7 +57,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-imagine/EXTEND.md") { "user" }
 
 Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path.
 
-**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
+**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | OpenAI image API dialect | Default models | Batch worker cap | Provider-specific batch limits
 
 Schema: `references/config/preferences-schema.md`
 
@@ -176,6 +176,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
 | `--size <WxH>` | Size (e.g., `1024x1024`) |
 | `--quality normal\|2k` | Quality preset (default: `2k`) |
 | `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
+| `--imageApiDialect openai-native\|ratio-metadata` | OpenAI-compatible image API dialect. Use `ratio-metadata` when the endpoint is OpenAI-compatible but expects aspect-ratio `size` plus `metadata.resolution` instead of pixel `size` |
 | `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
 | `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image |
 | `--json` | JSON output |
@@ -209,6 +210,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
 | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
 | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
 | `OPENAI_BASE_URL` | Custom OpenAI endpoint |
+| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect override (`openai-native` or `ratio-metadata`) |
 | `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint |
 | `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) |
 | `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
@@ -242,6 +244,22 @@ For Azure, `--model` / `default_model.azure` should be the Azure deployment name
 
 **EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins.
 
+### OpenAI-Compatible Gateway Dialects
+
+`provider=openai` means the auth and routing entrypoint is OpenAI-compatible. It does **not** guarantee that the upstream image API uses OpenAI native image-request semantics.
+
+Use `default_image_api_dialect` in `EXTEND.md`, `OPENAI_IMAGE_API_DIALECT`, or `--imageApiDialect` when the endpoint expects a different wire format:
+
+- `openai-native`: Sends pixel `size` such as `1536x1024` and native OpenAI quality fields when supported
+- `ratio-metadata`: Sends aspect-ratio `size` such as `16:9` and maps quality/size intent into `metadata.resolution` (`1K|2K|4K`) plus `metadata.orientation`
+
+Recommended use:
+
+- OpenAI native Images API or strict clones: keep `openai-native`
+- OpenAI-compatible gateways in front of Gemini or similar models: try `ratio-metadata`
+
+Current limitation: `ratio-metadata` only applies to text-to-image generation. Reference-image edit flows still require `openai-native` or another provider with first-class edit support.
+
 **Agent MUST display model info** before each generation:
 - Show: `Using [provider] / [model]`
 - Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
diff --git a/skills/baoyu-imagine/references/config/first-time-setup.md b/skills/baoyu-imagine/references/config/first-time-setup.md
index 68e4efd..0d73811 100644
--- a/skills/baoyu-imagine/references/config/first-time-setup.md
+++ b/skills/baoyu-imagine/references/config/first-time-setup.md
@@ -175,6 +175,7 @@ default_provider: [selected provider or null]
 default_quality: [selected quality]
 default_aspect_ratio: null
 default_image_size: null
+default_image_api_dialect: null
 default_model:
   google: [selected google model or null]
   openai: null
@@ -187,6 +188,8 @@ default_model:
 ---
 ```
 
+If the user selects `OpenAI` but says their endpoint is only OpenAI-compatible and fronts another image model family, save `default_image_api_dialect: ratio-metadata` when they explicitly confirm the gateway expects aspect-ratio `size` plus metadata-based resolution. Otherwise leave it `null` / `openai-native`.
+
 ## Flow 2: EXTEND.md Exists, Model Null
 
 When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider.
diff --git a/skills/baoyu-imagine/references/config/preferences-schema.md b/skills/baoyu-imagine/references/config/preferences-schema.md
index cf35c9b..617ac76 100644
--- a/skills/baoyu-imagine/references/config/preferences-schema.md
+++ b/skills/baoyu-imagine/references/config/preferences-schema.md
@@ -19,6 +19,8 @@ default_aspect_ratio: null  # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
 
 default_image_size: null    # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
 
+default_image_api_dialect: null  # openai-native|ratio-metadata|null (OpenAI-compatible gateways; null = use env/default)
+
 default_model:
   google: null              # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
   openai: null              # e.g., "gpt-image-1.5", "gpt-image-1"
@@ -68,6 +70,7 @@ batch:
 | `default_quality` | string\|null | null | Default quality (null = 2k) |
 | `default_aspect_ratio` | string\|null | null | Default aspect ratio |
 | `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
+| `default_image_api_dialect` | string\|null | null | OpenAI-compatible image dialect (`openai-native` or `ratio-metadata`) |
 | `default_model.google` | string\|null | null | Google default model |
 | `default_model.openai` | string\|null | null | OpenAI default model |
 | `default_model.azure` | string\|null | null | Azure default deployment name |
@@ -88,6 +91,7 @@ batch:
 version: 1
 default_provider: google
 default_quality: 2k
+default_image_api_dialect: null
 ---
 ```
 
@@ -99,6 +103,7 @@ default_provider: google
 default_quality: 2k
 default_aspect_ratio: "16:9"
 default_image_size: 2K
+default_image_api_dialect: null
 default_model:
   google: "gemini-3-pro-image-preview"
   openai: "gpt-image-1.5"
diff --git a/skills/baoyu-imagine/scripts/main.test.ts b/skills/baoyu-imagine/scripts/main.test.ts
index 4928367..6a8bbf4 100644
--- a/skills/baoyu-imagine/scripts/main.test.ts
+++ b/skills/baoyu-imagine/scripts/main.test.ts
@@ -17,6 +17,7 @@ import {
   mergeConfig,
   normalizeOutputImagePath,
   parseArgs,
+  parseOpenAIImageApiDialect,
   parseSimpleYaml,
 } from "./main.ts";
 
@@ -33,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     quality: null,
     imageSize: null,
     imageSizeSource: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
@@ -85,6 +87,8 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
     "2k",
     "--imageSize",
     "4k",
+    "--imageApiDialect",
+    "ratio-metadata",
     "--ref",
     "ref/one.png",
     "ref/two.jpg",
@@ -102,6 +106,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
   assert.equal(args.aspectRatioSource, null);
   assert.equal(args.imageSize, "4K");
   assert.equal(args.imageSizeSource, "cli");
+  assert.equal(args.imageApiDialect, "ratio-metadata");
   assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]);
   assert.equal(args.n, 3);
   assert.equal(args.jobs, 5);
@@ -125,6 +130,7 @@ default_provider: openrouter
 default_quality: normal
 default_aspect_ratio: '16:9'
 default_image_size: 2K
+default_image_api_dialect: ratio-metadata
 default_model:
   google: gemini-3-pro-image-preview
   openai: gpt-image-1.5
@@ -157,6 +163,7 @@ batch:
   assert.equal(config.default_quality, "normal");
   assert.equal(config.default_aspect_ratio, "16:9");
   assert.equal(config.default_image_size, "2K");
+  assert.equal(config.default_image_api_dialect, "ratio-metadata");
   assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
   assert.equal(config.default_model?.openai, "gpt-image-1.5");
   assert.equal(config.default_model?.zai, "glm-image");
@@ -252,6 +259,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
       default_quality: "2k",
       default_aspect_ratio: "3:2",
       default_image_size: "2K",
+      default_image_api_dialect: "ratio-metadata",
     } satisfies Partial<ExtendConfig>,
   );
 
@@ -261,6 +269,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
   assert.equal(merged.aspectRatioSource, "config");
   assert.equal(merged.imageSize, "4K");
   assert.equal(merged.imageSizeSource, "cli");
+  assert.equal(merged.imageApiDialect, "ratio-metadata");
 });
 
 test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => {
@@ -275,6 +284,25 @@ test("mergeConfig tags inherited imageSize defaults so providers can ignore inco
   assert.equal(merged.imageSizeSource, "config");
 });
 
+test("mergeConfig falls back to OPENAI_IMAGE_API_DIALECT when CLI and EXTEND are unset", (t) => {
+  useEnv(t, {
+    OPENAI_IMAGE_API_DIALECT: "ratio-metadata",
+  });
+
+  const merged = mergeConfig(makeArgs(), {});
+  assert.equal(merged.imageApiDialect, "ratio-metadata");
+});
+
+test("parseOpenAIImageApiDialect validates supported values", () => {
+  assert.equal(parseOpenAIImageApiDialect("openai-native"), "openai-native");
+  assert.equal(parseOpenAIImageApiDialect("ratio-metadata"), "ratio-metadata");
+  assert.equal(parseOpenAIImageApiDialect(null), null);
+  assert.throws(
+    () => parseOpenAIImageApiDialect("gateway-magic"),
+    /Invalid OpenAI image API dialect/,
+  );
+});
+
 test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => {
   assert.throws(
     () =>
@@ -492,6 +520,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
     makeArgs({
       provider: "replicate",
       quality: "2k",
+      imageApiDialect: "ratio-metadata",
       json: true,
     }),
     loaded.tasks[0]!,
@@ -508,6 +537,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
   assert.equal(taskArgs.provider, "replicate");
   assert.equal(taskArgs.aspectRatio, "16:9");
   assert.equal(taskArgs.quality, "2k");
+  assert.equal(taskArgs.imageApiDialect, "ratio-metadata");
   assert.equal(taskArgs.json, true);
 });
 
diff --git a/skills/baoyu-imagine/scripts/main.ts b/skills/baoyu-imagine/scripts/main.ts
index 6be1b08..a101bb0 100644
--- a/skills/baoyu-imagine/scripts/main.ts
+++ b/skills/baoyu-imagine/scripts/main.ts
@@ -8,6 +8,7 @@ import type {
   BatchTaskInput,
   CliArgs,
   ExtendConfig,
+  OpenAIImageApiDialect,
   Provider,
 } from "./types";
 
@@ -83,6 +84,7 @@ Options:
   --size <WxH>              Size (e.g., 1024x1024)
   --quality normal|2k       Quality preset (default: 2k)
   --imageSize 1K|2K|4K      Image size for Google/OpenRouter (default: from quality)
+  --imageApiDialect <id>    OpenAI-compatible image dialect: openai-native|ratio-metadata
   --ref <files...>          Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0)
   --n <count>               Number of images for the current task (default: 1; Replicate currently requires 1)
   --json                    JSON output
@@ -133,6 +135,7 @@ Environment variables:
   JIMENG_IMAGE_MODEL        Default Jimeng model (jimeng_t2i_v40)
   SEEDREAM_IMAGE_MODEL      Default Seedream model (doubao-seedream-5-0-260128)
   OPENAI_BASE_URL           Custom OpenAI endpoint
+  OPENAI_IMAGE_API_DIALECT  OpenAI-compatible image dialect (openai-native|ratio-metadata)
   OPENAI_IMAGE_USE_CHAT     Use /chat/completions instead of /images/generations (true|false)
   OPENROUTER_BASE_URL       Custom OpenRouter endpoint
   OPENROUTER_HTTP_REFERER   Optional app URL for OpenRouter attribution
@@ -170,6 +173,7 @@ export function parseArgs(argv: string[]): CliArgs {
     quality: null,
     imageSize: null,
     imageSizeSource: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
@@ -299,6 +303,15 @@ export function parseArgs(argv: string[]): CliArgs {
       continue;
     }
 
+    if (a === "--imageApiDialect") {
+      const v = argv[++i];
+      if (v !== "openai-native" && v !== "ratio-metadata") {
+        throw new Error(`Invalid imageApiDialect: ${v}`);
+      }
+      out.imageApiDialect = v;
+      continue;
+    }
+
     if (a === "--ref" || a === "--reference") {
       const { items, next } = takeMany(i);
       if (items.length === 0) throw new Error(`Missing files for ${a}`);
@@ -402,6 +415,9 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
         config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
       } else if (key === "default_image_size") {
         config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
+      } else if (key === "default_image_api_dialect") {
+        config.default_image_api_dialect =
+          value === "null" ? null : parseOpenAIImageApiDialect(value);
       } else if (key === "default_model") {
         config.default_model = {
           google: null,
@@ -487,6 +503,15 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
   return config;
 }
 
+export function parseOpenAIImageApiDialect(
+  value: string | undefined | null
+): OpenAIImageApiDialect | null {
+  if (!value) return null;
+  const normalized = value.replace(/['"]/g, "").trim();
+  if (normalized === "openai-native" || normalized === "ratio-metadata") return normalized;
+  throw new Error(`Invalid OpenAI image API dialect: ${value}`);
+}
+
 type ExtendConfigPathPair = {
   current: string;
   legacy: string;
@@ -548,6 +573,10 @@ export async function loadExtendConfig(
 export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
   const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null;
   const imageSize = args.imageSize ?? extend.default_image_size ?? null;
+  const imageApiDialect =
+    args.imageApiDialect ??
+    extend.default_image_api_dialect ??
+    parseOpenAIImageApiDialect(process.env.OPENAI_IMAGE_API_DIALECT);
   return {
     ...args,
     provider: args.provider ?? extend.default_provider ?? null,
@@ -560,6 +589,7 @@ export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliAr
     imageSizeSource:
       args.imageSizeSource ??
       (args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)),
+    imageApiDialect,
   };
 }
 
@@ -891,6 +921,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir
     quality: task.quality ?? baseArgs.quality ?? null,
     imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
     imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null),
+    imageApiDialect: task.imageApiDialect ?? baseArgs.imageApiDialect ?? null,
     referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [],
     n: task.n ?? baseArgs.n,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/azure.test.ts b/skills/baoyu-imagine/scripts/providers/azure.test.ts
index 26cb0a5..cd85919 100644
--- a/skills/baoyu-imagine/scripts/providers/azure.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/azure.test.ts
@@ -48,6 +48,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/google.test.ts b/skills/baoyu-imagine/scripts/providers/google.test.ts
index aec3372..88d4e00 100644
--- a/skills/baoyu-imagine/scripts/providers/google.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/google.test.ts
@@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/jimeng.test.ts b/skills/baoyu-imagine/scripts/providers/jimeng.test.ts
index ed38fb9..811844b 100644
--- a/skills/baoyu-imagine/scripts/providers/jimeng.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/jimeng.test.ts
@@ -15,6 +15,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/minimax.test.ts b/skills/baoyu-imagine/scripts/providers/minimax.test.ts
index c334634..7a1179f 100644
--- a/skills/baoyu-imagine/scripts/providers/minimax.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/minimax.test.ts
@@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/openai.test.ts b/skills/baoyu-imagine/scripts/providers/openai.test.ts
index c4dcd79..b6b44f8 100644
--- a/skills/baoyu-imagine/scripts/providers/openai.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/openai.test.ts
@@ -2,9 +2,16 @@ import assert from "node:assert/strict";
 import test from "node:test";
 
 import {
+  buildOpenAIGenerationsBody,
   extractImageFromResponse,
+  getOpenAIAspectRatio,
+  getOpenAIImageApiDialect,
+  getOpenAIResolution,
   getMimeType,
   getOpenAISize,
+  getOrientationFromAspectRatio,
+  inferAspectRatioFromSize,
+  inferResolutionFromSize,
   parseAspectRatio,
 } from "./openai.ts";
 
@@ -18,6 +25,69 @@ test("OpenAI aspect-ratio parsing and size selection match model families", () =
   assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024");
   assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024");
   assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024");
+  assert.equal(inferAspectRatioFromSize("1536x1024"), "3:2");
+  assert.equal(inferResolutionFromSize("1536x1024"), "2K");
+  assert.equal(getOpenAIAspectRatio({ aspectRatio: null, size: "2048x1152" }), "16:9");
+  assert.equal(getOpenAIResolution({ imageSize: null, size: "2048x1152", quality: "normal" }), "2K");
+  assert.equal(getOrientationFromAspectRatio("16:9"), "landscape");
+  assert.equal(getOrientationFromAspectRatio("9:16"), "portrait");
+  assert.equal(getOrientationFromAspectRatio("1:1"), null);
+  assert.equal(getOpenAIImageApiDialect({ imageApiDialect: null }), "openai-native");
+});
+
+test("OpenAI generations body switches between native and ratio-metadata dialects", () => {
+  assert.deepEqual(
+    buildOpenAIGenerationsBody("Draw a skyline", "gpt-image-1.5", {
+      aspectRatio: "16:9",
+      size: null,
+      quality: "2k",
+      imageSize: null,
+      imageApiDialect: null,
+    }),
+    {
+      model: "gpt-image-1.5",
+      prompt: "Draw a skyline",
+      size: "1536x1024",
+    },
+  );
+
+  assert.deepEqual(
+    buildOpenAIGenerationsBody("Draw a skyline", "gemini-3-pro-image-preview", {
+      aspectRatio: "16:9",
+      size: null,
+      quality: "2k",
+      imageSize: null,
+      imageApiDialect: "ratio-metadata",
+    }),
+    {
+      model: "gemini-3-pro-image-preview",
+      prompt: "Draw a skyline",
+      size: "16:9",
+      metadata: {
+        resolution: "2K",
+        orientation: "landscape",
+      },
+    },
+  );
+
+  assert.deepEqual(
+    buildOpenAIGenerationsBody("Draw a portrait", "gemini-3-pro-image-preview", {
+      aspectRatio: null,
+      size: "1152x2048",
+      quality: "normal",
+      imageSize: null,
+      imageApiDialect: "ratio-metadata",
+    }),
+    {
+      model: "gemini-3-pro-image-preview",
+      prompt: "Draw a portrait",
+      size: "9:16",
+      metadata: {
+        resolution: "2K",
+        orientation: "portrait",
+      },
+    },
+  );
 });
 
 test("OpenAI mime-type detection covers supported reference image extensions", () => {
diff --git a/skills/baoyu-imagine/scripts/providers/openai.ts b/skills/baoyu-imagine/scripts/providers/openai.ts
index 875631d..2777682 100644
--- a/skills/baoyu-imagine/scripts/providers/openai.ts
+++ b/skills/baoyu-imagine/scripts/providers/openai.ts
@@ -1,6 +1,6 @@
 import path from "node:path";
 import { readFile } from "node:fs/promises";
-import type { CliArgs } from "../types";
+import type { CliArgs, OpenAIImageApiDialect } from "../types";
 
 export function getDefaultModel(): string {
   return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5";
@@ -23,6 +23,8 @@ type SizeMapping = {
   portrait: string;
 };
 
+type OpenAIGenerationsBody = Record<string, unknown>;
+
 export function getOpenAISize(
   model: string,
   ar: string | null,
@@ -60,6 +62,114 @@ export function getOpenAISize(
   return sizes.square;
 }
 
+function parsePixelSize(value: string): { width: number; height: number } | null {
+  const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
+  if (!match) return null;
+
+  const width = parseInt(match[1]!, 10);
+  const height = parseInt(match[2]!, 10);
+  if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
+    return null;
+  }
+
+  return { width, height };
+}
+
+function gcd(a: number, b: number): number {
+  let x = Math.abs(a);
+  let y = Math.abs(b);
+  while (y !== 0) {
+    const next = x % y;
+    x = y;
+    y = next;
+  }
+  return x || 1;
+}
+
+export function getOpenAIImageApiDialect(args: Pick<CliArgs, "imageApiDialect">): OpenAIImageApiDialect {
+  return args.imageApiDialect ?? "openai-native";
+}
+
+export function inferAspectRatioFromSize(size: string | null): string | null {
+  if (!size) return null;
+  const parsed = parsePixelSize(size);
+  if (!parsed) return null;
+
+  const divisor = gcd(parsed.width, parsed.height);
+  return `${parsed.width / divisor}:${parsed.height / divisor}`;
+}
+
+export function inferResolutionFromSize(size: string | null): "1K" | "2K" | "4K" | null {
+  if (!size) return null;
+  const parsed = parsePixelSize(size);
+  if (!parsed) return null;
+
+  const longestEdge = Math.max(parsed.width, parsed.height);
+  if (longestEdge <= 1024) return "1K";
+  if (longestEdge <= 2048) return "2K";
+  return "4K";
+}
+
+export function getOpenAIAspectRatio(args: Pick<CliArgs, "aspectRatio" | "size">): string {
+  return args.aspectRatio ?? inferAspectRatioFromSize(args.size) ?? "1:1";
+}
+
+export function getOpenAIResolution(
+  args: Pick<CliArgs, "imageSize" | "size" | "quality">
+): "1K" | "2K" | "4K" {
+  if (args.imageSize === "1K" || args.imageSize === "2K" || args.imageSize === "4K") {
+    return args.imageSize;
+  }
+
+  const inferred = inferResolutionFromSize(args.size);
+  if (inferred) return inferred;
+
+  return args.quality === "normal" ? "1K" : "2K";
+}
+
+export function getOrientationFromAspectRatio(ar: string): "landscape" | "portrait" | null {
+  const parsed = parseAspectRatio(ar);
+  if (!parsed) return null;
+
+  const ratio = parsed.width / parsed.height;
+  if (Math.abs(ratio - 1) < 0.1) return null;
+  return ratio > 1 ? "landscape" : "portrait";
+}
+
+export function buildOpenAIGenerationsBody(
+  prompt: string,
+  model: string,
+  args: Pick<CliArgs, "aspectRatio" | "size" | "quality" | "imageSize" | "imageApiDialect">
+): OpenAIGenerationsBody {
+  if (getOpenAIImageApiDialect(args) === "ratio-metadata") {
+    const aspectRatio = getOpenAIAspectRatio(args);
+    const metadata: Record<string, string> = {
+      resolution: getOpenAIResolution(args),
+    };
+    const orientation = getOrientationFromAspectRatio(aspectRatio);
+    if (orientation) metadata.orientation = orientation;
+
+    return {
+      model,
+      prompt,
+      size: aspectRatio,
+      metadata,
+    };
+  }
+
+  const body: OpenAIGenerationsBody = {
+    model,
+    prompt,
+    size: args.size || getOpenAISize(model, args.aspectRatio, args.quality),
+  };
+
+  if (model.includes("dall-e-3")) {
+    body.quality = args.quality === "2k" ? "hd" : "standard";
+  }
+
+  return body;
+}
+
 export async function generateImage(
   prompt: string,
   model: string,
@@ -78,18 +188,28 @@ export async function generateImage(
     return generateWithChatCompletions(baseURL, apiKey, prompt, model);
   }
 
-  const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
+  const imageApiDialect = getOpenAIImageApiDialect(args);
 
   if (args.referenceImages.length > 0) {
+    if (imageApiDialect !== "openai-native") {
+      throw new Error(
+        "Reference images are not supported with the ratio-metadata OpenAI dialect yet. Use openai-native, Google, Azure, OpenRouter, MiniMax, Seedream, or Replicate for image-edit workflows."
+      );
+    }
     if (model.includes("dall-e-2") || model.includes("dall-e-3")) {
       throw new Error(
         "Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)."
       );
     }
+    const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
     return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality);
   }
 
-  return generateWithOpenAIGenerations(baseURL, apiKey, prompt, model, size, args.quality);
+  return generateWithOpenAIGenerations(
+    baseURL,
+    apiKey,
+    buildOpenAIGenerationsBody(prompt, model, args)
+  );
 }
 
 async function generateWithChatCompletions(
@@ -129,17 +249,8 @@ async function generateWithChatCompletions(
 async function generateWithOpenAIGenerations(
   baseURL: string,
   apiKey: string,
-  prompt: string,
-  model: string,
-  size: string,
-  quality: CliArgs["quality"]
+  body: OpenAIGenerationsBody
 ): Promise<Uint8Array> {
-  const body: Record<string, any> = { model, prompt, size };
-
-  if (model.includes("dall-e-3")) {
-    body.quality = quality === "2k" ? "hd" : "standard";
-  }
-
   const res = await fetch(`${baseURL}/images/generations`, {
     method: "POST",
     headers: {
diff --git a/skills/baoyu-imagine/scripts/providers/openrouter.test.ts b/skills/baoyu-imagine/scripts/providers/openrouter.test.ts
index 415122e..8878e14 100644
--- a/skills/baoyu-imagine/scripts/providers/openrouter.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/openrouter.test.ts
@@ -28,6 +28,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/replicate.test.ts b/skills/baoyu-imagine/scripts/providers/replicate.test.ts
index cd90def..0b35590 100644
--- a/skills/baoyu-imagine/scripts/providers/replicate.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/replicate.test.ts
@@ -24,6 +24,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     quality: null,
     imageSize: null,
     imageSizeSource: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/seedream.test.ts b/skills/baoyu-imagine/scripts/providers/seedream.test.ts
index 5ec94d6..7176278 100644
--- a/skills/baoyu-imagine/scripts/providers/seedream.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/seedream.test.ts
@@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/providers/zai.test.ts b/skills/baoyu-imagine/scripts/providers/zai.test.ts
index 59dcef4..ea9681f 100644
--- a/skills/baoyu-imagine/scripts/providers/zai.test.ts
+++ b/skills/baoyu-imagine/scripts/providers/zai.test.ts
@@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
     size: null,
     quality: null,
     imageSize: null,
+    imageApiDialect: null,
     referenceImages: [],
     n: 1,
     batchFile: null,
diff --git a/skills/baoyu-imagine/scripts/types.ts b/skills/baoyu-imagine/scripts/types.ts
index b7c7640..ead86e6 100644
--- a/skills/baoyu-imagine/scripts/types.ts
+++ b/skills/baoyu-imagine/scripts/types.ts
@@ -10,6 +10,7 @@ export type Provider =
   | "seedream"
   | "azure";
 export type Quality = "normal" | "2k";
+export type OpenAIImageApiDialect = "openai-native" | "ratio-metadata";
 
 export type CliArgs = {
   prompt: string | null;
@@ -23,6 +24,7 @@ export type CliArgs = {
   quality: Quality | null;
   imageSize: string | null;
   imageSizeSource?: "cli" | "task" | "config" | null;
+  imageApiDialect: OpenAIImageApiDialect | null;
   referenceImages: string[];
   n: number;
   batchFile: string | null;
@@ -42,6 +44,7 @@ export type BatchTaskInput = {
   size?: string | null;
   quality?: Quality | null;
   imageSize?: "1K" | "2K" | "4K" | null;
+  imageApiDialect?: OpenAIImageApiDialect | null;
   ref?: string[];
   n?: number;
 };
@@ -59,6 +62,7 @@ export type ExtendConfig = {
   default_quality: Quality | null;
   default_aspect_ratio: string | null;
   default_image_size: "1K" | "2K" | "4K" | null;
+  default_image_api_dialect: OpenAIImageApiDialect | null;
   default_model: {
     google: string | null;
     openai: string | null;