feat(baoyu-imagine): add OpenAI-compatible image API dialect support

Add --imageApiDialect flag, OPENAI_IMAGE_API_DIALECT env var, and
default_image_api_dialect config for gateways that expect aspect-ratio
size plus metadata.resolution instead of pixel size.
This commit is contained in:
Jim Liu 宝玉 2026-04-12 02:14:18 -05:00
parent 58ba4579ef
commit 11d80eeaa9
18 changed files with 298 additions and 14 deletions

View File

@ -790,6 +790,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
| `--size` | Size (e.g., `1024x1024`) | | `--size` | Size (e.g., `1024x1024`) |
| `--quality` | `normal` or `2k` (default: `2k`) | | `--quality` | `normal` or `2k` (default: `2k`) |
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter | | `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways |
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) | | `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
| `--n` | Number of images per request (`replicate` currently requires `--n 1`) | | `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
| `--json` | JSON output | | `--json` | JSON output |
@ -823,6 +824,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` | | `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - | | `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` |
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` | | `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - | | `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` | | `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |

View File

@ -790,6 +790,7 @@ AI 驱动的生成后端。
| `--size` | 尺寸(如 `1024x1024` | | `--size` | 尺寸(如 `1024x1024` |
| `--quality` | `normal``2k`(默认:`2k` | | `--quality` | `normal``2k`(默认:`2k` |
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` | | `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言(`openai-native` 或 `ratio-metadata` |
| `--ref` | 参考图片Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0 | | `--ref` | 参考图片Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0 |
| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1` | | `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1` |
| `--json` | 输出 JSON 结果 | | `--json` | 输出 JSON 结果 |
@ -823,6 +824,7 @@ AI 驱动的生成后端。
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` | | `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - | | `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言(`openai-native` 或 `ratio-metadata` | `openai-native` |
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` | | `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - | | `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` | | `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |

View File

@ -57,7 +57,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-imagine/EXTEND.md") { "user" }
Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path. Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path.
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits **EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | OpenAI image API dialect | Default models | Batch worker cap | Provider-specific batch limits
Schema: `references/config/preferences-schema.md` Schema: `references/config/preferences-schema.md`
@ -176,6 +176,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| `--size <WxH>` | Size (e.g., `1024x1024`) | | `--size <WxH>` | Size (e.g., `1024x1024`) |
| `--quality normal\|2k` | Quality preset (default: `2k`) | | `--quality normal\|2k` | Quality preset (default: `2k`) |
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) | | `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
| `--imageApiDialect openai-native\|ratio-metadata` | OpenAI-compatible image API dialect. Use `ratio-metadata` when the endpoint is OpenAI-compatible but expects aspect-ratio `size` plus `metadata.resolution` instead of pixel `size` |
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 | | `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
| `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image | | `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image |
| `--json` | JSON output | | `--json` | JSON output |
@ -209,6 +210,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) | | `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | | `OPENAI_BASE_URL` | Custom OpenAI endpoint |
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect override (`openai-native` or `ratio-metadata`) |
| `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint | | `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint |
| `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) | | `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) |
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) | | `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
@ -242,6 +244,22 @@ For Azure, `--model` / `default_model.azure` should be the Azure deployment name
**EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins. **EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins.
### OpenAI-Compatible Gateway Dialects
`provider=openai` means the auth and routing entrypoint is OpenAI-compatible. It does **not** guarantee that the upstream image API uses OpenAI native image-request semantics.
Use `default_image_api_dialect` in `EXTEND.md`, `OPENAI_IMAGE_API_DIALECT`, or `--imageApiDialect` when the endpoint expects a different wire format:
- `openai-native`: Sends pixel `size` such as `1536x1024` and native OpenAI quality fields when supported
- `ratio-metadata`: Sends aspect-ratio `size` such as `16:9` and maps quality/size intent into `metadata.resolution` (`1K|2K|4K`) plus `metadata.orientation`
Recommended use:
- OpenAI native Images API or strict clones: keep `openai-native`
- OpenAI-compatible gateways in front of Gemini or similar models: try `ratio-metadata`
Current limitation: `ratio-metadata` only applies to text-to-image generation. Reference-image edit flows still require `openai-native` or another provider with first-class edit support.
**Agent MUST display model info** before each generation: **Agent MUST display model info** before each generation:
- Show: `Using [provider] / [model]` - Show: `Using [provider] / [model]`
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL` - Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`

View File

@ -175,6 +175,7 @@ default_provider: [selected provider or null]
default_quality: [selected quality] default_quality: [selected quality]
default_aspect_ratio: null default_aspect_ratio: null
default_image_size: null default_image_size: null
default_image_api_dialect: null
default_model: default_model:
google: [selected google model or null] google: [selected google model or null]
openai: null openai: null
@ -187,6 +188,8 @@ default_model:
--- ---
``` ```
If the user selects `OpenAI` but says their endpoint is only OpenAI-compatible and fronts another image model family, save `default_image_api_dialect: ratio-metadata` when they explicitly confirm the gateway expects aspect-ratio `size` plus metadata-based resolution. Otherwise leave it `null` / `openai-native`.
## Flow 2: EXTEND.md Exists, Model Null ## Flow 2: EXTEND.md Exists, Model Null
When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider. When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider.

View File

@ -19,6 +19,8 @@ default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality) default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
default_image_api_dialect: null # openai-native|ratio-metadata|null (OpenAI-compatible gateways; null = use env/default)
default_model: default_model:
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview" google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
openai: null # e.g., "gpt-image-1.5", "gpt-image-1" openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
@ -68,6 +70,7 @@ batch:
| `default_quality` | string\|null | null | Default quality (null = 2k) | | `default_quality` | string\|null | null | Default quality (null = 2k) |
| `default_aspect_ratio` | string\|null | null | Default aspect ratio | | `default_aspect_ratio` | string\|null | null | Default aspect ratio |
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) | | `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
| `default_image_api_dialect` | string\|null | null | OpenAI-compatible image dialect (`openai-native` or `ratio-metadata`) |
| `default_model.google` | string\|null | null | Google default model | | `default_model.google` | string\|null | null | Google default model |
| `default_model.openai` | string\|null | null | OpenAI default model | | `default_model.openai` | string\|null | null | OpenAI default model |
| `default_model.azure` | string\|null | null | Azure default deployment name | | `default_model.azure` | string\|null | null | Azure default deployment name |
@ -88,6 +91,7 @@ batch:
version: 1 version: 1
default_provider: google default_provider: google
default_quality: 2k default_quality: 2k
default_image_api_dialect: null
--- ---
``` ```
@ -99,6 +103,7 @@ default_provider: google
default_quality: 2k default_quality: 2k
default_aspect_ratio: "16:9" default_aspect_ratio: "16:9"
default_image_size: 2K default_image_size: 2K
default_image_api_dialect: null
default_model: default_model:
google: "gemini-3-pro-image-preview" google: "gemini-3-pro-image-preview"
openai: "gpt-image-1.5" openai: "gpt-image-1.5"

View File

@ -17,6 +17,7 @@ import {
mergeConfig, mergeConfig,
normalizeOutputImagePath, normalizeOutputImagePath,
parseArgs, parseArgs,
parseOpenAIImageApiDialect,
parseSimpleYaml, parseSimpleYaml,
} from "./main.ts"; } from "./main.ts";
@ -33,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
quality: null, quality: null,
imageSize: null, imageSize: null,
imageSizeSource: null, imageSizeSource: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,
@ -85,6 +87,8 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
"2k", "2k",
"--imageSize", "--imageSize",
"4k", "4k",
"--imageApiDialect",
"ratio-metadata",
"--ref", "--ref",
"ref/one.png", "ref/one.png",
"ref/two.jpg", "ref/two.jpg",
@ -102,6 +106,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
assert.equal(args.aspectRatioSource, null); assert.equal(args.aspectRatioSource, null);
assert.equal(args.imageSize, "4K"); assert.equal(args.imageSize, "4K");
assert.equal(args.imageSizeSource, "cli"); assert.equal(args.imageSizeSource, "cli");
assert.equal(args.imageApiDialect, "ratio-metadata");
assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]); assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]);
assert.equal(args.n, 3); assert.equal(args.n, 3);
assert.equal(args.jobs, 5); assert.equal(args.jobs, 5);
@ -125,6 +130,7 @@ default_provider: openrouter
default_quality: normal default_quality: normal
default_aspect_ratio: '16:9' default_aspect_ratio: '16:9'
default_image_size: 2K default_image_size: 2K
default_image_api_dialect: ratio-metadata
default_model: default_model:
google: gemini-3-pro-image-preview google: gemini-3-pro-image-preview
openai: gpt-image-1.5 openai: gpt-image-1.5
@ -157,6 +163,7 @@ batch:
assert.equal(config.default_quality, "normal"); assert.equal(config.default_quality, "normal");
assert.equal(config.default_aspect_ratio, "16:9"); assert.equal(config.default_aspect_ratio, "16:9");
assert.equal(config.default_image_size, "2K"); assert.equal(config.default_image_size, "2K");
assert.equal(config.default_image_api_dialect, "ratio-metadata");
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview"); assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
assert.equal(config.default_model?.openai, "gpt-image-1.5"); assert.equal(config.default_model?.openai, "gpt-image-1.5");
assert.equal(config.default_model?.zai, "glm-image"); assert.equal(config.default_model?.zai, "glm-image");
@ -252,6 +259,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
default_quality: "2k", default_quality: "2k",
default_aspect_ratio: "3:2", default_aspect_ratio: "3:2",
default_image_size: "2K", default_image_size: "2K",
default_image_api_dialect: "ratio-metadata",
} satisfies Partial<ExtendConfig>, } satisfies Partial<ExtendConfig>,
); );
@ -261,6 +269,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
assert.equal(merged.aspectRatioSource, "config"); assert.equal(merged.aspectRatioSource, "config");
assert.equal(merged.imageSize, "4K"); assert.equal(merged.imageSize, "4K");
assert.equal(merged.imageSizeSource, "cli"); assert.equal(merged.imageSizeSource, "cli");
assert.equal(merged.imageApiDialect, "ratio-metadata");
}); });
test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => { test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => {
@ -275,6 +284,25 @@ test("mergeConfig tags inherited imageSize defaults so providers can ignore inco
assert.equal(merged.imageSizeSource, "config"); assert.equal(merged.imageSizeSource, "config");
}); });
test("mergeConfig falls back to OPENAI_IMAGE_API_DIALECT when CLI and EXTEND are unset", (t) => {
useEnv(t, {
OPENAI_IMAGE_API_DIALECT: "ratio-metadata",
});
const merged = mergeConfig(makeArgs(), {});
assert.equal(merged.imageApiDialect, "ratio-metadata");
});
test("parseOpenAIImageApiDialect validates supported values", () => {
assert.equal(parseOpenAIImageApiDialect("openai-native"), "openai-native");
assert.equal(parseOpenAIImageApiDialect("ratio-metadata"), "ratio-metadata");
assert.equal(parseOpenAIImageApiDialect(null), null);
assert.throws(
() => parseOpenAIImageApiDialect("gateway-magic"),
/Invalid OpenAI image API dialect/,
);
});
test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => { test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => {
assert.throws( assert.throws(
() => () =>
@ -492,6 +520,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
makeArgs({ makeArgs({
provider: "replicate", provider: "replicate",
quality: "2k", quality: "2k",
imageApiDialect: "ratio-metadata",
json: true, json: true,
}), }),
loaded.tasks[0]!, loaded.tasks[0]!,
@ -508,6 +537,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
assert.equal(taskArgs.provider, "replicate"); assert.equal(taskArgs.provider, "replicate");
assert.equal(taskArgs.aspectRatio, "16:9"); assert.equal(taskArgs.aspectRatio, "16:9");
assert.equal(taskArgs.quality, "2k"); assert.equal(taskArgs.quality, "2k");
assert.equal(taskArgs.imageApiDialect, "ratio-metadata");
assert.equal(taskArgs.json, true); assert.equal(taskArgs.json, true);
}); });

View File

@ -8,6 +8,7 @@ import type {
BatchTaskInput, BatchTaskInput,
CliArgs, CliArgs,
ExtendConfig, ExtendConfig,
OpenAIImageApiDialect,
Provider, Provider,
} from "./types"; } from "./types";
@ -83,6 +84,7 @@ Options:
--size <WxH> Size (e.g., 1024x1024) --size <WxH> Size (e.g., 1024x1024)
--quality normal|2k Quality preset (default: 2k) --quality normal|2k Quality preset (default: 2k)
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
--imageApiDialect <id> OpenAI-compatible image dialect: openai-native|ratio-metadata
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0) --ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0)
--n <count> Number of images for the current task (default: 1; Replicate currently requires 1) --n <count> Number of images for the current task (default: 1; Replicate currently requires 1)
--json JSON output --json JSON output
@ -133,6 +135,7 @@ Environment variables:
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128) SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
OPENAI_BASE_URL Custom OpenAI endpoint OPENAI_BASE_URL Custom OpenAI endpoint
OPENAI_IMAGE_API_DIALECT OpenAI-compatible image dialect (openai-native|ratio-metadata)
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false) OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
OPENROUTER_BASE_URL Custom OpenRouter endpoint OPENROUTER_BASE_URL Custom OpenRouter endpoint
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
@ -170,6 +173,7 @@ export function parseArgs(argv: string[]): CliArgs {
quality: null, quality: null,
imageSize: null, imageSize: null,
imageSizeSource: null, imageSizeSource: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,
@ -299,6 +303,15 @@ export function parseArgs(argv: string[]): CliArgs {
continue; continue;
} }
if (a === "--imageApiDialect") {
const v = argv[++i];
if (v !== "openai-native" && v !== "ratio-metadata") {
throw new Error(`Invalid imageApiDialect: ${v}`);
}
out.imageApiDialect = v;
continue;
}
if (a === "--ref" || a === "--reference") { if (a === "--ref" || a === "--reference") {
const { items, next } = takeMany(i); const { items, next } = takeMany(i);
if (items.length === 0) throw new Error(`Missing files for ${a}`); if (items.length === 0) throw new Error(`Missing files for ${a}`);
@ -402,6 +415,9 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
config.default_aspect_ratio = cleaned === "null" ? null : cleaned; config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
} else if (key === "default_image_size") { } else if (key === "default_image_size") {
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K"; config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
} else if (key === "default_image_api_dialect") {
config.default_image_api_dialect =
value === "null" ? null : parseOpenAIImageApiDialect(value);
} else if (key === "default_model") { } else if (key === "default_model") {
config.default_model = { config.default_model = {
google: null, google: null,
@ -487,6 +503,15 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
return config; return config;
} }
export function parseOpenAIImageApiDialect(
value: string | undefined | null
): OpenAIImageApiDialect | null {
if (!value) return null;
const normalized = value.replace(/['"]/g, "").trim();
if (normalized === "openai-native" || normalized === "ratio-metadata") return normalized;
throw new Error(`Invalid OpenAI image API dialect: ${value}`);
}
type ExtendConfigPathPair = { type ExtendConfigPathPair = {
current: string; current: string;
legacy: string; legacy: string;
@ -548,6 +573,10 @@ export async function loadExtendConfig(
export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs { export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null; const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null;
const imageSize = args.imageSize ?? extend.default_image_size ?? null; const imageSize = args.imageSize ?? extend.default_image_size ?? null;
const imageApiDialect =
args.imageApiDialect ??
extend.default_image_api_dialect ??
parseOpenAIImageApiDialect(process.env.OPENAI_IMAGE_API_DIALECT);
return { return {
...args, ...args,
provider: args.provider ?? extend.default_provider ?? null, provider: args.provider ?? extend.default_provider ?? null,
@ -560,6 +589,7 @@ export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliAr
imageSizeSource: imageSizeSource:
args.imageSizeSource ?? args.imageSizeSource ??
(args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)), (args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)),
imageApiDialect,
}; };
} }
@ -891,6 +921,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir
quality: task.quality ?? baseArgs.quality ?? null, quality: task.quality ?? baseArgs.quality ?? null,
imageSize: task.imageSize ?? baseArgs.imageSize ?? null, imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null), imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null),
imageApiDialect: task.imageApiDialect ?? baseArgs.imageApiDialect ?? null,
referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [], referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [],
n: task.n ?? baseArgs.n, n: task.n ?? baseArgs.n,
batchFile: null, batchFile: null,

View File

@ -48,6 +48,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -15,6 +15,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -2,9 +2,16 @@ import assert from "node:assert/strict";
import test from "node:test"; import test from "node:test";
import { import {
buildOpenAIGenerationsBody,
extractImageFromResponse, extractImageFromResponse,
getOpenAIAspectRatio,
getOpenAIImageApiDialect,
getOpenAIResolution,
getMimeType, getMimeType,
getOpenAISize, getOpenAISize,
getOrientationFromAspectRatio,
inferAspectRatioFromSize,
inferResolutionFromSize,
parseAspectRatio, parseAspectRatio,
} from "./openai.ts"; } from "./openai.ts";
@ -18,6 +25,69 @@ test("OpenAI aspect-ratio parsing and size selection match model families", () =
assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024"); assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024");
assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024"); assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024");
assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024"); assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024");
assert.equal(inferAspectRatioFromSize("1536x1024"), "3:2");
assert.equal(inferResolutionFromSize("1536x1024"), "2K");
assert.equal(getOpenAIAspectRatio({ aspectRatio: null, size: "2048x1152" }), "16:9");
assert.equal(getOpenAIResolution({ imageSize: null, size: "2048x1152", quality: "normal" }), "2K");
assert.equal(getOrientationFromAspectRatio("16:9"), "landscape");
assert.equal(getOrientationFromAspectRatio("9:16"), "portrait");
assert.equal(getOrientationFromAspectRatio("1:1"), null);
assert.equal(getOpenAIImageApiDialect({ imageApiDialect: null }), "openai-native");
});
test("OpenAI generations body switches between native and ratio-metadata dialects", () => {
assert.deepEqual(
buildOpenAIGenerationsBody("Draw a skyline", "gpt-image-1.5", {
aspectRatio: "16:9",
size: null,
quality: "2k",
imageSize: null,
imageApiDialect: null,
}),
{
model: "gpt-image-1.5",
prompt: "Draw a skyline",
size: "1536x1024",
},
);
assert.deepEqual(
buildOpenAIGenerationsBody("Draw a skyline", "gemini-3-pro-image-preview", {
aspectRatio: "16:9",
size: null,
quality: "2k",
imageSize: null,
imageApiDialect: "ratio-metadata",
}),
{
model: "gemini-3-pro-image-preview",
prompt: "Draw a skyline",
size: "16:9",
metadata: {
resolution: "2K",
orientation: "landscape",
},
},
);
assert.deepEqual(
buildOpenAIGenerationsBody("Draw a portrait", "gemini-3-pro-image-preview", {
aspectRatio: null,
size: "1152x2048",
quality: "normal",
imageSize: null,
imageApiDialect: "ratio-metadata",
}),
{
model: "gemini-3-pro-image-preview",
prompt: "Draw a portrait",
size: "9:16",
metadata: {
resolution: "2K",
orientation: "portrait",
},
},
);
}); });
test("OpenAI mime-type detection covers supported reference image extensions", () => { test("OpenAI mime-type detection covers supported reference image extensions", () => {

View File

@ -1,6 +1,6 @@
import path from "node:path"; import path from "node:path";
import { readFile } from "node:fs/promises"; import { readFile } from "node:fs/promises";
import type { CliArgs } from "../types"; import type { CliArgs, OpenAIImageApiDialect } from "../types";
export function getDefaultModel(): string { export function getDefaultModel(): string {
return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5"; return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5";
@ -23,6 +23,8 @@ type SizeMapping = {
portrait: string; portrait: string;
}; };
type OpenAIGenerationsBody = Record<string, unknown>;
export function getOpenAISize( export function getOpenAISize(
model: string, model: string,
ar: string | null, ar: string | null,
@ -60,6 +62,114 @@ export function getOpenAISize(
return sizes.square; return sizes.square;
} }
function parsePixelSize(value: string): { width: number; height: number } | null {
const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
if (!match) return null;
const width = parseInt(match[1]!, 10);
const height = parseInt(match[2]!, 10);
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
return null;
}
return { width, height };
}
function gcd(a: number, b: number): number {
let x = Math.abs(a);
let y = Math.abs(b);
while (y !== 0) {
const next = x % y;
x = y;
y = next;
}
return x || 1;
}
export function getOpenAIImageApiDialect(args: Pick<CliArgs, "imageApiDialect">): OpenAIImageApiDialect {
return args.imageApiDialect ?? "openai-native";
}
export function inferAspectRatioFromSize(size: string | null): string | null {
if (!size) return null;
const parsed = parsePixelSize(size);
if (!parsed) return null;
const divisor = gcd(parsed.width, parsed.height);
return `${parsed.width / divisor}:${parsed.height / divisor}`;
}
export function inferResolutionFromSize(size: string | null): "1K" | "2K" | "4K" | null {
if (!size) return null;
const parsed = parsePixelSize(size);
if (!parsed) return null;
const longestEdge = Math.max(parsed.width, parsed.height);
if (longestEdge <= 1024) return "1K";
if (longestEdge <= 2048) return "2K";
return "4K";
}
export function getOpenAIAspectRatio(args: Pick<CliArgs, "aspectRatio" | "size">): string {
return args.aspectRatio ?? inferAspectRatioFromSize(args.size) ?? "1:1";
}
export function getOpenAIResolution(
args: Pick<CliArgs, "imageSize" | "size" | "quality">
): "1K" | "2K" | "4K" {
if (args.imageSize === "1K" || args.imageSize === "2K" || args.imageSize === "4K") {
return args.imageSize;
}
const inferred = inferResolutionFromSize(args.size);
if (inferred) return inferred;
return args.quality === "normal" ? "1K" : "2K";
}
export function getOrientationFromAspectRatio(ar: string): "landscape" | "portrait" | null {
const parsed = parseAspectRatio(ar);
if (!parsed) return null;
const ratio = parsed.width / parsed.height;
if (Math.abs(ratio - 1) < 0.1) return null;
return ratio > 1 ? "landscape" : "portrait";
}
export function buildOpenAIGenerationsBody(
prompt: string,
model: string,
args: Pick<CliArgs, "aspectRatio" | "size" | "quality" | "imageSize" | "imageApiDialect">
): OpenAIGenerationsBody {
if (getOpenAIImageApiDialect(args) === "ratio-metadata") {
const aspectRatio = getOpenAIAspectRatio(args);
const metadata: Record<string, string> = {
resolution: getOpenAIResolution(args),
};
const orientation = getOrientationFromAspectRatio(aspectRatio);
if (orientation) metadata.orientation = orientation;
return {
model,
prompt,
size: aspectRatio,
metadata,
};
}
const body: OpenAIGenerationsBody = {
model,
prompt,
size: args.size || getOpenAISize(model, args.aspectRatio, args.quality),
};
if (model.includes("dall-e-3")) {
body.quality = args.quality === "2k" ? "hd" : "standard";
}
return body;
}
export async function generateImage( export async function generateImage(
prompt: string, prompt: string,
model: string, model: string,
@ -78,18 +188,28 @@ export async function generateImage(
return generateWithChatCompletions(baseURL, apiKey, prompt, model); return generateWithChatCompletions(baseURL, apiKey, prompt, model);
} }
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality); const imageApiDialect = getOpenAIImageApiDialect(args);
if (args.referenceImages.length > 0) { if (args.referenceImages.length > 0) {
if (imageApiDialect !== "openai-native") {
throw new Error(
"Reference images are not supported with the ratio-metadata OpenAI dialect yet. Use openai-native, Google, Azure, OpenRouter, MiniMax, Seedream, or Replicate for image-edit workflows."
);
}
if (model.includes("dall-e-2") || model.includes("dall-e-3")) { if (model.includes("dall-e-2") || model.includes("dall-e-3")) {
throw new Error( throw new Error(
"Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)." "Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)."
); );
} }
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality); return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality);
} }
return generateWithOpenAIGenerations(baseURL, apiKey, prompt, model, size, args.quality); return generateWithOpenAIGenerations(
baseURL,
apiKey,
buildOpenAIGenerationsBody(prompt, model, args)
);
} }
async function generateWithChatCompletions( async function generateWithChatCompletions(
@ -129,17 +249,8 @@ async function generateWithChatCompletions(
async function generateWithOpenAIGenerations( async function generateWithOpenAIGenerations(
baseURL: string, baseURL: string,
apiKey: string, apiKey: string,
prompt: string, body: OpenAIGenerationsBody
model: string,
size: string,
quality: CliArgs["quality"]
): Promise<Uint8Array> { ): Promise<Uint8Array> {
const body: Record<string, any> = { model, prompt, size };
if (model.includes("dall-e-3")) {
body.quality = quality === "2k" ? "hd" : "standard";
}
const res = await fetch(`${baseURL}/images/generations`, { const res = await fetch(`${baseURL}/images/generations`, {
method: "POST", method: "POST",
headers: { headers: {

View File

@ -28,6 +28,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -24,6 +24,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
quality: null, quality: null,
imageSize: null, imageSize: null,
imageSizeSource: null, imageSizeSource: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
size: null, size: null,
quality: null, quality: null,
imageSize: null, imageSize: null,
imageApiDialect: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null, batchFile: null,

View File

@ -10,6 +10,7 @@ export type Provider =
| "seedream" | "seedream"
| "azure"; | "azure";
export type Quality = "normal" | "2k"; export type Quality = "normal" | "2k";
export type OpenAIImageApiDialect = "openai-native" | "ratio-metadata";
export type CliArgs = { export type CliArgs = {
prompt: string | null; prompt: string | null;
@ -23,6 +24,7 @@ export type CliArgs = {
quality: Quality | null; quality: Quality | null;
imageSize: string | null; imageSize: string | null;
imageSizeSource?: "cli" | "task" | "config" | null; imageSizeSource?: "cli" | "task" | "config" | null;
imageApiDialect: OpenAIImageApiDialect | null;
referenceImages: string[]; referenceImages: string[];
n: number; n: number;
batchFile: string | null; batchFile: string | null;
@ -42,6 +44,7 @@ export type BatchTaskInput = {
size?: string | null; size?: string | null;
quality?: Quality | null; quality?: Quality | null;
imageSize?: "1K" | "2K" | "4K" | null; imageSize?: "1K" | "2K" | "4K" | null;
imageApiDialect?: OpenAIImageApiDialect | null;
ref?: string[]; ref?: string[];
n?: number; n?: number;
}; };
@ -59,6 +62,7 @@ export type ExtendConfig = {
default_quality: Quality | null; default_quality: Quality | null;
default_aspect_ratio: string | null; default_aspect_ratio: string | null;
default_image_size: "1K" | "2K" | "4K" | null; default_image_size: "1K" | "2K" | "4K" | null;
default_image_api_dialect: OpenAIImageApiDialect | null;
default_model: { default_model: {
google: string | null; google: string | null;
openai: string | null; openai: string | null;