feat(baoyu-imagine): add OpenAI-compatible image API dialect support
Add --imageApiDialect flag, OPENAI_IMAGE_API_DIALECT env var, and default_image_api_dialect config for gateways that expect aspect-ratio size plus metadata.resolution instead of pixel size.
This commit is contained in:
parent
58ba4579ef
commit
11d80eeaa9
|
|
@ -790,6 +790,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
|||
| `--size` | Size (e.g., `1024x1024`) |
|
||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
|
||||
| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways |
|
||||
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
|
||||
| `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
|
||||
| `--json` | JSON output |
|
||||
|
|
@ -823,6 +824,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
|||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` |
|
||||
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
|
||||
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
|
||||
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
|
||||
|
|
|
|||
|
|
@ -790,6 +790,7 @@ AI 驱动的生成后端。
|
|||
| `--size` | 尺寸(如 `1024x1024`) |
|
||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
|
||||
| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言(`openai-native` 或 `ratio-metadata`) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) |
|
||||
| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) |
|
||||
| `--json` | 输出 JSON 结果 |
|
||||
|
|
@ -823,6 +824,7 @@ AI 驱动的生成后端。
|
|||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||
| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言(`openai-native` 或 `ratio-metadata`) | `openai-native` |
|
||||
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
|
||||
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
|
||||
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-imagine/EXTEND.md") { "user" }
|
|||
|
||||
Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path.
|
||||
|
||||
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
|
||||
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | OpenAI image API dialect | Default models | Batch worker cap | Provider-specific batch limits
|
||||
|
||||
Schema: `references/config/preferences-schema.md`
|
||||
|
||||
|
|
@ -176,6 +176,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
||||
| `--imageApiDialect openai-native\|ratio-metadata` | OpenAI-compatible image API dialect. Use `ratio-metadata` when the endpoint is OpenAI-compatible but expects aspect-ratio `size` plus `metadata.resolution` instead of pixel `size` |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
||||
| `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image |
|
||||
| `--json` | JSON output |
|
||||
|
|
@ -209,6 +210,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
|||
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
||||
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect override (`openai-native` or `ratio-metadata`) |
|
||||
| `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint |
|
||||
| `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) |
|
||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
|
||||
|
|
@ -242,6 +244,22 @@ For Azure, `--model` / `default_model.azure` should be the Azure deployment name
|
|||
|
||||
**EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins.
|
||||
|
||||
### OpenAI-Compatible Gateway Dialects
|
||||
|
||||
`provider=openai` means the auth and routing entrypoint is OpenAI-compatible. It does **not** guarantee that the upstream image API uses OpenAI native image-request semantics.
|
||||
|
||||
Use `default_image_api_dialect` in `EXTEND.md`, `OPENAI_IMAGE_API_DIALECT`, or `--imageApiDialect` when the endpoint expects a different wire format:
|
||||
|
||||
- `openai-native`: Sends pixel `size` such as `1536x1024` and native OpenAI quality fields when supported
|
||||
- `ratio-metadata`: Sends aspect-ratio `size` such as `16:9` and maps quality/size intent into `metadata.resolution` (`1K|2K|4K`) plus `metadata.orientation`
|
||||
|
||||
Recommended use:
|
||||
|
||||
- OpenAI native Images API or strict clones: keep `openai-native`
|
||||
- OpenAI-compatible gateways in front of Gemini or similar models: try `ratio-metadata`
|
||||
|
||||
Current limitation: `ratio-metadata` only applies to text-to-image generation. Reference-image edit flows still require `openai-native` or another provider with first-class edit support.
|
||||
|
||||
**Agent MUST display model info** before each generation:
|
||||
- Show: `Using [provider] / [model]`
|
||||
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
|
||||
|
|
|
|||
|
|
@ -175,6 +175,7 @@ default_provider: [selected provider or null]
|
|||
default_quality: [selected quality]
|
||||
default_aspect_ratio: null
|
||||
default_image_size: null
|
||||
default_image_api_dialect: null
|
||||
default_model:
|
||||
google: [selected google model or null]
|
||||
openai: null
|
||||
|
|
@ -187,6 +188,8 @@ default_model:
|
|||
---
|
||||
```
|
||||
|
||||
If the user selects `OpenAI` but says their endpoint is only OpenAI-compatible and fronts another image model family, save `default_image_api_dialect: ratio-metadata` when they explicitly confirm the gateway expects aspect-ratio `size` plus metadata-based resolution. Otherwise leave it `null` / `openai-native`.
|
||||
|
||||
## Flow 2: EXTEND.md Exists, Model Null
|
||||
|
||||
When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider.
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
|
|||
|
||||
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
|
||||
|
||||
default_image_api_dialect: null # openai-native|ratio-metadata|null (OpenAI-compatible gateways; null = use env/default)
|
||||
|
||||
default_model:
|
||||
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
||||
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
||||
|
|
@ -68,6 +70,7 @@ batch:
|
|||
| `default_quality` | string\|null | null | Default quality (null = 2k) |
|
||||
| `default_aspect_ratio` | string\|null | null | Default aspect ratio |
|
||||
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
|
||||
| `default_image_api_dialect` | string\|null | null | OpenAI-compatible image dialect (`openai-native` or `ratio-metadata`) |
|
||||
| `default_model.google` | string\|null | null | Google default model |
|
||||
| `default_model.openai` | string\|null | null | OpenAI default model |
|
||||
| `default_model.azure` | string\|null | null | Azure default deployment name |
|
||||
|
|
@ -88,6 +91,7 @@ batch:
|
|||
version: 1
|
||||
default_provider: google
|
||||
default_quality: 2k
|
||||
default_image_api_dialect: null
|
||||
---
|
||||
```
|
||||
|
||||
|
|
@ -99,6 +103,7 @@ default_provider: google
|
|||
default_quality: 2k
|
||||
default_aspect_ratio: "16:9"
|
||||
default_image_size: 2K
|
||||
default_image_api_dialect: null
|
||||
default_model:
|
||||
google: "gemini-3-pro-image-preview"
|
||||
openai: "gpt-image-1.5"
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import {
|
|||
mergeConfig,
|
||||
normalizeOutputImagePath,
|
||||
parseArgs,
|
||||
parseOpenAIImageApiDialect,
|
||||
parseSimpleYaml,
|
||||
} from "./main.ts";
|
||||
|
||||
|
|
@ -33,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
quality: null,
|
||||
imageSize: null,
|
||||
imageSizeSource: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
@ -85,6 +87,8 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
|
|||
"2k",
|
||||
"--imageSize",
|
||||
"4k",
|
||||
"--imageApiDialect",
|
||||
"ratio-metadata",
|
||||
"--ref",
|
||||
"ref/one.png",
|
||||
"ref/two.jpg",
|
||||
|
|
@ -102,6 +106,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
|
|||
assert.equal(args.aspectRatioSource, null);
|
||||
assert.equal(args.imageSize, "4K");
|
||||
assert.equal(args.imageSizeSource, "cli");
|
||||
assert.equal(args.imageApiDialect, "ratio-metadata");
|
||||
assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]);
|
||||
assert.equal(args.n, 3);
|
||||
assert.equal(args.jobs, 5);
|
||||
|
|
@ -125,6 +130,7 @@ default_provider: openrouter
|
|||
default_quality: normal
|
||||
default_aspect_ratio: '16:9'
|
||||
default_image_size: 2K
|
||||
default_image_api_dialect: ratio-metadata
|
||||
default_model:
|
||||
google: gemini-3-pro-image-preview
|
||||
openai: gpt-image-1.5
|
||||
|
|
@ -157,6 +163,7 @@ batch:
|
|||
assert.equal(config.default_quality, "normal");
|
||||
assert.equal(config.default_aspect_ratio, "16:9");
|
||||
assert.equal(config.default_image_size, "2K");
|
||||
assert.equal(config.default_image_api_dialect, "ratio-metadata");
|
||||
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
|
||||
assert.equal(config.default_model?.openai, "gpt-image-1.5");
|
||||
assert.equal(config.default_model?.zai, "glm-image");
|
||||
|
|
@ -252,6 +259,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
|
|||
default_quality: "2k",
|
||||
default_aspect_ratio: "3:2",
|
||||
default_image_size: "2K",
|
||||
default_image_api_dialect: "ratio-metadata",
|
||||
} satisfies Partial<ExtendConfig>,
|
||||
);
|
||||
|
||||
|
|
@ -261,6 +269,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
|
|||
assert.equal(merged.aspectRatioSource, "config");
|
||||
assert.equal(merged.imageSize, "4K");
|
||||
assert.equal(merged.imageSizeSource, "cli");
|
||||
assert.equal(merged.imageApiDialect, "ratio-metadata");
|
||||
});
|
||||
|
||||
test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => {
|
||||
|
|
@ -275,6 +284,25 @@ test("mergeConfig tags inherited imageSize defaults so providers can ignore inco
|
|||
assert.equal(merged.imageSizeSource, "config");
|
||||
});
|
||||
|
||||
test("mergeConfig falls back to OPENAI_IMAGE_API_DIALECT when CLI and EXTEND are unset", (t) => {
|
||||
useEnv(t, {
|
||||
OPENAI_IMAGE_API_DIALECT: "ratio-metadata",
|
||||
});
|
||||
|
||||
const merged = mergeConfig(makeArgs(), {});
|
||||
assert.equal(merged.imageApiDialect, "ratio-metadata");
|
||||
});
|
||||
|
||||
test("parseOpenAIImageApiDialect validates supported values", () => {
|
||||
assert.equal(parseOpenAIImageApiDialect("openai-native"), "openai-native");
|
||||
assert.equal(parseOpenAIImageApiDialect("ratio-metadata"), "ratio-metadata");
|
||||
assert.equal(parseOpenAIImageApiDialect(null), null);
|
||||
assert.throws(
|
||||
() => parseOpenAIImageApiDialect("gateway-magic"),
|
||||
/Invalid OpenAI image API dialect/,
|
||||
);
|
||||
});
|
||||
|
||||
test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => {
|
||||
assert.throws(
|
||||
() =>
|
||||
|
|
@ -492,6 +520,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
|
|||
makeArgs({
|
||||
provider: "replicate",
|
||||
quality: "2k",
|
||||
imageApiDialect: "ratio-metadata",
|
||||
json: true,
|
||||
}),
|
||||
loaded.tasks[0]!,
|
||||
|
|
@ -508,6 +537,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
|
|||
assert.equal(taskArgs.provider, "replicate");
|
||||
assert.equal(taskArgs.aspectRatio, "16:9");
|
||||
assert.equal(taskArgs.quality, "2k");
|
||||
assert.equal(taskArgs.imageApiDialect, "ratio-metadata");
|
||||
assert.equal(taskArgs.json, true);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import type {
|
|||
BatchTaskInput,
|
||||
CliArgs,
|
||||
ExtendConfig,
|
||||
OpenAIImageApiDialect,
|
||||
Provider,
|
||||
} from "./types";
|
||||
|
||||
|
|
@ -83,6 +84,7 @@ Options:
|
|||
--size <WxH> Size (e.g., 1024x1024)
|
||||
--quality normal|2k Quality preset (default: 2k)
|
||||
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
||||
--imageApiDialect <id> OpenAI-compatible image dialect: openai-native|ratio-metadata
|
||||
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0)
|
||||
--n <count> Number of images for the current task (default: 1; Replicate currently requires 1)
|
||||
--json JSON output
|
||||
|
|
@ -133,6 +135,7 @@ Environment variables:
|
|||
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
||||
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
||||
OPENAI_BASE_URL Custom OpenAI endpoint
|
||||
OPENAI_IMAGE_API_DIALECT OpenAI-compatible image dialect (openai-native|ratio-metadata)
|
||||
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
|
||||
OPENROUTER_BASE_URL Custom OpenRouter endpoint
|
||||
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
|
||||
|
|
@ -170,6 +173,7 @@ export function parseArgs(argv: string[]): CliArgs {
|
|||
quality: null,
|
||||
imageSize: null,
|
||||
imageSizeSource: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
@ -299,6 +303,15 @@ export function parseArgs(argv: string[]): CliArgs {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (a === "--imageApiDialect") {
|
||||
const v = argv[++i];
|
||||
if (v !== "openai-native" && v !== "ratio-metadata") {
|
||||
throw new Error(`Invalid imageApiDialect: ${v}`);
|
||||
}
|
||||
out.imageApiDialect = v;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--ref" || a === "--reference") {
|
||||
const { items, next } = takeMany(i);
|
||||
if (items.length === 0) throw new Error(`Missing files for ${a}`);
|
||||
|
|
@ -402,6 +415,9 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
|
||||
} else if (key === "default_image_size") {
|
||||
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
|
||||
} else if (key === "default_image_api_dialect") {
|
||||
config.default_image_api_dialect =
|
||||
value === "null" ? null : parseOpenAIImageApiDialect(value);
|
||||
} else if (key === "default_model") {
|
||||
config.default_model = {
|
||||
google: null,
|
||||
|
|
@ -487,6 +503,15 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
return config;
|
||||
}
|
||||
|
||||
export function parseOpenAIImageApiDialect(
|
||||
value: string | undefined | null
|
||||
): OpenAIImageApiDialect | null {
|
||||
if (!value) return null;
|
||||
const normalized = value.replace(/['"]/g, "").trim();
|
||||
if (normalized === "openai-native" || normalized === "ratio-metadata") return normalized;
|
||||
throw new Error(`Invalid OpenAI image API dialect: ${value}`);
|
||||
}
|
||||
|
||||
type ExtendConfigPathPair = {
|
||||
current: string;
|
||||
legacy: string;
|
||||
|
|
@ -548,6 +573,10 @@ export async function loadExtendConfig(
|
|||
export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
|
||||
const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null;
|
||||
const imageSize = args.imageSize ?? extend.default_image_size ?? null;
|
||||
const imageApiDialect =
|
||||
args.imageApiDialect ??
|
||||
extend.default_image_api_dialect ??
|
||||
parseOpenAIImageApiDialect(process.env.OPENAI_IMAGE_API_DIALECT);
|
||||
return {
|
||||
...args,
|
||||
provider: args.provider ?? extend.default_provider ?? null,
|
||||
|
|
@ -560,6 +589,7 @@ export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliAr
|
|||
imageSizeSource:
|
||||
args.imageSizeSource ??
|
||||
(args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)),
|
||||
imageApiDialect,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -891,6 +921,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir
|
|||
quality: task.quality ?? baseArgs.quality ?? null,
|
||||
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
|
||||
imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null),
|
||||
imageApiDialect: task.imageApiDialect ?? baseArgs.imageApiDialect ?? null,
|
||||
referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [],
|
||||
n: task.n ?? baseArgs.n,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -2,9 +2,16 @@ import assert from "node:assert/strict";
|
|||
import test from "node:test";
|
||||
|
||||
import {
|
||||
buildOpenAIGenerationsBody,
|
||||
extractImageFromResponse,
|
||||
getOpenAIAspectRatio,
|
||||
getOpenAIImageApiDialect,
|
||||
getOpenAIResolution,
|
||||
getMimeType,
|
||||
getOpenAISize,
|
||||
getOrientationFromAspectRatio,
|
||||
inferAspectRatioFromSize,
|
||||
inferResolutionFromSize,
|
||||
parseAspectRatio,
|
||||
} from "./openai.ts";
|
||||
|
||||
|
|
@ -18,6 +25,69 @@ test("OpenAI aspect-ratio parsing and size selection match model families", () =
|
|||
assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024");
|
||||
assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024");
|
||||
assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024");
|
||||
assert.equal(inferAspectRatioFromSize("1536x1024"), "3:2");
|
||||
assert.equal(inferResolutionFromSize("1536x1024"), "2K");
|
||||
assert.equal(getOpenAIAspectRatio({ aspectRatio: null, size: "2048x1152" }), "16:9");
|
||||
assert.equal(getOpenAIResolution({ imageSize: null, size: "2048x1152", quality: "normal" }), "2K");
|
||||
assert.equal(getOrientationFromAspectRatio("16:9"), "landscape");
|
||||
assert.equal(getOrientationFromAspectRatio("9:16"), "portrait");
|
||||
assert.equal(getOrientationFromAspectRatio("1:1"), null);
|
||||
assert.equal(getOpenAIImageApiDialect({ imageApiDialect: null }), "openai-native");
|
||||
});
|
||||
|
||||
test("OpenAI generations body switches between native and ratio-metadata dialects", () => {
|
||||
assert.deepEqual(
|
||||
buildOpenAIGenerationsBody("Draw a skyline", "gpt-image-1.5", {
|
||||
aspectRatio: "16:9",
|
||||
size: null,
|
||||
quality: "2k",
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
}),
|
||||
{
|
||||
model: "gpt-image-1.5",
|
||||
prompt: "Draw a skyline",
|
||||
size: "1536x1024",
|
||||
},
|
||||
);
|
||||
|
||||
assert.deepEqual(
|
||||
buildOpenAIGenerationsBody("Draw a skyline", "gemini-3-pro-image-preview", {
|
||||
aspectRatio: "16:9",
|
||||
size: null,
|
||||
quality: "2k",
|
||||
imageSize: null,
|
||||
imageApiDialect: "ratio-metadata",
|
||||
}),
|
||||
{
|
||||
model: "gemini-3-pro-image-preview",
|
||||
prompt: "Draw a skyline",
|
||||
size: "16:9",
|
||||
metadata: {
|
||||
resolution: "2K",
|
||||
orientation: "landscape",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
assert.deepEqual(
|
||||
buildOpenAIGenerationsBody("Draw a portrait", "gemini-3-pro-image-preview", {
|
||||
aspectRatio: null,
|
||||
size: "1152x2048",
|
||||
quality: "normal",
|
||||
imageSize: null,
|
||||
imageApiDialect: "ratio-metadata",
|
||||
}),
|
||||
{
|
||||
model: "gemini-3-pro-image-preview",
|
||||
prompt: "Draw a portrait",
|
||||
size: "9:16",
|
||||
metadata: {
|
||||
resolution: "2K",
|
||||
orientation: "portrait",
|
||||
},
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
test("OpenAI mime-type detection covers supported reference image extensions", () => {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import path from "node:path";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import type { CliArgs } from "../types";
|
||||
import type { CliArgs, OpenAIImageApiDialect } from "../types";
|
||||
|
||||
export function getDefaultModel(): string {
|
||||
return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5";
|
||||
|
|
@ -23,6 +23,8 @@ type SizeMapping = {
|
|||
portrait: string;
|
||||
};
|
||||
|
||||
type OpenAIGenerationsBody = Record<string, unknown>;
|
||||
|
||||
export function getOpenAISize(
|
||||
model: string,
|
||||
ar: string | null,
|
||||
|
|
@ -60,6 +62,114 @@ export function getOpenAISize(
|
|||
return sizes.square;
|
||||
}
|
||||
|
||||
function parsePixelSize(value: string): { width: number; height: number } | null {
|
||||
const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
|
||||
if (!match) return null;
|
||||
|
||||
const width = parseInt(match[1]!, 10);
|
||||
const height = parseInt(match[2]!, 10);
|
||||
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { width, height };
|
||||
}
|
||||
|
||||
function gcd(a: number, b: number): number {
|
||||
let x = Math.abs(a);
|
||||
let y = Math.abs(b);
|
||||
while (y !== 0) {
|
||||
const next = x % y;
|
||||
x = y;
|
||||
y = next;
|
||||
}
|
||||
return x || 1;
|
||||
}
|
||||
|
||||
export function getOpenAIImageApiDialect(args: Pick<CliArgs, "imageApiDialect">): OpenAIImageApiDialect {
|
||||
return args.imageApiDialect ?? "openai-native";
|
||||
}
|
||||
|
||||
export function inferAspectRatioFromSize(size: string | null): string | null {
|
||||
if (!size) return null;
|
||||
const parsed = parsePixelSize(size);
|
||||
if (!parsed) return null;
|
||||
|
||||
const divisor = gcd(parsed.width, parsed.height);
|
||||
return `${parsed.width / divisor}:${parsed.height / divisor}`;
|
||||
}
|
||||
|
||||
export function inferResolutionFromSize(size: string | null): "1K" | "2K" | "4K" | null {
|
||||
if (!size) return null;
|
||||
const parsed = parsePixelSize(size);
|
||||
if (!parsed) return null;
|
||||
|
||||
const longestEdge = Math.max(parsed.width, parsed.height);
|
||||
if (longestEdge <= 1024) return "1K";
|
||||
if (longestEdge <= 2048) return "2K";
|
||||
return "4K";
|
||||
}
|
||||
|
||||
export function getOpenAIAspectRatio(args: Pick<CliArgs, "aspectRatio" | "size">): string {
|
||||
return args.aspectRatio ?? inferAspectRatioFromSize(args.size) ?? "1:1";
|
||||
}
|
||||
|
||||
export function getOpenAIResolution(
|
||||
args: Pick<CliArgs, "imageSize" | "size" | "quality">
|
||||
): "1K" | "2K" | "4K" {
|
||||
if (args.imageSize === "1K" || args.imageSize === "2K" || args.imageSize === "4K") {
|
||||
return args.imageSize;
|
||||
}
|
||||
|
||||
const inferred = inferResolutionFromSize(args.size);
|
||||
if (inferred) return inferred;
|
||||
|
||||
return args.quality === "normal" ? "1K" : "2K";
|
||||
}
|
||||
|
||||
export function getOrientationFromAspectRatio(ar: string): "landscape" | "portrait" | null {
|
||||
const parsed = parseAspectRatio(ar);
|
||||
if (!parsed) return null;
|
||||
|
||||
const ratio = parsed.width / parsed.height;
|
||||
if (Math.abs(ratio - 1) < 0.1) return null;
|
||||
return ratio > 1 ? "landscape" : "portrait";
|
||||
}
|
||||
|
||||
export function buildOpenAIGenerationsBody(
|
||||
prompt: string,
|
||||
model: string,
|
||||
args: Pick<CliArgs, "aspectRatio" | "size" | "quality" | "imageSize" | "imageApiDialect">
|
||||
): OpenAIGenerationsBody {
|
||||
if (getOpenAIImageApiDialect(args) === "ratio-metadata") {
|
||||
const aspectRatio = getOpenAIAspectRatio(args);
|
||||
const metadata: Record<string, string> = {
|
||||
resolution: getOpenAIResolution(args),
|
||||
};
|
||||
const orientation = getOrientationFromAspectRatio(aspectRatio);
|
||||
if (orientation) metadata.orientation = orientation;
|
||||
|
||||
return {
|
||||
model,
|
||||
prompt,
|
||||
size: aspectRatio,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
||||
const body: OpenAIGenerationsBody = {
|
||||
model,
|
||||
prompt,
|
||||
size: args.size || getOpenAISize(model, args.aspectRatio, args.quality),
|
||||
};
|
||||
|
||||
if (model.includes("dall-e-3")) {
|
||||
body.quality = args.quality === "2k" ? "hd" : "standard";
|
||||
}
|
||||
|
||||
return body;
|
||||
}
|
||||
|
||||
export async function generateImage(
|
||||
prompt: string,
|
||||
model: string,
|
||||
|
|
@ -78,18 +188,28 @@ export async function generateImage(
|
|||
return generateWithChatCompletions(baseURL, apiKey, prompt, model);
|
||||
}
|
||||
|
||||
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
|
||||
const imageApiDialect = getOpenAIImageApiDialect(args);
|
||||
|
||||
if (args.referenceImages.length > 0) {
|
||||
if (imageApiDialect !== "openai-native") {
|
||||
throw new Error(
|
||||
"Reference images are not supported with the ratio-metadata OpenAI dialect yet. Use openai-native, Google, Azure, OpenRouter, MiniMax, Seedream, or Replicate for image-edit workflows."
|
||||
);
|
||||
}
|
||||
if (model.includes("dall-e-2") || model.includes("dall-e-3")) {
|
||||
throw new Error(
|
||||
"Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)."
|
||||
);
|
||||
}
|
||||
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
|
||||
return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality);
|
||||
}
|
||||
|
||||
return generateWithOpenAIGenerations(baseURL, apiKey, prompt, model, size, args.quality);
|
||||
return generateWithOpenAIGenerations(
|
||||
baseURL,
|
||||
apiKey,
|
||||
buildOpenAIGenerationsBody(prompt, model, args)
|
||||
);
|
||||
}
|
||||
|
||||
async function generateWithChatCompletions(
|
||||
|
|
@ -129,17 +249,8 @@ async function generateWithChatCompletions(
|
|||
async function generateWithOpenAIGenerations(
|
||||
baseURL: string,
|
||||
apiKey: string,
|
||||
prompt: string,
|
||||
model: string,
|
||||
size: string,
|
||||
quality: CliArgs["quality"]
|
||||
body: OpenAIGenerationsBody
|
||||
): Promise<Uint8Array> {
|
||||
const body: Record<string, any> = { model, prompt, size };
|
||||
|
||||
if (model.includes("dall-e-3")) {
|
||||
body.quality = quality === "2k" ? "hd" : "standard";
|
||||
}
|
||||
|
||||
const res = await fetch(`${baseURL}/images/generations`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
quality: null,
|
||||
imageSize: null,
|
||||
imageSizeSource: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
|||
size: null,
|
||||
quality: null,
|
||||
imageSize: null,
|
||||
imageApiDialect: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ export type Provider =
|
|||
| "seedream"
|
||||
| "azure";
|
||||
export type Quality = "normal" | "2k";
|
||||
export type OpenAIImageApiDialect = "openai-native" | "ratio-metadata";
|
||||
|
||||
export type CliArgs = {
|
||||
prompt: string | null;
|
||||
|
|
@ -23,6 +24,7 @@ export type CliArgs = {
|
|||
quality: Quality | null;
|
||||
imageSize: string | null;
|
||||
imageSizeSource?: "cli" | "task" | "config" | null;
|
||||
imageApiDialect: OpenAIImageApiDialect | null;
|
||||
referenceImages: string[];
|
||||
n: number;
|
||||
batchFile: string | null;
|
||||
|
|
@ -42,6 +44,7 @@ export type BatchTaskInput = {
|
|||
size?: string | null;
|
||||
quality?: Quality | null;
|
||||
imageSize?: "1K" | "2K" | "4K" | null;
|
||||
imageApiDialect?: OpenAIImageApiDialect | null;
|
||||
ref?: string[];
|
||||
n?: number;
|
||||
};
|
||||
|
|
@ -59,6 +62,7 @@ export type ExtendConfig = {
|
|||
default_quality: Quality | null;
|
||||
default_aspect_ratio: string | null;
|
||||
default_image_size: "1K" | "2K" | "4K" | null;
|
||||
default_image_api_dialect: OpenAIImageApiDialect | null;
|
||||
default_model: {
|
||||
google: string | null;
|
||||
openai: string | null;
|
||||
|
|
|
|||
Loading…
Reference in New Issue