feat(baoyu-imagine): add OpenAI-compatible image API dialect support
Add --imageApiDialect flag, OPENAI_IMAGE_API_DIALECT env var, and default_image_api_dialect config for gateways that expect aspect-ratio size plus metadata.resolution instead of pixel size.
This commit is contained in:
parent
58ba4579ef
commit
11d80eeaa9
|
|
@ -790,6 +790,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
| `--size` | Size (e.g., `1024x1024`) |
|
| `--size` | Size (e.g., `1024x1024`) |
|
||||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||||
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
|
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
|
||||||
|
| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways |
|
||||||
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
|
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
|
||||||
| `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
|
| `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
|
||||||
| `--json` | JSON output |
|
| `--json` | JSON output |
|
||||||
|
|
@ -823,6 +824,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||||
|
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` |
|
||||||
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
|
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
|
||||||
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
|
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
|
||||||
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
|
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
|
||||||
|
|
|
||||||
|
|
@ -790,6 +790,7 @@ AI 驱动的生成后端。
|
||||||
| `--size` | 尺寸(如 `1024x1024`) |
|
| `--size` | 尺寸(如 `1024x1024`) |
|
||||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||||
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
|
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
|
||||||
|
| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言(`openai-native` 或 `ratio-metadata`) |
|
||||||
| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) |
|
| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) |
|
||||||
| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) |
|
| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) |
|
||||||
| `--json` | 输出 JSON 结果 |
|
| `--json` | 输出 JSON 结果 |
|
||||||
|
|
@ -823,6 +824,7 @@ AI 驱动的生成后端。
|
||||||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||||
|
| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言(`openai-native` 或 `ratio-metadata`) | `openai-native` |
|
||||||
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
|
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
|
||||||
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
|
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
|
||||||
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
|
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-imagine/EXTEND.md") { "user" }
|
||||||
|
|
||||||
Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path.
|
Legacy compatibility: if `.baoyu-skills/baoyu-image-gen/EXTEND.md` exists and the new path does not, runtime renames it to `baoyu-imagine`. If both files exist, runtime leaves them unchanged and uses the new path.
|
||||||
|
|
||||||
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
|
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | OpenAI image API dialect | Default models | Batch worker cap | Provider-specific batch limits
|
||||||
|
|
||||||
Schema: `references/config/preferences-schema.md`
|
Schema: `references/config/preferences-schema.md`
|
||||||
|
|
||||||
|
|
@ -176,6 +176,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||||
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
| `--imageSize 1K\|2K\|4K` | Image size for Google/OpenRouter (default: from quality) |
|
||||||
|
| `--imageApiDialect openai-native\|ratio-metadata` | OpenAI-compatible image API dialect. Use `ratio-metadata` when the endpoint is OpenAI-compatible but expects aspect-ratio `size` plus `metadata.resolution` instead of pixel `size` |
|
||||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, Azure OpenAI edits (PNG/JPG only), OpenRouter multimodal models, Replicate supported families, MiniMax subject-reference, and Seedream 5.0/4.5/4.0. Not supported by Jimeng, Seedream 3.0, or removed SeedEdit 3.0 |
|
||||||
| `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image |
|
| `--n <count>` | Number of images. Replicate currently supports only `--n 1` because this path saves exactly one output image |
|
||||||
| `--json` | JSON output |
|
| `--json` | JSON output |
|
||||||
|
|
@ -209,6 +210,7 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi
|
||||||
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
| `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) |
|
||||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
| `SEEDREAM_IMAGE_MODEL` | Seedream model override (default: doubao-seedream-5-0-260128) |
|
||||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
| `OPENAI_BASE_URL` | Custom OpenAI endpoint |
|
||||||
|
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect override (`openai-native` or `ratio-metadata`) |
|
||||||
| `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint |
|
| `AZURE_OPENAI_BASE_URL` | Azure resource endpoint or deployment endpoint |
|
||||||
| `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) |
|
| `AZURE_API_VERSION` | Azure image API version (default: `2025-04-01-preview`) |
|
||||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
|
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint (default: `https://openrouter.ai/api/v1`) |
|
||||||
|
|
@ -242,6 +244,22 @@ For Azure, `--model` / `default_model.azure` should be the Azure deployment name
|
||||||
|
|
||||||
**EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins.
|
**EXTEND.md overrides env vars**. If both EXTEND.md `default_model.google: "gemini-3-pro-image-preview"` and env var `GOOGLE_IMAGE_MODEL=gemini-3.1-flash-image-preview` exist, EXTEND.md wins.
|
||||||
|
|
||||||
|
### OpenAI-Compatible Gateway Dialects
|
||||||
|
|
||||||
|
`provider=openai` means the auth and routing entrypoint is OpenAI-compatible. It does **not** guarantee that the upstream image API uses OpenAI native image-request semantics.
|
||||||
|
|
||||||
|
Use `default_image_api_dialect` in `EXTEND.md`, `OPENAI_IMAGE_API_DIALECT`, or `--imageApiDialect` when the endpoint expects a different wire format:
|
||||||
|
|
||||||
|
- `openai-native`: Sends pixel `size` such as `1536x1024` and native OpenAI quality fields when supported
|
||||||
|
- `ratio-metadata`: Sends aspect-ratio `size` such as `16:9` and maps quality/size intent into `metadata.resolution` (`1K|2K|4K`) plus `metadata.orientation`
|
||||||
|
|
||||||
|
Recommended use:
|
||||||
|
|
||||||
|
- OpenAI native Images API or strict clones: keep `openai-native`
|
||||||
|
- OpenAI-compatible gateways in front of Gemini or similar models: try `ratio-metadata`
|
||||||
|
|
||||||
|
Current limitation: `ratio-metadata` only applies to text-to-image generation. Reference-image edit flows still require `openai-native` or another provider with first-class edit support.
|
||||||
|
|
||||||
**Agent MUST display model info** before each generation:
|
**Agent MUST display model info** before each generation:
|
||||||
- Show: `Using [provider] / [model]`
|
- Show: `Using [provider] / [model]`
|
||||||
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
|
- Show switch hint: `Switch model: --model <id> | EXTEND.md default_model.[provider] | env <PROVIDER>_IMAGE_MODEL`
|
||||||
|
|
|
||||||
|
|
@ -175,6 +175,7 @@ default_provider: [selected provider or null]
|
||||||
default_quality: [selected quality]
|
default_quality: [selected quality]
|
||||||
default_aspect_ratio: null
|
default_aspect_ratio: null
|
||||||
default_image_size: null
|
default_image_size: null
|
||||||
|
default_image_api_dialect: null
|
||||||
default_model:
|
default_model:
|
||||||
google: [selected google model or null]
|
google: [selected google model or null]
|
||||||
openai: null
|
openai: null
|
||||||
|
|
@ -187,6 +188,8 @@ default_model:
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If the user selects `OpenAI` but says their endpoint is only OpenAI-compatible and fronts another image model family, save `default_image_api_dialect: ratio-metadata` when they explicitly confirm the gateway expects aspect-ratio `size` plus metadata-based resolution. Otherwise leave it `null` / `openai-native`.
|
||||||
|
|
||||||
## Flow 2: EXTEND.md Exists, Model Null
|
## Flow 2: EXTEND.md Exists, Model Null
|
||||||
|
|
||||||
When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider.
|
When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider.
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,8 @@ default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null
|
||||||
|
|
||||||
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
|
default_image_size: null # 1K|2K|4K|null (Google/OpenRouter, overrides quality)
|
||||||
|
|
||||||
|
default_image_api_dialect: null # openai-native|ratio-metadata|null (OpenAI-compatible gateways; null = use env/default)
|
||||||
|
|
||||||
default_model:
|
default_model:
|
||||||
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
||||||
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
||||||
|
|
@ -68,6 +70,7 @@ batch:
|
||||||
| `default_quality` | string\|null | null | Default quality (null = 2k) |
|
| `default_quality` | string\|null | null | Default quality (null = 2k) |
|
||||||
| `default_aspect_ratio` | string\|null | null | Default aspect ratio |
|
| `default_aspect_ratio` | string\|null | null | Default aspect ratio |
|
||||||
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
|
| `default_image_size` | string\|null | null | Google/OpenRouter image size (overrides quality) |
|
||||||
|
| `default_image_api_dialect` | string\|null | null | OpenAI-compatible image dialect (`openai-native` or `ratio-metadata`) |
|
||||||
| `default_model.google` | string\|null | null | Google default model |
|
| `default_model.google` | string\|null | null | Google default model |
|
||||||
| `default_model.openai` | string\|null | null | OpenAI default model |
|
| `default_model.openai` | string\|null | null | OpenAI default model |
|
||||||
| `default_model.azure` | string\|null | null | Azure default deployment name |
|
| `default_model.azure` | string\|null | null | Azure default deployment name |
|
||||||
|
|
@ -88,6 +91,7 @@ batch:
|
||||||
version: 1
|
version: 1
|
||||||
default_provider: google
|
default_provider: google
|
||||||
default_quality: 2k
|
default_quality: 2k
|
||||||
|
default_image_api_dialect: null
|
||||||
---
|
---
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -99,6 +103,7 @@ default_provider: google
|
||||||
default_quality: 2k
|
default_quality: 2k
|
||||||
default_aspect_ratio: "16:9"
|
default_aspect_ratio: "16:9"
|
||||||
default_image_size: 2K
|
default_image_size: 2K
|
||||||
|
default_image_api_dialect: null
|
||||||
default_model:
|
default_model:
|
||||||
google: "gemini-3-pro-image-preview"
|
google: "gemini-3-pro-image-preview"
|
||||||
openai: "gpt-image-1.5"
|
openai: "gpt-image-1.5"
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ import {
|
||||||
mergeConfig,
|
mergeConfig,
|
||||||
normalizeOutputImagePath,
|
normalizeOutputImagePath,
|
||||||
parseArgs,
|
parseArgs,
|
||||||
|
parseOpenAIImageApiDialect,
|
||||||
parseSimpleYaml,
|
parseSimpleYaml,
|
||||||
} from "./main.ts";
|
} from "./main.ts";
|
||||||
|
|
||||||
|
|
@ -33,6 +34,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
imageSizeSource: null,
|
imageSizeSource: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
@ -85,6 +87,8 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
|
||||||
"2k",
|
"2k",
|
||||||
"--imageSize",
|
"--imageSize",
|
||||||
"4k",
|
"4k",
|
||||||
|
"--imageApiDialect",
|
||||||
|
"ratio-metadata",
|
||||||
"--ref",
|
"--ref",
|
||||||
"ref/one.png",
|
"ref/one.png",
|
||||||
"ref/two.jpg",
|
"ref/two.jpg",
|
||||||
|
|
@ -102,6 +106,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => {
|
||||||
assert.equal(args.aspectRatioSource, null);
|
assert.equal(args.aspectRatioSource, null);
|
||||||
assert.equal(args.imageSize, "4K");
|
assert.equal(args.imageSize, "4K");
|
||||||
assert.equal(args.imageSizeSource, "cli");
|
assert.equal(args.imageSizeSource, "cli");
|
||||||
|
assert.equal(args.imageApiDialect, "ratio-metadata");
|
||||||
assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]);
|
assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]);
|
||||||
assert.equal(args.n, 3);
|
assert.equal(args.n, 3);
|
||||||
assert.equal(args.jobs, 5);
|
assert.equal(args.jobs, 5);
|
||||||
|
|
@ -125,6 +130,7 @@ default_provider: openrouter
|
||||||
default_quality: normal
|
default_quality: normal
|
||||||
default_aspect_ratio: '16:9'
|
default_aspect_ratio: '16:9'
|
||||||
default_image_size: 2K
|
default_image_size: 2K
|
||||||
|
default_image_api_dialect: ratio-metadata
|
||||||
default_model:
|
default_model:
|
||||||
google: gemini-3-pro-image-preview
|
google: gemini-3-pro-image-preview
|
||||||
openai: gpt-image-1.5
|
openai: gpt-image-1.5
|
||||||
|
|
@ -157,6 +163,7 @@ batch:
|
||||||
assert.equal(config.default_quality, "normal");
|
assert.equal(config.default_quality, "normal");
|
||||||
assert.equal(config.default_aspect_ratio, "16:9");
|
assert.equal(config.default_aspect_ratio, "16:9");
|
||||||
assert.equal(config.default_image_size, "2K");
|
assert.equal(config.default_image_size, "2K");
|
||||||
|
assert.equal(config.default_image_api_dialect, "ratio-metadata");
|
||||||
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
|
assert.equal(config.default_model?.google, "gemini-3-pro-image-preview");
|
||||||
assert.equal(config.default_model?.openai, "gpt-image-1.5");
|
assert.equal(config.default_model?.openai, "gpt-image-1.5");
|
||||||
assert.equal(config.default_model?.zai, "glm-image");
|
assert.equal(config.default_model?.zai, "glm-image");
|
||||||
|
|
@ -252,6 +259,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
|
||||||
default_quality: "2k",
|
default_quality: "2k",
|
||||||
default_aspect_ratio: "3:2",
|
default_aspect_ratio: "3:2",
|
||||||
default_image_size: "2K",
|
default_image_size: "2K",
|
||||||
|
default_image_api_dialect: "ratio-metadata",
|
||||||
} satisfies Partial<ExtendConfig>,
|
} satisfies Partial<ExtendConfig>,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -261,6 +269,7 @@ test("mergeConfig only fills values missing from CLI args", () => {
|
||||||
assert.equal(merged.aspectRatioSource, "config");
|
assert.equal(merged.aspectRatioSource, "config");
|
||||||
assert.equal(merged.imageSize, "4K");
|
assert.equal(merged.imageSize, "4K");
|
||||||
assert.equal(merged.imageSizeSource, "cli");
|
assert.equal(merged.imageSizeSource, "cli");
|
||||||
|
assert.equal(merged.imageApiDialect, "ratio-metadata");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => {
|
test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => {
|
||||||
|
|
@ -275,6 +284,25 @@ test("mergeConfig tags inherited imageSize defaults so providers can ignore inco
|
||||||
assert.equal(merged.imageSizeSource, "config");
|
assert.equal(merged.imageSizeSource, "config");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("mergeConfig falls back to OPENAI_IMAGE_API_DIALECT when CLI and EXTEND are unset", (t) => {
|
||||||
|
useEnv(t, {
|
||||||
|
OPENAI_IMAGE_API_DIALECT: "ratio-metadata",
|
||||||
|
});
|
||||||
|
|
||||||
|
const merged = mergeConfig(makeArgs(), {});
|
||||||
|
assert.equal(merged.imageApiDialect, "ratio-metadata");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("parseOpenAIImageApiDialect validates supported values", () => {
|
||||||
|
assert.equal(parseOpenAIImageApiDialect("openai-native"), "openai-native");
|
||||||
|
assert.equal(parseOpenAIImageApiDialect("ratio-metadata"), "ratio-metadata");
|
||||||
|
assert.equal(parseOpenAIImageApiDialect(null), null);
|
||||||
|
assert.throws(
|
||||||
|
() => parseOpenAIImageApiDialect("gateway-magic"),
|
||||||
|
/Invalid OpenAI image API dialect/,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => {
|
test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => {
|
||||||
assert.throws(
|
assert.throws(
|
||||||
() =>
|
() =>
|
||||||
|
|
@ -492,6 +520,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
|
||||||
makeArgs({
|
makeArgs({
|
||||||
provider: "replicate",
|
provider: "replicate",
|
||||||
quality: "2k",
|
quality: "2k",
|
||||||
|
imageApiDialect: "ratio-metadata",
|
||||||
json: true,
|
json: true,
|
||||||
}),
|
}),
|
||||||
loaded.tasks[0]!,
|
loaded.tasks[0]!,
|
||||||
|
|
@ -508,6 +537,7 @@ test("loadBatchTasks and createTaskArgs resolve batch-relative paths", async (t)
|
||||||
assert.equal(taskArgs.provider, "replicate");
|
assert.equal(taskArgs.provider, "replicate");
|
||||||
assert.equal(taskArgs.aspectRatio, "16:9");
|
assert.equal(taskArgs.aspectRatio, "16:9");
|
||||||
assert.equal(taskArgs.quality, "2k");
|
assert.equal(taskArgs.quality, "2k");
|
||||||
|
assert.equal(taskArgs.imageApiDialect, "ratio-metadata");
|
||||||
assert.equal(taskArgs.json, true);
|
assert.equal(taskArgs.json, true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import type {
|
||||||
BatchTaskInput,
|
BatchTaskInput,
|
||||||
CliArgs,
|
CliArgs,
|
||||||
ExtendConfig,
|
ExtendConfig,
|
||||||
|
OpenAIImageApiDialect,
|
||||||
Provider,
|
Provider,
|
||||||
} from "./types";
|
} from "./types";
|
||||||
|
|
||||||
|
|
@ -83,6 +84,7 @@ Options:
|
||||||
--size <WxH> Size (e.g., 1024x1024)
|
--size <WxH> Size (e.g., 1024x1024)
|
||||||
--quality normal|2k Quality preset (default: 2k)
|
--quality normal|2k Quality preset (default: 2k)
|
||||||
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
--imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality)
|
||||||
|
--imageApiDialect <id> OpenAI-compatible image dialect: openai-native|ratio-metadata
|
||||||
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0)
|
--ref <files...> Reference images (Google, OpenAI, Azure, OpenRouter, Replicate supported families, MiniMax, or Seedream 4.0/4.5/5.0)
|
||||||
--n <count> Number of images for the current task (default: 1; Replicate currently requires 1)
|
--n <count> Number of images for the current task (default: 1; Replicate currently requires 1)
|
||||||
--json JSON output
|
--json JSON output
|
||||||
|
|
@ -133,6 +135,7 @@ Environment variables:
|
||||||
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40)
|
||||||
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
SEEDREAM_IMAGE_MODEL Default Seedream model (doubao-seedream-5-0-260128)
|
||||||
OPENAI_BASE_URL Custom OpenAI endpoint
|
OPENAI_BASE_URL Custom OpenAI endpoint
|
||||||
|
OPENAI_IMAGE_API_DIALECT OpenAI-compatible image dialect (openai-native|ratio-metadata)
|
||||||
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
|
OPENAI_IMAGE_USE_CHAT Use /chat/completions instead of /images/generations (true|false)
|
||||||
OPENROUTER_BASE_URL Custom OpenRouter endpoint
|
OPENROUTER_BASE_URL Custom OpenRouter endpoint
|
||||||
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
|
OPENROUTER_HTTP_REFERER Optional app URL for OpenRouter attribution
|
||||||
|
|
@ -170,6 +173,7 @@ export function parseArgs(argv: string[]): CliArgs {
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
imageSizeSource: null,
|
imageSizeSource: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
@ -299,6 +303,15 @@ export function parseArgs(argv: string[]): CliArgs {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (a === "--imageApiDialect") {
|
||||||
|
const v = argv[++i];
|
||||||
|
if (v !== "openai-native" && v !== "ratio-metadata") {
|
||||||
|
throw new Error(`Invalid imageApiDialect: ${v}`);
|
||||||
|
}
|
||||||
|
out.imageApiDialect = v;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (a === "--ref" || a === "--reference") {
|
if (a === "--ref" || a === "--reference") {
|
||||||
const { items, next } = takeMany(i);
|
const { items, next } = takeMany(i);
|
||||||
if (items.length === 0) throw new Error(`Missing files for ${a}`);
|
if (items.length === 0) throw new Error(`Missing files for ${a}`);
|
||||||
|
|
@ -402,6 +415,9 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
||||||
config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
|
config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
|
||||||
} else if (key === "default_image_size") {
|
} else if (key === "default_image_size") {
|
||||||
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
|
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
|
||||||
|
} else if (key === "default_image_api_dialect") {
|
||||||
|
config.default_image_api_dialect =
|
||||||
|
value === "null" ? null : parseOpenAIImageApiDialect(value);
|
||||||
} else if (key === "default_model") {
|
} else if (key === "default_model") {
|
||||||
config.default_model = {
|
config.default_model = {
|
||||||
google: null,
|
google: null,
|
||||||
|
|
@ -487,6 +503,15 @@ export function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function parseOpenAIImageApiDialect(
|
||||||
|
value: string | undefined | null
|
||||||
|
): OpenAIImageApiDialect | null {
|
||||||
|
if (!value) return null;
|
||||||
|
const normalized = value.replace(/['"]/g, "").trim();
|
||||||
|
if (normalized === "openai-native" || normalized === "ratio-metadata") return normalized;
|
||||||
|
throw new Error(`Invalid OpenAI image API dialect: ${value}`);
|
||||||
|
}
|
||||||
|
|
||||||
type ExtendConfigPathPair = {
|
type ExtendConfigPathPair = {
|
||||||
current: string;
|
current: string;
|
||||||
legacy: string;
|
legacy: string;
|
||||||
|
|
@ -548,6 +573,10 @@ export async function loadExtendConfig(
|
||||||
export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
|
export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
|
||||||
const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null;
|
const aspectRatio = args.aspectRatio ?? extend.default_aspect_ratio ?? null;
|
||||||
const imageSize = args.imageSize ?? extend.default_image_size ?? null;
|
const imageSize = args.imageSize ?? extend.default_image_size ?? null;
|
||||||
|
const imageApiDialect =
|
||||||
|
args.imageApiDialect ??
|
||||||
|
extend.default_image_api_dialect ??
|
||||||
|
parseOpenAIImageApiDialect(process.env.OPENAI_IMAGE_API_DIALECT);
|
||||||
return {
|
return {
|
||||||
...args,
|
...args,
|
||||||
provider: args.provider ?? extend.default_provider ?? null,
|
provider: args.provider ?? extend.default_provider ?? null,
|
||||||
|
|
@ -560,6 +589,7 @@ export function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliAr
|
||||||
imageSizeSource:
|
imageSizeSource:
|
||||||
args.imageSizeSource ??
|
args.imageSizeSource ??
|
||||||
(args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)),
|
(args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)),
|
||||||
|
imageApiDialect,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -891,6 +921,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir
|
||||||
quality: task.quality ?? baseArgs.quality ?? null,
|
quality: task.quality ?? baseArgs.quality ?? null,
|
||||||
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
|
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
|
||||||
imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null),
|
imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null),
|
||||||
|
imageApiDialect: task.imageApiDialect ?? baseArgs.imageApiDialect ?? null,
|
||||||
referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [],
|
referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [],
|
||||||
n: task.n ?? baseArgs.n,
|
n: task.n ?? baseArgs.n,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,16 @@ import assert from "node:assert/strict";
|
||||||
import test from "node:test";
|
import test from "node:test";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
|
buildOpenAIGenerationsBody,
|
||||||
extractImageFromResponse,
|
extractImageFromResponse,
|
||||||
|
getOpenAIAspectRatio,
|
||||||
|
getOpenAIImageApiDialect,
|
||||||
|
getOpenAIResolution,
|
||||||
getMimeType,
|
getMimeType,
|
||||||
getOpenAISize,
|
getOpenAISize,
|
||||||
|
getOrientationFromAspectRatio,
|
||||||
|
inferAspectRatioFromSize,
|
||||||
|
inferResolutionFromSize,
|
||||||
parseAspectRatio,
|
parseAspectRatio,
|
||||||
} from "./openai.ts";
|
} from "./openai.ts";
|
||||||
|
|
||||||
|
|
@ -18,6 +25,69 @@ test("OpenAI aspect-ratio parsing and size selection match model families", () =
|
||||||
assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024");
|
assert.equal(getOpenAISize("dall-e-2", "16:9", "2k"), "1024x1024");
|
||||||
assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024");
|
assert.equal(getOpenAISize("gpt-image-1.5", "16:9", "2k"), "1536x1024");
|
||||||
assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024");
|
assert.equal(getOpenAISize("gpt-image-1.5", "4:3", "2k"), "1024x1024");
|
||||||
|
assert.equal(inferAspectRatioFromSize("1536x1024"), "3:2");
|
||||||
|
assert.equal(inferResolutionFromSize("1536x1024"), "2K");
|
||||||
|
assert.equal(getOpenAIAspectRatio({ aspectRatio: null, size: "2048x1152" }), "16:9");
|
||||||
|
assert.equal(getOpenAIResolution({ imageSize: null, size: "2048x1152", quality: "normal" }), "2K");
|
||||||
|
assert.equal(getOrientationFromAspectRatio("16:9"), "landscape");
|
||||||
|
assert.equal(getOrientationFromAspectRatio("9:16"), "portrait");
|
||||||
|
assert.equal(getOrientationFromAspectRatio("1:1"), null);
|
||||||
|
assert.equal(getOpenAIImageApiDialect({ imageApiDialect: null }), "openai-native");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("OpenAI generations body switches between native and ratio-metadata dialects", () => {
|
||||||
|
assert.deepEqual(
|
||||||
|
buildOpenAIGenerationsBody("Draw a skyline", "gpt-image-1.5", {
|
||||||
|
aspectRatio: "16:9",
|
||||||
|
size: null,
|
||||||
|
quality: "2k",
|
||||||
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
model: "gpt-image-1.5",
|
||||||
|
prompt: "Draw a skyline",
|
||||||
|
size: "1536x1024",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
buildOpenAIGenerationsBody("Draw a skyline", "gemini-3-pro-image-preview", {
|
||||||
|
aspectRatio: "16:9",
|
||||||
|
size: null,
|
||||||
|
quality: "2k",
|
||||||
|
imageSize: null,
|
||||||
|
imageApiDialect: "ratio-metadata",
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
model: "gemini-3-pro-image-preview",
|
||||||
|
prompt: "Draw a skyline",
|
||||||
|
size: "16:9",
|
||||||
|
metadata: {
|
||||||
|
resolution: "2K",
|
||||||
|
orientation: "landscape",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
buildOpenAIGenerationsBody("Draw a portrait", "gemini-3-pro-image-preview", {
|
||||||
|
aspectRatio: null,
|
||||||
|
size: "1152x2048",
|
||||||
|
quality: "normal",
|
||||||
|
imageSize: null,
|
||||||
|
imageApiDialect: "ratio-metadata",
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
model: "gemini-3-pro-image-preview",
|
||||||
|
prompt: "Draw a portrait",
|
||||||
|
size: "9:16",
|
||||||
|
metadata: {
|
||||||
|
resolution: "2K",
|
||||||
|
orientation: "portrait",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("OpenAI mime-type detection covers supported reference image extensions", () => {
|
test("OpenAI mime-type detection covers supported reference image extensions", () => {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
import { readFile } from "node:fs/promises";
|
import { readFile } from "node:fs/promises";
|
||||||
import type { CliArgs } from "../types";
|
import type { CliArgs, OpenAIImageApiDialect } from "../types";
|
||||||
|
|
||||||
export function getDefaultModel(): string {
|
export function getDefaultModel(): string {
|
||||||
return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5";
|
return process.env.OPENAI_IMAGE_MODEL || "gpt-image-1.5";
|
||||||
|
|
@ -23,6 +23,8 @@ type SizeMapping = {
|
||||||
portrait: string;
|
portrait: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type OpenAIGenerationsBody = Record<string, unknown>;
|
||||||
|
|
||||||
export function getOpenAISize(
|
export function getOpenAISize(
|
||||||
model: string,
|
model: string,
|
||||||
ar: string | null,
|
ar: string | null,
|
||||||
|
|
@ -60,6 +62,114 @@ export function getOpenAISize(
|
||||||
return sizes.square;
|
return sizes.square;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function parsePixelSize(value: string): { width: number; height: number } | null {
|
||||||
|
const match = value.match(/^(\d+)\s*[xX]\s*(\d+)$/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
const width = parseInt(match[1]!, 10);
|
||||||
|
const height = parseInt(match[2]!, 10);
|
||||||
|
if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { width, height };
|
||||||
|
}
|
||||||
|
|
||||||
|
function gcd(a: number, b: number): number {
|
||||||
|
let x = Math.abs(a);
|
||||||
|
let y = Math.abs(b);
|
||||||
|
while (y !== 0) {
|
||||||
|
const next = x % y;
|
||||||
|
x = y;
|
||||||
|
y = next;
|
||||||
|
}
|
||||||
|
return x || 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getOpenAIImageApiDialect(args: Pick<CliArgs, "imageApiDialect">): OpenAIImageApiDialect {
|
||||||
|
return args.imageApiDialect ?? "openai-native";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function inferAspectRatioFromSize(size: string | null): string | null {
|
||||||
|
if (!size) return null;
|
||||||
|
const parsed = parsePixelSize(size);
|
||||||
|
if (!parsed) return null;
|
||||||
|
|
||||||
|
const divisor = gcd(parsed.width, parsed.height);
|
||||||
|
return `${parsed.width / divisor}:${parsed.height / divisor}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function inferResolutionFromSize(size: string | null): "1K" | "2K" | "4K" | null {
|
||||||
|
if (!size) return null;
|
||||||
|
const parsed = parsePixelSize(size);
|
||||||
|
if (!parsed) return null;
|
||||||
|
|
||||||
|
const longestEdge = Math.max(parsed.width, parsed.height);
|
||||||
|
if (longestEdge <= 1024) return "1K";
|
||||||
|
if (longestEdge <= 2048) return "2K";
|
||||||
|
return "4K";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getOpenAIAspectRatio(args: Pick<CliArgs, "aspectRatio" | "size">): string {
|
||||||
|
return args.aspectRatio ?? inferAspectRatioFromSize(args.size) ?? "1:1";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getOpenAIResolution(
|
||||||
|
args: Pick<CliArgs, "imageSize" | "size" | "quality">
|
||||||
|
): "1K" | "2K" | "4K" {
|
||||||
|
if (args.imageSize === "1K" || args.imageSize === "2K" || args.imageSize === "4K") {
|
||||||
|
return args.imageSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
const inferred = inferResolutionFromSize(args.size);
|
||||||
|
if (inferred) return inferred;
|
||||||
|
|
||||||
|
return args.quality === "normal" ? "1K" : "2K";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getOrientationFromAspectRatio(ar: string): "landscape" | "portrait" | null {
|
||||||
|
const parsed = parseAspectRatio(ar);
|
||||||
|
if (!parsed) return null;
|
||||||
|
|
||||||
|
const ratio = parsed.width / parsed.height;
|
||||||
|
if (Math.abs(ratio - 1) < 0.1) return null;
|
||||||
|
return ratio > 1 ? "landscape" : "portrait";
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildOpenAIGenerationsBody(
|
||||||
|
prompt: string,
|
||||||
|
model: string,
|
||||||
|
args: Pick<CliArgs, "aspectRatio" | "size" | "quality" | "imageSize" | "imageApiDialect">
|
||||||
|
): OpenAIGenerationsBody {
|
||||||
|
if (getOpenAIImageApiDialect(args) === "ratio-metadata") {
|
||||||
|
const aspectRatio = getOpenAIAspectRatio(args);
|
||||||
|
const metadata: Record<string, string> = {
|
||||||
|
resolution: getOpenAIResolution(args),
|
||||||
|
};
|
||||||
|
const orientation = getOrientationFromAspectRatio(aspectRatio);
|
||||||
|
if (orientation) metadata.orientation = orientation;
|
||||||
|
|
||||||
|
return {
|
||||||
|
model,
|
||||||
|
prompt,
|
||||||
|
size: aspectRatio,
|
||||||
|
metadata,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const body: OpenAIGenerationsBody = {
|
||||||
|
model,
|
||||||
|
prompt,
|
||||||
|
size: args.size || getOpenAISize(model, args.aspectRatio, args.quality),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (model.includes("dall-e-3")) {
|
||||||
|
body.quality = args.quality === "2k" ? "hd" : "standard";
|
||||||
|
}
|
||||||
|
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
export async function generateImage(
|
export async function generateImage(
|
||||||
prompt: string,
|
prompt: string,
|
||||||
model: string,
|
model: string,
|
||||||
|
|
@ -78,18 +188,28 @@ export async function generateImage(
|
||||||
return generateWithChatCompletions(baseURL, apiKey, prompt, model);
|
return generateWithChatCompletions(baseURL, apiKey, prompt, model);
|
||||||
}
|
}
|
||||||
|
|
||||||
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
|
const imageApiDialect = getOpenAIImageApiDialect(args);
|
||||||
|
|
||||||
if (args.referenceImages.length > 0) {
|
if (args.referenceImages.length > 0) {
|
||||||
|
if (imageApiDialect !== "openai-native") {
|
||||||
|
throw new Error(
|
||||||
|
"Reference images are not supported with the ratio-metadata OpenAI dialect yet. Use openai-native, Google, Azure, OpenRouter, MiniMax, Seedream, or Replicate for image-edit workflows."
|
||||||
|
);
|
||||||
|
}
|
||||||
if (model.includes("dall-e-2") || model.includes("dall-e-3")) {
|
if (model.includes("dall-e-2") || model.includes("dall-e-3")) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)."
|
"Reference images with OpenAI in this skill require GPT Image models. Use --model gpt-image-1.5 (or another gpt-image model)."
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality);
|
||||||
return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality);
|
return generateWithOpenAIEdits(baseURL, apiKey, prompt, model, size, args.referenceImages, args.quality);
|
||||||
}
|
}
|
||||||
|
|
||||||
return generateWithOpenAIGenerations(baseURL, apiKey, prompt, model, size, args.quality);
|
return generateWithOpenAIGenerations(
|
||||||
|
baseURL,
|
||||||
|
apiKey,
|
||||||
|
buildOpenAIGenerationsBody(prompt, model, args)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function generateWithChatCompletions(
|
async function generateWithChatCompletions(
|
||||||
|
|
@ -129,17 +249,8 @@ async function generateWithChatCompletions(
|
||||||
async function generateWithOpenAIGenerations(
|
async function generateWithOpenAIGenerations(
|
||||||
baseURL: string,
|
baseURL: string,
|
||||||
apiKey: string,
|
apiKey: string,
|
||||||
prompt: string,
|
body: OpenAIGenerationsBody
|
||||||
model: string,
|
|
||||||
size: string,
|
|
||||||
quality: CliArgs["quality"]
|
|
||||||
): Promise<Uint8Array> {
|
): Promise<Uint8Array> {
|
||||||
const body: Record<string, any> = { model, prompt, size };
|
|
||||||
|
|
||||||
if (model.includes("dall-e-3")) {
|
|
||||||
body.quality = quality === "2k" ? "hd" : "standard";
|
|
||||||
}
|
|
||||||
|
|
||||||
const res = await fetch(`${baseURL}/images/generations`, {
|
const res = await fetch(`${baseURL}/images/generations`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
imageSizeSource: null,
|
imageSizeSource: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ function makeArgs(overrides: Partial<CliArgs> = {}): CliArgs {
|
||||||
size: null,
|
size: null,
|
||||||
quality: null,
|
quality: null,
|
||||||
imageSize: null,
|
imageSize: null,
|
||||||
|
imageApiDialect: null,
|
||||||
referenceImages: [],
|
referenceImages: [],
|
||||||
n: 1,
|
n: 1,
|
||||||
batchFile: null,
|
batchFile: null,
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ export type Provider =
|
||||||
| "seedream"
|
| "seedream"
|
||||||
| "azure";
|
| "azure";
|
||||||
export type Quality = "normal" | "2k";
|
export type Quality = "normal" | "2k";
|
||||||
|
export type OpenAIImageApiDialect = "openai-native" | "ratio-metadata";
|
||||||
|
|
||||||
export type CliArgs = {
|
export type CliArgs = {
|
||||||
prompt: string | null;
|
prompt: string | null;
|
||||||
|
|
@ -23,6 +24,7 @@ export type CliArgs = {
|
||||||
quality: Quality | null;
|
quality: Quality | null;
|
||||||
imageSize: string | null;
|
imageSize: string | null;
|
||||||
imageSizeSource?: "cli" | "task" | "config" | null;
|
imageSizeSource?: "cli" | "task" | "config" | null;
|
||||||
|
imageApiDialect: OpenAIImageApiDialect | null;
|
||||||
referenceImages: string[];
|
referenceImages: string[];
|
||||||
n: number;
|
n: number;
|
||||||
batchFile: string | null;
|
batchFile: string | null;
|
||||||
|
|
@ -42,6 +44,7 @@ export type BatchTaskInput = {
|
||||||
size?: string | null;
|
size?: string | null;
|
||||||
quality?: Quality | null;
|
quality?: Quality | null;
|
||||||
imageSize?: "1K" | "2K" | "4K" | null;
|
imageSize?: "1K" | "2K" | "4K" | null;
|
||||||
|
imageApiDialect?: OpenAIImageApiDialect | null;
|
||||||
ref?: string[];
|
ref?: string[];
|
||||||
n?: number;
|
n?: number;
|
||||||
};
|
};
|
||||||
|
|
@ -59,6 +62,7 @@ export type ExtendConfig = {
|
||||||
default_quality: Quality | null;
|
default_quality: Quality | null;
|
||||||
default_aspect_ratio: string | null;
|
default_aspect_ratio: string | null;
|
||||||
default_image_size: "1K" | "2K" | "4K" | null;
|
default_image_size: "1K" | "2K" | "4K" | null;
|
||||||
|
default_image_api_dialect: OpenAIImageApiDialect | null;
|
||||||
default_model: {
|
default_model: {
|
||||||
google: string | null;
|
google: string | null;
|
||||||
openai: string | null;
|
openai: string | null;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue