diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 50ff76e..659d1f5 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,7 +6,7 @@ }, "metadata": { "description": "Skills shared by Baoyu for improving daily work efficiency", - "version": "1.99.1" + "version": "1.100.0" }, "plugins": [ { diff --git a/CHANGELOG.md b/CHANGELOG.md index b80d5e9..af2bdad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ English | [中文](./CHANGELOG.zh.md) +## 1.100.0 - 2026-04-12 + +### Features +- `baoyu-imagine`: add Z.AI GLM-Image provider — supports `glm-image` and `cogview-4-250304` models via the Z.AI sync image API; configure with `ZAI_API_KEY` (or `BIGMODEL_API_KEY` for backward compatibility) + ## 1.99.1 - 2026-04-11 ### Fixes diff --git a/CHANGELOG.zh.md b/CHANGELOG.zh.md index fa188a0..e45ff35 100644 --- a/CHANGELOG.zh.md +++ b/CHANGELOG.zh.md @@ -2,6 +2,11 @@ [English](./CHANGELOG.md) | 中文 +## 1.100.0 - 2026-04-12 + +### 新功能 +- `baoyu-imagine`:新增 Z.AI GLM-Image 服务商支持,支持 `glm-image` 和 `cogview-4-250304` 模型,通过 Z.AI 同步图像 API 调用;配置 `ZAI_API_KEY`(或 `BIGMODEL_API_KEY` 向后兼容) + ## 1.99.1 - 2026-04-11 ### 修复 diff --git a/README.md b/README.md index fd93a19..39ca7bd 100644 --- a/README.md +++ b/README.md @@ -745,6 +745,9 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da # DashScope with custom size /baoyu-imagine --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872 +# Z.AI GLM-Image +/baoyu-imagine --prompt "一张带清晰中文标题的科技海报" --image out.png --provider zai + # MiniMax /baoyu-imagine --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax @@ -781,8 +784,8 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `--image` | Output image path (required) | | `--batchfile` | JSON batch file for multi-image generation | | `--jobs` | Worker count for batch mode | -| `--provider` | `google`, `openai`, `azure`, `openrouter`, `dashscope`, `minimax`, `jimeng`, `seedream`, or `replicate` | -| `--model`, `-m` | Model ID or deployment name. Azure uses deployment name; OpenRouter uses full model IDs; MiniMax uses `image-01` / `image-01-live` | +| `--provider` | `google`, `openai`, `azure`, `openrouter`, `dashscope`, `zai`, `minimax`, `jimeng`, `seedream`, or `replicate` | +| `--model`, `-m` | Model ID or deployment name. Azure uses deployment name; OpenRouter uses full model IDs; Z.AI uses `glm-image`; MiniMax uses `image-01` / `image-01-live` | | `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size` | Size (e.g., `1024x1024`) | | `--quality` | `normal` or `2k` (default: `2k`) | @@ -800,6 +803,8 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `GOOGLE_API_KEY` | Google API key | - | | `GEMINI_API_KEY` | Alias for `GOOGLE_API_KEY` | - | | `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - | +| `ZAI_API_KEY` | Z.AI API key | - | +| `BIGMODEL_API_KEY` | Backward-compatible alias for Z.AI API key | - | | `MINIMAX_API_KEY` | MiniMax API key | - | | `REPLICATE_API_TOKEN` | Replicate API token | - | | `JIMENG_ACCESS_KEY_ID` | Jimeng Volcengine access key | - | @@ -811,6 +816,8 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` | +| `ZAI_IMAGE_MODEL` | Z.AI model | `glm-image` | +| `BIGMODEL_IMAGE_MODEL` | Backward-compatible alias for Z.AI model | `glm-image` | | `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` | | `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` | @@ -824,6 +831,8 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da | `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | - | | `GOOGLE_BASE_URL` | Custom Google endpoint | - | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - | +| `ZAI_BASE_URL` | Custom Z.AI endpoint | `https://api.z.ai/api/paas/v4` | +| `BIGMODEL_BASE_URL` | Backward-compatible alias for Z.AI endpoint | - | | `MINIMAX_BASE_URL` | Custom MiniMax endpoint | `https://api.minimax.io` | | `REPLICATE_BASE_URL` | Custom Replicate endpoint | - | | `JIMENG_BASE_URL` | Custom Jimeng endpoint | `https://visual.volcengineapi.com` | @@ -836,6 +845,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da **Provider Notes**: - Azure OpenAI: `--model` means Azure deployment name, not the underlying model family. - DashScope: `qwen-image-2.0-pro` is the recommended default for custom `--size`, `21:9`, and strong Chinese/English text rendering. +- Z.AI: `glm-image` is recommended for posters, diagrams, and text-heavy Chinese/English images. Reference images are not supported. - MiniMax: `image-01` supports documented custom `width` / `height`; `image-01-live` is lower latency and works best with `--ar`. - MiniMax reference images are sent as `subject_reference`; the current API is specialized toward character / portrait consistency. - Jimeng does not support reference images. @@ -848,7 +858,7 @@ AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, Da 1. If `--provider` is specified → use it 2. If `--ref` is provided and no provider is specified → try Google, then OpenAI, Azure, OpenRouter, Replicate, Seedream, and finally MiniMax 3. If only one API key is available → use that provider -4. If multiple providers are available → default to Google +4. If multiple providers are available → default to Google, then OpenAI, Azure, OpenRouter, DashScope, Z.AI, MiniMax, Replicate, Jimeng, Seedream #### baoyu-danger-gemini-web @@ -1148,6 +1158,11 @@ DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +# Z.AI +ZAI_API_KEY=xxx +ZAI_IMAGE_MODEL=glm-image +# ZAI_BASE_URL=https://api.z.ai/api/paas/v4 + # MiniMax MINIMAX_API_KEY=xxx MINIMAX_IMAGE_MODEL=image-01 diff --git a/README.zh.md b/README.zh.md index cd41231..082ed21 100644 --- a/README.zh.md +++ b/README.zh.md @@ -745,6 +745,9 @@ AI 驱动的生成后端。 # DashScope 自定义尺寸 /baoyu-imagine --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872 +# Z.AI GLM-Image +/baoyu-imagine --prompt "一张带清晰中文标题的科技海报" --image out.png --provider zai + # MiniMax /baoyu-imagine --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax @@ -781,8 +784,8 @@ AI 驱动的生成后端。 | `--image` | 输出图片路径(必需) | | `--batchfile` | 多图批量生成的 JSON 文件 | | `--jobs` | 批量模式的并发 worker 数 | -| `--provider` | `google`、`openai`、`azure`、`openrouter`、`dashscope`、`minimax`、`jimeng`、`seedream` 或 `replicate` | -| `--model`, `-m` | 模型 ID 或部署名。Azure 使用部署名;OpenRouter 使用完整模型 ID;MiniMax 使用 `image-01` / `image-01-live` | +| `--provider` | `google`、`openai`、`azure`、`openrouter`、`dashscope`、`zai`、`minimax`、`jimeng`、`seedream` 或 `replicate` | +| `--model`, `-m` | 模型 ID 或部署名。Azure 使用部署名;OpenRouter 使用完整模型 ID;Z.AI 使用 `glm-image`;MiniMax 使用 `image-01` / `image-01-live` | | `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) | | `--size` | 尺寸(如 `1024x1024`) | | `--quality` | `normal` 或 `2k`(默认:`2k`) | @@ -800,6 +803,8 @@ AI 驱动的生成后端。 | `GOOGLE_API_KEY` | Google API 密钥 | - | | `GEMINI_API_KEY` | `GOOGLE_API_KEY` 的别名 | - | | `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - | +| `ZAI_API_KEY` | Z.AI API 密钥 | - | +| `BIGMODEL_API_KEY` | Z.AI API 密钥向后兼容别名 | - | | `MINIMAX_API_KEY` | MiniMax API 密钥 | - | | `REPLICATE_API_TOKEN` | Replicate API Token | - | | `JIMENG_ACCESS_KEY_ID` | 即梦火山引擎 Access Key | - | @@ -811,6 +816,8 @@ AI 驱动的生成后端。 | `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` | | `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` | | `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` | +| `ZAI_IMAGE_MODEL` | Z.AI 模型 | `glm-image` | +| `BIGMODEL_IMAGE_MODEL` | Z.AI 模型向后兼容别名 | `glm-image` | | `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` | | `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-2` | | `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` | @@ -824,6 +831,8 @@ AI 驱动的生成后端。 | `OPENROUTER_TITLE` | OpenRouter 归因用应用名 | - | | `GOOGLE_BASE_URL` | 自定义 Google 端点 | - | | `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - | +| `ZAI_BASE_URL` | 自定义 Z.AI 端点 | `https://api.z.ai/api/paas/v4` | +| `BIGMODEL_BASE_URL` | Z.AI 端点向后兼容别名 | - | | `MINIMAX_BASE_URL` | 自定义 MiniMax 端点 | `https://api.minimax.io` | | `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - | | `JIMENG_BASE_URL` | 自定义即梦端点 | `https://visual.volcengineapi.com` | @@ -836,6 +845,7 @@ AI 驱动的生成后端。 **Provider 说明**: - Azure OpenAI:`--model` 表示 Azure deployment name,不是底层模型家族名。 - DashScope:`qwen-image-2.0-pro` 是自定义 `--size`、`21:9` 和中英文排版的推荐默认模型。 +- Z.AI:`glm-image` 适合海报、图表和中英文排版密集的图片生成,暂不支持参考图。 - MiniMax:`image-01` 支持官方文档里的自定义 `width` / `height`;`image-01-live` 更偏低延迟,适合配合 `--ar` 使用。 - MiniMax 参考图会走 `subject_reference`,当前能力更偏角色 / 人像一致性。 - 即梦不支持参考图。 @@ -848,7 +858,7 @@ AI 驱动的生成后端。 1. 如果指定了 `--provider` → 使用指定的 2. 如果传了 `--ref` 且未指定 provider → 依次尝试 Google、OpenAI、Azure、OpenRouter、Replicate、Seedream,最后是 MiniMax 3. 如果只有一个 API 密钥 → 使用对应服务商 -4. 如果多个可用 → 默认使用 Google +4. 如果多个可用 → 默认使用 Google,然后依次为 OpenAI、Azure、OpenRouter、DashScope、Z.AI、MiniMax、Replicate、即梦、豆包 #### baoyu-danger-gemini-web @@ -1148,6 +1158,11 @@ DASHSCOPE_API_KEY=sk-xxx DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro # DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1 +# Z.AI +ZAI_API_KEY=xxx +ZAI_IMAGE_MODEL=glm-image +# ZAI_BASE_URL=https://api.z.ai/api/paas/v4 + # MiniMax MINIMAX_API_KEY=xxx MINIMAX_IMAGE_MODEL=image-01 diff --git a/skills/baoyu-imagine/SKILL.md b/skills/baoyu-imagine/SKILL.md index ff4e9ac..0e028a6 100644 --- a/skills/baoyu-imagine/SKILL.md +++ b/skills/baoyu-imagine/SKILL.md @@ -1,7 +1,7 @@ --- name: baoyu-imagine -description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, MiniMax, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. -version: 1.56.5 +description: AI image generation with OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, Z.AI GLM-Image, MiniMax, Jimeng, Seedream and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and batch generation from saved prompt files. Sequential by default; use batch parallel generation when the user already has multiple prompts or wants stable multi-image throughput. Use when user asks to generate, create, or draw images. +version: 1.57.0 metadata: openclaw: homepage: https://github.com/JimLiu/baoyu-skills#baoyu-imagine @@ -13,7 +13,7 @@ metadata: # Image Generation (AI SDK) -Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), MiniMax, Jimeng (即梦), Seedream (豆包) and Replicate providers. +Official API-based image generation. Supports OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (阿里通义万象), Z.AI GLM-Image, MiniMax, Jimeng (即梦), Seedream (豆包) and Replicate providers. ## Script Directory @@ -103,6 +103,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "为咖啡品牌设计一张 21:9 # DashScope legacy Qwen fixed-size model ${BUN_X} {baseDir}/scripts/main.ts --prompt "一张电影感海报" --image out.png --provider dashscope --model qwen-image-max --size 1664x928 +# Z.AI GLM-image +${BUN_X} {baseDir}/scripts/main.ts --prompt "一张带清晰中文标题的科技海报" --image out.png --provider zai + +# Z.AI GLM-image with explicit custom size +${BUN_X} {baseDir}/scripts/main.ts --prompt "A science illustration with labels" --image out.png --provider zai --model glm-image --size 1472x1088 + # MiniMax ${BUN_X} {baseDir}/scripts/main.ts --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax @@ -164,8 +170,8 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `--image ` | Output image path (required in single-image mode) | | `--batchfile ` | JSON batch file for multi-image generation | | `--jobs ` | Worker count for batch mode (default: auto, max from config, built-in default 10) | -| `--provider google\|openai\|azure\|openrouter\|dashscope\|minimax\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) | -| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`; MiniMax: `image-01`) | +| `--provider google\|openai\|azure\|openrouter\|dashscope\|zai\|minimax\|jimeng\|seedream\|replicate` | Force provider (default: auto-detect) | +| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`; OpenAI: `gpt-image-1.5`; Azure: deployment name such as `gpt-image-1.5` or `image-prod`; OpenRouter: `google/gemini-3.1-flash-image-preview`; DashScope: `qwen-image-2.0-pro`; Z.AI: `glm-image`; MiniMax: `image-01`) | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: `2k`) | @@ -183,6 +189,8 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_API_KEY` | OpenRouter API key | | `GOOGLE_API_KEY` | Google API key | | `DASHSCOPE_API_KEY` | DashScope API key (阿里云) | +| `ZAI_API_KEY` | Z.AI API key | +| `BIGMODEL_API_KEY` | Backward-compatible alias for Z.AI API key | | `MINIMAX_API_KEY` | MiniMax API key | | `REPLICATE_API_TOKEN` | Replicate API token | | `JIMENG_ACCESS_KEY_ID` | Jimeng (即梦) Volcengine access key | @@ -194,6 +202,8 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_IMAGE_MODEL` | OpenRouter model override (default: `google/gemini-3.1-flash-image-preview`) | | `GOOGLE_IMAGE_MODEL` | Google model override | | `DASHSCOPE_IMAGE_MODEL` | DashScope model override (default: `qwen-image-2.0-pro`) | +| `ZAI_IMAGE_MODEL` | Z.AI model override (default: `glm-image`) | +| `BIGMODEL_IMAGE_MODEL` | Backward-compatible alias for Z.AI model override | | `MINIMAX_IMAGE_MODEL` | MiniMax model override (default: `image-01`) | | `REPLICATE_IMAGE_MODEL` | Replicate model override (default: google/nano-banana-2) | | `JIMENG_IMAGE_MODEL` | Jimeng model override (default: jimeng_t2i_v40) | @@ -206,6 +216,8 @@ Paths in `promptFiles`, `image`, and `ref` are resolved relative to the batch fi | `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | | `GOOGLE_BASE_URL` | Custom Google endpoint | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | +| `ZAI_BASE_URL` | Custom Z.AI endpoint (default: `https://api.z.ai/api/paas/v4`) | +| `BIGMODEL_BASE_URL` | Backward-compatible alias for Z.AI endpoint | | `MINIMAX_BASE_URL` | Custom MiniMax endpoint (default: `https://api.minimax.io`) | | `REPLICATE_BASE_URL` | Custom Replicate endpoint | | `JIMENG_BASE_URL` | Custom Jimeng endpoint (default: `https://visual.volcengineapi.com`) | @@ -280,6 +292,32 @@ Official references: - [Text-to-image guide](https://help.aliyun.com/zh/model-studio/text-to-image) - [Qwen-Image Edit API](https://help.aliyun.com/zh/model-studio/qwen-image-edit-api) +### Z.AI Models + +Use `--model glm-image` or set `default_model.zai` / `ZAI_IMAGE_MODEL` when the user wants GLM-image output. + +Official Z.AI image model options currently documented in the sync image API: + +- `glm-image` (recommended default) + - Text-to-image only in `baoyu-imagine` + - Native `quality` options are `hd` and `standard`; this skill maps `2k -> hd` and `normal -> standard` + - Recommended sizes: `1280x1280`, `1568x1056`, `1056x1568`, `1472x1088`, `1088x1472`, `1728x960`, `960x1728` + - Custom `--size` requires width and height between `1024` and `2048`, divisible by `32`, with total pixels <= `2^22` +- `cogview-4-250304` + - Legacy Z.AI image model family exposed by the same endpoint + - Custom `--size` requires width and height between `512` and `2048`, divisible by `16`, with total pixels <= `2^21` + +Notes: + +- The official sync API returns a temporary image URL; `baoyu-imagine` downloads that URL and writes the image locally +- `--ref` is not supported for Z.AI in this skill yet +- The sync API currently returns a single image, so `--n > 1` is rejected + +Official references: + +- [GLM-Image Guide](https://docs.z.ai/guides/image/glm-image) +- [Generate Image API](https://docs.z.ai/api-reference/image/generate-image) + ### MiniMax Models Use `--model image-01` or set `default_model.minimax` / `MINIMAX_IMAGE_MODEL` when the user wants MiniMax image generation. @@ -368,7 +406,7 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r 1. `--ref` provided + no `--provider` → auto-select Google first, then OpenAI, then Azure, then OpenRouter, then Replicate, then Seedream, then MiniMax (MiniMax subject reference is more specialized toward character/portrait consistency) 2. `--provider` specified → use it (if `--ref`, must be `google`, `openai`, `azure`, `openrouter`, `replicate`, `seedream`, or `minimax`) 3. Only one API key available → use that provider -4. Multiple available → default to Google +4. Multiple available → default to Google, then OpenAI, Azure, OpenRouter, DashScope, Z.AI, MiniMax, Replicate, Jimeng, Seedream ## Quality Presets diff --git a/skills/baoyu-imagine/references/config/first-time-setup.md b/skills/baoyu-imagine/references/config/first-time-setup.md index 865272b..68e4efd 100644 --- a/skills/baoyu-imagine/references/config/first-time-setup.md +++ b/skills/baoyu-imagine/references/config/first-time-setup.md @@ -53,6 +53,8 @@ options: description: "Router for Gemini/FLUX/OpenAI-compatible image models" - label: "DashScope" description: "Alibaba Cloud - Qwen-Image, strong Chinese/English text rendering" + - label: "Z.AI" + description: "GLM-image, strong poster and text-heavy image generation" - label: "MiniMax" description: "MiniMax image generation with subject-reference character workflows" - label: "Replicate" @@ -119,6 +121,20 @@ options: description: "Faster variant, use aspect ratio instead of custom size" ``` +### Question 2e: Default Z.AI Model + +Only show if user selected Z.AI. + +```yaml +header: "Z.AI Model" +question: "Default Z.AI image generation model?" +options: + - label: "glm-image (Recommended)" + description: "Best default for posters, diagrams, and text-heavy images" + - label: "cogview-4-250304" + description: "Legacy Z.AI image model on the same endpoint" +``` + ### Question 3: Default Quality ```yaml @@ -165,6 +181,7 @@ default_model: azure: [selected azure deployment or null] openrouter: [selected openrouter model or null] dashscope: null + zai: [selected Z.AI model or null] minimax: [selected minimax model or null] replicate: null --- @@ -257,6 +274,24 @@ Notes for DashScope setup: - `qwen-image-max` / `qwen-image-plus` / `qwen-image` only support five fixed sizes: `1664*928`, `1472*1104`, `1328*1328`, `1104*1472`, `928*1664`. - In `baoyu-imagine`, `quality` is a compatibility preset. It is not a native DashScope parameter. +### Z.AI Model Selection + +```yaml +header: "Z.AI Model" +question: "Choose a default Z.AI image generation model?" +options: + - label: "glm-image (Recommended)" + description: "Current flagship image model with better text rendering and poster layouts" + - label: "cogview-4-250304" + description: "Legacy model on the sync image endpoint" +``` + +Notes for Z.AI setup: + +- Prefer `glm-image` for posters, diagrams, and Chinese/English text-heavy layouts. +- In `baoyu-imagine`, Z.AI currently exposes text-to-image only; reference images are not wired for this provider. +- The sync Z.AI image API returns a downloadable image URL, which the runtime saves locally after download. + ### Replicate Model Selection ```yaml @@ -306,6 +341,7 @@ default_model: azure: [value or null] openrouter: [value or null] dashscope: [value or null] + zai: [value or null] minimax: [value or null] replicate: [value or null] ``` diff --git a/skills/baoyu-imagine/references/config/preferences-schema.md b/skills/baoyu-imagine/references/config/preferences-schema.md index 55480bc..cf35c9b 100644 --- a/skills/baoyu-imagine/references/config/preferences-schema.md +++ b/skills/baoyu-imagine/references/config/preferences-schema.md @@ -11,7 +11,7 @@ description: EXTEND.md YAML schema for baoyu-imagine user preferences --- version: 1 -default_provider: null # google|openai|azure|openrouter|dashscope|minimax|replicate|null (null = auto-detect) +default_provider: null # google|openai|azure|openrouter|dashscope|zai|minimax|replicate|null (null = auto-detect) default_quality: null # normal|2k|null (null = use default: 2k) @@ -25,6 +25,7 @@ default_model: azure: null # Azure deployment name, e.g., "gpt-image-1.5" or "image-prod" openrouter: null # e.g., "google/gemini-3.1-flash-image-preview" dashscope: null # e.g., "qwen-image-2.0-pro" + zai: null # e.g., "glm-image" minimax: null # e.g., "image-01" replicate: null # e.g., "google/nano-banana-2" @@ -49,6 +50,9 @@ batch: dashscope: concurrency: 3 start_interval_ms: 1100 + zai: + concurrency: 3 + start_interval_ms: 1100 minimax: concurrency: 3 start_interval_ms: 1100 @@ -69,6 +73,7 @@ batch: | `default_model.azure` | string\|null | null | Azure default deployment name | | `default_model.openrouter` | string\|null | null | OpenRouter default model | | `default_model.dashscope` | string\|null | null | DashScope default model | +| `default_model.zai` | string\|null | null | Z.AI default model | | `default_model.minimax` | string\|null | null | MiniMax default model | | `default_model.replicate` | string\|null | null | Replicate default model | | `batch.max_workers` | int\|null | 10 | Batch worker cap | @@ -100,6 +105,7 @@ default_model: azure: "gpt-image-1.5" openrouter: "google/gemini-3.1-flash-image-preview" dashscope: "qwen-image-2.0-pro" + zai: "glm-image" minimax: "image-01" replicate: "google/nano-banana-2" batch: @@ -111,6 +117,9 @@ batch: azure: concurrency: 3 start_interval_ms: 1100 + zai: + concurrency: 3 + start_interval_ms: 1100 openrouter: concurrency: 3 start_interval_ms: 1100 diff --git a/skills/baoyu-imagine/scripts/main.test.ts b/skills/baoyu-imagine/scripts/main.test.ts index 29928f6..f7123ee 100644 --- a/skills/baoyu-imagine/scripts/main.test.ts +++ b/skills/baoyu-imagine/scripts/main.test.ts @@ -31,6 +31,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -78,7 +79,7 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => { "--image", "out/hero", "--provider", - "openai", + "zai", "--quality", "2k", "--imageSize", @@ -95,9 +96,10 @@ test("parseArgs parses the main baoyu-imagine CLI flags", () => { assert.deepEqual(args.promptFiles, ["prompts/system.md", "prompts/content.md"]); assert.equal(args.imagePath, "out/hero"); - assert.equal(args.provider, "openai"); + assert.equal(args.provider, "zai"); assert.equal(args.quality, "2k"); assert.equal(args.imageSize, "4K"); + assert.equal(args.imageSizeSource, "cli"); assert.deepEqual(args.referenceImages, ["ref/one.png", "ref/two.jpg"]); assert.equal(args.n, 3); assert.equal(args.jobs, 5); @@ -124,6 +126,7 @@ default_image_size: 2K default_model: google: gemini-3-pro-image-preview openai: gpt-image-1.5 + zai: glm-image azure: image-prod minimax: image-01 batch: @@ -134,6 +137,9 @@ batch: start_interval_ms: 900 openai: concurrency: 4 + zai: + concurrency: 2 + start_interval_ms: 1000 minimax: concurrency: 2 start_interval_ms: 1400 @@ -151,6 +157,7 @@ batch: assert.equal(config.default_image_size, "2K"); assert.equal(config.default_model?.google, "gemini-3-pro-image-preview"); assert.equal(config.default_model?.openai, "gpt-image-1.5"); + assert.equal(config.default_model?.zai, "glm-image"); assert.equal(config.default_model?.azure, "image-prod"); assert.equal(config.default_model?.minimax, "image-01"); assert.equal(config.batch?.max_workers, 8); @@ -161,6 +168,10 @@ batch: assert.deepEqual(config.batch?.provider_limits?.openai, { concurrency: 4, }); + assert.deepEqual(config.batch?.provider_limits?.zai, { + concurrency: 2, + start_interval_ms: 1000, + }); assert.deepEqual(config.batch?.provider_limits?.minimax, { concurrency: 2, start_interval_ms: 1400, @@ -246,6 +257,19 @@ test("mergeConfig only fills values missing from CLI args", () => { assert.equal(merged.quality, "2k"); assert.equal(merged.aspectRatio, "3:2"); assert.equal(merged.imageSize, "4K"); + assert.equal(merged.imageSizeSource, "cli"); +}); + +test("mergeConfig tags inherited imageSize defaults so providers can ignore incompatible config", () => { + const merged = mergeConfig( + makeArgs(), + { + default_image_size: "2K", + } satisfies Partial, + ); + + assert.equal(merged.imageSize, "2K"); + assert.equal(merged.imageSizeSource, "config"); }); test("detectProvider rejects non-ref-capable providers and prefers Google first when multiple keys exist", (t) => { @@ -316,6 +340,27 @@ test("detectProvider selects Azure when only Azure credentials are configured", ); }); +test("detectProvider selects Z.AI when credentials are present or the model id matches", (t) => { + useEnv(t, { + GOOGLE_API_KEY: null, + OPENAI_API_KEY: null, + AZURE_OPENAI_API_KEY: null, + AZURE_OPENAI_BASE_URL: null, + OPENROUTER_API_KEY: null, + DASHSCOPE_API_KEY: null, + ZAI_API_KEY: "zai-key", + BIGMODEL_API_KEY: null, + MINIMAX_API_KEY: null, + REPLICATE_API_TOKEN: null, + JIMENG_ACCESS_KEY_ID: null, + JIMENG_SECRET_ACCESS_KEY: null, + ARK_API_KEY: null, + }); + + assert.equal(detectProvider(makeArgs()), "zai"); + assert.equal(detectProvider(makeArgs({ model: "glm-image" })), "zai"); +}); + test("detectProvider infers Seedream from model id and allows Seedream reference-image workflows", (t) => { useEnv(t, { GOOGLE_API_KEY: null, @@ -375,6 +420,7 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND BAOYU_IMAGE_GEN_MAX_WORKERS: "12", BAOYU_IMAGE_GEN_GOOGLE_CONCURRENCY: "5", BAOYU_IMAGE_GEN_GOOGLE_START_INTERVAL_MS: "450", + BAOYU_IMAGE_GEN_ZAI_CONCURRENCY: "4", }); const extendConfig: Partial = { @@ -385,6 +431,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND concurrency: 2, start_interval_ms: 900, }, + zai: { + concurrency: 1, + start_interval_ms: 1200, + }, minimax: { concurrency: 1, start_interval_ms: 1500, @@ -398,6 +448,10 @@ test("batch worker and provider-rate-limit configuration prefer env over EXTEND concurrency: 5, startIntervalMs: 450, }); + assert.deepEqual(getConfiguredProviderRateLimits(extendConfig).zai, { + concurrency: 4, + startIntervalMs: 1200, + }); assert.deepEqual(getConfiguredProviderRateLimits(extendConfig).minimax, { concurrency: 1, startIntervalMs: 1500, @@ -464,5 +518,11 @@ test("path normalization, worker count, and retry classification follow expected assert.equal(getWorkerCount(5, 0, 4), 1); assert.equal(isRetryableGenerationError(new Error("API error (401): denied")), false); + assert.equal( + isRetryableGenerationError( + new Error("Replicate returned 2 outputs, but baoyu-imagine currently supports saving exactly one image per request."), + ), + false, + ); assert.equal(isRetryableGenerationError(new Error("socket hang up")), true); }); diff --git a/skills/baoyu-imagine/scripts/main.ts b/skills/baoyu-imagine/scripts/main.ts index 3b6ee72..59bce7d 100644 --- a/skills/baoyu-imagine/scripts/main.ts +++ b/skills/baoyu-imagine/scripts/main.ts @@ -58,6 +58,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record = { openai: { concurrency: 3, startIntervalMs: 1100 }, openrouter: { concurrency: 3, startIntervalMs: 1100 }, dashscope: { concurrency: 3, startIntervalMs: 1100 }, + zai: { concurrency: 3, startIntervalMs: 1100 }, minimax: { concurrency: 3, startIntervalMs: 1100 }, jimeng: { concurrency: 3, startIntervalMs: 1100 }, seedream: { concurrency: 3, startIntervalMs: 1100 }, @@ -76,7 +77,7 @@ Options: --image Output image path (required in single-image mode) --batchfile JSON batch file for multi-image generation --jobs Worker count for batch mode (default: auto, max from config, built-in default 10) - --provider google|openai|openrouter|dashscope|minimax|replicate|jimeng|seedream|azure Force provider (auto-detect by default) + --provider google|openai|openrouter|dashscope|zai|minimax|replicate|jimeng|seedream|azure Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) @@ -114,6 +115,8 @@ Environment variables: GOOGLE_API_KEY Google API key GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) DASHSCOPE_API_KEY DashScope API key + ZAI_API_KEY Z.AI API key + BIGMODEL_API_KEY Backward-compatible alias for Z.AI API key MINIMAX_API_KEY MiniMax API key REPLICATE_API_TOKEN Replicate API token JIMENG_ACCESS_KEY_ID Jimeng Access Key ID @@ -123,6 +126,8 @@ Environment variables: OPENROUTER_IMAGE_MODEL Default OpenRouter model (google/gemini-3.1-flash-image-preview) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) DASHSCOPE_IMAGE_MODEL Default DashScope model (qwen-image-2.0-pro) + ZAI_IMAGE_MODEL Default Z.AI model (glm-image) + BIGMODEL_IMAGE_MODEL Backward-compatible alias for Z.AI model (glm-image) MINIMAX_IMAGE_MODEL Default MiniMax model (image-01) REPLICATE_IMAGE_MODEL Default Replicate model (google/nano-banana-2) JIMENG_IMAGE_MODEL Default Jimeng model (jimeng_t2i_v40) @@ -134,6 +139,8 @@ Environment variables: OPENROUTER_TITLE Optional app name for OpenRouter attribution GOOGLE_BASE_URL Custom Google endpoint DASHSCOPE_BASE_URL Custom DashScope endpoint + ZAI_BASE_URL Custom Z.AI endpoint + BIGMODEL_BASE_URL Backward-compatible alias for Z.AI endpoint MINIMAX_BASE_URL Custom MiniMax endpoint REPLICATE_BASE_URL Custom Replicate endpoint JIMENG_BASE_URL Custom Jimeng endpoint @@ -161,6 +168,7 @@ export function parseArgs(argv: string[]): CliArgs { size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -240,6 +248,7 @@ export function parseArgs(argv: string[]): CliArgs { v !== "openai" && v !== "openrouter" && v !== "dashscope" && + v !== "zai" && v !== "minimax" && v !== "replicate" && v !== "jimeng" && @@ -284,6 +293,7 @@ export function parseArgs(argv: string[]): CliArgs { const v = argv[++i]?.toUpperCase(); if (v !== "1K" && v !== "2K" && v !== "4K") throw new Error(`Invalid imageSize: ${v}`); out.imageSize = v; + out.imageSizeSource = "cli"; continue; } @@ -396,6 +406,7 @@ export function parseSimpleYaml(yaml: string): Partial { openai: null, openrouter: null, dashscope: null, + zai: null, minimax: null, replicate: null, jimeng: null, @@ -424,6 +435,7 @@ export function parseSimpleYaml(yaml: string): Partial { key === "openai" || key === "openrouter" || key === "dashscope" || + key === "zai" || key === "minimax" || key === "replicate" || key === "jimeng" || @@ -442,6 +454,7 @@ export function parseSimpleYaml(yaml: string): Partial { key === "openai" || key === "openrouter" || key === "dashscope" || + key === "zai" || key === "minimax" || key === "replicate" || key === "jimeng" || @@ -531,12 +544,16 @@ export async function loadExtendConfig( } export function mergeConfig(args: CliArgs, extend: Partial): CliArgs { + const imageSize = args.imageSize ?? extend.default_image_size ?? null; return { ...args, provider: args.provider ?? extend.default_provider ?? null, quality: args.quality ?? extend.default_quality ?? null, aspectRatio: args.aspectRatio ?? extend.default_aspect_ratio ?? null, - imageSize: args.imageSize ?? extend.default_image_size ?? null, + imageSize, + imageSizeSource: + args.imageSizeSource ?? + (args.imageSize !== null ? "cli" : (imageSize !== null ? "config" : null)), }; } @@ -572,13 +589,14 @@ export function getConfiguredProviderRateLimits( openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai }, openrouter: { ...DEFAULT_PROVIDER_RATE_LIMITS.openrouter }, dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope }, + zai: { ...DEFAULT_PROVIDER_RATE_LIMITS.zai }, minimax: { ...DEFAULT_PROVIDER_RATE_LIMITS.minimax }, jimeng: { ...DEFAULT_PROVIDER_RATE_LIMITS.jimeng }, seedream: { ...DEFAULT_PROVIDER_RATE_LIMITS.seedream }, azure: { ...DEFAULT_PROVIDER_RATE_LIMITS.azure }, }; - for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "minimax", "jimeng", "seedream", "azure"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "zai", "minimax", "jimeng", "seedream", "azure"] as Provider[]) { const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`; const extendLimit = extendConfig.batch?.provider_limits?.[provider]; configured[provider] = { @@ -630,6 +648,7 @@ function inferProviderFromModel(model: string | null): Provider | null { const normalized = model.trim(); if (normalized.includes("seedream") || normalized.includes("seededit")) return "seedream"; if (normalized === "image-01" || normalized === "image-01-live") return "minimax"; + if (normalized === "glm-image" || normalized === "cogview-4-250304") return "zai"; return null; } @@ -657,6 +676,7 @@ export function detectProvider(args: CliArgs): Provider { const hasOpenai = !!process.env.OPENAI_API_KEY; const hasOpenrouter = !!process.env.OPENROUTER_API_KEY; const hasDashscope = !!process.env.DASHSCOPE_API_KEY; + const hasZai = !!(process.env.ZAI_API_KEY || process.env.BIGMODEL_API_KEY); const hasMinimax = !!process.env.MINIMAX_API_KEY; const hasReplicate = !!process.env.REPLICATE_API_TOKEN; const hasJimeng = !!(process.env.JIMENG_ACCESS_KEY_ID && process.env.JIMENG_SECRET_ACCESS_KEY); @@ -677,6 +697,13 @@ export function detectProvider(args: CliArgs): Provider { return "minimax"; } + if (modelProvider === "zai") { + if (!hasZai) { + throw new Error("Model looks like a Z.AI image model, but ZAI_API_KEY is not set."); + } + return "zai"; + } + if (args.referenceImages.length > 0) { if (hasGoogle) return "google"; if (hasOpenai) return "openai"; @@ -696,6 +723,7 @@ export function detectProvider(args: CliArgs): Provider { hasAzure && "azure", hasOpenrouter && "openrouter", hasDashscope && "dashscope", + hasZai && "zai", hasMinimax && "minimax", hasReplicate && "replicate", hasJimeng && "jimeng", @@ -706,7 +734,7 @@ export function detectProvider(args: CliArgs): Provider { if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, MINIMAX_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, ZAI_API_KEY, MINIMAX_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -738,6 +766,7 @@ export function isRetryableGenerationError(error: unknown): boolean { "API error (403)", "API error (404)", "temporarily disabled", + "supports saving exactly one image", ]; return !nonRetryableMarkers.some((marker) => msg.includes(marker)); } @@ -745,6 +774,7 @@ export function isRetryableGenerationError(error: unknown): boolean { async function loadProviderModule(provider: Provider): Promise { if (provider === "google") return (await import("./providers/google")) as ProviderModule; if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule; + if (provider === "zai") return (await import("./providers/zai")) as ProviderModule; if (provider === "minimax") return (await import("./providers/minimax")) as ProviderModule; if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule; if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule; @@ -776,6 +806,7 @@ function getModelForProvider( return extendConfig.default_model.openrouter; } if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope; + if (provider === "zai" && extendConfig.default_model.zai) return extendConfig.default_model.zai; if (provider === "minimax" && extendConfig.default_model.minimax) return extendConfig.default_model.minimax; if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate; if (provider === "jimeng" && extendConfig.default_model.jimeng) return extendConfig.default_model.jimeng; @@ -852,6 +883,7 @@ export function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput, batchDir size: task.size ?? baseArgs.size ?? null, quality: task.quality ?? baseArgs.quality ?? null, imageSize: task.imageSize ?? baseArgs.imageSize ?? null, + imageSizeSource: task.imageSize != null ? "task" : (baseArgs.imageSizeSource ?? null), referenceImages: task.ref ? task.ref.map((filePath) => resolveBatchPath(batchDir, filePath)) : [], n: task.n ?? baseArgs.n, batchFile: null, @@ -1000,7 +1032,7 @@ async function runBatchTasks( const acquireProvider = createProviderGate(providerRateLimits); const workerCount = getWorkerCount(tasks.length, jobs, maxWorkers); console.error(`Batch mode: ${tasks.length} tasks, ${workerCount} workers, parallel mode enabled.`); - for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream", "azure"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "zai", "minimax", "jimeng", "seedream", "azure"] as Provider[]) { const limit = providerRateLimits[provider]; console.error(`- ${provider}: concurrency=${limit.concurrency}, startIntervalMs=${limit.startIntervalMs}`); } diff --git a/skills/baoyu-imagine/scripts/providers/replicate.test.ts b/skills/baoyu-imagine/scripts/providers/replicate.test.ts index 50a2eaa..f6a5327 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.test.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.test.ts @@ -22,6 +22,7 @@ function makeArgs(overrides: Partial = {}): CliArgs { size: null, quality: null, imageSize: null, + imageSizeSource: null, referenceImages: [], n: 1, batchFile: null, @@ -220,6 +221,22 @@ test("Replicate validateArgs blocks misleading multi-output and unsupported fami /compatibility list/, ); + assert.doesNotThrow(() => + validateArgs( + "google/nano-banana-2", + makeArgs({ imageSize: "2K", imageSizeSource: "config" }), + ), + ); + + assert.throws( + () => + validateArgs( + "google/nano-banana-2", + makeArgs({ imageSize: "2K", imageSizeSource: "cli" }), + ), + /do not use --imageSize/, + ); + assert.doesNotThrow(() => validateArgs( "stability-ai/sdxl", diff --git a/skills/baoyu-imagine/scripts/providers/replicate.ts b/skills/baoyu-imagine/scripts/providers/replicate.ts index 197ca6d..aea8b52 100644 --- a/skills/baoyu-imagine/scripts/providers/replicate.ts +++ b/skills/baoyu-imagine/scripts/providers/replicate.ts @@ -361,7 +361,7 @@ export function validateArgs(model: string, args: CliArgs): void { throw new Error("Replicate integration currently supports exactly one output image per request. Remove --n or use --n 1."); } - if (args.imageSize) { + if (args.imageSize && args.imageSizeSource !== "config") { throw new Error("Replicate models in baoyu-imagine do not use --imageSize. Use --quality, --ar, or --size instead."); } diff --git a/skills/baoyu-imagine/scripts/providers/zai.test.ts b/skills/baoyu-imagine/scripts/providers/zai.test.ts new file mode 100644 index 0000000..59dcef4 --- /dev/null +++ b/skills/baoyu-imagine/scripts/providers/zai.test.ts @@ -0,0 +1,180 @@ +import assert from "node:assert/strict"; +import test, { type TestContext } from "node:test"; + +import type { CliArgs } from "../types.ts"; +import { + buildRequestBody, + buildZaiUrl, + extractImageFromResponse, + getDefaultModel, + getModelFamily, + parseAspectRatio, + parseSize, + resolveSizeForModel, + validateArgs, +} from "./zai.ts"; + +function makeArgs(overrides: Partial = {}): CliArgs { + return { + prompt: null, + promptFiles: [], + imagePath: null, + provider: null, + model: null, + aspectRatio: null, + size: null, + quality: null, + imageSize: null, + referenceImages: [], + n: 1, + batchFile: null, + jobs: null, + json: false, + help: false, + ...overrides, + }; +} + +function useEnv( + t: TestContext, + values: Record, +): void { + const previous = new Map(); + for (const [key, value] of Object.entries(values)) { + previous.set(key, process.env[key]); + if (value == null) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + + t.after(() => { + for (const [key, value] of previous.entries()) { + if (value == null) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + }); +} + +test("Z.AI default model prefers env override and otherwise uses glm-image", (t) => { + useEnv(t, { + ZAI_IMAGE_MODEL: null, + BIGMODEL_IMAGE_MODEL: null, + }); + assert.equal(getDefaultModel(), "glm-image"); + + process.env.BIGMODEL_IMAGE_MODEL = "cogview-4-250304"; + assert.equal(getDefaultModel(), "cogview-4-250304"); +}); + +test("Z.AI URL builder normalizes host, v4 base, and full endpoint inputs", (t) => { + useEnv(t, { ZAI_BASE_URL: "https://api.z.ai" }); + assert.equal(buildZaiUrl(), "https://api.z.ai/api/paas/v4/images/generations"); + + process.env.ZAI_BASE_URL = "https://proxy.example.com/api/paas/v4/"; + assert.equal(buildZaiUrl(), "https://proxy.example.com/api/paas/v4/images/generations"); + + process.env.ZAI_BASE_URL = "https://proxy.example.com/custom/images/generations"; + assert.equal(buildZaiUrl(), "https://proxy.example.com/custom/images/generations"); +}); + +test("Z.AI model family and parsing helpers recognize documented formats", () => { + assert.equal(getModelFamily("glm-image"), "glm"); + assert.equal(getModelFamily("cogview-4-250304"), "legacy"); + assert.deepEqual(parseAspectRatio("16:9"), { width: 16, height: 9 }); + assert.equal(parseAspectRatio("wide"), null); + assert.deepEqual(parseSize("1280x1280"), { width: 1280, height: 1280 }); + assert.deepEqual(parseSize("1472*1088"), { width: 1472, height: 1088 }); + assert.equal(parseSize("big"), null); +}); + +test("Z.AI size resolution follows documented recommended ratios and validates custom sizes", () => { + assert.equal( + resolveSizeForModel("glm-image", makeArgs({ aspectRatio: "16:9", quality: "2k" })), + "1728x960", + ); + assert.equal( + resolveSizeForModel("cogview-4-250304", makeArgs({ aspectRatio: "4:3", quality: "normal" })), + "1152x864", + ); + assert.equal( + resolveSizeForModel("glm-image", makeArgs({ size: "1568x1056", quality: "2k" })), + "1568x1056", + ); + + const uncommon = resolveSizeForModel( + "glm-image", + makeArgs({ aspectRatio: "5:2", quality: "normal" }), + ); + const parsed = parseSize(uncommon); + assert.ok(parsed); + assert.ok(parsed.width % 32 === 0); + assert.ok(parsed.height % 32 === 0); + assert.ok(parsed.width * parsed.height <= 2 ** 22); + + assert.throws( + () => resolveSizeForModel("glm-image", makeArgs({ size: "1000x1000", quality: "2k" })), + /between 1024 and 2048/, + ); + assert.throws( + () => resolveSizeForModel("glm-image", makeArgs({ size: "1280x1260", quality: "2k" })), + /divisible by 32/, + ); + assert.throws( + () => resolveSizeForModel("cogview-4-250304", makeArgs({ size: "2048x2048", quality: "2k" })), + /must not exceed 2\^21 total pixels/, + ); +}); + +test("Z.AI validation rejects unsupported refs and multi-image requests", () => { + assert.throws( + () => validateArgs("glm-image", makeArgs({ referenceImages: ["ref.png"] })), + /text-to-image only/, + ); + assert.throws( + () => validateArgs("glm-image", makeArgs({ n: 2 })), + /single image per request/, + ); +}); + +test("Z.AI request body maps skill quality and resolved size into provider fields", () => { + const body = buildRequestBody( + "A cinematic science poster", + "glm-image", + makeArgs({ aspectRatio: "4:3", quality: "normal" }), + ); + + assert.deepEqual(body, { + model: "glm-image", + prompt: "A cinematic science poster", + quality: "standard", + size: "1472x1088", + }); +}); + +test("Z.AI response extraction downloads the returned image URL", async (t) => { + const originalFetch = globalThis.fetch; + t.after(() => { + globalThis.fetch = originalFetch; + }); + + globalThis.fetch = async () => + new Response(Uint8Array.from([1, 2, 3]), { + status: 200, + headers: { "Content-Type": "image/png" }, + }); + + const image = await extractImageFromResponse({ + data: [{ url: "https://cdn.example.com/glm-image.png" }], + }); + assert.deepEqual([...image], [1, 2, 3]); + + await assert.rejects( + () => extractImageFromResponse({ data: [{}] }), + /No image URL/, + ); +}); diff --git a/skills/baoyu-imagine/scripts/providers/zai.ts b/skills/baoyu-imagine/scripts/providers/zai.ts new file mode 100644 index 0000000..e170135 --- /dev/null +++ b/skills/baoyu-imagine/scripts/providers/zai.ts @@ -0,0 +1,306 @@ +import type { CliArgs, Quality } from "../types"; + +type ZaiModelFamily = "glm" | "legacy"; + +type ZaiRequestBody = { + model: string; + prompt: string; + quality: "hd" | "standard"; + size: string; +}; + +type ZaiResponse = { + data?: Array<{ url?: string }>; +}; + +const DEFAULT_MODEL = "glm-image"; +const GLM_MAX_PIXELS = 2 ** 22; +const LEGACY_MAX_PIXELS = 2 ** 21; +const GLM_SIZE_STEP = 32; +const LEGACY_SIZE_STEP = 16; + +const GLM_RECOMMENDED_SIZES: Record = { + "1:1": "1280x1280", + "3:2": "1568x1056", + "2:3": "1056x1568", + "4:3": "1472x1088", + "3:4": "1088x1472", + "16:9": "1728x960", + "9:16": "960x1728", +}; + +const LEGACY_RECOMMENDED_SIZES: Record = { + "1:1": "1024x1024", + "9:16": "768x1344", + "3:4": "864x1152", + "16:9": "1344x768", + "4:3": "1152x864", + "2:1": "1440x720", + "1:2": "720x1440", +}; + +export function getDefaultModel(): string { + return process.env.ZAI_IMAGE_MODEL || process.env.BIGMODEL_IMAGE_MODEL || DEFAULT_MODEL; +} + +function getApiKey(): string | null { + return process.env.ZAI_API_KEY || process.env.BIGMODEL_API_KEY || null; +} + +export function buildZaiUrl(): string { + const base = (process.env.ZAI_BASE_URL || process.env.BIGMODEL_BASE_URL || "https://api.z.ai/api/paas/v4") + .replace(/\/+$/g, ""); + if (base.endsWith("/images/generations")) return base; + if (base.endsWith("/api/paas/v4")) return `${base}/images/generations`; + if (base.endsWith("/v4")) return `${base}/images/generations`; + return `${base}/api/paas/v4/images/generations`; +} + +export function getModelFamily(model: string): ZaiModelFamily { + return model.trim().toLowerCase() === "glm-image" ? "glm" : "legacy"; +} + +export function parseAspectRatio(ar: string): { width: number; height: number } | null { + const match = ar.match(/^(\d+(?:\.\d+)?):(\d+(?:\.\d+)?)$/); + if (!match) return null; + const width = Number(match[1]); + const height = Number(match[2]); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + return { width, height }; +} + +export function parseSize(size: string): { width: number; height: number } | null { + const match = size.trim().match(/^(\d+)\s*[xX*]\s*(\d+)$/); + if (!match) return null; + const width = parseInt(match[1]!, 10); + const height = parseInt(match[2]!, 10); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) { + return null; + } + return { width, height }; +} + +function formatSize(width: number, height: number): string { + return `${width}x${height}`; +} + +function roundToStep(value: number, step: number): number { + return Math.max(step, Math.round(value / step) * step); +} + +function getRatioValue(ar: string): number | null { + const parsed = parseAspectRatio(ar); + if (!parsed) return null; + return parsed.width / parsed.height; +} + +function findClosestRatioKey(ar: string, candidates: string[]): string | null { + const targetRatio = getRatioValue(ar); + if (targetRatio == null) return null; + + let bestKey: string | null = null; + let bestDiff = Infinity; + for (const candidate of candidates) { + const candidateRatio = getRatioValue(candidate); + if (candidateRatio == null) continue; + const diff = Math.abs(candidateRatio - targetRatio); + if (diff < bestDiff) { + bestDiff = diff; + bestKey = candidate; + } + } + + return bestDiff <= 0.05 ? bestKey : null; +} + +function getTargetPixels(quality: Quality): number { + return quality === "normal" ? 1024 * 1024 : 1536 * 1536; +} + +function fitToPixelBudget( + width: number, + height: number, + targetPixels: number, + maxPixels: number, + step: number, +): { width: number; height: number } { + let nextWidth = width; + let nextHeight = height; + const pixels = nextWidth * nextHeight; + + if (pixels > maxPixels) { + const scale = Math.sqrt(maxPixels / pixels); + nextWidth *= scale; + nextHeight *= scale; + } else { + const scale = Math.sqrt(targetPixels / pixels); + nextWidth *= scale; + nextHeight *= scale; + } + + let roundedWidth = roundToStep(nextWidth, step); + let roundedHeight = roundToStep(nextHeight, step); + let roundedPixels = roundedWidth * roundedHeight; + + while (roundedPixels > maxPixels && (roundedWidth > step || roundedHeight > step)) { + if (roundedWidth >= roundedHeight && roundedWidth > step) { + roundedWidth -= step; + } else if (roundedHeight > step) { + roundedHeight -= step; + } else { + break; + } + roundedPixels = roundedWidth * roundedHeight; + } + + return { width: roundedWidth, height: roundedHeight }; +} + +function validateCustomSize( + size: string, + family: ZaiModelFamily, +): string { + const parsed = parseSize(size); + if (!parsed) { + throw new Error("Z.AI --size must be in WxH format, for example 1280x1280."); + } + + const widthStep = family === "glm" ? GLM_SIZE_STEP : LEGACY_SIZE_STEP; + const minEdge = family === "glm" ? 1024 : 512; + const maxPixels = family === "glm" ? GLM_MAX_PIXELS : LEGACY_MAX_PIXELS; + + if (parsed.width < minEdge || parsed.width > 2048 || parsed.height < minEdge || parsed.height > 2048) { + throw new Error( + family === "glm" + ? "GLM-image custom size requires width and height between 1024 and 2048." + : "Z.AI legacy image models require width and height between 512 and 2048." + ); + } + + if (parsed.width % widthStep !== 0 || parsed.height % widthStep !== 0) { + throw new Error( + family === "glm" + ? "GLM-image custom size requires width and height divisible by 32." + : "Z.AI legacy image models require width and height divisible by 16." + ); + } + + if (parsed.width * parsed.height > maxPixels) { + throw new Error( + family === "glm" + ? "GLM-image custom size must not exceed 2^22 total pixels." + : "Z.AI legacy image size must not exceed 2^21 total pixels." + ); + } + + return formatSize(parsed.width, parsed.height); +} + +export function resolveSizeForModel( + model: string, + args: Pick, +): string { + const family = getModelFamily(model); + const quality = args.quality === "normal" ? "normal" : "2k"; + + if (args.size) { + return validateCustomSize(args.size, family); + } + + const recommended = family === "glm" ? GLM_RECOMMENDED_SIZES : LEGACY_RECOMMENDED_SIZES; + const defaultSize = family === "glm" ? "1280x1280" : "1024x1024"; + + if (!args.aspectRatio) return defaultSize; + + const recommendedRatio = findClosestRatioKey(args.aspectRatio, Object.keys(recommended)); + if (recommendedRatio) { + return recommended[recommendedRatio]!; + } + + const parsedRatio = parseAspectRatio(args.aspectRatio); + if (!parsedRatio) return defaultSize; + + const targetPixels = getTargetPixels(quality); + const maxPixels = family === "glm" ? GLM_MAX_PIXELS : LEGACY_MAX_PIXELS; + const step = family === "glm" ? GLM_SIZE_STEP : LEGACY_SIZE_STEP; + const fit = fitToPixelBudget( + parsedRatio.width, + parsedRatio.height, + targetPixels, + maxPixels, + step, + ); + return formatSize(fit.width, fit.height); +} + +function getZaiQuality(quality: CliArgs["quality"]): "hd" | "standard" { + return quality === "normal" ? "standard" : "hd"; +} + +export function validateArgs(_model: string, args: CliArgs): void { + if (args.referenceImages.length > 0) { + throw new Error("Z.AI GLM-image currently supports text-to-image only in baoyu-imagine. Remove --ref or choose another provider."); + } + + if (args.n > 1) { + throw new Error("Z.AI image generation currently returns a single image per request in baoyu-imagine."); + } +} + +export function buildRequestBody( + prompt: string, + model: string, + args: CliArgs, +): ZaiRequestBody { + validateArgs(model, args); + return { + model, + prompt, + quality: getZaiQuality(args.quality), + size: resolveSizeForModel(model, args), + }; +} + +export async function extractImageFromResponse(result: ZaiResponse): Promise { + const url = result.data?.[0]?.url; + if (!url) { + throw new Error("No image URL in Z.AI response"); + } + + const imageResponse = await fetch(url); + if (!imageResponse.ok) { + throw new Error(`Failed to download image from Z.AI: ${imageResponse.status}`); + } + + return new Uint8Array(await imageResponse.arrayBuffer()); +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs, +): Promise { + const apiKey = getApiKey(); + if (!apiKey) { + throw new Error("ZAI_API_KEY is required. Get one from https://docs.z.ai/."); + } + + const response = await fetch(buildZaiUrl(), { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify(buildRequestBody(prompt, model, args)), + }); + + if (!response.ok) { + const err = await response.text(); + throw new Error(`Z.AI API error (${response.status}): ${err}`); + } + + const result = (await response.json()) as ZaiResponse; + return extractImageFromResponse(result); +} diff --git a/skills/baoyu-imagine/scripts/types.ts b/skills/baoyu-imagine/scripts/types.ts index dd98213..994db36 100644 --- a/skills/baoyu-imagine/scripts/types.ts +++ b/skills/baoyu-imagine/scripts/types.ts @@ -3,6 +3,7 @@ export type Provider = | "openai" | "openrouter" | "dashscope" + | "zai" | "minimax" | "replicate" | "jimeng" @@ -20,6 +21,7 @@ export type CliArgs = { size: string | null; quality: Quality | null; imageSize: string | null; + imageSizeSource?: "cli" | "task" | "config" | null; referenceImages: string[]; n: number; batchFile: string | null; @@ -61,6 +63,7 @@ export type ExtendConfig = { openai: string | null; openrouter: string | null; dashscope: string | null; + zai: string | null; minimax: string | null; replicate: string | null; jimeng: string | null;