From 3bd5fdeb1b9a1fb750c9b2909932f9721765391e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Fri, 27 Feb 2026 10:12:52 -0600 Subject: [PATCH] feat(baoyu-image-gen): add gemini-3.1-flash-image-preview model and improve first-time setup - Add gemini-3.1-flash-image-preview to supported Google multimodal models - Improve preferences loading with blocking first-time setup flow - Add first-time-setup.md reference for guided configuration - Update model references in SKILL.md and preferences schema --- skills/baoyu-image-gen/SKILL.md | 39 ++-- .../references/config/first-time-setup.md | 197 ++++++++++++++++++ .../references/config/preferences-schema.md | 2 +- .../scripts/providers/google.ts | 5 +- 4 files changed, 218 insertions(+), 25 deletions(-) create mode 100644 skills/baoyu-image-gen/references/config/first-time-setup.md diff --git a/skills/baoyu-image-gen/SKILL.md b/skills/baoyu-image-gen/SKILL.md index 4ef6fff..b669bf1 100644 --- a/skills/baoyu-image-gen/SKILL.md +++ b/skills/baoyu-image-gen/SKILL.md @@ -13,33 +13,28 @@ Official API-based image generation. Supports OpenAI, Google, DashScope (阿里 1. `SKILL_DIR` = this SKILL.md file's directory 2. Script path = `${SKILL_DIR}/scripts/main.ts` -## Preferences (EXTEND.md) +## Step 0: Load Preferences ⛔ BLOCKING -Use Bash to check EXTEND.md existence (priority order): +**CRITICAL**: This step MUST complete BEFORE any image generation. Do NOT skip or defer. + +Check EXTEND.md existence (priority: project → user): ```bash -# Check project-level first test -f .baoyu-skills/baoyu-image-gen/EXTEND.md && echo "project" - -# Then user-level (cross-platform: $HOME works on macOS/Linux/WSL) test -f "$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md" && echo "user" ``` -┌──────────────────────────────────────────────────┬───────────────────┐ -│ Path │ Location │ -├──────────────────────────────────────────────────┼───────────────────┤ -│ .baoyu-skills/baoyu-image-gen/EXTEND.md │ Project directory │ -├──────────────────────────────────────────────────┼───────────────────┤ -│ $HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md │ User home │ -└──────────────────────────────────────────────────┴───────────────────┘ +| Result | Action | +|--------|--------| +| Found | Load, parse, apply settings. If `default_model.[provider]` is null → ask model only (Flow 2) | +| Not found | ⛔ Run first-time setup ([references/config/first-time-setup.md](references/config/first-time-setup.md)) → Save EXTEND.md → Then continue | -┌───────────┬───────────────────────────────────────────────────────────────────────────┐ -│ Result │ Action │ -├───────────┼───────────────────────────────────────────────────────────────────────────┤ -│ Found │ Read, parse, apply settings │ -├───────────┼───────────────────────────────────────────────────────────────────────────┤ -│ Not found │ Use defaults │ -└───────────┴───────────────────────────────────────────────────────────────────────────┘ +**CRITICAL**: If not found, complete the full setup (provider + model + quality + save location) using AskUserQuestion BEFORE generating any images. Generation is BLOCKED until EXTEND.md is created. + +| Path | Location | +|------|----------| +| `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Project directory | +| `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | User home | **EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models @@ -87,12 +82,12 @@ npx -y bun ${SKILL_DIR}/scripts/main.ts --prompt "A cat" --image out.png --provi | `--promptfiles ` | Read prompt from files (concatenated) | | `--image ` | Output image path (required) | | `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) | -| `--model `, `-m` | Model ID (`--ref` with OpenAI requires GPT Image model, e.g. `gpt-image-1.5`) | +| `--model `, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`) | | `--ar ` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--size ` | Size (e.g., `1024x1024`) | | `--quality normal\|2k` | Quality preset (default: 2k) | | `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) | -| `--ref ` | Reference images. Supported by Google multimodal and OpenAI edits (GPT Image models). If provider omitted: Google first, then OpenAI | +| `--ref ` | Reference images. Supported by Google multimodal (`gemini-3-pro-image-preview`, `gemini-3-flash-preview`, `gemini-3.1-flash-image-preview`) and OpenAI edits (GPT Image models). If provider omitted: Google first, then OpenAI | | `--n ` | Number of images | | `--json` | JSON output | @@ -194,7 +189,7 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1` - Missing API key → error with setup instructions - Generation failure → auto-retry once - Invalid aspect ratio → warning, proceed with default -- Reference images with unsupported provider/model → error with fix hint (switch to Google multimodal or OpenAI GPT Image edits) +- Reference images with unsupported provider/model → error with fix hint (switch to Google multimodal: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; or OpenAI GPT Image edits) ## Extension Support diff --git a/skills/baoyu-image-gen/references/config/first-time-setup.md b/skills/baoyu-image-gen/references/config/first-time-setup.md new file mode 100644 index 0000000..18415c4 --- /dev/null +++ b/skills/baoyu-image-gen/references/config/first-time-setup.md @@ -0,0 +1,197 @@ +--- +name: first-time-setup +description: First-time setup and default model selection flow for baoyu-image-gen +--- + +# First-Time Setup + +## Overview + +Triggered when: +1. No EXTEND.md found → full setup (provider + model + preferences) +2. EXTEND.md found but `default_model.[provider]` is null → model selection only + +## Setup Flow + +``` +No EXTEND.md found EXTEND.md found, model null + │ │ + ▼ ▼ +┌─────────────────────┐ ┌──────────────────────┐ +│ AskUserQuestion │ │ AskUserQuestion │ +│ (full setup) │ │ (model only) │ +└─────────────────────┘ └──────────────────────┘ + │ │ + ▼ ▼ +┌─────────────────────┐ ┌──────────────────────┐ +│ Create EXTEND.md │ │ Update EXTEND.md │ +└─────────────────────┘ └──────────────────────┘ + │ │ + ▼ ▼ + Continue Continue +``` + +## Flow 1: No EXTEND.md (Full Setup) + +**Language**: Use user's input language or saved language preference. + +Use AskUserQuestion with ALL questions in ONE call: + +### Question 1: Default Provider + +```yaml +header: "Provider" +question: "Default image generation provider?" +options: + - label: "Google (Recommended)" + description: "Gemini multimodal - high quality, reference images, flexible sizes" + - label: "OpenAI" + description: "GPT Image - consistent quality, reliable output" + - label: "DashScope" + description: "Alibaba Cloud - z-image-turbo, good for Chinese content" + - label: "Replicate" + description: "Community models - nano-banana-pro, flexible model selection" +``` + +### Question 2: Default Google Model + +Only show if user selected Google or auto-detect (no explicit provider). + +```yaml +header: "Google Model" +question: "Default Google image generation model?" +options: + - label: "gemini-3-pro-image-preview (Recommended)" + description: "Highest quality, best for production use" + - label: "gemini-3.1-flash-image-preview" + description: "Fast generation, good quality, lower cost" + - label: "gemini-3-flash-preview" + description: "Fast generation, balanced quality and speed" +``` + +### Question 3: Default Quality + +```yaml +header: "Quality" +question: "Default image quality?" +options: + - label: "2k (Recommended)" + description: "2048px - covers, illustrations, infographics" + - label: "normal" + description: "1024px - quick previews, drafts" +``` + +### Question 4: Save Location + +```yaml +header: "Save" +question: "Where to save preferences?" +options: + - label: "Project (Recommended)" + description: ".baoyu-skills/ (this project only)" + - label: "User" + description: "~/.baoyu-skills/ (all projects)" +``` + +### Save Locations + +| Choice | Path | Scope | +|--------|------|-------| +| Project | `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Current project | +| User | `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | All projects | + +### EXTEND.md Template + +```yaml +--- +version: 1 +default_provider: [selected provider or null] +default_quality: [selected quality] +default_aspect_ratio: null +default_image_size: null +default_model: + google: [selected google model or null] + openai: null + dashscope: null + replicate: null +--- +``` + +## Flow 2: EXTEND.md Exists, Model Null + +When EXTEND.md exists but `default_model.[current_provider]` is null, ask ONLY the model question for the current provider. + +### Google Model Selection + +```yaml +header: "Google Model" +question: "Choose a default Google image generation model?" +options: + - label: "gemini-3-pro-image-preview (Recommended)" + description: "Highest quality, best for production use" + - label: "gemini-3.1-flash-image-preview" + description: "Fast generation, good quality, lower cost" + - label: "gemini-3-flash-preview" + description: "Fast generation, balanced quality and speed" +``` + +### OpenAI Model Selection + +```yaml +header: "OpenAI Model" +question: "Choose a default OpenAI image generation model?" +options: + - label: "gpt-image-1.5 (Recommended)" + description: "Latest GPT Image model, high quality" + - label: "gpt-image-1" + description: "Previous generation GPT Image model" +``` + +### DashScope Model Selection + +```yaml +header: "DashScope Model" +question: "Choose a default DashScope image generation model?" +options: + - label: "z-image-turbo (Recommended)" + description: "Fast generation, good quality" + - label: "z-image-ultra" + description: "Higher quality, slower generation" +``` + +### Replicate Model Selection + +```yaml +header: "Replicate Model" +question: "Choose a default Replicate image generation model?" +options: + - label: "google/nano-banana-pro (Recommended)" + description: "Google's fast image model on Replicate" + - label: "google/nano-banana" + description: "Google's base image model on Replicate" +``` + +### Update EXTEND.md + +After user selects a model: + +1. Read existing EXTEND.md +2. If `default_model:` section exists → update the provider-specific key +3. If `default_model:` section missing → add the full section: + +```yaml +default_model: + google: [value or null] + openai: [value or null] + dashscope: [value or null] + replicate: [value or null] +``` + +Only set the selected provider's model; leave others as their current value or null. + +## After Setup + +1. Create directory if needed +2. Write/update EXTEND.md with frontmatter +3. Confirm: "Preferences saved to [path]" +4. Continue with image generation diff --git a/skills/baoyu-image-gen/references/config/preferences-schema.md b/skills/baoyu-image-gen/references/config/preferences-schema.md index ce7696a..362c7b9 100644 --- a/skills/baoyu-image-gen/references/config/preferences-schema.md +++ b/skills/baoyu-image-gen/references/config/preferences-schema.md @@ -20,7 +20,7 @@ default_aspect_ratio: null # "16:9"|"1:1"|"4:3"|"3:4"|"2.35:1"|null default_image_size: null # 1K|2K|4K|null (Google only, overrides quality) default_model: - google: null # e.g., "gemini-3-pro-image-preview" + google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview" openai: null # e.g., "gpt-image-1.5" dashscope: null # e.g., "z-image-turbo" replicate: null # e.g., "google/nano-banana-pro" diff --git a/skills/baoyu-image-gen/scripts/providers/google.ts b/skills/baoyu-image-gen/scripts/providers/google.ts index 1d8bcaa..021e58a 100644 --- a/skills/baoyu-image-gen/scripts/providers/google.ts +++ b/skills/baoyu-image-gen/scripts/providers/google.ts @@ -6,6 +6,7 @@ import type { CliArgs } from "../types"; const GOOGLE_MULTIMODAL_MODELS = [ "gemini-3-pro-image-preview", "gemini-3-flash-preview", + "gemini-3.1-flash-image-preview", ]; const GOOGLE_IMAGEN_MODELS = [ "imagen-3.0-generate-002", @@ -303,7 +304,7 @@ export async function generateImage( if (isGoogleImagen(model)) { if (args.referenceImages.length > 0) { throw new Error( - "Reference images are not supported with Imagen models. Use gemini-3-pro-image-preview or gemini-3-flash-preview.", + "Reference images are not supported with Imagen models. Use gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.", ); } return generateWithImagen(prompt, model, args); @@ -311,7 +312,7 @@ export async function generateImage( if (!isGoogleMultimodal(model) && args.referenceImages.length > 0) { throw new Error( - "Reference images are only supported with Gemini multimodal models. Use gemini-3-pro-image-preview or gemini-3-flash-preview.", + "Reference images are only supported with Gemini multimodal models. Use gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.", ); }