feat: add batch parallel image generation and provider-level throttling

- Add --batchfile and --jobs flags for multi-image parallel generation
  with per-provider concurrency control and rate limiting
- Refactor main.ts into prepareSingleTask/prepareBatchTasks/runBatchTasks
  with worker pool pattern and up to 3 retries per image
- Fix Replicate provider: use image_input array (nano-banana-pro schema),
  add match_input_image aspect ratio, add quality-to-resolution mapping
- Improve OpenAI error message for missing API key (Codex auth hint)
- Expand non-retryable error detection (4xx codes, disabled models)
- Add batch config to EXTEND.md schema (max_workers, provider_limits)
- Add build-batch.ts for article-illustrator batch workflow integration
- Add image-language awareness pass to baoyu-translate

Co-authored-by: 敖氏 <aoshi@MacBook-Air.local>
This commit is contained in:
Jim Liu 宝玉 2026-03-09 00:07:45 -05:00
parent e4d4ec8334
commit 5acef7151b
8 changed files with 788 additions and 139 deletions

View File

@ -0,0 +1,156 @@
import path from "node:path";
import process from "node:process";
import { readdir, readFile, writeFile } from "node:fs/promises";
type CliArgs = {
outlinePath: string | null;
promptsDir: string | null;
outputPath: string | null;
imagesDir: string | null;
provider: string;
model: string;
aspectRatio: string;
quality: string;
jobs: number | null;
help: boolean;
};
type OutlineEntry = {
index: number;
filename: string;
};
function printUsage(): void {
console.log(`Usage:
npx -y tsx scripts/build-batch.ts --outline outline.md --prompts prompts --output batch.json --images-dir attachments
Options:
--outline <path> Path to outline.md
--prompts <path> Path to prompts directory
--output <path> Path to output batch.json
--images-dir <path> Directory for generated images
--provider <name> Provider for baoyu-image-gen batch tasks (default: replicate)
--model <id> Model for baoyu-image-gen batch tasks (default: google/nano-banana-pro)
--ar <ratio> Aspect ratio for all tasks (default: 16:9)
--quality <level> Quality for all tasks (default: 2k)
--jobs <count> Recommended worker count metadata (optional)
-h, --help Show help`);
}
function parseArgs(argv: string[]): CliArgs {
const args: CliArgs = {
outlinePath: null,
promptsDir: null,
outputPath: null,
imagesDir: null,
provider: "replicate",
model: "google/nano-banana-pro",
aspectRatio: "16:9",
quality: "2k",
jobs: null,
help: false,
};
for (let i = 0; i < argv.length; i++) {
const current = argv[i]!;
if (current === "--outline") args.outlinePath = argv[++i] ?? null;
else if (current === "--prompts") args.promptsDir = argv[++i] ?? null;
else if (current === "--output") args.outputPath = argv[++i] ?? null;
else if (current === "--images-dir") args.imagesDir = argv[++i] ?? null;
else if (current === "--provider") args.provider = argv[++i] ?? args.provider;
else if (current === "--model") args.model = argv[++i] ?? args.model;
else if (current === "--ar") args.aspectRatio = argv[++i] ?? args.aspectRatio;
else if (current === "--quality") args.quality = argv[++i] ?? args.quality;
else if (current === "--jobs") {
const value = argv[++i];
args.jobs = value ? parseInt(value, 10) : null;
} else if (current === "--help" || current === "-h") {
args.help = true;
}
}
return args;
}
function parseOutline(content: string): OutlineEntry[] {
const entries: OutlineEntry[] = [];
const blocks = content.split(/^## Illustration\s+/m).slice(1);
for (const block of blocks) {
const indexMatch = block.match(/^(\d+)/);
const filenameMatch = block.match(/\*\*Filename\*\*:\s*(.+)/);
if (indexMatch && filenameMatch) {
entries.push({
index: parseInt(indexMatch[1]!, 10),
filename: filenameMatch[1]!.trim(),
});
}
}
return entries;
}
async function findPromptFile(promptsDir: string, entry: OutlineEntry): Promise<string | null> {
const files = await readdir(promptsDir);
const prefix = String(entry.index).padStart(2, "0");
const match = files.find((f) => f.startsWith(prefix) && f.endsWith(".md"));
return match ? path.join(promptsDir, match) : null;
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
printUsage();
return;
}
if (!args.outlinePath) {
console.error("Error: --outline is required");
process.exit(1);
}
if (!args.promptsDir) {
console.error("Error: --prompts is required");
process.exit(1);
}
if (!args.outputPath) {
console.error("Error: --output is required");
process.exit(1);
}
const outlineContent = await readFile(args.outlinePath, "utf8");
const entries = parseOutline(outlineContent);
if (entries.length === 0) {
console.error("No illustration entries found in outline.");
process.exit(1);
}
const tasks = [];
for (const entry of entries) {
const promptFile = await findPromptFile(args.promptsDir, entry);
if (!promptFile) {
console.error(`Warning: No prompt file found for illustration ${entry.index}, skipping.`);
continue;
}
const imageDir = args.imagesDir ?? path.dirname(args.outputPath);
tasks.push({
id: `illustration-${String(entry.index).padStart(2, "0")}`,
promptFiles: [promptFile],
image: path.join(imageDir, entry.filename),
provider: args.provider,
model: args.model,
ar: args.aspectRatio,
quality: args.quality,
});
}
const output: Record<string, unknown> = { tasks };
if (args.jobs) output.jobs = args.jobs;
await writeFile(args.outputPath, JSON.stringify(output, null, 2) + "\n");
console.log(`Batch file written: ${args.outputPath} (${tasks.length} tasks)`);
}
main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});

View File

@ -55,7 +55,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md") { "user" }
| `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Project directory | | `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Project directory |
| `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | User home | | `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | User home |
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models **EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
Schema: `references/config/preferences-schema.md` Schema: `references/config/preferences-schema.md`
@ -91,6 +91,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
# Replicate with specific model # Replicate with specific model
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
# Batch mode with saved prompt files
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json
# Batch mode with explicit worker count
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json
``` ```
## Options ## Options
@ -99,14 +105,16 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|--------|-------------| |--------|-------------|
| `--prompt <text>`, `-p` | Prompt text | | `--prompt <text>`, `-p` | Prompt text |
| `--promptfiles <files...>` | Read prompt from files (concatenated) | | `--promptfiles <files...>` | Read prompt from files (concatenated) |
| `--image <path>` | Output image path (required) | | `--image <path>` | Output image path (required in single-image mode) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) | | `--batchfile <path>` | JSON batch file for multi-image generation |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`) | | `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) |
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) | | `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size <WxH>` | Size (e.g., `1024x1024`) | | `--size <WxH>` | Size (e.g., `1024x1024`) |
| `--quality normal\|2k` | Quality preset (default: 2k) | | `--quality normal\|2k` | Quality preset (default: `2k`) |
| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) | | `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) |
| `--ref <files...>` | Reference images. Supported by Google multimodal (`gemini-3-pro-image-preview`, `gemini-3-flash-preview`, `gemini-3.1-flash-image-preview`) and OpenAI edits (GPT Image models). If provider omitted: Google first, then OpenAI | | `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate |
| `--n <count>` | Number of images | | `--n <count>` | Number of images |
| `--json` | JSON output | | `--json` | JSON output |
@ -126,6 +134,9 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
| `GOOGLE_BASE_URL` | Custom Google endpoint | | `GOOGLE_BASE_URL` | Custom Google endpoint |
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | | `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | | `REPLICATE_BASE_URL` | Custom Replicate endpoint |
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap |
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | Override provider concurrency, e.g. `BAOYU_IMAGE_GEN_REPLICATE_CONCURRENCY` |
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | Override provider start gap, e.g. `BAOYU_IMAGE_GEN_REPLICATE_START_INTERVAL_MS` |
**Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env` **Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
@ -187,36 +198,29 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
## Generation Mode ## Generation Mode
**Default**: Sequential generation (one image at a time). This ensures stable output and easier debugging. **Default**: Sequential generation.
**Parallel Generation**: Only use when user explicitly requests parallel/concurrent generation. **Batch Parallel Generation**: When `--batchfile` contains 2 or more pending tasks, the script automatically enables parallel generation.
| Mode | When to Use | | Mode | When to Use |
|------|-------------| |------|-------------|
| Sequential (default) | Normal usage, single images, small batches | | Sequential (default) | Normal usage, single images, small batches |
| Parallel | User explicitly requests, large batches (10+) | | Parallel batch | Batch mode with 2+ tasks |
**Parallel Settings** (when requested): Parallel behavior:
| Setting | Value | - Default worker count is automatic, capped by config, built-in default 10
|---------|-------| - Provider-specific throttling is applied only in batch mode, and the built-in defaults are tuned for faster throughput while still avoiding obvious RPM bursts
| Recommended concurrency | 4 subagents | - You can override worker count with `--jobs <count>`
| Max concurrency | 8 subagents | - Each image retries automatically up to 3 attempts
| Use case | Large batch generation when user requests parallel | - Final output includes success count, failure count, and per-image failure reasons
**Agent Implementation** (parallel mode only):
```
# Launch multiple generations in parallel using Task tool
# Each Task runs as background subagent with run_in_background=true
# Collect results via TaskOutput when all complete
```
## Error Handling ## Error Handling
- Missing API key → error with setup instructions - Missing API key → error with setup instructions
- Generation failure → auto-retry once - Generation failure → auto-retry up to 3 attempts per image
- Invalid aspect ratio → warning, proceed with default - Invalid aspect ratio → warning, proceed with default
- Reference images with unsupported provider/model → error with fix hint (switch to Google multimodal: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; or OpenAI GPT Image edits) - Reference images with unsupported provider/model → error with fix hint
## Extension Support ## Extension Support

View File

@ -21,9 +21,25 @@ default_image_size: null # 1K|2K|4K|null (Google only, overrides quality)
default_model: default_model:
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview" google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
openai: null # e.g., "gpt-image-1.5" openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
dashscope: null # e.g., "z-image-turbo" dashscope: null # e.g., "z-image-turbo"
replicate: null # e.g., "google/nano-banana-pro" replicate: null # e.g., "google/nano-banana-pro"
batch:
max_workers: 10
provider_limits:
replicate:
concurrency: 5
start_interval_ms: 700
google:
concurrency: 3
start_interval_ms: 1100
openai:
concurrency: 3
start_interval_ms: 1100
dashscope:
concurrency: 3
start_interval_ms: 1100
--- ---
``` ```
@ -40,6 +56,9 @@ default_model:
| `default_model.openai` | string\|null | null | OpenAI default model | | `default_model.openai` | string\|null | null | OpenAI default model |
| `default_model.dashscope` | string\|null | null | DashScope default model | | `default_model.dashscope` | string\|null | null | DashScope default model |
| `default_model.replicate` | string\|null | null | Replicate default model | | `default_model.replicate` | string\|null | null | Replicate default model |
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
| `batch.provider_limits.<provider>.concurrency` | int\|null | provider default | Max simultaneous requests per provider |
| `batch.provider_limits.<provider>.start_interval_ms` | int\|null | provider default | Minimum gap between request starts per provider |
## Examples ## Examples
@ -65,5 +84,11 @@ default_model:
openai: "gpt-image-1.5" openai: "gpt-image-1.5"
dashscope: "z-image-turbo" dashscope: "z-image-turbo"
replicate: "google/nano-banana-pro" replicate: "google/nano-banana-pro"
batch:
max_workers: 10
provider_limits:
replicate:
concurrency: 5
start_interval_ms: 700
--- ---
``` ```

View File

@ -2,34 +2,99 @@ import path from "node:path";
import process from "node:process"; import process from "node:process";
import { homedir } from "node:os"; import { homedir } from "node:os";
import { access, mkdir, readFile, writeFile } from "node:fs/promises"; import { access, mkdir, readFile, writeFile } from "node:fs/promises";
import type { CliArgs, Provider, ExtendConfig } from "./types"; import type {
BatchFile,
BatchTaskInput,
CliArgs,
ExtendConfig,
Provider,
} from "./types";
type ProviderModule = {
getDefaultModel: () => string;
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
};
type PreparedTask = {
id: string;
prompt: string;
args: CliArgs;
provider: Provider;
model: string;
outputPath: string;
providerModule: ProviderModule;
};
type TaskResult = {
id: string;
provider: Provider;
model: string;
outputPath: string;
success: boolean;
attempts: number;
error: string | null;
};
type ProviderRateLimit = {
concurrency: number;
startIntervalMs: number;
};
const MAX_ATTEMPTS = 3;
const DEFAULT_MAX_WORKERS = 10;
const POLL_WAIT_MS = 250;
const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
replicate: { concurrency: 5, startIntervalMs: 700 },
google: { concurrency: 3, startIntervalMs: 1100 },
openai: { concurrency: 3, startIntervalMs: 1100 },
dashscope: { concurrency: 3, startIntervalMs: 1100 },
};
function printUsage(): void { function printUsage(): void {
console.log(`Usage: console.log(`Usage:
npx -y bun scripts/main.ts --prompt "A cat" --image cat.png npx -y bun scripts/main.ts --prompt "A cat" --image cat.png
npx -y bun scripts/main.ts --prompt "A landscape" --image landscape.png --ar 16:9
npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png
npx -y bun scripts/main.ts --batchfile batch.json
Options: Options:
-p, --prompt <text> Prompt text -p, --prompt <text> Prompt text
--promptfiles <files...> Read prompt from files (concatenated) --promptfiles <files...> Read prompt from files (concatenated)
--image <path> Output image path (required) --image <path> Output image path (required in single-image mode)
--batchfile <path> JSON batch file for multi-image generation
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
--provider google|openai|dashscope|replicate Force provider (auto-detect by default) --provider google|openai|dashscope|replicate Force provider (auto-detect by default)
-m, --model <id> Model ID -m, --model <id> Model ID
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3) --ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
--size <WxH> Size (e.g., 1024x1024) --size <WxH> Size (e.g., 1024x1024)
--quality normal|2k Quality preset (default: 2k) --quality normal|2k Quality preset (default: 2k)
--imageSize 1K|2K|4K Image size for Google (default: from quality) --imageSize 1K|2K|4K Image size for Google (default: from quality)
--ref <files...> Reference images (Google multimodal or OpenAI edits) --ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate)
--n <count> Number of images (default: 1) --n <count> Number of images for the current task (default: 1)
--json JSON output --json JSON output
-h, --help Show help -h, --help Show help
Batch file format:
[
{
"id": "hero",
"promptFiles": ["prompts/hero.md"],
"image": "out/hero.png",
"provider": "replicate",
"model": "google/nano-banana-pro",
"ar": "16:9"
}
]
Behavior:
- Batch mode automatically runs in parallel when pending tasks >= 2
- Each image retries automatically up to 3 attempts
- Batch summary reports success count, failure count, and per-image errors
Environment variables: Environment variables:
OPENAI_API_KEY OpenAI API key OPENAI_API_KEY OpenAI API key
GOOGLE_API_KEY Google API key GOOGLE_API_KEY Google API key
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY) GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
DASHSCOPE_API_KEY DashScope API key () DASHSCOPE_API_KEY DashScope API key
REPLICATE_API_TOKEN Replicate API token REPLICATE_API_TOKEN Replicate API token
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5) OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview) GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
@ -40,6 +105,9 @@ Environment variables:
GOOGLE_BASE_URL Custom Google endpoint GOOGLE_BASE_URL Custom Google endpoint
DASHSCOPE_BASE_URL Custom DashScope endpoint DASHSCOPE_BASE_URL Custom DashScope endpoint
REPLICATE_BASE_URL Custom Replicate endpoint REPLICATE_BASE_URL Custom Replicate endpoint
BAOYU_IMAGE_GEN_MAX_WORKERS Override batch worker cap
BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY Override provider concurrency
BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS Override provider start gap in ms
Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`); Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
} }
@ -57,6 +125,8 @@ function parseArgs(argv: string[]): CliArgs {
imageSize: null, imageSize: null,
referenceImages: [], referenceImages: [],
n: 1, n: 1,
batchFile: null,
jobs: null,
json: false, json: false,
help: false, help: false,
}; };
@ -110,9 +180,26 @@ function parseArgs(argv: string[]): CliArgs {
continue; continue;
} }
if (a === "--batchfile") {
const v = argv[++i];
if (!v) throw new Error("Missing value for --batchfile");
out.batchFile = v;
continue;
}
if (a === "--jobs") {
const v = argv[++i];
if (!v) throw new Error("Missing value for --jobs");
out.jobs = parseInt(v, 10);
if (isNaN(out.jobs) || out.jobs < 1) throw new Error(`Invalid worker count: ${v}`);
continue;
}
if (a === "--provider") { if (a === "--provider") {
const v = argv[++i]; const v = argv[++i];
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`); if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") {
throw new Error(`Invalid provider: ${v}`);
}
out.provider = v; out.provider = v;
continue; continue;
} }
@ -228,9 +315,11 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
const config: Partial<ExtendConfig> = {}; const config: Partial<ExtendConfig> = {};
const lines = yaml.split("\n"); const lines = yaml.split("\n");
let currentKey: string | null = null; let currentKey: string | null = null;
let currentProvider: Provider | null = null;
for (const line of lines) { for (const line of lines) {
const trimmed = line.trim(); const trimmed = line.trim();
const indent = line.match(/^\s*/)?.[0].length ?? 0;
if (!trimmed || trimmed.startsWith("#")) continue; if (!trimmed || trimmed.startsWith("#")) continue;
if (trimmed.includes(":") && !trimmed.startsWith("-")) { if (trimmed.includes(":") && !trimmed.startsWith("-")) {
@ -247,18 +336,57 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
} else if (key === "default_provider") { } else if (key === "default_provider") {
config.default_provider = value === "null" ? null : (value as Provider); config.default_provider = value === "null" ? null : (value as Provider);
} else if (key === "default_quality") { } else if (key === "default_quality") {
config.default_quality = value === "null" ? null : (value as "normal" | "2k"); config.default_quality = value === "null" ? null : value as "normal" | "2k";
} else if (key === "default_aspect_ratio") { } else if (key === "default_aspect_ratio") {
const cleaned = value.replace(/['"]/g, ""); const cleaned = value.replace(/['"]/g, "");
config.default_aspect_ratio = cleaned === "null" ? null : cleaned; config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
} else if (key === "default_image_size") { } else if (key === "default_image_size") {
config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K"); config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
} else if (key === "default_model") { } else if (key === "default_model") {
config.default_model = { google: null, openai: null, dashscope: null, replicate: null }; config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
currentKey = "default_model"; currentKey = "default_model";
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) { currentProvider = null;
} else if (key === "batch") {
config.batch = {};
currentKey = "batch";
currentProvider = null;
} else if (currentKey === "batch" && indent >= 2 && key === "max_workers") {
config.batch ??= {};
config.batch.max_workers = value === "null" ? null : parseInt(value, 10);
} else if (currentKey === "batch" && indent >= 2 && key === "provider_limits") {
config.batch ??= {};
config.batch.provider_limits ??= {};
currentKey = "provider_limits";
currentProvider = null;
} else if (
currentKey === "provider_limits" &&
indent >= 4 &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
) {
config.batch ??= {};
config.batch.provider_limits ??= {};
config.batch.provider_limits[key] ??= {};
currentProvider = key;
} else if (
currentKey === "default_model" &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
) {
const cleaned = value.replace(/['"]/g, ""); const cleaned = value.replace(/['"]/g, "");
config.default_model![key] = cleaned === "null" ? null : cleaned; config.default_model![key] = cleaned === "null" ? null : cleaned;
} else if (
currentKey === "provider_limits" &&
currentProvider &&
indent >= 6 &&
(key === "concurrency" || key === "start_interval_ms")
) {
config.batch ??= {};
config.batch.provider_limits ??= {};
const providerLimit = (config.batch.provider_limits[currentProvider] ??= {});
if (key === "concurrency") {
providerLimit.concurrency = value === "null" ? null : parseInt(value, 10);
} else {
providerLimit.start_interval_ms = value === "null" ? null : parseInt(value, 10);
}
} }
} }
} }
@ -280,7 +408,6 @@ async function loadExtendConfig(): Promise<Partial<ExtendConfig>> {
const content = await readFile(p, "utf8"); const content = await readFile(p, "utf8");
const yaml = extractYamlFrontMatter(content); const yaml = extractYamlFrontMatter(content);
if (!yaml) continue; if (!yaml) continue;
return parseSimpleYaml(yaml); return parseSimpleYaml(yaml);
} catch { } catch {
continue; continue;
@ -300,6 +427,46 @@ function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
}; };
} }
function parsePositiveInt(value: string | undefined): number | null {
if (!value) return null;
const parsed = parseInt(value, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
function getConfiguredMaxWorkers(extendConfig: Partial<ExtendConfig>): number {
const envValue = parsePositiveInt(process.env.BAOYU_IMAGE_GEN_MAX_WORKERS);
const configValue = extendConfig.batch?.max_workers ?? null;
return Math.max(1, envValue ?? configValue ?? DEFAULT_MAX_WORKERS);
}
function getConfiguredProviderRateLimits(
extendConfig: Partial<ExtendConfig>
): Record<Provider, ProviderRateLimit> {
const configured: Record<Provider, ProviderRateLimit> = {
replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate },
google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google },
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
};
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
configured[provider] = {
concurrency:
parsePositiveInt(process.env[`${envPrefix}_CONCURRENCY`]) ??
extendLimit?.concurrency ??
configured[provider].concurrency,
startIntervalMs:
parsePositiveInt(process.env[`${envPrefix}_START_INTERVAL_MS`]) ??
extendLimit?.start_interval_ms ??
configured[provider].startIntervalMs,
};
}
return configured;
}
async function readPromptFromFiles(files: string[]): Promise<string> { async function readPromptFromFiles(files: string[]): Promise<string> {
const parts: string[] = []; const parts: string[] = [];
for (const f of files) { for (const f of files) {
@ -311,9 +478,12 @@ async function readPromptFromFiles(files: string[]): Promise<string> {
async function readPromptFromStdin(): Promise<string | null> { async function readPromptFromStdin(): Promise<string | null> {
if (process.stdin.isTTY) return null; if (process.stdin.isTTY) return null;
try { try {
const t = await Bun.stdin.text(); const chunks: Buffer[] = [];
const v = t.trim(); for await (const chunk of process.stdin) {
return v.length > 0 ? v : null; chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
const value = Buffer.concat(chunks).toString("utf8").trim();
return value.length > 0 ? value : null;
} catch { } catch {
return null; return null;
} }
@ -327,7 +497,13 @@ function normalizeOutputImagePath(p: string): string {
} }
function detectProvider(args: CliArgs): Provider { function detectProvider(args: CliArgs): Provider {
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") { if (
args.referenceImages.length > 0 &&
args.provider &&
args.provider !== "google" &&
args.provider !== "openai" &&
args.provider !== "replicate"
) {
throw new Error( throw new Error(
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate." "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
); );
@ -349,13 +525,18 @@ function detectProvider(args: CliArgs): Provider {
); );
} }
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[]; const available = [
hasReplicate && "replicate",
hasGoogle && "google",
hasOpenai && "openai",
hasDashscope && "dashscope",
].filter(Boolean) as Provider[];
if (available.length === 1) return available[0]!; if (available.length === 1) return available[0]!;
if (available.length > 1) return available[0]!; if (available.length > 1) return available[0]!;
throw new Error( throw new Error(
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" + "No API key found. Set REPLICATE_API_TOKEN, GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys." "Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
); );
} }
@ -371,11 +552,6 @@ async function validateReferenceImages(referenceImages: string[]): Promise<void>
} }
} }
type ProviderModule = {
getDefaultModel: () => string;
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
};
function isRetryableGenerationError(error: unknown): boolean { function isRetryableGenerationError(error: unknown): boolean {
const msg = error instanceof Error ? error.message : String(error); const msg = error instanceof Error ? error.message : String(error);
const nonRetryableMarkers = [ const nonRetryableMarkers = [
@ -384,26 +560,328 @@ function isRetryableGenerationError(error: unknown): boolean {
"only supported", "only supported",
"No API key found", "No API key found",
"is required", "is required",
"Invalid ",
"Unexpected ",
"API error (400)",
"API error (401)",
"API error (402)",
"API error (403)",
"API error (404)",
"temporarily disabled",
]; ];
return !nonRetryableMarkers.some((marker) => msg.includes(marker)); return !nonRetryableMarkers.some((marker) => msg.includes(marker));
} }
async function loadProviderModule(provider: Provider): Promise<ProviderModule> { async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
if (provider === "google") { if (provider === "google") return (await import("./providers/google")) as ProviderModule;
return (await import("./providers/google")) as ProviderModule; if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
} if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
if (provider === "dashscope") {
return (await import("./providers/dashscope")) as ProviderModule;
}
if (provider === "replicate") {
return (await import("./providers/replicate")) as ProviderModule;
}
return (await import("./providers/openai")) as ProviderModule; return (await import("./providers/openai")) as ProviderModule;
} }
async function loadPromptForArgs(args: CliArgs): Promise<string | null> {
let prompt: string | null = args.prompt;
if (!prompt && args.promptFiles.length > 0) {
prompt = await readPromptFromFiles(args.promptFiles);
}
return prompt;
}
function getModelForProvider(
provider: Provider,
requestedModel: string | null,
extendConfig: Partial<ExtendConfig>,
providerModule: ProviderModule
): string {
if (requestedModel) return requestedModel;
if (extendConfig.default_model) {
if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google;
if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai;
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
}
return providerModule.getDefaultModel();
}
async function prepareSingleTask(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<PreparedTask> {
if (!args.quality) args.quality = "2k";
const prompt = (await loadPromptForArgs(args)) ?? (await readPromptFromStdin());
if (!prompt) throw new Error("Prompt is required");
if (!args.imagePath) throw new Error("--image is required");
if (args.referenceImages.length > 0) await validateReferenceImages(args.referenceImages);
const provider = detectProvider(args);
const providerModule = await loadProviderModule(provider);
const model = getModelForProvider(provider, args.model, extendConfig, providerModule);
return {
id: "single",
prompt,
args,
provider,
model,
outputPath: normalizeOutputImagePath(args.imagePath),
providerModule,
};
}
async function loadBatchTasks(batchFilePath: string): Promise<BatchTaskInput[]> {
const content = await readFile(path.resolve(batchFilePath), "utf8");
const parsed = JSON.parse(content.replace(/^\uFEFF/, "")) as BatchFile;
if (Array.isArray(parsed)) return parsed;
if (parsed && typeof parsed === "object" && Array.isArray(parsed.tasks)) return parsed.tasks;
throw new Error("Invalid batch file. Expected an array of tasks or an object with a tasks array.");
}
function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput): CliArgs {
return {
...baseArgs,
prompt: task.prompt ?? null,
promptFiles: task.promptFiles ? [...task.promptFiles] : [],
imagePath: task.image ?? null,
provider: task.provider ?? baseArgs.provider ?? null,
model: task.model ?? baseArgs.model ?? null,
aspectRatio: task.ar ?? baseArgs.aspectRatio ?? null,
size: task.size ?? baseArgs.size ?? null,
quality: task.quality ?? baseArgs.quality ?? null,
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
referenceImages: task.ref ? [...task.ref] : [],
n: task.n ?? baseArgs.n,
batchFile: null,
jobs: baseArgs.jobs,
json: baseArgs.json,
help: false,
};
}
async function prepareBatchTasks(
args: CliArgs,
extendConfig: Partial<ExtendConfig>
): Promise<PreparedTask[]> {
if (!args.batchFile) throw new Error("--batchfile is required in batch mode");
const taskInputs = await loadBatchTasks(args.batchFile);
if (taskInputs.length === 0) throw new Error("Batch file does not contain any tasks.");
const prepared: PreparedTask[] = [];
for (let i = 0; i < taskInputs.length; i++) {
const task = taskInputs[i]!;
const taskArgs = createTaskArgs(args, task);
const prompt = await loadPromptForArgs(taskArgs);
if (!prompt) throw new Error(`Task ${i + 1} is missing prompt or promptFiles.`);
if (!taskArgs.imagePath) throw new Error(`Task ${i + 1} is missing image output path.`);
if (taskArgs.referenceImages.length > 0) await validateReferenceImages(taskArgs.referenceImages);
const provider = detectProvider(taskArgs);
const providerModule = await loadProviderModule(provider);
const model = getModelForProvider(provider, taskArgs.model, extendConfig, providerModule);
prepared.push({
id: task.id || `task-${String(i + 1).padStart(2, "0")}`,
prompt,
args: taskArgs,
provider,
model,
outputPath: normalizeOutputImagePath(taskArgs.imagePath),
providerModule,
});
}
return prepared;
}
async function writeImage(outputPath: string, imageData: Uint8Array): Promise<void> {
await mkdir(path.dirname(outputPath), { recursive: true });
await writeFile(outputPath, imageData);
}
async function generatePreparedTask(task: PreparedTask): Promise<TaskResult> {
console.error(`Using ${task.provider} / ${task.model} for ${task.id}`);
console.error(
`Switch model: --model <id> | EXTEND.md default_model.${task.provider} | env ${task.provider.toUpperCase()}_IMAGE_MODEL`
);
let attempts = 0;
while (attempts < MAX_ATTEMPTS) {
attempts += 1;
try {
const imageData = await task.providerModule.generateImage(task.prompt, task.model, task.args);
await writeImage(task.outputPath, imageData);
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: true,
attempts,
error: null,
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const canRetry = attempts < MAX_ATTEMPTS && isRetryableGenerationError(error);
if (canRetry) {
console.error(`[${task.id}] Attempt ${attempts}/${MAX_ATTEMPTS} failed, retrying...`);
continue;
}
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: false,
attempts,
error: message,
};
}
}
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: false,
attempts: MAX_ATTEMPTS,
error: "Unknown failure",
};
}
function createProviderGate(providerRateLimits: Record<Provider, ProviderRateLimit>) {
const state = new Map<Provider, { active: number; lastStartedAt: number }>();
return async function acquire(provider: Provider): Promise<() => void> {
const limit = providerRateLimits[provider];
while (true) {
const current = state.get(provider) ?? { active: 0, lastStartedAt: 0 };
const now = Date.now();
const enoughCapacity = current.active < limit.concurrency;
const enoughGap = now - current.lastStartedAt >= limit.startIntervalMs;
if (enoughCapacity && enoughGap) {
state.set(provider, { active: current.active + 1, lastStartedAt: now });
return () => {
const latest = state.get(provider) ?? { active: 1, lastStartedAt: now };
state.set(provider, {
active: Math.max(0, latest.active - 1),
lastStartedAt: latest.lastStartedAt,
});
};
}
await new Promise((resolve) => setTimeout(resolve, POLL_WAIT_MS));
}
};
}
function getWorkerCount(taskCount: number, jobs: number | null, maxWorkers: number): number {
const requested = jobs ?? Math.min(taskCount, maxWorkers);
return Math.max(1, Math.min(requested, taskCount, maxWorkers));
}
async function runBatchTasks(
tasks: PreparedTask[],
jobs: number | null,
extendConfig: Partial<ExtendConfig>
): Promise<TaskResult[]> {
if (tasks.length === 1) {
return [await generatePreparedTask(tasks[0]!)];
}
const maxWorkers = getConfiguredMaxWorkers(extendConfig);
const providerRateLimits = getConfiguredProviderRateLimits(extendConfig);
const acquireProvider = createProviderGate(providerRateLimits);
const workerCount = getWorkerCount(tasks.length, jobs, maxWorkers);
console.error(`Batch mode: ${tasks.length} tasks, ${workerCount} workers, parallel mode enabled.`);
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
const limit = providerRateLimits[provider];
console.error(`- ${provider}: concurrency=${limit.concurrency}, startIntervalMs=${limit.startIntervalMs}`);
}
let nextIndex = 0;
const results: TaskResult[] = new Array(tasks.length);
const worker = async (): Promise<void> => {
while (true) {
const currentIndex = nextIndex;
nextIndex += 1;
if (currentIndex >= tasks.length) return;
const task = tasks[currentIndex]!;
const release = await acquireProvider(task.provider);
try {
results[currentIndex] = await generatePreparedTask(task);
} finally {
release();
}
}
};
await Promise.all(Array.from({ length: workerCount }, () => worker()));
return results;
}
function printBatchSummary(results: TaskResult[]): void {
const successCount = results.filter((result) => result.success).length;
const failureCount = results.length - successCount;
console.error("");
console.error("Batch generation summary:");
console.error(`- Total: ${results.length}`);
console.error(`- Succeeded: ${successCount}`);
console.error(`- Failed: ${failureCount}`);
if (failureCount > 0) {
console.error("Failure reasons:");
for (const result of results.filter((item) => !item.success)) {
console.error(`- ${result.id}: ${result.error}`);
}
}
}
function emitJson(payload: unknown): void {
console.log(JSON.stringify(payload, null, 2));
}
async function runSingleMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
const task = await prepareSingleTask(args, extendConfig);
const result = await generatePreparedTask(task);
if (!result.success) {
throw new Error(result.error || "Generation failed");
}
if (args.json) {
emitJson({
savedImage: result.outputPath,
provider: result.provider,
model: result.model,
attempts: result.attempts,
prompt: task.prompt.slice(0, 200),
});
return;
}
console.log(result.outputPath);
}
async function runBatchMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
const tasks = await prepareBatchTasks(args, extendConfig);
const results = await runBatchTasks(tasks, args.jobs, extendConfig);
printBatchSummary(results);
if (args.json) {
emitJson({
mode: "batch",
total: results.length,
succeeded: results.filter((item) => item.success).length,
failed: results.filter((item) => !item.success).length,
results,
});
}
if (results.some((item) => !item.success)) {
process.exitCode = 1;
}
}
async function main(): Promise<void> { async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2)); const args = parseArgs(process.argv.slice(2));
if (args.help) { if (args.help) {
printUsage(); printUsage();
return; return;
@ -412,86 +890,18 @@ async function main(): Promise<void> {
await loadEnv(); await loadEnv();
const extendConfig = await loadExtendConfig(); const extendConfig = await loadExtendConfig();
const mergedArgs = mergeConfig(args, extendConfig); const mergedArgs = mergeConfig(args, extendConfig);
if (!mergedArgs.quality) mergedArgs.quality = "2k"; if (!mergedArgs.quality) mergedArgs.quality = "2k";
let prompt: string | null = mergedArgs.prompt; if (mergedArgs.batchFile) {
if (!prompt && mergedArgs.promptFiles.length > 0) prompt = await readPromptFromFiles(mergedArgs.promptFiles); await runBatchMode(mergedArgs, extendConfig);
if (!prompt) prompt = await readPromptFromStdin();
if (!prompt) {
console.error("Error: Prompt is required");
printUsage();
process.exitCode = 1;
return; return;
} }
if (!mergedArgs.imagePath) { await runSingleMode(mergedArgs, extendConfig);
console.error("Error: --image is required");
printUsage();
process.exitCode = 1;
return;
}
if (mergedArgs.referenceImages.length > 0) {
await validateReferenceImages(mergedArgs.referenceImages);
}
const provider = detectProvider(mergedArgs);
const providerModule = await loadProviderModule(provider);
let model = mergedArgs.model;
if (!model && extendConfig.default_model) {
if (provider === "google") model = extendConfig.default_model.google ?? null;
if (provider === "openai") model = extendConfig.default_model.openai ?? null;
if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
}
model = model || providerModule.getDefaultModel();
const outputPath = normalizeOutputImagePath(mergedArgs.imagePath);
let imageData: Uint8Array;
let retried = false;
while (true) {
try {
imageData = await providerModule.generateImage(prompt, model, mergedArgs);
break;
} catch (e) {
if (!retried && isRetryableGenerationError(e)) {
retried = true;
console.error("Generation failed, retrying...");
continue;
}
throw e;
}
}
const dir = path.dirname(outputPath);
await mkdir(dir, { recursive: true });
await writeFile(outputPath, imageData);
if (mergedArgs.json) {
console.log(
JSON.stringify(
{
savedImage: outputPath,
provider,
model,
prompt: prompt.slice(0, 200),
},
null,
2
)
);
} else {
console.log(outputPath);
}
} }
main().catch((e) => { main().catch((error) => {
const msg = e instanceof Error ? e.message : String(e); const message = error instanceof Error ? error.message : String(error);
console.error(msg); console.error(message);
process.exit(1); process.exit(1);
}); });

View File

@ -68,7 +68,11 @@ export async function generateImage(
const baseURL = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1"; const baseURL = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1";
const apiKey = process.env.OPENAI_API_KEY; const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error("OPENAI_API_KEY is required"); if (!apiKey) {
throw new Error(
"OPENAI_API_KEY is required. Codex/ChatGPT desktop login does not automatically grant OpenAI Images API access to this script."
);
}
if (process.env.OPENAI_IMAGE_USE_CHAT === "true") { if (process.env.OPENAI_IMAGE_USE_CHAT === "true") {
return generateWithChatCompletions(baseURL, apiKey, prompt, model); return generateWithChatCompletions(baseURL, apiKey, prompt, model);

View File

@ -36,22 +36,24 @@ function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): R
if (args.aspectRatio) { if (args.aspectRatio) {
input.aspect_ratio = args.aspectRatio; input.aspect_ratio = args.aspectRatio;
} else if (referenceImages.length > 0) {
input.aspect_ratio = "match_input_image";
} }
if (args.n > 1) { if (args.n > 1) {
input.number_of_images = args.n; input.number_of_images = args.n;
} }
if (args.quality === "normal") {
input.resolution = "1K";
} else if (args.quality === "2k") {
input.resolution = "2K";
}
input.output_format = "png"; input.output_format = "png";
if (referenceImages.length > 0) { if (referenceImages.length > 0) {
if (referenceImages.length === 1) { input.image_input = referenceImages;
input.image = referenceImages[0];
} else {
for (let i = 0; i < referenceImages.length; i++) {
input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
}
}
} }
return input; return input;

View File

@ -13,10 +13,29 @@ export type CliArgs = {
imageSize: string | null; imageSize: string | null;
referenceImages: string[]; referenceImages: string[];
n: number; n: number;
batchFile: string | null;
jobs: number | null;
json: boolean; json: boolean;
help: boolean; help: boolean;
}; };
export type BatchTaskInput = {
id?: string;
prompt?: string | null;
promptFiles?: string[];
image?: string;
provider?: Provider | null;
model?: string | null;
ar?: string | null;
size?: string | null;
quality?: Quality | null;
imageSize?: "1K" | "2K" | "4K" | null;
ref?: string[];
n?: number;
};
export type BatchFile = BatchTaskInput[] | { tasks: BatchTaskInput[] };
export type ExtendConfig = { export type ExtendConfig = {
version: number; version: number;
default_provider: Provider | null; default_provider: Provider | null;
@ -29,4 +48,16 @@ export type ExtendConfig = {
dashscope: string | null; dashscope: string | null;
replicate: string | null; replicate: string | null;
}; };
batch?: {
max_workers?: number | null;
provider_limits?: Partial<
Record<
Provider,
{
concurrency?: number | null;
start_interval_ms?: number | null;
}
>
>;
};
}; };

View File

@ -212,6 +212,7 @@ Before translating chunks:
- **Natural flow**: Use idiomatic target language word order and sentence patterns; break or restructure sentences freely when the source structure doesn't work naturally in the target language - **Natural flow**: Use idiomatic target language word order and sentence patterns; break or restructure sentences freely when the source structure doesn't work naturally in the target language
- **Terminology**: Use standard translations; annotate with original term in parentheses on first occurrence - **Terminology**: Use standard translations; annotate with original term in parentheses on first occurrence
- **Preserve format**: Keep all markdown formatting (headings, bold, italic, images, links, code blocks) - **Preserve format**: Keep all markdown formatting (headings, bold, italic, images, links, code blocks)
- **Image-language awareness**: Preserve image references exactly during translation, but after the translation is complete, review referenced images and check whether their likely main text language still matches the translated article language
- **Frontmatter transformation**: If the source has YAML frontmatter, preserve it in the translation with these changes: (1) Rename metadata fields that describe the *source* article — `url`→`sourceUrl`, `title`→`sourceTitle`, `description`→`sourceDescription`, `author`→`sourceAuthor`, `date`→`sourceDate`, and any similar origin-metadata fields — by adding a `source` prefix (camelCase). (2) Translate the values of text fields (title, description, etc.) and add them as new top-level fields. (3) Keep other fields (tags, categories, custom fields) as-is, translating their values where appropriate - **Frontmatter transformation**: If the source has YAML frontmatter, preserve it in the translation with these changes: (1) Rename metadata fields that describe the *source* article — `url`→`sourceUrl`, `title`→`sourceTitle`, `description`→`sourceDescription`, `author`→`sourceAuthor`, `date`→`sourceDate`, and any similar origin-metadata fields — by adding a `source` prefix (camelCase). (2) Translate the values of text fields (title, description, etc.) and add them as new top-level fields. (3) Keep other fields (tags, categories, custom fields) as-is, translating their values where appropriate
- **Respect original**: Maintain original meaning and intent; do not add, remove, or editorialize — but sentence structure and imagery may be adapted freely to serve the meaning - **Respect original**: Maintain original meaning and intent; do not add, remove, or editorialize — but sentence structure and imagery may be adapted freely to serve the meaning
- **Translator's notes**: For terms, concepts, or cultural references that target readers may not understand — due to jargon, cultural gaps, or domain-specific knowledge — add a concise explanatory note in parentheses immediately after the term. The note should explain *what it means* in plain language, not just provide the English original. Format: `译文English original通俗解释`. Calibrate annotation depth to the target audience: general readers need more notes than technical readers. Only add notes where genuinely needed; do not over-annotate obvious terms. - **Translator's notes**: For terms, concepts, or cultural references that target readers may not understand — due to jargon, cultural gaps, or domain-specific knowledge — add a concise explanatory note in parentheses immediately after the term. The note should explain *what it means* in plain language, not just provide the English original. Format: `译文English original通俗解释`. Calibrate annotation depth to the target audience: general readers need more notes than technical readers. Only add notes where genuinely needed; do not over-annotate obvious terms.
@ -250,6 +251,20 @@ Each step reads the previous step's file and builds on it.
Final translation is always at `translation.md` in the output directory. Final translation is always at `translation.md` in the output directory.
After the final translation is written, do a lightweight image-language pass:
1. Collect image references from the translated article
2. Identify likely text-heavy images such as covers, screenshots, diagrams, charts, frameworks, and infographics
3. If any image likely contains a main text language that does not match the translated article language, proactively remind the user
4. The reminder must be a list only. Do not automatically localize those images unless the user asks
Reminder format:
```text
Possible image localization needed:
- ![[attachments/example-cover.png]]: likely still contains source-language text while the article is now in target language
- ![[attachments/example-diagram.png]]: likely text-heavy framework graphic, check whether labels need translation
```
Display summary: Display summary:
``` ```
**Translation complete** ({mode} mode) **Translation complete** ({mode} mode)
@ -261,6 +276,8 @@ Final: {output-dir}/translation.md
Glossary terms applied: {count} Glossary terms applied: {count}
``` ```
If mismatched image-language candidates were found, append a short note after the summary telling the user that some embedded images may still need image-text localization, followed by the candidate list.
## Extension Support ## Extension Support
Custom configurations via EXTEND.md. See **Preferences** section for paths and supported options. Custom configurations via EXTEND.md. See **Preferences** section for paths and supported options.