feat: add batch parallel image generation and provider-level throttling

- Add --batchfile and --jobs flags for multi-image parallel generation
  with per-provider concurrency control and rate limiting
- Refactor main.ts into prepareSingleTask/prepareBatchTasks/runBatchTasks
  with worker pool pattern and up to 3 retries per image
- Fix Replicate provider: use image_input array (nano-banana-pro schema),
  add match_input_image aspect ratio, add quality-to-resolution mapping
- Improve OpenAI error message for missing API key (Codex auth hint)
- Expand non-retryable error detection (4xx codes, disabled models)
- Add batch config to EXTEND.md schema (max_workers, provider_limits)
- Add build-batch.ts for article-illustrator batch workflow integration
- Add image-language awareness pass to baoyu-translate

Co-authored-by: 敖氏 <aoshi@MacBook-Air.local>
This commit is contained in:
Jim Liu 宝玉 2026-03-09 00:07:45 -05:00
parent e4d4ec8334
commit 5acef7151b
8 changed files with 788 additions and 139 deletions

View File

@ -0,0 +1,156 @@
import path from "node:path";
import process from "node:process";
import { readdir, readFile, writeFile } from "node:fs/promises";
type CliArgs = {
outlinePath: string | null;
promptsDir: string | null;
outputPath: string | null;
imagesDir: string | null;
provider: string;
model: string;
aspectRatio: string;
quality: string;
jobs: number | null;
help: boolean;
};
type OutlineEntry = {
index: number;
filename: string;
};
function printUsage(): void {
console.log(`Usage:
npx -y tsx scripts/build-batch.ts --outline outline.md --prompts prompts --output batch.json --images-dir attachments
Options:
--outline <path> Path to outline.md
--prompts <path> Path to prompts directory
--output <path> Path to output batch.json
--images-dir <path> Directory for generated images
--provider <name> Provider for baoyu-image-gen batch tasks (default: replicate)
--model <id> Model for baoyu-image-gen batch tasks (default: google/nano-banana-pro)
--ar <ratio> Aspect ratio for all tasks (default: 16:9)
--quality <level> Quality for all tasks (default: 2k)
--jobs <count> Recommended worker count metadata (optional)
-h, --help Show help`);
}
function parseArgs(argv: string[]): CliArgs {
const args: CliArgs = {
outlinePath: null,
promptsDir: null,
outputPath: null,
imagesDir: null,
provider: "replicate",
model: "google/nano-banana-pro",
aspectRatio: "16:9",
quality: "2k",
jobs: null,
help: false,
};
for (let i = 0; i < argv.length; i++) {
const current = argv[i]!;
if (current === "--outline") args.outlinePath = argv[++i] ?? null;
else if (current === "--prompts") args.promptsDir = argv[++i] ?? null;
else if (current === "--output") args.outputPath = argv[++i] ?? null;
else if (current === "--images-dir") args.imagesDir = argv[++i] ?? null;
else if (current === "--provider") args.provider = argv[++i] ?? args.provider;
else if (current === "--model") args.model = argv[++i] ?? args.model;
else if (current === "--ar") args.aspectRatio = argv[++i] ?? args.aspectRatio;
else if (current === "--quality") args.quality = argv[++i] ?? args.quality;
else if (current === "--jobs") {
const value = argv[++i];
args.jobs = value ? parseInt(value, 10) : null;
} else if (current === "--help" || current === "-h") {
args.help = true;
}
}
return args;
}
function parseOutline(content: string): OutlineEntry[] {
const entries: OutlineEntry[] = [];
const blocks = content.split(/^## Illustration\s+/m).slice(1);
for (const block of blocks) {
const indexMatch = block.match(/^(\d+)/);
const filenameMatch = block.match(/\*\*Filename\*\*:\s*(.+)/);
if (indexMatch && filenameMatch) {
entries.push({
index: parseInt(indexMatch[1]!, 10),
filename: filenameMatch[1]!.trim(),
});
}
}
return entries;
}
async function findPromptFile(promptsDir: string, entry: OutlineEntry): Promise<string | null> {
const files = await readdir(promptsDir);
const prefix = String(entry.index).padStart(2, "0");
const match = files.find((f) => f.startsWith(prefix) && f.endsWith(".md"));
return match ? path.join(promptsDir, match) : null;
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
printUsage();
return;
}
if (!args.outlinePath) {
console.error("Error: --outline is required");
process.exit(1);
}
if (!args.promptsDir) {
console.error("Error: --prompts is required");
process.exit(1);
}
if (!args.outputPath) {
console.error("Error: --output is required");
process.exit(1);
}
const outlineContent = await readFile(args.outlinePath, "utf8");
const entries = parseOutline(outlineContent);
if (entries.length === 0) {
console.error("No illustration entries found in outline.");
process.exit(1);
}
const tasks = [];
for (const entry of entries) {
const promptFile = await findPromptFile(args.promptsDir, entry);
if (!promptFile) {
console.error(`Warning: No prompt file found for illustration ${entry.index}, skipping.`);
continue;
}
const imageDir = args.imagesDir ?? path.dirname(args.outputPath);
tasks.push({
id: `illustration-${String(entry.index).padStart(2, "0")}`,
promptFiles: [promptFile],
image: path.join(imageDir, entry.filename),
provider: args.provider,
model: args.model,
ar: args.aspectRatio,
quality: args.quality,
});
}
const output: Record<string, unknown> = { tasks };
if (args.jobs) output.jobs = args.jobs;
await writeFile(args.outputPath, JSON.stringify(output, null, 2) + "\n");
console.log(`Batch file written: ${args.outputPath} (${tasks.length} tasks)`);
}
main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
});

View File

@ -55,7 +55,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md") { "user" }
| `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Project directory |
| `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | User home |
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
Schema: `references/config/preferences-schema.md`
@ -91,6 +91,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
# Replicate with specific model
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
# Batch mode with saved prompt files
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json
# Batch mode with explicit worker count
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json
```
## Options
@ -99,14 +105,16 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|--------|-------------|
| `--prompt <text>`, `-p` | Prompt text |
| `--promptfiles <files...>` | Read prompt from files (concatenated) |
| `--image <path>` | Output image path (required) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`) |
| `--image <path>` | Output image path (required in single-image mode) |
| `--batchfile <path>` | JSON batch file for multi-image generation |
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) |
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) |
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
| `--size <WxH>` | Size (e.g., `1024x1024`) |
| `--quality normal\|2k` | Quality preset (default: 2k) |
| `--quality normal\|2k` | Quality preset (default: `2k`) |
| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) |
| `--ref <files...>` | Reference images. Supported by Google multimodal (`gemini-3-pro-image-preview`, `gemini-3-flash-preview`, `gemini-3.1-flash-image-preview`) and OpenAI edits (GPT Image models). If provider omitted: Google first, then OpenAI |
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate |
| `--n <count>` | Number of images |
| `--json` | JSON output |
@ -126,6 +134,9 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
| `GOOGLE_BASE_URL` | Custom Google endpoint |
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap |
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | Override provider concurrency, e.g. `BAOYU_IMAGE_GEN_REPLICATE_CONCURRENCY` |
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | Override provider start gap, e.g. `BAOYU_IMAGE_GEN_REPLICATE_START_INTERVAL_MS` |
**Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
@ -187,36 +198,29 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
## Generation Mode
**Default**: Sequential generation (one image at a time). This ensures stable output and easier debugging.
**Default**: Sequential generation.
**Parallel Generation**: Only use when user explicitly requests parallel/concurrent generation.
**Batch Parallel Generation**: When `--batchfile` contains 2 or more pending tasks, the script automatically enables parallel generation.
| Mode | When to Use |
|------|-------------|
| Sequential (default) | Normal usage, single images, small batches |
| Parallel | User explicitly requests, large batches (10+) |
| Parallel batch | Batch mode with 2+ tasks |
**Parallel Settings** (when requested):
Parallel behavior:
| Setting | Value |
|---------|-------|
| Recommended concurrency | 4 subagents |
| Max concurrency | 8 subagents |
| Use case | Large batch generation when user requests parallel |
**Agent Implementation** (parallel mode only):
```
# Launch multiple generations in parallel using Task tool
# Each Task runs as background subagent with run_in_background=true
# Collect results via TaskOutput when all complete
```
- Default worker count is automatic, capped by config, built-in default 10
- Provider-specific throttling is applied only in batch mode, and the built-in defaults are tuned for faster throughput while still avoiding obvious RPM bursts
- You can override worker count with `--jobs <count>`
- Each image retries automatically up to 3 attempts
- Final output includes success count, failure count, and per-image failure reasons
## Error Handling
- Missing API key → error with setup instructions
- Generation failure → auto-retry once
- Generation failure → auto-retry up to 3 attempts per image
- Invalid aspect ratio → warning, proceed with default
- Reference images with unsupported provider/model → error with fix hint (switch to Google multimodal: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; or OpenAI GPT Image edits)
- Reference images with unsupported provider/model → error with fix hint
## Extension Support

View File

@ -21,9 +21,25 @@ default_image_size: null # 1K|2K|4K|null (Google only, overrides quality)
default_model:
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
openai: null # e.g., "gpt-image-1.5"
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
dashscope: null # e.g., "z-image-turbo"
replicate: null # e.g., "google/nano-banana-pro"
batch:
max_workers: 10
provider_limits:
replicate:
concurrency: 5
start_interval_ms: 700
google:
concurrency: 3
start_interval_ms: 1100
openai:
concurrency: 3
start_interval_ms: 1100
dashscope:
concurrency: 3
start_interval_ms: 1100
---
```
@ -40,6 +56,9 @@ default_model:
| `default_model.openai` | string\|null | null | OpenAI default model |
| `default_model.dashscope` | string\|null | null | DashScope default model |
| `default_model.replicate` | string\|null | null | Replicate default model |
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
| `batch.provider_limits.<provider>.concurrency` | int\|null | provider default | Max simultaneous requests per provider |
| `batch.provider_limits.<provider>.start_interval_ms` | int\|null | provider default | Minimum gap between request starts per provider |
## Examples
@ -65,5 +84,11 @@ default_model:
openai: "gpt-image-1.5"
dashscope: "z-image-turbo"
replicate: "google/nano-banana-pro"
batch:
max_workers: 10
provider_limits:
replicate:
concurrency: 5
start_interval_ms: 700
---
```

View File

@ -2,34 +2,99 @@ import path from "node:path";
import process from "node:process";
import { homedir } from "node:os";
import { access, mkdir, readFile, writeFile } from "node:fs/promises";
import type { CliArgs, Provider, ExtendConfig } from "./types";
import type {
BatchFile,
BatchTaskInput,
CliArgs,
ExtendConfig,
Provider,
} from "./types";
type ProviderModule = {
getDefaultModel: () => string;
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
};
type PreparedTask = {
id: string;
prompt: string;
args: CliArgs;
provider: Provider;
model: string;
outputPath: string;
providerModule: ProviderModule;
};
type TaskResult = {
id: string;
provider: Provider;
model: string;
outputPath: string;
success: boolean;
attempts: number;
error: string | null;
};
type ProviderRateLimit = {
concurrency: number;
startIntervalMs: number;
};
const MAX_ATTEMPTS = 3;
const DEFAULT_MAX_WORKERS = 10;
const POLL_WAIT_MS = 250;
const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
replicate: { concurrency: 5, startIntervalMs: 700 },
google: { concurrency: 3, startIntervalMs: 1100 },
openai: { concurrency: 3, startIntervalMs: 1100 },
dashscope: { concurrency: 3, startIntervalMs: 1100 },
};
function printUsage(): void {
console.log(`Usage:
npx -y bun scripts/main.ts --prompt "A cat" --image cat.png
npx -y bun scripts/main.ts --prompt "A landscape" --image landscape.png --ar 16:9
npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png
npx -y bun scripts/main.ts --batchfile batch.json
Options:
-p, --prompt <text> Prompt text
--promptfiles <files...> Read prompt from files (concatenated)
--image <path> Output image path (required)
--image <path> Output image path (required in single-image mode)
--batchfile <path> JSON batch file for multi-image generation
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
-m, --model <id> Model ID
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
--size <WxH> Size (e.g., 1024x1024)
--quality normal|2k Quality preset (default: 2k)
--imageSize 1K|2K|4K Image size for Google (default: from quality)
--ref <files...> Reference images (Google multimodal or OpenAI edits)
--n <count> Number of images (default: 1)
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate)
--n <count> Number of images for the current task (default: 1)
--json JSON output
-h, --help Show help
Batch file format:
[
{
"id": "hero",
"promptFiles": ["prompts/hero.md"],
"image": "out/hero.png",
"provider": "replicate",
"model": "google/nano-banana-pro",
"ar": "16:9"
}
]
Behavior:
- Batch mode automatically runs in parallel when pending tasks >= 2
- Each image retries automatically up to 3 attempts
- Batch summary reports success count, failure count, and per-image errors
Environment variables:
OPENAI_API_KEY OpenAI API key
GOOGLE_API_KEY Google API key
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
DASHSCOPE_API_KEY DashScope API key ()
DASHSCOPE_API_KEY DashScope API key
REPLICATE_API_TOKEN Replicate API token
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
@ -40,6 +105,9 @@ Environment variables:
GOOGLE_BASE_URL Custom Google endpoint
DASHSCOPE_BASE_URL Custom DashScope endpoint
REPLICATE_BASE_URL Custom Replicate endpoint
BAOYU_IMAGE_GEN_MAX_WORKERS Override batch worker cap
BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY Override provider concurrency
BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS Override provider start gap in ms
Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
}
@ -57,6 +125,8 @@ function parseArgs(argv: string[]): CliArgs {
imageSize: null,
referenceImages: [],
n: 1,
batchFile: null,
jobs: null,
json: false,
help: false,
};
@ -110,9 +180,26 @@ function parseArgs(argv: string[]): CliArgs {
continue;
}
if (a === "--batchfile") {
const v = argv[++i];
if (!v) throw new Error("Missing value for --batchfile");
out.batchFile = v;
continue;
}
if (a === "--jobs") {
const v = argv[++i];
if (!v) throw new Error("Missing value for --jobs");
out.jobs = parseInt(v, 10);
if (isNaN(out.jobs) || out.jobs < 1) throw new Error(`Invalid worker count: ${v}`);
continue;
}
if (a === "--provider") {
const v = argv[++i];
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`);
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") {
throw new Error(`Invalid provider: ${v}`);
}
out.provider = v;
continue;
}
@ -228,9 +315,11 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
const config: Partial<ExtendConfig> = {};
const lines = yaml.split("\n");
let currentKey: string | null = null;
let currentProvider: Provider | null = null;
for (const line of lines) {
const trimmed = line.trim();
const indent = line.match(/^\s*/)?.[0].length ?? 0;
if (!trimmed || trimmed.startsWith("#")) continue;
if (trimmed.includes(":") && !trimmed.startsWith("-")) {
@ -247,18 +336,57 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
} else if (key === "default_provider") {
config.default_provider = value === "null" ? null : (value as Provider);
} else if (key === "default_quality") {
config.default_quality = value === "null" ? null : (value as "normal" | "2k");
config.default_quality = value === "null" ? null : value as "normal" | "2k";
} else if (key === "default_aspect_ratio") {
const cleaned = value.replace(/['"]/g, "");
config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
} else if (key === "default_image_size") {
config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K");
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
} else if (key === "default_model") {
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
currentKey = "default_model";
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) {
currentProvider = null;
} else if (key === "batch") {
config.batch = {};
currentKey = "batch";
currentProvider = null;
} else if (currentKey === "batch" && indent >= 2 && key === "max_workers") {
config.batch ??= {};
config.batch.max_workers = value === "null" ? null : parseInt(value, 10);
} else if (currentKey === "batch" && indent >= 2 && key === "provider_limits") {
config.batch ??= {};
config.batch.provider_limits ??= {};
currentKey = "provider_limits";
currentProvider = null;
} else if (
currentKey === "provider_limits" &&
indent >= 4 &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
) {
config.batch ??= {};
config.batch.provider_limits ??= {};
config.batch.provider_limits[key] ??= {};
currentProvider = key;
} else if (
currentKey === "default_model" &&
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
) {
const cleaned = value.replace(/['"]/g, "");
config.default_model![key] = cleaned === "null" ? null : cleaned;
} else if (
currentKey === "provider_limits" &&
currentProvider &&
indent >= 6 &&
(key === "concurrency" || key === "start_interval_ms")
) {
config.batch ??= {};
config.batch.provider_limits ??= {};
const providerLimit = (config.batch.provider_limits[currentProvider] ??= {});
if (key === "concurrency") {
providerLimit.concurrency = value === "null" ? null : parseInt(value, 10);
} else {
providerLimit.start_interval_ms = value === "null" ? null : parseInt(value, 10);
}
}
}
}
@ -280,7 +408,6 @@ async function loadExtendConfig(): Promise<Partial<ExtendConfig>> {
const content = await readFile(p, "utf8");
const yaml = extractYamlFrontMatter(content);
if (!yaml) continue;
return parseSimpleYaml(yaml);
} catch {
continue;
@ -300,6 +427,46 @@ function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
};
}
function parsePositiveInt(value: string | undefined): number | null {
if (!value) return null;
const parsed = parseInt(value, 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
}
function getConfiguredMaxWorkers(extendConfig: Partial<ExtendConfig>): number {
const envValue = parsePositiveInt(process.env.BAOYU_IMAGE_GEN_MAX_WORKERS);
const configValue = extendConfig.batch?.max_workers ?? null;
return Math.max(1, envValue ?? configValue ?? DEFAULT_MAX_WORKERS);
}
function getConfiguredProviderRateLimits(
extendConfig: Partial<ExtendConfig>
): Record<Provider, ProviderRateLimit> {
const configured: Record<Provider, ProviderRateLimit> = {
replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate },
google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google },
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
};
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
configured[provider] = {
concurrency:
parsePositiveInt(process.env[`${envPrefix}_CONCURRENCY`]) ??
extendLimit?.concurrency ??
configured[provider].concurrency,
startIntervalMs:
parsePositiveInt(process.env[`${envPrefix}_START_INTERVAL_MS`]) ??
extendLimit?.start_interval_ms ??
configured[provider].startIntervalMs,
};
}
return configured;
}
async function readPromptFromFiles(files: string[]): Promise<string> {
const parts: string[] = [];
for (const f of files) {
@ -311,9 +478,12 @@ async function readPromptFromFiles(files: string[]): Promise<string> {
async function readPromptFromStdin(): Promise<string | null> {
if (process.stdin.isTTY) return null;
try {
const t = await Bun.stdin.text();
const v = t.trim();
return v.length > 0 ? v : null;
const chunks: Buffer[] = [];
for await (const chunk of process.stdin) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
const value = Buffer.concat(chunks).toString("utf8").trim();
return value.length > 0 ? value : null;
} catch {
return null;
}
@ -327,7 +497,13 @@ function normalizeOutputImagePath(p: string): string {
}
function detectProvider(args: CliArgs): Provider {
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") {
if (
args.referenceImages.length > 0 &&
args.provider &&
args.provider !== "google" &&
args.provider !== "openai" &&
args.provider !== "replicate"
) {
throw new Error(
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
);
@ -349,13 +525,18 @@ function detectProvider(args: CliArgs): Provider {
);
}
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[];
const available = [
hasReplicate && "replicate",
hasGoogle && "google",
hasOpenai && "openai",
hasDashscope && "dashscope",
].filter(Boolean) as Provider[];
if (available.length === 1) return available[0]!;
if (available.length > 1) return available[0]!;
throw new Error(
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
"No API key found. Set REPLICATE_API_TOKEN, GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
);
}
@ -371,11 +552,6 @@ async function validateReferenceImages(referenceImages: string[]): Promise<void>
}
}
type ProviderModule = {
getDefaultModel: () => string;
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
};
function isRetryableGenerationError(error: unknown): boolean {
const msg = error instanceof Error ? error.message : String(error);
const nonRetryableMarkers = [
@ -384,26 +560,328 @@ function isRetryableGenerationError(error: unknown): boolean {
"only supported",
"No API key found",
"is required",
"Invalid ",
"Unexpected ",
"API error (400)",
"API error (401)",
"API error (402)",
"API error (403)",
"API error (404)",
"temporarily disabled",
];
return !nonRetryableMarkers.some((marker) => msg.includes(marker));
}
async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
if (provider === "google") {
return (await import("./providers/google")) as ProviderModule;
}
if (provider === "dashscope") {
return (await import("./providers/dashscope")) as ProviderModule;
}
if (provider === "replicate") {
return (await import("./providers/replicate")) as ProviderModule;
}
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
return (await import("./providers/openai")) as ProviderModule;
}
async function loadPromptForArgs(args: CliArgs): Promise<string | null> {
let prompt: string | null = args.prompt;
if (!prompt && args.promptFiles.length > 0) {
prompt = await readPromptFromFiles(args.promptFiles);
}
return prompt;
}
function getModelForProvider(
provider: Provider,
requestedModel: string | null,
extendConfig: Partial<ExtendConfig>,
providerModule: ProviderModule
): string {
if (requestedModel) return requestedModel;
if (extendConfig.default_model) {
if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google;
if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai;
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
}
return providerModule.getDefaultModel();
}
async function prepareSingleTask(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<PreparedTask> {
if (!args.quality) args.quality = "2k";
const prompt = (await loadPromptForArgs(args)) ?? (await readPromptFromStdin());
if (!prompt) throw new Error("Prompt is required");
if (!args.imagePath) throw new Error("--image is required");
if (args.referenceImages.length > 0) await validateReferenceImages(args.referenceImages);
const provider = detectProvider(args);
const providerModule = await loadProviderModule(provider);
const model = getModelForProvider(provider, args.model, extendConfig, providerModule);
return {
id: "single",
prompt,
args,
provider,
model,
outputPath: normalizeOutputImagePath(args.imagePath),
providerModule,
};
}
async function loadBatchTasks(batchFilePath: string): Promise<BatchTaskInput[]> {
const content = await readFile(path.resolve(batchFilePath), "utf8");
const parsed = JSON.parse(content.replace(/^\uFEFF/, "")) as BatchFile;
if (Array.isArray(parsed)) return parsed;
if (parsed && typeof parsed === "object" && Array.isArray(parsed.tasks)) return parsed.tasks;
throw new Error("Invalid batch file. Expected an array of tasks or an object with a tasks array.");
}
function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput): CliArgs {
return {
...baseArgs,
prompt: task.prompt ?? null,
promptFiles: task.promptFiles ? [...task.promptFiles] : [],
imagePath: task.image ?? null,
provider: task.provider ?? baseArgs.provider ?? null,
model: task.model ?? baseArgs.model ?? null,
aspectRatio: task.ar ?? baseArgs.aspectRatio ?? null,
size: task.size ?? baseArgs.size ?? null,
quality: task.quality ?? baseArgs.quality ?? null,
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
referenceImages: task.ref ? [...task.ref] : [],
n: task.n ?? baseArgs.n,
batchFile: null,
jobs: baseArgs.jobs,
json: baseArgs.json,
help: false,
};
}
async function prepareBatchTasks(
args: CliArgs,
extendConfig: Partial<ExtendConfig>
): Promise<PreparedTask[]> {
if (!args.batchFile) throw new Error("--batchfile is required in batch mode");
const taskInputs = await loadBatchTasks(args.batchFile);
if (taskInputs.length === 0) throw new Error("Batch file does not contain any tasks.");
const prepared: PreparedTask[] = [];
for (let i = 0; i < taskInputs.length; i++) {
const task = taskInputs[i]!;
const taskArgs = createTaskArgs(args, task);
const prompt = await loadPromptForArgs(taskArgs);
if (!prompt) throw new Error(`Task ${i + 1} is missing prompt or promptFiles.`);
if (!taskArgs.imagePath) throw new Error(`Task ${i + 1} is missing image output path.`);
if (taskArgs.referenceImages.length > 0) await validateReferenceImages(taskArgs.referenceImages);
const provider = detectProvider(taskArgs);
const providerModule = await loadProviderModule(provider);
const model = getModelForProvider(provider, taskArgs.model, extendConfig, providerModule);
prepared.push({
id: task.id || `task-${String(i + 1).padStart(2, "0")}`,
prompt,
args: taskArgs,
provider,
model,
outputPath: normalizeOutputImagePath(taskArgs.imagePath),
providerModule,
});
}
return prepared;
}
async function writeImage(outputPath: string, imageData: Uint8Array): Promise<void> {
await mkdir(path.dirname(outputPath), { recursive: true });
await writeFile(outputPath, imageData);
}
async function generatePreparedTask(task: PreparedTask): Promise<TaskResult> {
console.error(`Using ${task.provider} / ${task.model} for ${task.id}`);
console.error(
`Switch model: --model <id> | EXTEND.md default_model.${task.provider} | env ${task.provider.toUpperCase()}_IMAGE_MODEL`
);
let attempts = 0;
while (attempts < MAX_ATTEMPTS) {
attempts += 1;
try {
const imageData = await task.providerModule.generateImage(task.prompt, task.model, task.args);
await writeImage(task.outputPath, imageData);
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: true,
attempts,
error: null,
};
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
const canRetry = attempts < MAX_ATTEMPTS && isRetryableGenerationError(error);
if (canRetry) {
console.error(`[${task.id}] Attempt ${attempts}/${MAX_ATTEMPTS} failed, retrying...`);
continue;
}
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: false,
attempts,
error: message,
};
}
}
return {
id: task.id,
provider: task.provider,
model: task.model,
outputPath: task.outputPath,
success: false,
attempts: MAX_ATTEMPTS,
error: "Unknown failure",
};
}
function createProviderGate(providerRateLimits: Record<Provider, ProviderRateLimit>) {
const state = new Map<Provider, { active: number; lastStartedAt: number }>();
return async function acquire(provider: Provider): Promise<() => void> {
const limit = providerRateLimits[provider];
while (true) {
const current = state.get(provider) ?? { active: 0, lastStartedAt: 0 };
const now = Date.now();
const enoughCapacity = current.active < limit.concurrency;
const enoughGap = now - current.lastStartedAt >= limit.startIntervalMs;
if (enoughCapacity && enoughGap) {
state.set(provider, { active: current.active + 1, lastStartedAt: now });
return () => {
const latest = state.get(provider) ?? { active: 1, lastStartedAt: now };
state.set(provider, {
active: Math.max(0, latest.active - 1),
lastStartedAt: latest.lastStartedAt,
});
};
}
await new Promise((resolve) => setTimeout(resolve, POLL_WAIT_MS));
}
};
}
function getWorkerCount(taskCount: number, jobs: number | null, maxWorkers: number): number {
const requested = jobs ?? Math.min(taskCount, maxWorkers);
return Math.max(1, Math.min(requested, taskCount, maxWorkers));
}
async function runBatchTasks(
tasks: PreparedTask[],
jobs: number | null,
extendConfig: Partial<ExtendConfig>
): Promise<TaskResult[]> {
if (tasks.length === 1) {
return [await generatePreparedTask(tasks[0]!)];
}
const maxWorkers = getConfiguredMaxWorkers(extendConfig);
const providerRateLimits = getConfiguredProviderRateLimits(extendConfig);
const acquireProvider = createProviderGate(providerRateLimits);
const workerCount = getWorkerCount(tasks.length, jobs, maxWorkers);
console.error(`Batch mode: ${tasks.length} tasks, ${workerCount} workers, parallel mode enabled.`);
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
const limit = providerRateLimits[provider];
console.error(`- ${provider}: concurrency=${limit.concurrency}, startIntervalMs=${limit.startIntervalMs}`);
}
let nextIndex = 0;
const results: TaskResult[] = new Array(tasks.length);
const worker = async (): Promise<void> => {
while (true) {
const currentIndex = nextIndex;
nextIndex += 1;
if (currentIndex >= tasks.length) return;
const task = tasks[currentIndex]!;
const release = await acquireProvider(task.provider);
try {
results[currentIndex] = await generatePreparedTask(task);
} finally {
release();
}
}
};
await Promise.all(Array.from({ length: workerCount }, () => worker()));
return results;
}
function printBatchSummary(results: TaskResult[]): void {
const successCount = results.filter((result) => result.success).length;
const failureCount = results.length - successCount;
console.error("");
console.error("Batch generation summary:");
console.error(`- Total: ${results.length}`);
console.error(`- Succeeded: ${successCount}`);
console.error(`- Failed: ${failureCount}`);
if (failureCount > 0) {
console.error("Failure reasons:");
for (const result of results.filter((item) => !item.success)) {
console.error(`- ${result.id}: ${result.error}`);
}
}
}
function emitJson(payload: unknown): void {
console.log(JSON.stringify(payload, null, 2));
}
async function runSingleMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
const task = await prepareSingleTask(args, extendConfig);
const result = await generatePreparedTask(task);
if (!result.success) {
throw new Error(result.error || "Generation failed");
}
if (args.json) {
emitJson({
savedImage: result.outputPath,
provider: result.provider,
model: result.model,
attempts: result.attempts,
prompt: task.prompt.slice(0, 200),
});
return;
}
console.log(result.outputPath);
}
async function runBatchMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
const tasks = await prepareBatchTasks(args, extendConfig);
const results = await runBatchTasks(tasks, args.jobs, extendConfig);
printBatchSummary(results);
if (args.json) {
emitJson({
mode: "batch",
total: results.length,
succeeded: results.filter((item) => item.success).length,
failed: results.filter((item) => !item.success).length,
results,
});
}
if (results.some((item) => !item.success)) {
process.exitCode = 1;
}
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
printUsage();
return;
@ -412,86 +890,18 @@ async function main(): Promise<void> {
await loadEnv();
const extendConfig = await loadExtendConfig();
const mergedArgs = mergeConfig(args, extendConfig);
if (!mergedArgs.quality) mergedArgs.quality = "2k";
let prompt: string | null = mergedArgs.prompt;
if (!prompt && mergedArgs.promptFiles.length > 0) prompt = await readPromptFromFiles(mergedArgs.promptFiles);
if (!prompt) prompt = await readPromptFromStdin();
if (!prompt) {
console.error("Error: Prompt is required");
printUsage();
process.exitCode = 1;
if (mergedArgs.batchFile) {
await runBatchMode(mergedArgs, extendConfig);
return;
}
if (!mergedArgs.imagePath) {
console.error("Error: --image is required");
printUsage();
process.exitCode = 1;
return;
}
if (mergedArgs.referenceImages.length > 0) {
await validateReferenceImages(mergedArgs.referenceImages);
}
const provider = detectProvider(mergedArgs);
const providerModule = await loadProviderModule(provider);
let model = mergedArgs.model;
if (!model && extendConfig.default_model) {
if (provider === "google") model = extendConfig.default_model.google ?? null;
if (provider === "openai") model = extendConfig.default_model.openai ?? null;
if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
}
model = model || providerModule.getDefaultModel();
const outputPath = normalizeOutputImagePath(mergedArgs.imagePath);
let imageData: Uint8Array;
let retried = false;
while (true) {
try {
imageData = await providerModule.generateImage(prompt, model, mergedArgs);
break;
} catch (e) {
if (!retried && isRetryableGenerationError(e)) {
retried = true;
console.error("Generation failed, retrying...");
continue;
}
throw e;
}
}
const dir = path.dirname(outputPath);
await mkdir(dir, { recursive: true });
await writeFile(outputPath, imageData);
if (mergedArgs.json) {
console.log(
JSON.stringify(
{
savedImage: outputPath,
provider,
model,
prompt: prompt.slice(0, 200),
},
null,
2
)
);
} else {
console.log(outputPath);
}
await runSingleMode(mergedArgs, extendConfig);
}
main().catch((e) => {
const msg = e instanceof Error ? e.message : String(e);
console.error(msg);
main().catch((error) => {
const message = error instanceof Error ? error.message : String(error);
console.error(message);
process.exit(1);
});

View File

@ -68,7 +68,11 @@ export async function generateImage(
const baseURL = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1";
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error("OPENAI_API_KEY is required");
if (!apiKey) {
throw new Error(
"OPENAI_API_KEY is required. Codex/ChatGPT desktop login does not automatically grant OpenAI Images API access to this script."
);
}
if (process.env.OPENAI_IMAGE_USE_CHAT === "true") {
return generateWithChatCompletions(baseURL, apiKey, prompt, model);

View File

@ -36,22 +36,24 @@ function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): R
if (args.aspectRatio) {
input.aspect_ratio = args.aspectRatio;
} else if (referenceImages.length > 0) {
input.aspect_ratio = "match_input_image";
}
if (args.n > 1) {
input.number_of_images = args.n;
}
if (args.quality === "normal") {
input.resolution = "1K";
} else if (args.quality === "2k") {
input.resolution = "2K";
}
input.output_format = "png";
if (referenceImages.length > 0) {
if (referenceImages.length === 1) {
input.image = referenceImages[0];
} else {
for (let i = 0; i < referenceImages.length; i++) {
input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
}
}
input.image_input = referenceImages;
}
return input;

View File

@ -13,10 +13,29 @@ export type CliArgs = {
imageSize: string | null;
referenceImages: string[];
n: number;
batchFile: string | null;
jobs: number | null;
json: boolean;
help: boolean;
};
export type BatchTaskInput = {
id?: string;
prompt?: string | null;
promptFiles?: string[];
image?: string;
provider?: Provider | null;
model?: string | null;
ar?: string | null;
size?: string | null;
quality?: Quality | null;
imageSize?: "1K" | "2K" | "4K" | null;
ref?: string[];
n?: number;
};
export type BatchFile = BatchTaskInput[] | { tasks: BatchTaskInput[] };
export type ExtendConfig = {
version: number;
default_provider: Provider | null;
@ -29,4 +48,16 @@ export type ExtendConfig = {
dashscope: string | null;
replicate: string | null;
};
batch?: {
max_workers?: number | null;
provider_limits?: Partial<
Record<
Provider,
{
concurrency?: number | null;
start_interval_ms?: number | null;
}
>
>;
};
};

View File

@ -212,6 +212,7 @@ Before translating chunks:
- **Natural flow**: Use idiomatic target language word order and sentence patterns; break or restructure sentences freely when the source structure doesn't work naturally in the target language
- **Terminology**: Use standard translations; annotate with original term in parentheses on first occurrence
- **Preserve format**: Keep all markdown formatting (headings, bold, italic, images, links, code blocks)
- **Image-language awareness**: Preserve image references exactly during translation, but after the translation is complete, review referenced images and check whether their likely main text language still matches the translated article language
- **Frontmatter transformation**: If the source has YAML frontmatter, preserve it in the translation with these changes: (1) Rename metadata fields that describe the *source* article — `url`→`sourceUrl`, `title`→`sourceTitle`, `description`→`sourceDescription`, `author`→`sourceAuthor`, `date`→`sourceDate`, and any similar origin-metadata fields — by adding a `source` prefix (camelCase). (2) Translate the values of text fields (title, description, etc.) and add them as new top-level fields. (3) Keep other fields (tags, categories, custom fields) as-is, translating their values where appropriate
- **Respect original**: Maintain original meaning and intent; do not add, remove, or editorialize — but sentence structure and imagery may be adapted freely to serve the meaning
- **Translator's notes**: For terms, concepts, or cultural references that target readers may not understand — due to jargon, cultural gaps, or domain-specific knowledge — add a concise explanatory note in parentheses immediately after the term. The note should explain *what it means* in plain language, not just provide the English original. Format: `译文English original通俗解释`. Calibrate annotation depth to the target audience: general readers need more notes than technical readers. Only add notes where genuinely needed; do not over-annotate obvious terms.
@ -250,6 +251,20 @@ Each step reads the previous step's file and builds on it.
Final translation is always at `translation.md` in the output directory.
After the final translation is written, do a lightweight image-language pass:
1. Collect image references from the translated article
2. Identify likely text-heavy images such as covers, screenshots, diagrams, charts, frameworks, and infographics
3. If any image likely contains a main text language that does not match the translated article language, proactively remind the user
4. The reminder must be a list only. Do not automatically localize those images unless the user asks
Reminder format:
```text
Possible image localization needed:
- ![[attachments/example-cover.png]]: likely still contains source-language text while the article is now in target language
- ![[attachments/example-diagram.png]]: likely text-heavy framework graphic, check whether labels need translation
```
Display summary:
```
**Translation complete** ({mode} mode)
@ -261,6 +276,8 @@ Final: {output-dir}/translation.md
Glossary terms applied: {count}
```
If mismatched image-language candidates were found, append a short note after the summary telling the user that some embedded images may still need image-text localization, followed by the candidate list.
## Extension Support
Custom configurations via EXTEND.md. See **Preferences** section for paths and supported options.