feat: add batch parallel image generation and provider-level throttling
- Add --batchfile and --jobs flags for multi-image parallel generation with per-provider concurrency control and rate limiting - Refactor main.ts into prepareSingleTask/prepareBatchTasks/runBatchTasks with worker pool pattern and up to 3 retries per image - Fix Replicate provider: use image_input array (nano-banana-pro schema), add match_input_image aspect ratio, add quality-to-resolution mapping - Improve OpenAI error message for missing API key (Codex auth hint) - Expand non-retryable error detection (4xx codes, disabled models) - Add batch config to EXTEND.md schema (max_workers, provider_limits) - Add build-batch.ts for article-illustrator batch workflow integration - Add image-language awareness pass to baoyu-translate Co-authored-by: 敖氏 <aoshi@MacBook-Air.local>
This commit is contained in:
parent
e4d4ec8334
commit
5acef7151b
|
|
@ -0,0 +1,156 @@
|
|||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
import { readdir, readFile, writeFile } from "node:fs/promises";
|
||||
|
||||
type CliArgs = {
|
||||
outlinePath: string | null;
|
||||
promptsDir: string | null;
|
||||
outputPath: string | null;
|
||||
imagesDir: string | null;
|
||||
provider: string;
|
||||
model: string;
|
||||
aspectRatio: string;
|
||||
quality: string;
|
||||
jobs: number | null;
|
||||
help: boolean;
|
||||
};
|
||||
|
||||
type OutlineEntry = {
|
||||
index: number;
|
||||
filename: string;
|
||||
};
|
||||
|
||||
function printUsage(): void {
|
||||
console.log(`Usage:
|
||||
npx -y tsx scripts/build-batch.ts --outline outline.md --prompts prompts --output batch.json --images-dir attachments
|
||||
|
||||
Options:
|
||||
--outline <path> Path to outline.md
|
||||
--prompts <path> Path to prompts directory
|
||||
--output <path> Path to output batch.json
|
||||
--images-dir <path> Directory for generated images
|
||||
--provider <name> Provider for baoyu-image-gen batch tasks (default: replicate)
|
||||
--model <id> Model for baoyu-image-gen batch tasks (default: google/nano-banana-pro)
|
||||
--ar <ratio> Aspect ratio for all tasks (default: 16:9)
|
||||
--quality <level> Quality for all tasks (default: 2k)
|
||||
--jobs <count> Recommended worker count metadata (optional)
|
||||
-h, --help Show help`);
|
||||
}
|
||||
|
||||
function parseArgs(argv: string[]): CliArgs {
|
||||
const args: CliArgs = {
|
||||
outlinePath: null,
|
||||
promptsDir: null,
|
||||
outputPath: null,
|
||||
imagesDir: null,
|
||||
provider: "replicate",
|
||||
model: "google/nano-banana-pro",
|
||||
aspectRatio: "16:9",
|
||||
quality: "2k",
|
||||
jobs: null,
|
||||
help: false,
|
||||
};
|
||||
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const current = argv[i]!;
|
||||
if (current === "--outline") args.outlinePath = argv[++i] ?? null;
|
||||
else if (current === "--prompts") args.promptsDir = argv[++i] ?? null;
|
||||
else if (current === "--output") args.outputPath = argv[++i] ?? null;
|
||||
else if (current === "--images-dir") args.imagesDir = argv[++i] ?? null;
|
||||
else if (current === "--provider") args.provider = argv[++i] ?? args.provider;
|
||||
else if (current === "--model") args.model = argv[++i] ?? args.model;
|
||||
else if (current === "--ar") args.aspectRatio = argv[++i] ?? args.aspectRatio;
|
||||
else if (current === "--quality") args.quality = argv[++i] ?? args.quality;
|
||||
else if (current === "--jobs") {
|
||||
const value = argv[++i];
|
||||
args.jobs = value ? parseInt(value, 10) : null;
|
||||
} else if (current === "--help" || current === "-h") {
|
||||
args.help = true;
|
||||
}
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
function parseOutline(content: string): OutlineEntry[] {
|
||||
const entries: OutlineEntry[] = [];
|
||||
const blocks = content.split(/^## Illustration\s+/m).slice(1);
|
||||
|
||||
for (const block of blocks) {
|
||||
const indexMatch = block.match(/^(\d+)/);
|
||||
const filenameMatch = block.match(/\*\*Filename\*\*:\s*(.+)/);
|
||||
if (indexMatch && filenameMatch) {
|
||||
entries.push({
|
||||
index: parseInt(indexMatch[1]!, 10),
|
||||
filename: filenameMatch[1]!.trim(),
|
||||
});
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
async function findPromptFile(promptsDir: string, entry: OutlineEntry): Promise<string | null> {
|
||||
const files = await readdir(promptsDir);
|
||||
const prefix = String(entry.index).padStart(2, "0");
|
||||
const match = files.find((f) => f.startsWith(prefix) && f.endsWith(".md"));
|
||||
return match ? path.join(promptsDir, match) : null;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
if (args.help) {
|
||||
printUsage();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!args.outlinePath) {
|
||||
console.error("Error: --outline is required");
|
||||
process.exit(1);
|
||||
}
|
||||
if (!args.promptsDir) {
|
||||
console.error("Error: --prompts is required");
|
||||
process.exit(1);
|
||||
}
|
||||
if (!args.outputPath) {
|
||||
console.error("Error: --output is required");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const outlineContent = await readFile(args.outlinePath, "utf8");
|
||||
const entries = parseOutline(outlineContent);
|
||||
|
||||
if (entries.length === 0) {
|
||||
console.error("No illustration entries found in outline.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const tasks = [];
|
||||
for (const entry of entries) {
|
||||
const promptFile = await findPromptFile(args.promptsDir, entry);
|
||||
if (!promptFile) {
|
||||
console.error(`Warning: No prompt file found for illustration ${entry.index}, skipping.`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const imageDir = args.imagesDir ?? path.dirname(args.outputPath);
|
||||
tasks.push({
|
||||
id: `illustration-${String(entry.index).padStart(2, "0")}`,
|
||||
promptFiles: [promptFile],
|
||||
image: path.join(imageDir, entry.filename),
|
||||
provider: args.provider,
|
||||
model: args.model,
|
||||
ar: args.aspectRatio,
|
||||
quality: args.quality,
|
||||
});
|
||||
}
|
||||
|
||||
const output: Record<string, unknown> = { tasks };
|
||||
if (args.jobs) output.jobs = args.jobs;
|
||||
|
||||
await writeFile(args.outputPath, JSON.stringify(output, null, 2) + "\n");
|
||||
console.log(`Batch file written: ${args.outputPath} (${tasks.length} tasks)`);
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
@ -55,7 +55,7 @@ if (Test-Path "$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md") { "user" }
|
|||
| `.baoyu-skills/baoyu-image-gen/EXTEND.md` | Project directory |
|
||||
| `$HOME/.baoyu-skills/baoyu-image-gen/EXTEND.md` | User home |
|
||||
|
||||
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models
|
||||
**EXTEND.md Supports**: Default provider | Default quality | Default aspect ratio | Default image size | Default models | Batch worker cap | Provider-specific batch limits
|
||||
|
||||
Schema: `references/config/preferences-schema.md`
|
||||
|
||||
|
|
@ -91,6 +91,12 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|
|||
|
||||
# Replicate with specific model
|
||||
${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider replicate --model google/nano-banana
|
||||
|
||||
# Batch mode with saved prompt files
|
||||
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json
|
||||
|
||||
# Batch mode with explicit worker count
|
||||
${BUN_X} {baseDir}/scripts/main.ts --batchfile batch.json --jobs 4 --json
|
||||
```
|
||||
|
||||
## Options
|
||||
|
|
@ -99,14 +105,16 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|
|||
|--------|-------------|
|
||||
| `--prompt <text>`, `-p` | Prompt text |
|
||||
| `--promptfiles <files...>` | Read prompt from files (concatenated) |
|
||||
| `--image <path>` | Output image path (required) |
|
||||
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: google) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`) |
|
||||
| `--image <path>` | Output image path (required in single-image mode) |
|
||||
| `--batchfile <path>` | JSON batch file for multi-image generation |
|
||||
| `--jobs <count>` | Worker count for batch mode (default: auto, max from config, built-in default 10) |
|
||||
| `--provider google\|openai\|dashscope\|replicate` | Force provider (default: auto-detect) |
|
||||
| `--model <id>`, `-m` | Model ID (Google: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; OpenAI: `gpt-image-1.5`, `gpt-image-1`) |
|
||||
| `--ar <ratio>` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size <WxH>` | Size (e.g., `1024x1024`) |
|
||||
| `--quality normal\|2k` | Quality preset (default: 2k) |
|
||||
| `--quality normal\|2k` | Quality preset (default: `2k`) |
|
||||
| `--imageSize 1K\|2K\|4K` | Image size for Google (default: from quality) |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal (`gemini-3-pro-image-preview`, `gemini-3-flash-preview`, `gemini-3.1-flash-image-preview`) and OpenAI edits (GPT Image models). If provider omitted: Google first, then OpenAI |
|
||||
| `--ref <files...>` | Reference images. Supported by Google multimodal, OpenAI GPT Image edits, and Replicate |
|
||||
| `--n <count>` | Number of images |
|
||||
| `--json` | JSON output |
|
||||
|
||||
|
|
@ -126,6 +134,9 @@ ${BUN_X} {baseDir}/scripts/main.ts --prompt "A cat" --image out.png --provider r
|
|||
| `GOOGLE_BASE_URL` | Custom Google endpoint |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint |
|
||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint |
|
||||
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | Override provider concurrency, e.g. `BAOYU_IMAGE_GEN_REPLICATE_CONCURRENCY` |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | Override provider start gap, e.g. `BAOYU_IMAGE_GEN_REPLICATE_START_INTERVAL_MS` |
|
||||
|
||||
**Load Priority**: CLI args > EXTEND.md > env vars > `<cwd>/.baoyu-skills/.env` > `~/.baoyu-skills/.env`
|
||||
|
||||
|
|
@ -187,36 +198,29 @@ Supported: `1:1`, `16:9`, `9:16`, `4:3`, `3:4`, `2.35:1`
|
|||
|
||||
## Generation Mode
|
||||
|
||||
**Default**: Sequential generation (one image at a time). This ensures stable output and easier debugging.
|
||||
**Default**: Sequential generation.
|
||||
|
||||
**Parallel Generation**: Only use when user explicitly requests parallel/concurrent generation.
|
||||
**Batch Parallel Generation**: When `--batchfile` contains 2 or more pending tasks, the script automatically enables parallel generation.
|
||||
|
||||
| Mode | When to Use |
|
||||
|------|-------------|
|
||||
| Sequential (default) | Normal usage, single images, small batches |
|
||||
| Parallel | User explicitly requests, large batches (10+) |
|
||||
| Parallel batch | Batch mode with 2+ tasks |
|
||||
|
||||
**Parallel Settings** (when requested):
|
||||
Parallel behavior:
|
||||
|
||||
| Setting | Value |
|
||||
|---------|-------|
|
||||
| Recommended concurrency | 4 subagents |
|
||||
| Max concurrency | 8 subagents |
|
||||
| Use case | Large batch generation when user requests parallel |
|
||||
|
||||
**Agent Implementation** (parallel mode only):
|
||||
```
|
||||
# Launch multiple generations in parallel using Task tool
|
||||
# Each Task runs as background subagent with run_in_background=true
|
||||
# Collect results via TaskOutput when all complete
|
||||
```
|
||||
- Default worker count is automatic, capped by config, built-in default 10
|
||||
- Provider-specific throttling is applied only in batch mode, and the built-in defaults are tuned for faster throughput while still avoiding obvious RPM bursts
|
||||
- You can override worker count with `--jobs <count>`
|
||||
- Each image retries automatically up to 3 attempts
|
||||
- Final output includes success count, failure count, and per-image failure reasons
|
||||
|
||||
## Error Handling
|
||||
|
||||
- Missing API key → error with setup instructions
|
||||
- Generation failure → auto-retry once
|
||||
- Generation failure → auto-retry up to 3 attempts per image
|
||||
- Invalid aspect ratio → warning, proceed with default
|
||||
- Reference images with unsupported provider/model → error with fix hint (switch to Google multimodal: `gemini-3-pro-image-preview`, `gemini-3.1-flash-image-preview`; or OpenAI GPT Image edits)
|
||||
- Reference images with unsupported provider/model → error with fix hint
|
||||
|
||||
## Extension Support
|
||||
|
||||
|
|
|
|||
|
|
@ -21,9 +21,25 @@ default_image_size: null # 1K|2K|4K|null (Google only, overrides quality)
|
|||
|
||||
default_model:
|
||||
google: null # e.g., "gemini-3-pro-image-preview", "gemini-3.1-flash-image-preview"
|
||||
openai: null # e.g., "gpt-image-1.5"
|
||||
openai: null # e.g., "gpt-image-1.5", "gpt-image-1"
|
||||
dashscope: null # e.g., "z-image-turbo"
|
||||
replicate: null # e.g., "google/nano-banana-pro"
|
||||
|
||||
batch:
|
||||
max_workers: 10
|
||||
provider_limits:
|
||||
replicate:
|
||||
concurrency: 5
|
||||
start_interval_ms: 700
|
||||
google:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
openai:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
dashscope:
|
||||
concurrency: 3
|
||||
start_interval_ms: 1100
|
||||
---
|
||||
```
|
||||
|
||||
|
|
@ -40,6 +56,9 @@ default_model:
|
|||
| `default_model.openai` | string\|null | null | OpenAI default model |
|
||||
| `default_model.dashscope` | string\|null | null | DashScope default model |
|
||||
| `default_model.replicate` | string\|null | null | Replicate default model |
|
||||
| `batch.max_workers` | int\|null | 10 | Batch worker cap |
|
||||
| `batch.provider_limits.<provider>.concurrency` | int\|null | provider default | Max simultaneous requests per provider |
|
||||
| `batch.provider_limits.<provider>.start_interval_ms` | int\|null | provider default | Minimum gap between request starts per provider |
|
||||
|
||||
## Examples
|
||||
|
||||
|
|
@ -65,5 +84,11 @@ default_model:
|
|||
openai: "gpt-image-1.5"
|
||||
dashscope: "z-image-turbo"
|
||||
replicate: "google/nano-banana-pro"
|
||||
batch:
|
||||
max_workers: 10
|
||||
provider_limits:
|
||||
replicate:
|
||||
concurrency: 5
|
||||
start_interval_ms: 700
|
||||
---
|
||||
```
|
||||
|
|
|
|||
|
|
@ -2,34 +2,99 @@ import path from "node:path";
|
|||
import process from "node:process";
|
||||
import { homedir } from "node:os";
|
||||
import { access, mkdir, readFile, writeFile } from "node:fs/promises";
|
||||
import type { CliArgs, Provider, ExtendConfig } from "./types";
|
||||
import type {
|
||||
BatchFile,
|
||||
BatchTaskInput,
|
||||
CliArgs,
|
||||
ExtendConfig,
|
||||
Provider,
|
||||
} from "./types";
|
||||
|
||||
type ProviderModule = {
|
||||
getDefaultModel: () => string;
|
||||
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
|
||||
};
|
||||
|
||||
type PreparedTask = {
|
||||
id: string;
|
||||
prompt: string;
|
||||
args: CliArgs;
|
||||
provider: Provider;
|
||||
model: string;
|
||||
outputPath: string;
|
||||
providerModule: ProviderModule;
|
||||
};
|
||||
|
||||
type TaskResult = {
|
||||
id: string;
|
||||
provider: Provider;
|
||||
model: string;
|
||||
outputPath: string;
|
||||
success: boolean;
|
||||
attempts: number;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
type ProviderRateLimit = {
|
||||
concurrency: number;
|
||||
startIntervalMs: number;
|
||||
};
|
||||
|
||||
const MAX_ATTEMPTS = 3;
|
||||
const DEFAULT_MAX_WORKERS = 10;
|
||||
const POLL_WAIT_MS = 250;
|
||||
const DEFAULT_PROVIDER_RATE_LIMITS: Record<Provider, ProviderRateLimit> = {
|
||||
replicate: { concurrency: 5, startIntervalMs: 700 },
|
||||
google: { concurrency: 3, startIntervalMs: 1100 },
|
||||
openai: { concurrency: 3, startIntervalMs: 1100 },
|
||||
dashscope: { concurrency: 3, startIntervalMs: 1100 },
|
||||
};
|
||||
|
||||
function printUsage(): void {
|
||||
console.log(`Usage:
|
||||
npx -y bun scripts/main.ts --prompt "A cat" --image cat.png
|
||||
npx -y bun scripts/main.ts --prompt "A landscape" --image landscape.png --ar 16:9
|
||||
npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png
|
||||
npx -y bun scripts/main.ts --batchfile batch.json
|
||||
|
||||
Options:
|
||||
-p, --prompt <text> Prompt text
|
||||
--promptfiles <files...> Read prompt from files (concatenated)
|
||||
--image <path> Output image path (required)
|
||||
--image <path> Output image path (required in single-image mode)
|
||||
--batchfile <path> JSON batch file for multi-image generation
|
||||
--jobs <count> Worker count for batch mode (default: auto, max from config, built-in default 10)
|
||||
--provider google|openai|dashscope|replicate Force provider (auto-detect by default)
|
||||
-m, --model <id> Model ID
|
||||
--ar <ratio> Aspect ratio (e.g., 16:9, 1:1, 4:3)
|
||||
--size <WxH> Size (e.g., 1024x1024)
|
||||
--quality normal|2k Quality preset (default: 2k)
|
||||
--imageSize 1K|2K|4K Image size for Google (default: from quality)
|
||||
--ref <files...> Reference images (Google multimodal or OpenAI edits)
|
||||
--n <count> Number of images (default: 1)
|
||||
--ref <files...> Reference images (Google multimodal, OpenAI GPT Image edits, or Replicate)
|
||||
--n <count> Number of images for the current task (default: 1)
|
||||
--json JSON output
|
||||
-h, --help Show help
|
||||
|
||||
Batch file format:
|
||||
[
|
||||
{
|
||||
"id": "hero",
|
||||
"promptFiles": ["prompts/hero.md"],
|
||||
"image": "out/hero.png",
|
||||
"provider": "replicate",
|
||||
"model": "google/nano-banana-pro",
|
||||
"ar": "16:9"
|
||||
}
|
||||
]
|
||||
|
||||
Behavior:
|
||||
- Batch mode automatically runs in parallel when pending tasks >= 2
|
||||
- Each image retries automatically up to 3 attempts
|
||||
- Batch summary reports success count, failure count, and per-image errors
|
||||
|
||||
Environment variables:
|
||||
OPENAI_API_KEY OpenAI API key
|
||||
GOOGLE_API_KEY Google API key
|
||||
GEMINI_API_KEY Gemini API key (alias for GOOGLE_API_KEY)
|
||||
DASHSCOPE_API_KEY DashScope API key (阿里云通义万象)
|
||||
DASHSCOPE_API_KEY DashScope API key
|
||||
REPLICATE_API_TOKEN Replicate API token
|
||||
OPENAI_IMAGE_MODEL Default OpenAI model (gpt-image-1.5)
|
||||
GOOGLE_IMAGE_MODEL Default Google model (gemini-3-pro-image-preview)
|
||||
|
|
@ -40,6 +105,9 @@ Environment variables:
|
|||
GOOGLE_BASE_URL Custom Google endpoint
|
||||
DASHSCOPE_BASE_URL Custom DashScope endpoint
|
||||
REPLICATE_BASE_URL Custom Replicate endpoint
|
||||
BAOYU_IMAGE_GEN_MAX_WORKERS Override batch worker cap
|
||||
BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY Override provider concurrency
|
||||
BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS Override provider start gap in ms
|
||||
|
||||
Env file load order: CLI args > EXTEND.md > process.env > <cwd>/.baoyu-skills/.env > ~/.baoyu-skills/.env`);
|
||||
}
|
||||
|
|
@ -57,6 +125,8 @@ function parseArgs(argv: string[]): CliArgs {
|
|||
imageSize: null,
|
||||
referenceImages: [],
|
||||
n: 1,
|
||||
batchFile: null,
|
||||
jobs: null,
|
||||
json: false,
|
||||
help: false,
|
||||
};
|
||||
|
|
@ -110,9 +180,26 @@ function parseArgs(argv: string[]): CliArgs {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (a === "--batchfile") {
|
||||
const v = argv[++i];
|
||||
if (!v) throw new Error("Missing value for --batchfile");
|
||||
out.batchFile = v;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--jobs") {
|
||||
const v = argv[++i];
|
||||
if (!v) throw new Error("Missing value for --jobs");
|
||||
out.jobs = parseInt(v, 10);
|
||||
if (isNaN(out.jobs) || out.jobs < 1) throw new Error(`Invalid worker count: ${v}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (a === "--provider") {
|
||||
const v = argv[++i];
|
||||
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") throw new Error(`Invalid provider: ${v}`);
|
||||
if (v !== "google" && v !== "openai" && v !== "dashscope" && v !== "replicate") {
|
||||
throw new Error(`Invalid provider: ${v}`);
|
||||
}
|
||||
out.provider = v;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -228,9 +315,11 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
const config: Partial<ExtendConfig> = {};
|
||||
const lines = yaml.split("\n");
|
||||
let currentKey: string | null = null;
|
||||
let currentProvider: Provider | null = null;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
const indent = line.match(/^\s*/)?.[0].length ?? 0;
|
||||
if (!trimmed || trimmed.startsWith("#")) continue;
|
||||
|
||||
if (trimmed.includes(":") && !trimmed.startsWith("-")) {
|
||||
|
|
@ -247,18 +336,57 @@ function parseSimpleYaml(yaml: string): Partial<ExtendConfig> {
|
|||
} else if (key === "default_provider") {
|
||||
config.default_provider = value === "null" ? null : (value as Provider);
|
||||
} else if (key === "default_quality") {
|
||||
config.default_quality = value === "null" ? null : (value as "normal" | "2k");
|
||||
config.default_quality = value === "null" ? null : value as "normal" | "2k";
|
||||
} else if (key === "default_aspect_ratio") {
|
||||
const cleaned = value.replace(/['"]/g, "");
|
||||
config.default_aspect_ratio = cleaned === "null" ? null : cleaned;
|
||||
} else if (key === "default_image_size") {
|
||||
config.default_image_size = value === "null" ? null : (value as "1K" | "2K" | "4K");
|
||||
config.default_image_size = value === "null" ? null : value as "1K" | "2K" | "4K";
|
||||
} else if (key === "default_model") {
|
||||
config.default_model = { google: null, openai: null, dashscope: null, replicate: null };
|
||||
currentKey = "default_model";
|
||||
} else if (currentKey === "default_model" && (key === "google" || key === "openai" || key === "dashscope" || key === "replicate")) {
|
||||
currentProvider = null;
|
||||
} else if (key === "batch") {
|
||||
config.batch = {};
|
||||
currentKey = "batch";
|
||||
currentProvider = null;
|
||||
} else if (currentKey === "batch" && indent >= 2 && key === "max_workers") {
|
||||
config.batch ??= {};
|
||||
config.batch.max_workers = value === "null" ? null : parseInt(value, 10);
|
||||
} else if (currentKey === "batch" && indent >= 2 && key === "provider_limits") {
|
||||
config.batch ??= {};
|
||||
config.batch.provider_limits ??= {};
|
||||
currentKey = "provider_limits";
|
||||
currentProvider = null;
|
||||
} else if (
|
||||
currentKey === "provider_limits" &&
|
||||
indent >= 4 &&
|
||||
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
|
||||
) {
|
||||
config.batch ??= {};
|
||||
config.batch.provider_limits ??= {};
|
||||
config.batch.provider_limits[key] ??= {};
|
||||
currentProvider = key;
|
||||
} else if (
|
||||
currentKey === "default_model" &&
|
||||
(key === "google" || key === "openai" || key === "dashscope" || key === "replicate")
|
||||
) {
|
||||
const cleaned = value.replace(/['"]/g, "");
|
||||
config.default_model![key] = cleaned === "null" ? null : cleaned;
|
||||
} else if (
|
||||
currentKey === "provider_limits" &&
|
||||
currentProvider &&
|
||||
indent >= 6 &&
|
||||
(key === "concurrency" || key === "start_interval_ms")
|
||||
) {
|
||||
config.batch ??= {};
|
||||
config.batch.provider_limits ??= {};
|
||||
const providerLimit = (config.batch.provider_limits[currentProvider] ??= {});
|
||||
if (key === "concurrency") {
|
||||
providerLimit.concurrency = value === "null" ? null : parseInt(value, 10);
|
||||
} else {
|
||||
providerLimit.start_interval_ms = value === "null" ? null : parseInt(value, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -280,7 +408,6 @@ async function loadExtendConfig(): Promise<Partial<ExtendConfig>> {
|
|||
const content = await readFile(p, "utf8");
|
||||
const yaml = extractYamlFrontMatter(content);
|
||||
if (!yaml) continue;
|
||||
|
||||
return parseSimpleYaml(yaml);
|
||||
} catch {
|
||||
continue;
|
||||
|
|
@ -300,6 +427,46 @@ function mergeConfig(args: CliArgs, extend: Partial<ExtendConfig>): CliArgs {
|
|||
};
|
||||
}
|
||||
|
||||
function parsePositiveInt(value: string | undefined): number | null {
|
||||
if (!value) return null;
|
||||
const parsed = parseInt(value, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
|
||||
}
|
||||
|
||||
function getConfiguredMaxWorkers(extendConfig: Partial<ExtendConfig>): number {
|
||||
const envValue = parsePositiveInt(process.env.BAOYU_IMAGE_GEN_MAX_WORKERS);
|
||||
const configValue = extendConfig.batch?.max_workers ?? null;
|
||||
return Math.max(1, envValue ?? configValue ?? DEFAULT_MAX_WORKERS);
|
||||
}
|
||||
|
||||
function getConfiguredProviderRateLimits(
|
||||
extendConfig: Partial<ExtendConfig>
|
||||
): Record<Provider, ProviderRateLimit> {
|
||||
const configured: Record<Provider, ProviderRateLimit> = {
|
||||
replicate: { ...DEFAULT_PROVIDER_RATE_LIMITS.replicate },
|
||||
google: { ...DEFAULT_PROVIDER_RATE_LIMITS.google },
|
||||
openai: { ...DEFAULT_PROVIDER_RATE_LIMITS.openai },
|
||||
dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope },
|
||||
};
|
||||
|
||||
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
|
||||
const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`;
|
||||
const extendLimit = extendConfig.batch?.provider_limits?.[provider];
|
||||
configured[provider] = {
|
||||
concurrency:
|
||||
parsePositiveInt(process.env[`${envPrefix}_CONCURRENCY`]) ??
|
||||
extendLimit?.concurrency ??
|
||||
configured[provider].concurrency,
|
||||
startIntervalMs:
|
||||
parsePositiveInt(process.env[`${envPrefix}_START_INTERVAL_MS`]) ??
|
||||
extendLimit?.start_interval_ms ??
|
||||
configured[provider].startIntervalMs,
|
||||
};
|
||||
}
|
||||
|
||||
return configured;
|
||||
}
|
||||
|
||||
async function readPromptFromFiles(files: string[]): Promise<string> {
|
||||
const parts: string[] = [];
|
||||
for (const f of files) {
|
||||
|
|
@ -311,9 +478,12 @@ async function readPromptFromFiles(files: string[]): Promise<string> {
|
|||
async function readPromptFromStdin(): Promise<string | null> {
|
||||
if (process.stdin.isTTY) return null;
|
||||
try {
|
||||
const t = await Bun.stdin.text();
|
||||
const v = t.trim();
|
||||
return v.length > 0 ? v : null;
|
||||
const chunks: Buffer[] = [];
|
||||
for await (const chunk of process.stdin) {
|
||||
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
||||
}
|
||||
const value = Buffer.concat(chunks).toString("utf8").trim();
|
||||
return value.length > 0 ? value : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -327,7 +497,13 @@ function normalizeOutputImagePath(p: string): string {
|
|||
}
|
||||
|
||||
function detectProvider(args: CliArgs): Provider {
|
||||
if (args.referenceImages.length > 0 && args.provider && args.provider !== "google" && args.provider !== "openai" && args.provider !== "replicate") {
|
||||
if (
|
||||
args.referenceImages.length > 0 &&
|
||||
args.provider &&
|
||||
args.provider !== "google" &&
|
||||
args.provider !== "openai" &&
|
||||
args.provider !== "replicate"
|
||||
) {
|
||||
throw new Error(
|
||||
"Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), or --provider replicate."
|
||||
);
|
||||
|
|
@ -349,13 +525,18 @@ function detectProvider(args: CliArgs): Provider {
|
|||
);
|
||||
}
|
||||
|
||||
const available = [hasGoogle && "google", hasOpenai && "openai", hasDashscope && "dashscope", hasReplicate && "replicate"].filter(Boolean) as Provider[];
|
||||
const available = [
|
||||
hasReplicate && "replicate",
|
||||
hasGoogle && "google",
|
||||
hasOpenai && "openai",
|
||||
hasDashscope && "dashscope",
|
||||
].filter(Boolean) as Provider[];
|
||||
|
||||
if (available.length === 1) return available[0]!;
|
||||
if (available.length > 1) return available[0]!;
|
||||
|
||||
throw new Error(
|
||||
"No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, DASHSCOPE_API_KEY, or REPLICATE_API_TOKEN.\n" +
|
||||
"No API key found. Set REPLICATE_API_TOKEN, GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, or DASHSCOPE_API_KEY.\n" +
|
||||
"Create ~/.baoyu-skills/.env or <cwd>/.baoyu-skills/.env with your keys."
|
||||
);
|
||||
}
|
||||
|
|
@ -371,11 +552,6 @@ async function validateReferenceImages(referenceImages: string[]): Promise<void>
|
|||
}
|
||||
}
|
||||
|
||||
type ProviderModule = {
|
||||
getDefaultModel: () => string;
|
||||
generateImage: (prompt: string, model: string, args: CliArgs) => Promise<Uint8Array>;
|
||||
};
|
||||
|
||||
function isRetryableGenerationError(error: unknown): boolean {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
const nonRetryableMarkers = [
|
||||
|
|
@ -384,26 +560,328 @@ function isRetryableGenerationError(error: unknown): boolean {
|
|||
"only supported",
|
||||
"No API key found",
|
||||
"is required",
|
||||
"Invalid ",
|
||||
"Unexpected ",
|
||||
"API error (400)",
|
||||
"API error (401)",
|
||||
"API error (402)",
|
||||
"API error (403)",
|
||||
"API error (404)",
|
||||
"temporarily disabled",
|
||||
];
|
||||
return !nonRetryableMarkers.some((marker) => msg.includes(marker));
|
||||
}
|
||||
|
||||
async function loadProviderModule(provider: Provider): Promise<ProviderModule> {
|
||||
if (provider === "google") {
|
||||
return (await import("./providers/google")) as ProviderModule;
|
||||
}
|
||||
if (provider === "dashscope") {
|
||||
return (await import("./providers/dashscope")) as ProviderModule;
|
||||
}
|
||||
if (provider === "replicate") {
|
||||
return (await import("./providers/replicate")) as ProviderModule;
|
||||
}
|
||||
if (provider === "google") return (await import("./providers/google")) as ProviderModule;
|
||||
if (provider === "dashscope") return (await import("./providers/dashscope")) as ProviderModule;
|
||||
if (provider === "replicate") return (await import("./providers/replicate")) as ProviderModule;
|
||||
return (await import("./providers/openai")) as ProviderModule;
|
||||
}
|
||||
|
||||
async function loadPromptForArgs(args: CliArgs): Promise<string | null> {
|
||||
let prompt: string | null = args.prompt;
|
||||
if (!prompt && args.promptFiles.length > 0) {
|
||||
prompt = await readPromptFromFiles(args.promptFiles);
|
||||
}
|
||||
return prompt;
|
||||
}
|
||||
|
||||
function getModelForProvider(
|
||||
provider: Provider,
|
||||
requestedModel: string | null,
|
||||
extendConfig: Partial<ExtendConfig>,
|
||||
providerModule: ProviderModule
|
||||
): string {
|
||||
if (requestedModel) return requestedModel;
|
||||
if (extendConfig.default_model) {
|
||||
if (provider === "google" && extendConfig.default_model.google) return extendConfig.default_model.google;
|
||||
if (provider === "openai" && extendConfig.default_model.openai) return extendConfig.default_model.openai;
|
||||
if (provider === "dashscope" && extendConfig.default_model.dashscope) return extendConfig.default_model.dashscope;
|
||||
if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate;
|
||||
}
|
||||
return providerModule.getDefaultModel();
|
||||
}
|
||||
|
||||
async function prepareSingleTask(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<PreparedTask> {
|
||||
if (!args.quality) args.quality = "2k";
|
||||
|
||||
const prompt = (await loadPromptForArgs(args)) ?? (await readPromptFromStdin());
|
||||
if (!prompt) throw new Error("Prompt is required");
|
||||
if (!args.imagePath) throw new Error("--image is required");
|
||||
if (args.referenceImages.length > 0) await validateReferenceImages(args.referenceImages);
|
||||
|
||||
const provider = detectProvider(args);
|
||||
const providerModule = await loadProviderModule(provider);
|
||||
const model = getModelForProvider(provider, args.model, extendConfig, providerModule);
|
||||
|
||||
return {
|
||||
id: "single",
|
||||
prompt,
|
||||
args,
|
||||
provider,
|
||||
model,
|
||||
outputPath: normalizeOutputImagePath(args.imagePath),
|
||||
providerModule,
|
||||
};
|
||||
}
|
||||
|
||||
async function loadBatchTasks(batchFilePath: string): Promise<BatchTaskInput[]> {
|
||||
const content = await readFile(path.resolve(batchFilePath), "utf8");
|
||||
const parsed = JSON.parse(content.replace(/^\uFEFF/, "")) as BatchFile;
|
||||
if (Array.isArray(parsed)) return parsed;
|
||||
if (parsed && typeof parsed === "object" && Array.isArray(parsed.tasks)) return parsed.tasks;
|
||||
throw new Error("Invalid batch file. Expected an array of tasks or an object with a tasks array.");
|
||||
}
|
||||
|
||||
function createTaskArgs(baseArgs: CliArgs, task: BatchTaskInput): CliArgs {
|
||||
return {
|
||||
...baseArgs,
|
||||
prompt: task.prompt ?? null,
|
||||
promptFiles: task.promptFiles ? [...task.promptFiles] : [],
|
||||
imagePath: task.image ?? null,
|
||||
provider: task.provider ?? baseArgs.provider ?? null,
|
||||
model: task.model ?? baseArgs.model ?? null,
|
||||
aspectRatio: task.ar ?? baseArgs.aspectRatio ?? null,
|
||||
size: task.size ?? baseArgs.size ?? null,
|
||||
quality: task.quality ?? baseArgs.quality ?? null,
|
||||
imageSize: task.imageSize ?? baseArgs.imageSize ?? null,
|
||||
referenceImages: task.ref ? [...task.ref] : [],
|
||||
n: task.n ?? baseArgs.n,
|
||||
batchFile: null,
|
||||
jobs: baseArgs.jobs,
|
||||
json: baseArgs.json,
|
||||
help: false,
|
||||
};
|
||||
}
|
||||
|
||||
async function prepareBatchTasks(
|
||||
args: CliArgs,
|
||||
extendConfig: Partial<ExtendConfig>
|
||||
): Promise<PreparedTask[]> {
|
||||
if (!args.batchFile) throw new Error("--batchfile is required in batch mode");
|
||||
const taskInputs = await loadBatchTasks(args.batchFile);
|
||||
if (taskInputs.length === 0) throw new Error("Batch file does not contain any tasks.");
|
||||
|
||||
const prepared: PreparedTask[] = [];
|
||||
for (let i = 0; i < taskInputs.length; i++) {
|
||||
const task = taskInputs[i]!;
|
||||
const taskArgs = createTaskArgs(args, task);
|
||||
const prompt = await loadPromptForArgs(taskArgs);
|
||||
if (!prompt) throw new Error(`Task ${i + 1} is missing prompt or promptFiles.`);
|
||||
if (!taskArgs.imagePath) throw new Error(`Task ${i + 1} is missing image output path.`);
|
||||
if (taskArgs.referenceImages.length > 0) await validateReferenceImages(taskArgs.referenceImages);
|
||||
|
||||
const provider = detectProvider(taskArgs);
|
||||
const providerModule = await loadProviderModule(provider);
|
||||
const model = getModelForProvider(provider, taskArgs.model, extendConfig, providerModule);
|
||||
prepared.push({
|
||||
id: task.id || `task-${String(i + 1).padStart(2, "0")}`,
|
||||
prompt,
|
||||
args: taskArgs,
|
||||
provider,
|
||||
model,
|
||||
outputPath: normalizeOutputImagePath(taskArgs.imagePath),
|
||||
providerModule,
|
||||
});
|
||||
}
|
||||
|
||||
return prepared;
|
||||
}
|
||||
|
||||
async function writeImage(outputPath: string, imageData: Uint8Array): Promise<void> {
|
||||
await mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await writeFile(outputPath, imageData);
|
||||
}
|
||||
|
||||
async function generatePreparedTask(task: PreparedTask): Promise<TaskResult> {
|
||||
console.error(`Using ${task.provider} / ${task.model} for ${task.id}`);
|
||||
console.error(
|
||||
`Switch model: --model <id> | EXTEND.md default_model.${task.provider} | env ${task.provider.toUpperCase()}_IMAGE_MODEL`
|
||||
);
|
||||
|
||||
let attempts = 0;
|
||||
while (attempts < MAX_ATTEMPTS) {
|
||||
attempts += 1;
|
||||
try {
|
||||
const imageData = await task.providerModule.generateImage(task.prompt, task.model, task.args);
|
||||
await writeImage(task.outputPath, imageData);
|
||||
return {
|
||||
id: task.id,
|
||||
provider: task.provider,
|
||||
model: task.model,
|
||||
outputPath: task.outputPath,
|
||||
success: true,
|
||||
attempts,
|
||||
error: null,
|
||||
};
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const canRetry = attempts < MAX_ATTEMPTS && isRetryableGenerationError(error);
|
||||
if (canRetry) {
|
||||
console.error(`[${task.id}] Attempt ${attempts}/${MAX_ATTEMPTS} failed, retrying...`);
|
||||
continue;
|
||||
}
|
||||
return {
|
||||
id: task.id,
|
||||
provider: task.provider,
|
||||
model: task.model,
|
||||
outputPath: task.outputPath,
|
||||
success: false,
|
||||
attempts,
|
||||
error: message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
id: task.id,
|
||||
provider: task.provider,
|
||||
model: task.model,
|
||||
outputPath: task.outputPath,
|
||||
success: false,
|
||||
attempts: MAX_ATTEMPTS,
|
||||
error: "Unknown failure",
|
||||
};
|
||||
}
|
||||
|
||||
function createProviderGate(providerRateLimits: Record<Provider, ProviderRateLimit>) {
|
||||
const state = new Map<Provider, { active: number; lastStartedAt: number }>();
|
||||
|
||||
return async function acquire(provider: Provider): Promise<() => void> {
|
||||
const limit = providerRateLimits[provider];
|
||||
while (true) {
|
||||
const current = state.get(provider) ?? { active: 0, lastStartedAt: 0 };
|
||||
const now = Date.now();
|
||||
const enoughCapacity = current.active < limit.concurrency;
|
||||
const enoughGap = now - current.lastStartedAt >= limit.startIntervalMs;
|
||||
if (enoughCapacity && enoughGap) {
|
||||
state.set(provider, { active: current.active + 1, lastStartedAt: now });
|
||||
return () => {
|
||||
const latest = state.get(provider) ?? { active: 1, lastStartedAt: now };
|
||||
state.set(provider, {
|
||||
active: Math.max(0, latest.active - 1),
|
||||
lastStartedAt: latest.lastStartedAt,
|
||||
});
|
||||
};
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_WAIT_MS));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function getWorkerCount(taskCount: number, jobs: number | null, maxWorkers: number): number {
|
||||
const requested = jobs ?? Math.min(taskCount, maxWorkers);
|
||||
return Math.max(1, Math.min(requested, taskCount, maxWorkers));
|
||||
}
|
||||
|
||||
async function runBatchTasks(
|
||||
tasks: PreparedTask[],
|
||||
jobs: number | null,
|
||||
extendConfig: Partial<ExtendConfig>
|
||||
): Promise<TaskResult[]> {
|
||||
if (tasks.length === 1) {
|
||||
return [await generatePreparedTask(tasks[0]!)];
|
||||
}
|
||||
|
||||
const maxWorkers = getConfiguredMaxWorkers(extendConfig);
|
||||
const providerRateLimits = getConfiguredProviderRateLimits(extendConfig);
|
||||
const acquireProvider = createProviderGate(providerRateLimits);
|
||||
const workerCount = getWorkerCount(tasks.length, jobs, maxWorkers);
|
||||
console.error(`Batch mode: ${tasks.length} tasks, ${workerCount} workers, parallel mode enabled.`);
|
||||
for (const provider of ["replicate", "google", "openai", "dashscope"] as Provider[]) {
|
||||
const limit = providerRateLimits[provider];
|
||||
console.error(`- ${provider}: concurrency=${limit.concurrency}, startIntervalMs=${limit.startIntervalMs}`);
|
||||
}
|
||||
|
||||
let nextIndex = 0;
|
||||
const results: TaskResult[] = new Array(tasks.length);
|
||||
|
||||
const worker = async (): Promise<void> => {
|
||||
while (true) {
|
||||
const currentIndex = nextIndex;
|
||||
nextIndex += 1;
|
||||
if (currentIndex >= tasks.length) return;
|
||||
|
||||
const task = tasks[currentIndex]!;
|
||||
const release = await acquireProvider(task.provider);
|
||||
try {
|
||||
results[currentIndex] = await generatePreparedTask(task);
|
||||
} finally {
|
||||
release();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(Array.from({ length: workerCount }, () => worker()));
|
||||
return results;
|
||||
}
|
||||
|
||||
function printBatchSummary(results: TaskResult[]): void {
|
||||
const successCount = results.filter((result) => result.success).length;
|
||||
const failureCount = results.length - successCount;
|
||||
|
||||
console.error("");
|
||||
console.error("Batch generation summary:");
|
||||
console.error(`- Total: ${results.length}`);
|
||||
console.error(`- Succeeded: ${successCount}`);
|
||||
console.error(`- Failed: ${failureCount}`);
|
||||
|
||||
if (failureCount > 0) {
|
||||
console.error("Failure reasons:");
|
||||
for (const result of results.filter((item) => !item.success)) {
|
||||
console.error(`- ${result.id}: ${result.error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function emitJson(payload: unknown): void {
|
||||
console.log(JSON.stringify(payload, null, 2));
|
||||
}
|
||||
|
||||
async function runSingleMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
|
||||
const task = await prepareSingleTask(args, extendConfig);
|
||||
const result = await generatePreparedTask(task);
|
||||
if (!result.success) {
|
||||
throw new Error(result.error || "Generation failed");
|
||||
}
|
||||
|
||||
if (args.json) {
|
||||
emitJson({
|
||||
savedImage: result.outputPath,
|
||||
provider: result.provider,
|
||||
model: result.model,
|
||||
attempts: result.attempts,
|
||||
prompt: task.prompt.slice(0, 200),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(result.outputPath);
|
||||
}
|
||||
|
||||
async function runBatchMode(args: CliArgs, extendConfig: Partial<ExtendConfig>): Promise<void> {
|
||||
const tasks = await prepareBatchTasks(args, extendConfig);
|
||||
const results = await runBatchTasks(tasks, args.jobs, extendConfig);
|
||||
printBatchSummary(results);
|
||||
|
||||
if (args.json) {
|
||||
emitJson({
|
||||
mode: "batch",
|
||||
total: results.length,
|
||||
succeeded: results.filter((item) => item.success).length,
|
||||
failed: results.filter((item) => !item.success).length,
|
||||
results,
|
||||
});
|
||||
}
|
||||
|
||||
if (results.some((item) => !item.success)) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (args.help) {
|
||||
printUsage();
|
||||
return;
|
||||
|
|
@ -412,86 +890,18 @@ async function main(): Promise<void> {
|
|||
await loadEnv();
|
||||
const extendConfig = await loadExtendConfig();
|
||||
const mergedArgs = mergeConfig(args, extendConfig);
|
||||
|
||||
if (!mergedArgs.quality) mergedArgs.quality = "2k";
|
||||
|
||||
let prompt: string | null = mergedArgs.prompt;
|
||||
if (!prompt && mergedArgs.promptFiles.length > 0) prompt = await readPromptFromFiles(mergedArgs.promptFiles);
|
||||
if (!prompt) prompt = await readPromptFromStdin();
|
||||
|
||||
if (!prompt) {
|
||||
console.error("Error: Prompt is required");
|
||||
printUsage();
|
||||
process.exitCode = 1;
|
||||
if (mergedArgs.batchFile) {
|
||||
await runBatchMode(mergedArgs, extendConfig);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mergedArgs.imagePath) {
|
||||
console.error("Error: --image is required");
|
||||
printUsage();
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (mergedArgs.referenceImages.length > 0) {
|
||||
await validateReferenceImages(mergedArgs.referenceImages);
|
||||
}
|
||||
|
||||
const provider = detectProvider(mergedArgs);
|
||||
const providerModule = await loadProviderModule(provider);
|
||||
|
||||
let model = mergedArgs.model;
|
||||
if (!model && extendConfig.default_model) {
|
||||
if (provider === "google") model = extendConfig.default_model.google ?? null;
|
||||
if (provider === "openai") model = extendConfig.default_model.openai ?? null;
|
||||
if (provider === "dashscope") model = extendConfig.default_model.dashscope ?? null;
|
||||
if (provider === "replicate") model = extendConfig.default_model.replicate ?? null;
|
||||
}
|
||||
model = model || providerModule.getDefaultModel();
|
||||
|
||||
const outputPath = normalizeOutputImagePath(mergedArgs.imagePath);
|
||||
|
||||
let imageData: Uint8Array;
|
||||
let retried = false;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
imageData = await providerModule.generateImage(prompt, model, mergedArgs);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (!retried && isRetryableGenerationError(e)) {
|
||||
retried = true;
|
||||
console.error("Generation failed, retrying...");
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
const dir = path.dirname(outputPath);
|
||||
await mkdir(dir, { recursive: true });
|
||||
await writeFile(outputPath, imageData);
|
||||
|
||||
if (mergedArgs.json) {
|
||||
console.log(
|
||||
JSON.stringify(
|
||||
{
|
||||
savedImage: outputPath,
|
||||
provider,
|
||||
model,
|
||||
prompt: prompt.slice(0, 200),
|
||||
},
|
||||
null,
|
||||
2
|
||||
)
|
||||
);
|
||||
} else {
|
||||
console.log(outputPath);
|
||||
}
|
||||
await runSingleMode(mergedArgs, extendConfig);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
console.error(msg);
|
||||
main().catch((error) => {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
console.error(message);
|
||||
process.exit(1);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -68,7 +68,11 @@ export async function generateImage(
|
|||
const baseURL = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1";
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) throw new Error("OPENAI_API_KEY is required");
|
||||
if (!apiKey) {
|
||||
throw new Error(
|
||||
"OPENAI_API_KEY is required. Codex/ChatGPT desktop login does not automatically grant OpenAI Images API access to this script."
|
||||
);
|
||||
}
|
||||
|
||||
if (process.env.OPENAI_IMAGE_USE_CHAT === "true") {
|
||||
return generateWithChatCompletions(baseURL, apiKey, prompt, model);
|
||||
|
|
|
|||
|
|
@ -36,22 +36,24 @@ function buildInput(prompt: string, args: CliArgs, referenceImages: string[]): R
|
|||
|
||||
if (args.aspectRatio) {
|
||||
input.aspect_ratio = args.aspectRatio;
|
||||
} else if (referenceImages.length > 0) {
|
||||
input.aspect_ratio = "match_input_image";
|
||||
}
|
||||
|
||||
if (args.n > 1) {
|
||||
input.number_of_images = args.n;
|
||||
}
|
||||
|
||||
if (args.quality === "normal") {
|
||||
input.resolution = "1K";
|
||||
} else if (args.quality === "2k") {
|
||||
input.resolution = "2K";
|
||||
}
|
||||
|
||||
input.output_format = "png";
|
||||
|
||||
if (referenceImages.length > 0) {
|
||||
if (referenceImages.length === 1) {
|
||||
input.image = referenceImages[0];
|
||||
} else {
|
||||
for (let i = 0; i < referenceImages.length; i++) {
|
||||
input[`image${i > 0 ? i + 1 : ""}`] = referenceImages[i];
|
||||
}
|
||||
}
|
||||
input.image_input = referenceImages;
|
||||
}
|
||||
|
||||
return input;
|
||||
|
|
|
|||
|
|
@ -13,10 +13,29 @@ export type CliArgs = {
|
|||
imageSize: string | null;
|
||||
referenceImages: string[];
|
||||
n: number;
|
||||
batchFile: string | null;
|
||||
jobs: number | null;
|
||||
json: boolean;
|
||||
help: boolean;
|
||||
};
|
||||
|
||||
export type BatchTaskInput = {
|
||||
id?: string;
|
||||
prompt?: string | null;
|
||||
promptFiles?: string[];
|
||||
image?: string;
|
||||
provider?: Provider | null;
|
||||
model?: string | null;
|
||||
ar?: string | null;
|
||||
size?: string | null;
|
||||
quality?: Quality | null;
|
||||
imageSize?: "1K" | "2K" | "4K" | null;
|
||||
ref?: string[];
|
||||
n?: number;
|
||||
};
|
||||
|
||||
export type BatchFile = BatchTaskInput[] | { tasks: BatchTaskInput[] };
|
||||
|
||||
export type ExtendConfig = {
|
||||
version: number;
|
||||
default_provider: Provider | null;
|
||||
|
|
@ -29,4 +48,16 @@ export type ExtendConfig = {
|
|||
dashscope: string | null;
|
||||
replicate: string | null;
|
||||
};
|
||||
batch?: {
|
||||
max_workers?: number | null;
|
||||
provider_limits?: Partial<
|
||||
Record<
|
||||
Provider,
|
||||
{
|
||||
concurrency?: number | null;
|
||||
start_interval_ms?: number | null;
|
||||
}
|
||||
>
|
||||
>;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -212,6 +212,7 @@ Before translating chunks:
|
|||
- **Natural flow**: Use idiomatic target language word order and sentence patterns; break or restructure sentences freely when the source structure doesn't work naturally in the target language
|
||||
- **Terminology**: Use standard translations; annotate with original term in parentheses on first occurrence
|
||||
- **Preserve format**: Keep all markdown formatting (headings, bold, italic, images, links, code blocks)
|
||||
- **Image-language awareness**: Preserve image references exactly during translation, but after the translation is complete, review referenced images and check whether their likely main text language still matches the translated article language
|
||||
- **Frontmatter transformation**: If the source has YAML frontmatter, preserve it in the translation with these changes: (1) Rename metadata fields that describe the *source* article — `url`→`sourceUrl`, `title`→`sourceTitle`, `description`→`sourceDescription`, `author`→`sourceAuthor`, `date`→`sourceDate`, and any similar origin-metadata fields — by adding a `source` prefix (camelCase). (2) Translate the values of text fields (title, description, etc.) and add them as new top-level fields. (3) Keep other fields (tags, categories, custom fields) as-is, translating their values where appropriate
|
||||
- **Respect original**: Maintain original meaning and intent; do not add, remove, or editorialize — but sentence structure and imagery may be adapted freely to serve the meaning
|
||||
- **Translator's notes**: For terms, concepts, or cultural references that target readers may not understand — due to jargon, cultural gaps, or domain-specific knowledge — add a concise explanatory note in parentheses immediately after the term. The note should explain *what it means* in plain language, not just provide the English original. Format: `译文(English original,通俗解释)`. Calibrate annotation depth to the target audience: general readers need more notes than technical readers. Only add notes where genuinely needed; do not over-annotate obvious terms.
|
||||
|
|
@ -250,6 +251,20 @@ Each step reads the previous step's file and builds on it.
|
|||
|
||||
Final translation is always at `translation.md` in the output directory.
|
||||
|
||||
After the final translation is written, do a lightweight image-language pass:
|
||||
|
||||
1. Collect image references from the translated article
|
||||
2. Identify likely text-heavy images such as covers, screenshots, diagrams, charts, frameworks, and infographics
|
||||
3. If any image likely contains a main text language that does not match the translated article language, proactively remind the user
|
||||
4. The reminder must be a list only. Do not automatically localize those images unless the user asks
|
||||
|
||||
Reminder format:
|
||||
```text
|
||||
Possible image localization needed:
|
||||
- ![[attachments/example-cover.png]]: likely still contains source-language text while the article is now in target language
|
||||
- ![[attachments/example-diagram.png]]: likely text-heavy framework graphic, check whether labels need translation
|
||||
```
|
||||
|
||||
Display summary:
|
||||
```
|
||||
**Translation complete** ({mode} mode)
|
||||
|
|
@ -261,6 +276,8 @@ Final: {output-dir}/translation.md
|
|||
Glossary terms applied: {count}
|
||||
```
|
||||
|
||||
If mismatched image-language candidates were found, append a short note after the summary telling the user that some embedded images may still need image-text localization, followed by the candidate list.
|
||||
|
||||
## Extension Support
|
||||
|
||||
Custom configurations via EXTEND.md. See **Preferences** section for paths and supported options.
|
||||
|
|
|
|||
Loading…
Reference in New Issue