diff --git a/skills/baoyu-image-gen/scripts/main.ts b/skills/baoyu-image-gen/scripts/main.ts index 4f746ce..38591c5 100644 --- a/skills/baoyu-image-gen/scripts/main.ts +++ b/skills/baoyu-image-gen/scripts/main.ts @@ -60,6 +60,7 @@ const DEFAULT_PROVIDER_RATE_LIMITS: Record = { dashscope: { concurrency: 3, startIntervalMs: 1100 }, jimeng: { concurrency: 3, startIntervalMs: 1100 }, seedream: { concurrency: 3, startIntervalMs: 1100 }, + azure: { concurrency: 3, startIntervalMs: 1100 }, }; function printUsage(): void { @@ -74,13 +75,13 @@ Options: --image Output image path (required in single-image mode) --batchfile JSON batch file for multi-image generation --jobs Worker count for batch mode (default: auto, max from config, built-in default 10) - --provider google|openai|openrouter|dashscope|replicate|jimeng|seedream Force provider (auto-detect by default) + --provider google|openai|openrouter|dashscope|replicate|jimeng|seedream|azure Force provider (auto-detect by default) -m, --model Model ID --ar Aspect ratio (e.g., 16:9, 1:1, 4:3) --size Size (e.g., 1024x1024) --quality normal|2k Quality preset (default: 2k) --imageSize 1K|2K|4K Image size for Google/OpenRouter (default: from quality) - --ref Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0) + --ref Reference images (Google, OpenAI, Azure, OpenRouter, Replicate, or Seedream 4.0/4.5/5.0) --n Number of images for the current task (default: 1) --json JSON output -h, --help Show help @@ -131,6 +132,10 @@ Environment variables: DASHSCOPE_BASE_URL Custom DashScope endpoint REPLICATE_BASE_URL Custom Replicate endpoint JIMENG_BASE_URL Custom Jimeng endpoint + AZURE_OPENAI_API_KEY Azure OpenAI API key + AZURE_OPENAI_BASE_URL Azure OpenAI deployment endpoint + AZURE_API_VERSION Azure API version (default: 2024-02-01) + AZURE_OPENAI_IMAGE_MODEL Default Azure model (gpt-image-1.5) SEEDREAM_BASE_URL Custom Seedream endpoint BAOYU_IMAGE_GEN_MAX_WORKERS Override batch worker cap BAOYU_IMAGE_GEN__CONCURRENCY Override provider concurrency @@ -231,7 +236,8 @@ export function parseArgs(argv: string[]): CliArgs { v !== "dashscope" && v !== "replicate" && v !== "jimeng" && - v !== "seedream" + v !== "seedream" && + v !== "azure" ) { throw new Error(`Invalid provider: ${v}`); } @@ -386,6 +392,7 @@ export function parseSimpleYaml(yaml: string): Partial { replicate: null, jimeng: null, seedream: null, + azure: null, }; currentKey = "default_model"; currentProvider = null; @@ -411,7 +418,8 @@ export function parseSimpleYaml(yaml: string): Partial { key === "dashscope" || key === "replicate" || key === "jimeng" || - key === "seedream" + key === "seedream" || + key === "azure" ) ) { config.batch ??= {}; @@ -427,7 +435,8 @@ export function parseSimpleYaml(yaml: string): Partial { key === "dashscope" || key === "replicate" || key === "jimeng" || - key === "seedream" + key === "seedream" || + key === "azure" ) ) { const cleaned = value.replace(/['"]/g, ""); @@ -520,9 +529,10 @@ export function getConfiguredProviderRateLimits( dashscope: { ...DEFAULT_PROVIDER_RATE_LIMITS.dashscope }, jimeng: { ...DEFAULT_PROVIDER_RATE_LIMITS.jimeng }, seedream: { ...DEFAULT_PROVIDER_RATE_LIMITS.seedream }, + azure: { ...DEFAULT_PROVIDER_RATE_LIMITS.azure }, }; - for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream", "azure"] as Provider[]) { const envPrefix = `BAOYU_IMAGE_GEN_${provider.toUpperCase()}`; const extendLimit = extendConfig.batch?.provider_limits?.[provider]; configured[provider] = { @@ -581,18 +591,20 @@ export function detectProvider(args: CliArgs): Provider { args.provider && args.provider !== "google" && args.provider !== "openai" && + args.provider !== "azure" && args.provider !== "openrouter" && args.provider !== "replicate" && args.provider !== "seedream" ) { throw new Error( - "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider openrouter (OpenRouter multimodal), --provider replicate, or --provider seedream for supported Seedream models." + "Reference images require a ref-capable provider. Use --provider google (Gemini multimodal), --provider openai (GPT Image edits), --provider azure (Azure OpenAI), --provider openrouter (OpenRouter multimodal), --provider replicate, or --provider seedream for supported Seedream models." ); } if (args.provider) return args.provider; const hasGoogle = !!(process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY); + const hasAzure = !!(process.env.AZURE_OPENAI_API_KEY && process.env.AZURE_OPENAI_BASE_URL); const hasOpenai = !!process.env.OPENAI_API_KEY; const hasOpenrouter = !!process.env.OPENROUTER_API_KEY; const hasDashscope = !!process.env.DASHSCOPE_API_KEY; @@ -611,17 +623,19 @@ export function detectProvider(args: CliArgs): Provider { if (args.referenceImages.length > 0) { if (hasGoogle) return "google"; if (hasOpenai) return "openai"; + if (hasAzure) return "azure"; if (hasOpenrouter) return "openrouter"; if (hasReplicate) return "replicate"; if (hasSeedream) return "seedream"; throw new Error( - "Reference images require Google, OpenAI, OpenRouter, Replicate, or supported Seedream models. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, or ARK_API_KEY, or remove --ref." + "Reference images require Google, OpenAI, Azure, OpenRouter, Replicate, or supported Seedream models. Set GOOGLE_API_KEY/GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, REPLICATE_API_TOKEN, or ARK_API_KEY, or remove --ref." ); } const available = [ hasGoogle && "google", hasOpenai && "openai", + hasAzure && "azure", hasOpenrouter && "openrouter", hasDashscope && "dashscope", hasReplicate && "replicate", @@ -633,7 +647,7 @@ export function detectProvider(args: CliArgs): Provider { if (available.length > 1) return available[0]!; throw new Error( - "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + + "No API key found. Set GOOGLE_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY, AZURE_OPENAI_API_KEY+AZURE_OPENAI_BASE_URL, OPENROUTER_API_KEY, DASHSCOPE_API_KEY, REPLICATE_API_TOKEN, JIMENG keys, or ARK_API_KEY.\n" + "Create ~/.baoyu-skills/.env or /.baoyu-skills/.env with your keys." ); } @@ -676,6 +690,7 @@ async function loadProviderModule(provider: Provider): Promise { if (provider === "openrouter") return (await import("./providers/openrouter")) as ProviderModule; if (provider === "jimeng") return (await import("./providers/jimeng")) as ProviderModule; if (provider === "seedream") return (await import("./providers/seedream")) as ProviderModule; + if (provider === "azure") return (await import("./providers/azure")) as ProviderModule; return (await import("./providers/openai")) as ProviderModule; } @@ -704,6 +719,7 @@ function getModelForProvider( if (provider === "replicate" && extendConfig.default_model.replicate) return extendConfig.default_model.replicate; if (provider === "jimeng" && extendConfig.default_model.jimeng) return extendConfig.default_model.jimeng; if (provider === "seedream" && extendConfig.default_model.seedream) return extendConfig.default_model.seedream; + if (provider === "azure" && extendConfig.default_model.azure) return extendConfig.default_model.azure; } return providerModule.getDefaultModel(); } @@ -923,7 +939,7 @@ async function runBatchTasks( const acquireProvider = createProviderGate(providerRateLimits); const workerCount = getWorkerCount(tasks.length, jobs, maxWorkers); console.error(`Batch mode: ${tasks.length} tasks, ${workerCount} workers, parallel mode enabled.`); - for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream"] as Provider[]) { + for (const provider of ["replicate", "google", "openai", "openrouter", "dashscope", "jimeng", "seedream", "azure"] as Provider[]) { const limit = providerRateLimits[provider]; console.error(`- ${provider}: concurrency=${limit.concurrency}, startIntervalMs=${limit.startIntervalMs}`); } diff --git a/skills/baoyu-image-gen/scripts/providers/azure.ts b/skills/baoyu-image-gen/scripts/providers/azure.ts new file mode 100644 index 0000000..a3a9732 --- /dev/null +++ b/skills/baoyu-image-gen/scripts/providers/azure.ts @@ -0,0 +1,118 @@ +import path from "node:path"; +import { readFile } from "node:fs/promises"; +import type { CliArgs } from "../types"; +import { getOpenAISize, parseAspectRatio, getMimeType, extractImageFromResponse } from "./openai"; + +type OpenAIImageResponse = { data: Array<{ url?: string; b64_json?: string }> }; + +export function getDefaultModel(): string { + return process.env.AZURE_OPENAI_IMAGE_MODEL || "gpt-image-1.5"; +} + +function getBaseURL(): string { + const url = process.env.AZURE_OPENAI_BASE_URL; + if (!url) { + throw new Error( + "AZURE_OPENAI_BASE_URL is required. Set it to your Azure deployment endpoint, e.g.: https://your-resource.openai.azure.com/openai/deployments/your-deployment" + ); + } + return url.replace(/\/+$/, ""); +} + +function getApiKey(): string { + const key = process.env.AZURE_OPENAI_API_KEY; + if (!key) { + throw new Error( + "AZURE_OPENAI_API_KEY is required. Get it from Azure Portal → your OpenAI resource → Keys and Endpoint." + ); + } + return key; +} + +function getApiVersion(): string { + return process.env.AZURE_API_VERSION || "2024-02-01"; +} + +function buildURL(pathSuffix: string): string { + return `${getBaseURL()}${pathSuffix}?api-version=${getApiVersion()}`; +} + +function authHeaders(): Record { + return { "api-key": getApiKey() }; +} + +export async function generateImage( + prompt: string, + model: string, + args: CliArgs +): Promise { + const size = args.size || getOpenAISize(model, args.aspectRatio, args.quality); + + if (args.referenceImages.length > 0) { + return generateWithAzureEdits(prompt, model, size, args.referenceImages, args.quality); + } + + return generateWithAzureGenerations(prompt, model, size, args.quality); +} + +async function generateWithAzureGenerations( + prompt: string, + model: string, + size: string, + quality: CliArgs["quality"] +): Promise { + const body: Record = { prompt, size, n: 1 }; + + const res = await fetch(buildURL("/images/generations"), { + method: "POST", + headers: { + "Content-Type": "application/json", + ...authHeaders(), + }, + body: JSON.stringify(body), + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`Azure OpenAI API error: ${err}`); + } + + const result = (await res.json()) as OpenAIImageResponse; + return extractImageFromResponse(result); +} + +async function generateWithAzureEdits( + prompt: string, + model: string, + size: string, + referenceImages: string[], + quality: CliArgs["quality"] +): Promise { + const form = new FormData(); + form.append("prompt", prompt); + form.append("size", size); + + for (const refPath of referenceImages) { + const bytes = await readFile(refPath); + const filename = path.basename(refPath); + const mimeType = getMimeType(filename); + const blob = new Blob([bytes], { type: mimeType }); + form.append("image[]", blob, filename); + } + + const res = await fetch(buildURL("/images/edits"), { + method: "POST", + headers: { + ...authHeaders(), + }, + body: form, + }); + + if (!res.ok) { + const err = await res.text(); + throw new Error(`Azure OpenAI edits API error: ${err}`); + } + + const result = (await res.json()) as OpenAIImageResponse; + return extractImageFromResponse(result); +} diff --git a/skills/baoyu-image-gen/scripts/types.ts b/skills/baoyu-image-gen/scripts/types.ts index e84421a..227d13c 100644 --- a/skills/baoyu-image-gen/scripts/types.ts +++ b/skills/baoyu-image-gen/scripts/types.ts @@ -1,4 +1,4 @@ -export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate" | "jimeng" | "seedream"; +export type Provider = "google" | "openai" | "openrouter" | "dashscope" | "replicate" | "jimeng" | "seedream" | "azure"; export type Quality = "normal" | "2k"; export type CliArgs = { @@ -55,6 +55,7 @@ export type ExtendConfig = { replicate: string | null; jimeng: string | null; seedream: string | null; + azure: string | null; }; batch?: { max_workers?: number | null;