update gemeni-web to support add image as reference

This commit is contained in:
Jim Liu 宝玉 2026-01-16 19:06:14 -06:00
parent a42137ff13
commit 259baff413
6 changed files with 456 additions and 89 deletions

View File

@ -5,6 +5,13 @@ description: Image generation skill using Gemini Web. Generates images from text
# Gemini Web Client
Supports:
- Text generation
- Image generation (download + save)
- Reference image upload (attach images for vision tasks)
- Multi-turn conversations within the same executor instance (`keepSession`)
- Experimental video generation (`generateVideo`) — Gemini may return an async placeholder; download might require Gemini web UI
## Quick start
```bash
@ -14,6 +21,19 @@ npx -y bun scripts/main.ts --prompt "A cute cat" --image cat.png
npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png
```
## Executor options (programmatic)
This skill is typically consumed via `createGeminiWebExecutor(geminiOptions)` (see `scripts/executor.ts`).
Key options in `GeminiWebOptions`:
- `referenceImages?: string | string[]` Upload local images as references (vision input).
- `keepSession?: boolean` Reuse Gemini `chatMetadata` to continue the same conversation across calls (required if you want reference images to persist across multiple messages).
- `generateVideo?: string` Generate a video and (best-effort) download to the given path. Gemini may return `video_gen_chip` (async); in that case you must open Gemini web UI to download the result.
Notes:
- `generateVideo` cannot be combined with `generateImage` / `editImage`.
- When `keepSession=true` and `referenceImages` is set, reference images are uploaded once per executor instance.
## Commands
### Text generation
@ -70,6 +90,8 @@ npx -y bun scripts/main.ts "Hello" --json
| `--profile-dir <path>` | | Chrome profile directory |
| `--help` | `-h` | Show help |
CLI note: `scripts/main.ts` currently supports text + image generation. Reference images / multi-turn / video generation are exposed via the executor options above.
## Models
- `gemini-3-pro` - Default, latest model

View File

@ -216,6 +216,7 @@ class CdpConnection {
export async function getGeminiCookieMapViaChrome(options?: {
timeoutMs?: number;
debugConnectTimeoutMs?: number;
tokenCheckTimeoutMs?: number;
pollIntervalMs?: number;
log?: GeminiWebLog;
userDataDir?: string;
@ -224,6 +225,7 @@ export async function getGeminiCookieMapViaChrome(options?: {
const log = options?.log;
const timeoutMs = options?.timeoutMs ?? 5 * 60_000;
const debugConnectTimeoutMs = options?.debugConnectTimeoutMs ?? 30_000;
const tokenCheckTimeoutMs = options?.tokenCheckTimeoutMs ?? 30_000;
const pollIntervalMs = options?.pollIntervalMs ?? 2_000;
const userDataDir = options?.userDataDir ?? resolveGeminiWebChromeProfileDir();
@ -290,7 +292,7 @@ export async function getGeminiCookieMapViaChrome(options?: {
if (hasRequiredGeminiCookies(cookieMap)) {
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), 10_000);
const timer = setTimeout(() => controller.abort(), tokenCheckTimeoutMs);
try {
await fetchGeminiAccessToken(cookieMap, controller.signal);
} finally {

View File

@ -67,19 +67,69 @@ function buildCookieHeader(cookieMap: Record<string, string>): string {
.join('; ');
}
function getSetCookieHeaders(res: Response): string[] {
const headers = res.headers as unknown as { getSetCookie?: () => string[] };
if (typeof headers.getSetCookie === 'function') {
try {
return headers.getSetCookie();
} catch {
return [];
}
}
const raw = res.headers.get('set-cookie');
return raw ? [raw] : [];
}
function applySetCookiesToMap(setCookies: string[], cookieMap: Record<string, string>): void {
for (const raw of setCookies) {
const first = raw.split(';')[0]?.trim();
if (!first) continue;
const idx = first.indexOf('=');
if (idx <= 0) continue;
const name = first.slice(0, idx).trim();
const value = first.slice(idx + 1).trim();
if (!name) continue;
cookieMap[name] = value;
}
}
async function fetchWithCookieJar(
url: string,
init: Omit<RequestInit, 'redirect' | 'headers'> & { headers?: Record<string, string> },
cookieMap: Record<string, string>,
signal?: AbortSignal,
maxRedirects = 20,
): Promise<Response> {
let current = url;
for (let i = 0; i <= maxRedirects; i += 1) {
const cookieHeader = buildCookieHeader(cookieMap);
const headers: Record<string, string> = {
...(init.headers ?? {}),
...(cookieHeader ? { cookie: cookieHeader } : {}),
'user-agent': USER_AGENT,
};
const res = await fetch(current, { ...init, redirect: 'manual', signal, headers });
applySetCookiesToMap(getSetCookieHeaders(res), cookieMap);
if (res.status >= 300 && res.status < 400) {
const location = res.headers.get('location');
if (!location) return res;
current = new URL(location, current).toString();
continue;
}
return res;
}
throw new Error(`Too many redirects while fetching ${url} (>${maxRedirects}).`);
}
export async function fetchGeminiAccessToken(
cookieMap: Record<string, string>,
signal?: AbortSignal,
): Promise<string> {
const cookieHeader = buildCookieHeader(cookieMap);
const res = await fetch(GEMINI_APP_URL, {
redirect: 'follow',
signal,
headers: {
cookie: cookieHeader,
'user-agent': USER_AGENT,
},
});
const res = await fetchWithCookieJar(GEMINI_APP_URL, { method: 'GET' }, cookieMap, signal);
const html = await res.text();
const tokens = ['SNlM0e', 'thykhd'] as const;
@ -107,7 +157,21 @@ function extractErrorCode(responseJson: unknown): number | undefined {
}
function extractGgdlUrls(rawText: string): string[] {
const matches = rawText.match(/https:\/\/lh3\.googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
const matches =
rawText.match(/https?:\/\/[^/\s"']*googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
const seen = new Set<string>();
const urls: string[] = [];
for (const match of matches) {
if (seen.has(match)) continue;
seen.add(match);
urls.push(match);
}
return urls;
}
function extractImageGenerationContentUrls(rawText: string): string[] {
const matches =
rawText.match(/https?:\/\/googleusercontent\.com\/image_generation_content\/\d+/g) ?? [];
const seen = new Set<string>();
const urls: string[] = [];
for (const match of matches) {
@ -119,9 +183,17 @@ function extractGgdlUrls(rawText: string): string[] {
}
function ensureFullSizeImageUrl(url: string): string {
if (url.includes('=s2048')) return url;
if (url.includes('=s')) return url;
return `${url}=s2048`;
const trimmed = url.trim();
let normalized = trimmed;
const backslashIndex = normalized.indexOf('\\');
if (backslashIndex >= 0) normalized = normalized.slice(0, backslashIndex);
// Some Gemini responses embed a size suffix as "/=s2048" which breaks downloads.
normalized = normalized.replace(/\/=s(?=\d+(?:$|[?#]))/, '=s');
normalized = normalized.replace(/\/=s(?=$|[?#])/, '=s');
if (normalized.endsWith('/')) normalized = normalized.slice(0, -1);
if (normalized.includes('=s2048')) return normalized;
if (normalized.includes('=s')) return normalized;
return `${normalized}=s2048`;
}
async function fetchWithCookiePreservingRedirects(
@ -190,6 +262,29 @@ async function uploadGeminiFile(filePath: string, signal?: AbortSignal): Promise
return { id: text, name: fileName };
}
function guessMimeType(fileName: string): string {
const ext = path.extname(fileName).toLowerCase();
switch (ext) {
case '.png':
return 'image/png';
case '.jpg':
case '.jpeg':
return 'image/jpeg';
case '.webp':
return 'image/webp';
case '.gif':
return 'image/gif';
case '.mp4':
return 'video/mp4';
case '.mov':
return 'video/quicktime';
case '.webm':
return 'video/webm';
default:
return 'application/octet-stream';
}
}
function buildGeminiFReqPayload(
prompt: string,
uploaded: Array<{ id: string; name: string }>,
@ -201,9 +296,8 @@ function buildGeminiFReqPayload(
prompt,
0,
null,
// Matches gemini-webapi payload format: [[[fileId, 1]]] for a single attachment.
// Keep it extensible for multiple uploads by emitting one [[id, 1]] entry per file.
uploaded.map((file) => [[file.id, 1]]),
// Matches gemini-web payload format: [[[fileId, 1, null, mimeType], fileName]] for an attachment.
uploaded.map((file) => [[file.id, 1, null, guessMimeType(file.name)], file.name]),
]
: [prompt];
@ -248,7 +342,19 @@ export function parseGeminiStreamGenerateResponse(rawText: string): {
? (getNestedValue<string | null>(firstCandidate, [22, 0], null) ?? textRaw)
: textRaw;
const thoughts = getNestedValue<string | null>(firstCandidate, [37, 0, 0], null);
const metadata = getNestedValue<unknown>(body, [1], []);
const conversationMeta = getNestedValue<unknown[]>(body, [1], []);
const conversationId =
typeof conversationMeta[0] === 'string' && conversationMeta[0].length > 0
? conversationMeta[0]
: null;
const responseId =
typeof conversationMeta[1] === 'string' && conversationMeta[1].length > 0
? conversationMeta[1]
: null;
const choiceIdRaw = getNestedValue<string | null>(firstCandidate, [0], null);
const choiceId = typeof choiceIdRaw === 'string' && choiceIdRaw.length > 0 ? choiceIdRaw : null;
const metadata =
conversationId && responseId && choiceId ? [conversationId, responseId, choiceId] : conversationMeta;
const images: GeminiWebCandidateImage[] = [];
@ -305,8 +411,8 @@ export function isGeminiModelUnavailable(errorCode: number | undefined): boolean
}
export async function runGeminiWebOnce(input: GeminiWebRunInput): Promise<GeminiWebRunOutput> {
const cookieHeader = buildCookieHeader(input.cookieMap);
const at = await fetchGeminiAccessToken(input.cookieMap, input.signal);
const cookieHeader = buildCookieHeader(input.cookieMap);
const uploaded: Array<{ id: string; name: string }> = [];
for (const file of input.files ?? []) {
@ -403,11 +509,19 @@ export async function saveFirstGeminiImageFromOutput(
return { saved: true, imageCount: output.images.length };
}
const ggdl = extractGgdlUrls(output.rawResponseText);
if (ggdl[0]) {
await downloadGeminiImage(ggdl[0], cookieMap, outputPath, signal);
const ggdl = extractGgdlUrls(`${output.text}\n${output.rawResponseText}`);
const preferred = ggdl.length > 0 ? ggdl[ggdl.length - 1] : null;
if (preferred) {
await downloadGeminiImage(preferred, cookieMap, outputPath, signal);
return { saved: true, imageCount: ggdl.length };
}
const imageGen = extractImageGenerationContentUrls(`${output.text}\n${output.rawResponseText}`);
const imageGenPreferred = imageGen.length > 0 ? imageGen[imageGen.length - 1] : null;
if (imageGenPreferred) {
await downloadGeminiImage(imageGenPreferred, cookieMap, outputPath, signal);
return { saved: true, imageCount: imageGen.length };
}
return { saved: false, imageCount: 0 };
}

View File

@ -1,7 +1,8 @@
import { mkdir, writeFile } from 'node:fs/promises';
import path from 'node:path';
import type { BrowserRunOptions, BrowserRunResult, BrowserLogger, CookieParam } from '../browser/types.js';
import { runGeminiWebWithFallback, saveFirstGeminiImageFromOutput } from './client.js';
import type { GeminiWebModelId } from './client.js';
import type { GeminiWebModelId, GeminiWebRunOutput } from './client.js';
import {
buildGeminiCookieMap,
hasRequiredGeminiCookies,
@ -11,6 +12,9 @@ import type { GeminiWebOptions, GeminiWebResponse } from './types.js';
export { hasRequiredGeminiCookies } from './cookie-store.js';
const USER_AGENT =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
function estimateTokenCount(text: string): number {
return Math.ceil(text.length / 4);
}
@ -22,6 +26,115 @@ function resolveInvocationPath(value: string | undefined): string | undefined {
return path.isAbsolute(trimmed) ? trimmed : path.resolve(process.cwd(), trimmed);
}
function normalizePathList(value: string | string[] | undefined): string[] {
if (!value) return [];
const raw = Array.isArray(value) ? value : [value];
const out: string[] = [];
for (const entry of raw) {
if (typeof entry !== 'string') continue;
const resolved = resolveInvocationPath(entry);
if (!resolved) continue;
out.push(resolved);
}
return out;
}
function dedupePaths(paths: string[]): string[] {
const seen = new Set<string>();
const out: string[] = [];
for (const item of paths) {
const trimmed = item.trim();
if (!trimmed || seen.has(trimmed)) continue;
seen.add(trimmed);
out.push(trimmed);
}
return out;
}
function buildCookieHeader(cookieMap: Record<string, string>): string {
return Object.entries(cookieMap)
.filter(([, value]) => typeof value === 'string' && value.length > 0)
.map(([name, value]) => `${name}=${value}`)
.join('; ');
}
async function fetchWithCookiePreservingRedirects(
url: string,
init: Omit<RequestInit, 'redirect'>,
signal?: AbortSignal,
maxRedirects = 10,
): Promise<Response> {
let current = url;
for (let i = 0; i <= maxRedirects; i += 1) {
const res = await fetch(current, { ...init, redirect: 'manual', signal });
if (res.status >= 300 && res.status < 400) {
const location = res.headers.get('location');
if (!location) return res;
current = new URL(location, current).toString();
continue;
}
return res;
}
throw new Error(`Too many redirects while downloading media (>${maxRedirects}).`);
}
async function downloadGeminiMedia(
url: string,
cookieMap: Record<string, string>,
outputPath: string,
signal?: AbortSignal,
): Promise<void> {
const cookieHeader = buildCookieHeader(cookieMap);
const res = await fetchWithCookiePreservingRedirects(
url,
{
headers: {
cookie: cookieHeader,
'user-agent': USER_AGENT,
},
},
signal,
);
if (!res.ok) {
throw new Error(`Failed to download media: ${res.status} ${res.statusText} (${res.url})`);
}
const data = new Uint8Array(await res.arrayBuffer());
await mkdir(path.dirname(outputPath), { recursive: true });
await writeFile(outputPath, data);
}
function extractGgdlUrls(rawText: string): string[] {
const matches =
rawText.match(/https?:\/\/[^/\s"']*googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
const seen = new Set<string>();
const urls: string[] = [];
for (const match of matches) {
if (seen.has(match)) continue;
seen.add(match);
urls.push(match);
}
return urls;
}
async function saveFirstGeminiVideoFromOutput(
output: GeminiWebRunOutput,
cookieMap: Record<string, string>,
outputPath: string,
signal?: AbortSignal,
): Promise<{ saved: boolean; videoCount: number }> {
const ggdl = extractGgdlUrls(output.rawResponseText);
if (!ggdl[0]) return { saved: false, videoCount: 0 };
const videoCandidates = ggdl.filter((url) => /\.(mp4|webm|mov)(?:$|[?#])/i.test(url));
const preferred =
(videoCandidates.length > 0 ? videoCandidates[videoCandidates.length - 1] : null) ??
ggdl.find((url) => /video/i.test(url)) ??
ggdl[ggdl.length - 1];
await downloadGeminiMedia(preferred, cookieMap, outputPath, signal);
return { saved: true, videoCount: ggdl.length };
}
function resolveGeminiWebModel(
desiredModel: string | null | undefined,
log?: BrowserLogger,
@ -115,6 +228,9 @@ export async function loadGeminiCookieMap(log?: BrowserLogger): Promise<Record<s
export function createGeminiWebExecutor(
geminiOptions: GeminiWebOptions,
): (runOptions: BrowserRunOptions) => Promise<BrowserRunResult> {
let persistedChatMetadata: unknown | null = null;
let referenceImagesUploaded = false;
return async (runOptions: BrowserRunOptions): Promise<BrowserRunResult> => {
const startTime = Date.now();
const log = runOptions.log;
@ -133,23 +249,38 @@ export function createGeminiWebExecutor(
? Math.max(1_000, runOptions.config.timeoutMs)
: null;
const generateVideoPath = resolveInvocationPath(geminiOptions.generateVideo);
const defaultTimeoutMs = geminiOptions.youtube
? 240_000
: geminiOptions.generateImage || geminiOptions.editImage
: generateVideoPath
? 900_000
: geminiOptions.generateImage || geminiOptions.editImage
? 300_000
: 120_000;
const timeoutMs = Math.min(configTimeout ?? defaultTimeoutMs, 600_000);
const timeoutCapMs = generateVideoPath ? 1_800_000 : 600_000;
const timeoutMs = Math.min(configTimeout ?? defaultTimeoutMs, timeoutCapMs);
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const keepSession = geminiOptions.keepSession === true;
const generateImagePath = resolveInvocationPath(geminiOptions.generateImage);
const editImagePath = resolveInvocationPath(geminiOptions.editImage);
const outputPath = resolveInvocationPath(geminiOptions.outputPath);
const attachmentPaths = (runOptions.attachments ?? []).map((attachment) => attachment.path);
const referenceImagePaths = normalizePathList(geminiOptions.referenceImages);
const requestFilePaths = dedupePaths(
keepSession ? attachmentPaths : [...referenceImagePaths, ...attachmentPaths],
);
if (generateVideoPath && (generateImagePath || editImagePath)) {
throw new Error('Gemini web executor: generateVideo cannot be combined with generateImage/editImage options.');
}
let prompt = runOptions.prompt;
if (geminiOptions.aspectRatio && (generateImagePath || editImagePath)) {
if (geminiOptions.aspectRatio && (generateImagePath || editImagePath || generateVideoPath)) {
prompt = `${prompt} (aspect ratio: ${geminiOptions.aspectRatio})`;
}
if (geminiOptions.youtube) {
@ -158,29 +289,50 @@ export function createGeminiWebExecutor(
if (generateImagePath && !editImagePath) {
prompt = `Generate an image: ${prompt}`;
}
if (generateVideoPath) {
prompt = `Generate a video: ${prompt}`;
}
const model: GeminiWebModelId = resolveGeminiWebModel(runOptions.config?.desiredModel, log);
let response: GeminiWebResponse;
let videoSaveSummary: { saved: boolean; videoCount: number; outputPath: string } | null = null;
try {
let chatMetadata: unknown = keepSession ? persistedChatMetadata : null;
if (keepSession && referenceImagePaths.length > 0 && !referenceImagesUploaded) {
const intro = await runGeminiWebWithFallback({
prompt: 'Here are reference images for future messages.',
files: referenceImagePaths,
model,
cookieMap,
chatMetadata,
signal: controller.signal,
});
chatMetadata = intro.metadata;
persistedChatMetadata = intro.metadata;
referenceImagesUploaded = true;
}
if (editImagePath) {
const intro = await runGeminiWebWithFallback({
prompt: 'Here is an image to edit',
files: [editImagePath],
model,
cookieMap,
chatMetadata: null,
chatMetadata,
signal: controller.signal,
});
const editPrompt = `Use image generation tool to ${prompt}`;
const out = await runGeminiWebWithFallback({
prompt: editPrompt,
files: attachmentPaths,
files: requestFilePaths,
model,
cookieMap,
chatMetadata: intro.metadata,
signal: controller.signal,
});
if (keepSession) persistedChatMetadata = out.metadata;
response = {
text: out.text ?? null,
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
@ -198,12 +350,13 @@ export function createGeminiWebExecutor(
} else if (generateImagePath) {
const out = await runGeminiWebWithFallback({
prompt,
files: attachmentPaths,
files: requestFilePaths,
model,
cookieMap,
chatMetadata: null,
chatMetadata,
signal: controller.signal,
});
if (keepSession) persistedChatMetadata = out.metadata;
response = {
text: out.text ?? null,
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
@ -216,15 +369,36 @@ export function createGeminiWebExecutor(
if (!imageSave.saved) {
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
}
} else if (generateVideoPath) {
const out = await runGeminiWebWithFallback({
prompt,
files: requestFilePaths,
model,
cookieMap,
chatMetadata,
signal: controller.signal,
});
if (keepSession) persistedChatMetadata = out.metadata;
response = {
text: out.text ?? null,
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
has_images: false,
image_count: 0,
};
const resolvedOutputPath = generateVideoPath ?? outputPath ?? 'generated.mp4';
const save = await saveFirstGeminiVideoFromOutput(out, cookieMap, resolvedOutputPath, controller.signal);
videoSaveSummary = { ...save, outputPath: resolvedOutputPath };
} else {
const out = await runGeminiWebWithFallback({
prompt,
files: attachmentPaths,
files: requestFilePaths,
model,
cookieMap,
chatMetadata: null,
chatMetadata,
signal: controller.signal,
});
if (keepSession) persistedChatMetadata = out.metadata;
response = {
text: out.text ?? null,
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
@ -247,6 +421,15 @@ export function createGeminiWebExecutor(
const imagePath = generateImagePath || outputPath || 'generated.png';
answerMarkdown += `\n\n*Generated ${response.image_count} image(s). Saved to: ${imagePath}*`;
}
if (videoSaveSummary) {
if (videoSaveSummary.saved) {
answerMarkdown += `\n\n*Generated ${videoSaveSummary.videoCount || 1} video(s). Saved to: ${videoSaveSummary.outputPath}*`;
} else if (/video_gen_chip/.test(answerMarkdown) || /video_gen_chip/.test(response.text ?? '')) {
answerMarkdown += '\n\n*Video generation is asynchronous. Check Gemini web UI to download the result.*';
} else {
answerMarkdown += '\n\n*No downloadable video URL found in Gemini response.*';
}
}
const tookMs = Date.now() - startTime;
log?.(`[gemini-web] Completed in ${tookMs}ms`);

View File

@ -27,6 +27,7 @@ Options:
-m, --model <id> gemini-3-pro | gemini-2.5-pro | gemini-2.5-flash (default: gemini-3-pro)
--json Output JSON
--image [path] Generate an image and save it (default: ./generated.png)
--reference <files...> Reference images for vision input
--login Only refresh cookies, then exit
--cookie-path <path> Cookie file path (default: ${cookiePath})
--profile-dir <path> Chrome profile dir (default: ${profileDir})
@ -75,6 +76,7 @@ function parseArgs(argv: string[]): {
loginOnly?: boolean;
cookiePath?: string;
profileDir?: string;
referenceImages?: string[];
} {
const out: ReturnType<typeof parseArgs> = {};
const positional: string[] = [];
@ -157,6 +159,19 @@ function parseArgs(argv: string[]): {
out.profileDir = arg.slice('--profile-dir='.length);
continue;
}
if (arg === '--reference' || arg === '--ref') {
out.referenceImages = [];
while (i + 1 < argv.length) {
const next = argv[i + 1];
if (next && !next.startsWith('-')) {
out.referenceImages.push(next);
i += 1;
} else {
break;
}
}
continue;
}
if (arg.startsWith('-')) {
throw new Error(`Unknown option: ${arg}`);
@ -178,6 +193,7 @@ function parseArgs(argv: string[]): {
if (out.cookiePath === '') delete out.cookiePath;
if (out.profileDir === '') delete out.profileDir;
if (out.promptFiles?.length === 0) delete out.promptFiles;
if (out.referenceImages?.length === 0) delete out.referenceImages;
return out;
}
@ -186,7 +202,7 @@ async function isCookieMapValid(cookieMap: Record<string, string>): Promise<bool
if (!hasRequiredGeminiCookies(cookieMap)) return false;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), 10_000);
const timer = setTimeout(() => controller.abort(), 30_000);
try {
await fetchGeminiAccessToken(cookieMap, controller.signal);
return true;
@ -201,7 +217,7 @@ async function ensureGeminiCookieMap(options: {
cookiePath: string;
profileDir: string;
}): Promise<Record<string, string>> {
const log = (msg: string) => console.log(msg);
const log = (msg: string) => console.error(msg);
let cookieMap = await readGeminiCookieMapFromDisk({ cookiePath: options.cookiePath, log });
if (await isCookieMapValid(cookieMap)) return cookieMap;
@ -256,80 +272,39 @@ async function main(): Promise<void> {
return;
}
const promptFromStdin = await readPromptFromStdin();
const promptFromFiles = args.promptFiles ? readPromptFiles(args.promptFiles) : null;
const prompt = promptFromFiles || args.prompt || promptFromStdin;
const promptFromArgs = promptFromFiles || args.prompt;
const prompt = promptFromArgs || (await readPromptFromStdin());
if (!prompt) printUsage(1);
let cookieMap = await ensureGeminiCookieMap({ cookiePath, profileDir });
const desiredModel = resolveModel(args.model || 'gemini-3-pro');
const imagePath = resolveImageOutputPath(args.imagePath);
const referenceImages = (args.referenceImages ?? []).map((p) =>
path.isAbsolute(p) ? p : path.resolve(process.cwd(), p),
);
try {
const effectivePrompt = imagePath ? `Generate an image: ${prompt}` : prompt;
const out = await runGeminiWebWithFallback({
prompt: effectivePrompt,
files: [],
model: desiredModel,
cookieMap,
chatMetadata: null,
});
let imageSaved = false;
let imageCount = 0;
if (imagePath) {
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath);
imageSaved = save.saved;
imageCount = save.imageCount;
if (!imageSaved) {
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
}
}
if (args.json) {
process.stdout.write(
`${JSON.stringify(
imagePath ? { ...out, imageSaved, imageCount, imagePath } : out,
null,
2,
)}\n`,
);
if (out.errorMessage) process.exit(1);
return;
}
if (out.errorMessage) {
throw new Error(out.errorMessage);
}
process.stdout.write(out.text ?? '');
if (!out.text?.endsWith('\n')) process.stdout.write('\n');
if (imagePath) {
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
}
return;
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (message.includes('Unable to locate Gemini access token')) {
console.error('[gemini-web] Cookies may be expired. Re-opening browser to refresh cookies...');
await sleep(500);
cookieMap = await getGeminiCookieMapViaChrome({ userDataDir: profileDir, log: (m) => console.log(m) });
await writeGeminiCookieMapToDisk(cookieMap, { cookiePath, log: (m) => console.log(m) });
const controller = new AbortController();
const timeoutMs = imagePath ? 300_000 : 120_000;
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const effectivePrompt = imagePath ? `Generate an image: ${prompt}` : prompt;
const out = await runGeminiWebWithFallback({
prompt: imagePath ? `Generate an image: ${prompt}` : prompt,
files: [],
prompt: effectivePrompt,
files: referenceImages,
model: desiredModel,
cookieMap,
chatMetadata: null,
signal: controller.signal,
});
let imageSaved = false;
let imageCount = 0;
if (imagePath) {
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath);
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath, controller.signal);
imageSaved = save.saved;
imageCount = save.imageCount;
if (!imageSaved) {
@ -359,6 +334,68 @@ async function main(): Promise<void> {
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
}
return;
} finally {
clearTimeout(timeout);
}
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (message.includes('Unable to locate Gemini access token')) {
console.error('[gemini-web] Cookies may be expired. Re-opening browser to refresh cookies...');
await sleep(500);
cookieMap = await getGeminiCookieMapViaChrome({ userDataDir: profileDir, log: (m) => console.error(m) });
await writeGeminiCookieMapToDisk(cookieMap, { cookiePath, log: (m) => console.error(m) });
const controller = new AbortController();
const timeoutMs = imagePath ? 300_000 : 120_000;
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const out = await runGeminiWebWithFallback({
prompt: imagePath ? `Generate an image: ${prompt}` : prompt,
files: referenceImages,
model: desiredModel,
cookieMap,
chatMetadata: null,
signal: controller.signal,
});
let imageSaved = false;
let imageCount = 0;
if (imagePath) {
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath, controller.signal);
imageSaved = save.saved;
imageCount = save.imageCount;
if (!imageSaved) {
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
}
}
if (args.json) {
process.stdout.write(
`${JSON.stringify(
imagePath ? { ...out, imageSaved, imageCount, imagePath } : out,
null,
2,
)}\n`,
);
if (out.errorMessage) process.exit(1);
return;
}
if (out.errorMessage) {
throw new Error(out.errorMessage);
}
process.stdout.write(out.text ?? '');
if (!out.text?.endsWith('\n')) process.stdout.write('\n');
if (imagePath) {
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
}
return;
} finally {
clearTimeout(timeout);
}
}
throw error;

View File

@ -2,9 +2,18 @@ export interface GeminiWebOptions {
youtube?: string;
generateImage?: string;
editImage?: string;
generateVideo?: string;
outputPath?: string;
showThoughts?: boolean;
aspectRatio?: string;
/**
* One or more local image paths to upload as persistent reference images.
* - If `keepSession` is enabled, they are uploaded once per executor session.
* - Otherwise, they are attached to each request.
*/
referenceImages?: string | string[];
/** Preserve Gemini chat metadata to continue multi-turn conversations within the same executor instance. */
keepSession?: boolean;
}
export interface GeminiWebResponse {