update gemeni-web to support add image as reference
This commit is contained in:
parent
a42137ff13
commit
259baff413
|
|
@ -5,6 +5,13 @@ description: Image generation skill using Gemini Web. Generates images from text
|
|||
|
||||
# Gemini Web Client
|
||||
|
||||
Supports:
|
||||
- Text generation
|
||||
- Image generation (download + save)
|
||||
- Reference image upload (attach images for vision tasks)
|
||||
- Multi-turn conversations within the same executor instance (`keepSession`)
|
||||
- Experimental video generation (`generateVideo`) — Gemini may return an async placeholder; download might require Gemini web UI
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
|
|
@ -14,6 +21,19 @@ npx -y bun scripts/main.ts --prompt "A cute cat" --image cat.png
|
|||
npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png
|
||||
```
|
||||
|
||||
## Executor options (programmatic)
|
||||
|
||||
This skill is typically consumed via `createGeminiWebExecutor(geminiOptions)` (see `scripts/executor.ts`).
|
||||
|
||||
Key options in `GeminiWebOptions`:
|
||||
- `referenceImages?: string | string[]` Upload local images as references (vision input).
|
||||
- `keepSession?: boolean` Reuse Gemini `chatMetadata` to continue the same conversation across calls (required if you want reference images to persist across multiple messages).
|
||||
- `generateVideo?: string` Generate a video and (best-effort) download to the given path. Gemini may return `video_gen_chip` (async); in that case you must open Gemini web UI to download the result.
|
||||
|
||||
Notes:
|
||||
- `generateVideo` cannot be combined with `generateImage` / `editImage`.
|
||||
- When `keepSession=true` and `referenceImages` is set, reference images are uploaded once per executor instance.
|
||||
|
||||
## Commands
|
||||
|
||||
### Text generation
|
||||
|
|
@ -70,6 +90,8 @@ npx -y bun scripts/main.ts "Hello" --json
|
|||
| `--profile-dir <path>` | | Chrome profile directory |
|
||||
| `--help` | `-h` | Show help |
|
||||
|
||||
CLI note: `scripts/main.ts` currently supports text + image generation. Reference images / multi-turn / video generation are exposed via the executor options above.
|
||||
|
||||
## Models
|
||||
|
||||
- `gemini-3-pro` - Default, latest model
|
||||
|
|
|
|||
|
|
@ -216,6 +216,7 @@ class CdpConnection {
|
|||
export async function getGeminiCookieMapViaChrome(options?: {
|
||||
timeoutMs?: number;
|
||||
debugConnectTimeoutMs?: number;
|
||||
tokenCheckTimeoutMs?: number;
|
||||
pollIntervalMs?: number;
|
||||
log?: GeminiWebLog;
|
||||
userDataDir?: string;
|
||||
|
|
@ -224,6 +225,7 @@ export async function getGeminiCookieMapViaChrome(options?: {
|
|||
const log = options?.log;
|
||||
const timeoutMs = options?.timeoutMs ?? 5 * 60_000;
|
||||
const debugConnectTimeoutMs = options?.debugConnectTimeoutMs ?? 30_000;
|
||||
const tokenCheckTimeoutMs = options?.tokenCheckTimeoutMs ?? 30_000;
|
||||
const pollIntervalMs = options?.pollIntervalMs ?? 2_000;
|
||||
const userDataDir = options?.userDataDir ?? resolveGeminiWebChromeProfileDir();
|
||||
|
||||
|
|
@ -290,7 +292,7 @@ export async function getGeminiCookieMapViaChrome(options?: {
|
|||
if (hasRequiredGeminiCookies(cookieMap)) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), 10_000);
|
||||
const timer = setTimeout(() => controller.abort(), tokenCheckTimeoutMs);
|
||||
try {
|
||||
await fetchGeminiAccessToken(cookieMap, controller.signal);
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -67,19 +67,69 @@ function buildCookieHeader(cookieMap: Record<string, string>): string {
|
|||
.join('; ');
|
||||
}
|
||||
|
||||
function getSetCookieHeaders(res: Response): string[] {
|
||||
const headers = res.headers as unknown as { getSetCookie?: () => string[] };
|
||||
if (typeof headers.getSetCookie === 'function') {
|
||||
try {
|
||||
return headers.getSetCookie();
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
const raw = res.headers.get('set-cookie');
|
||||
return raw ? [raw] : [];
|
||||
}
|
||||
|
||||
function applySetCookiesToMap(setCookies: string[], cookieMap: Record<string, string>): void {
|
||||
for (const raw of setCookies) {
|
||||
const first = raw.split(';')[0]?.trim();
|
||||
if (!first) continue;
|
||||
const idx = first.indexOf('=');
|
||||
if (idx <= 0) continue;
|
||||
const name = first.slice(0, idx).trim();
|
||||
const value = first.slice(idx + 1).trim();
|
||||
if (!name) continue;
|
||||
cookieMap[name] = value;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchWithCookieJar(
|
||||
url: string,
|
||||
init: Omit<RequestInit, 'redirect' | 'headers'> & { headers?: Record<string, string> },
|
||||
cookieMap: Record<string, string>,
|
||||
signal?: AbortSignal,
|
||||
maxRedirects = 20,
|
||||
): Promise<Response> {
|
||||
let current = url;
|
||||
for (let i = 0; i <= maxRedirects; i += 1) {
|
||||
const cookieHeader = buildCookieHeader(cookieMap);
|
||||
const headers: Record<string, string> = {
|
||||
...(init.headers ?? {}),
|
||||
...(cookieHeader ? { cookie: cookieHeader } : {}),
|
||||
'user-agent': USER_AGENT,
|
||||
};
|
||||
|
||||
const res = await fetch(current, { ...init, redirect: 'manual', signal, headers });
|
||||
applySetCookiesToMap(getSetCookieHeaders(res), cookieMap);
|
||||
|
||||
if (res.status >= 300 && res.status < 400) {
|
||||
const location = res.headers.get('location');
|
||||
if (!location) return res;
|
||||
current = new URL(location, current).toString();
|
||||
continue;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
throw new Error(`Too many redirects while fetching ${url} (>${maxRedirects}).`);
|
||||
}
|
||||
|
||||
export async function fetchGeminiAccessToken(
|
||||
cookieMap: Record<string, string>,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string> {
|
||||
const cookieHeader = buildCookieHeader(cookieMap);
|
||||
const res = await fetch(GEMINI_APP_URL, {
|
||||
redirect: 'follow',
|
||||
signal,
|
||||
headers: {
|
||||
cookie: cookieHeader,
|
||||
'user-agent': USER_AGENT,
|
||||
},
|
||||
});
|
||||
const res = await fetchWithCookieJar(GEMINI_APP_URL, { method: 'GET' }, cookieMap, signal);
|
||||
const html = await res.text();
|
||||
|
||||
const tokens = ['SNlM0e', 'thykhd'] as const;
|
||||
|
|
@ -107,7 +157,21 @@ function extractErrorCode(responseJson: unknown): number | undefined {
|
|||
}
|
||||
|
||||
function extractGgdlUrls(rawText: string): string[] {
|
||||
const matches = rawText.match(/https:\/\/lh3\.googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
|
||||
const matches =
|
||||
rawText.match(/https?:\/\/[^/\s"']*googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
|
||||
const seen = new Set<string>();
|
||||
const urls: string[] = [];
|
||||
for (const match of matches) {
|
||||
if (seen.has(match)) continue;
|
||||
seen.add(match);
|
||||
urls.push(match);
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
|
||||
function extractImageGenerationContentUrls(rawText: string): string[] {
|
||||
const matches =
|
||||
rawText.match(/https?:\/\/googleusercontent\.com\/image_generation_content\/\d+/g) ?? [];
|
||||
const seen = new Set<string>();
|
||||
const urls: string[] = [];
|
||||
for (const match of matches) {
|
||||
|
|
@ -119,9 +183,17 @@ function extractGgdlUrls(rawText: string): string[] {
|
|||
}
|
||||
|
||||
function ensureFullSizeImageUrl(url: string): string {
|
||||
if (url.includes('=s2048')) return url;
|
||||
if (url.includes('=s')) return url;
|
||||
return `${url}=s2048`;
|
||||
const trimmed = url.trim();
|
||||
let normalized = trimmed;
|
||||
const backslashIndex = normalized.indexOf('\\');
|
||||
if (backslashIndex >= 0) normalized = normalized.slice(0, backslashIndex);
|
||||
// Some Gemini responses embed a size suffix as "/=s2048" which breaks downloads.
|
||||
normalized = normalized.replace(/\/=s(?=\d+(?:$|[?#]))/, '=s');
|
||||
normalized = normalized.replace(/\/=s(?=$|[?#])/, '=s');
|
||||
if (normalized.endsWith('/')) normalized = normalized.slice(0, -1);
|
||||
if (normalized.includes('=s2048')) return normalized;
|
||||
if (normalized.includes('=s')) return normalized;
|
||||
return `${normalized}=s2048`;
|
||||
}
|
||||
|
||||
async function fetchWithCookiePreservingRedirects(
|
||||
|
|
@ -190,6 +262,29 @@ async function uploadGeminiFile(filePath: string, signal?: AbortSignal): Promise
|
|||
return { id: text, name: fileName };
|
||||
}
|
||||
|
||||
function guessMimeType(fileName: string): string {
|
||||
const ext = path.extname(fileName).toLowerCase();
|
||||
switch (ext) {
|
||||
case '.png':
|
||||
return 'image/png';
|
||||
case '.jpg':
|
||||
case '.jpeg':
|
||||
return 'image/jpeg';
|
||||
case '.webp':
|
||||
return 'image/webp';
|
||||
case '.gif':
|
||||
return 'image/gif';
|
||||
case '.mp4':
|
||||
return 'video/mp4';
|
||||
case '.mov':
|
||||
return 'video/quicktime';
|
||||
case '.webm':
|
||||
return 'video/webm';
|
||||
default:
|
||||
return 'application/octet-stream';
|
||||
}
|
||||
}
|
||||
|
||||
function buildGeminiFReqPayload(
|
||||
prompt: string,
|
||||
uploaded: Array<{ id: string; name: string }>,
|
||||
|
|
@ -201,9 +296,8 @@ function buildGeminiFReqPayload(
|
|||
prompt,
|
||||
0,
|
||||
null,
|
||||
// Matches gemini-webapi payload format: [[[fileId, 1]]] for a single attachment.
|
||||
// Keep it extensible for multiple uploads by emitting one [[id, 1]] entry per file.
|
||||
uploaded.map((file) => [[file.id, 1]]),
|
||||
// Matches gemini-web payload format: [[[fileId, 1, null, mimeType], fileName]] for an attachment.
|
||||
uploaded.map((file) => [[file.id, 1, null, guessMimeType(file.name)], file.name]),
|
||||
]
|
||||
: [prompt];
|
||||
|
||||
|
|
@ -248,7 +342,19 @@ export function parseGeminiStreamGenerateResponse(rawText: string): {
|
|||
? (getNestedValue<string | null>(firstCandidate, [22, 0], null) ?? textRaw)
|
||||
: textRaw;
|
||||
const thoughts = getNestedValue<string | null>(firstCandidate, [37, 0, 0], null);
|
||||
const metadata = getNestedValue<unknown>(body, [1], []);
|
||||
const conversationMeta = getNestedValue<unknown[]>(body, [1], []);
|
||||
const conversationId =
|
||||
typeof conversationMeta[0] === 'string' && conversationMeta[0].length > 0
|
||||
? conversationMeta[0]
|
||||
: null;
|
||||
const responseId =
|
||||
typeof conversationMeta[1] === 'string' && conversationMeta[1].length > 0
|
||||
? conversationMeta[1]
|
||||
: null;
|
||||
const choiceIdRaw = getNestedValue<string | null>(firstCandidate, [0], null);
|
||||
const choiceId = typeof choiceIdRaw === 'string' && choiceIdRaw.length > 0 ? choiceIdRaw : null;
|
||||
const metadata =
|
||||
conversationId && responseId && choiceId ? [conversationId, responseId, choiceId] : conversationMeta;
|
||||
|
||||
const images: GeminiWebCandidateImage[] = [];
|
||||
|
||||
|
|
@ -305,8 +411,8 @@ export function isGeminiModelUnavailable(errorCode: number | undefined): boolean
|
|||
}
|
||||
|
||||
export async function runGeminiWebOnce(input: GeminiWebRunInput): Promise<GeminiWebRunOutput> {
|
||||
const cookieHeader = buildCookieHeader(input.cookieMap);
|
||||
const at = await fetchGeminiAccessToken(input.cookieMap, input.signal);
|
||||
const cookieHeader = buildCookieHeader(input.cookieMap);
|
||||
|
||||
const uploaded: Array<{ id: string; name: string }> = [];
|
||||
for (const file of input.files ?? []) {
|
||||
|
|
@ -403,11 +509,19 @@ export async function saveFirstGeminiImageFromOutput(
|
|||
return { saved: true, imageCount: output.images.length };
|
||||
}
|
||||
|
||||
const ggdl = extractGgdlUrls(output.rawResponseText);
|
||||
if (ggdl[0]) {
|
||||
await downloadGeminiImage(ggdl[0], cookieMap, outputPath, signal);
|
||||
const ggdl = extractGgdlUrls(`${output.text}\n${output.rawResponseText}`);
|
||||
const preferred = ggdl.length > 0 ? ggdl[ggdl.length - 1] : null;
|
||||
if (preferred) {
|
||||
await downloadGeminiImage(preferred, cookieMap, outputPath, signal);
|
||||
return { saved: true, imageCount: ggdl.length };
|
||||
}
|
||||
|
||||
const imageGen = extractImageGenerationContentUrls(`${output.text}\n${output.rawResponseText}`);
|
||||
const imageGenPreferred = imageGen.length > 0 ? imageGen[imageGen.length - 1] : null;
|
||||
if (imageGenPreferred) {
|
||||
await downloadGeminiImage(imageGenPreferred, cookieMap, outputPath, signal);
|
||||
return { saved: true, imageCount: imageGen.length };
|
||||
}
|
||||
|
||||
return { saved: false, imageCount: 0 };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import type { BrowserRunOptions, BrowserRunResult, BrowserLogger, CookieParam } from '../browser/types.js';
|
||||
import { runGeminiWebWithFallback, saveFirstGeminiImageFromOutput } from './client.js';
|
||||
import type { GeminiWebModelId } from './client.js';
|
||||
import type { GeminiWebModelId, GeminiWebRunOutput } from './client.js';
|
||||
import {
|
||||
buildGeminiCookieMap,
|
||||
hasRequiredGeminiCookies,
|
||||
|
|
@ -11,6 +12,9 @@ import type { GeminiWebOptions, GeminiWebResponse } from './types.js';
|
|||
|
||||
export { hasRequiredGeminiCookies } from './cookie-store.js';
|
||||
|
||||
const USER_AGENT =
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
function estimateTokenCount(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
|
@ -22,6 +26,115 @@ function resolveInvocationPath(value: string | undefined): string | undefined {
|
|||
return path.isAbsolute(trimmed) ? trimmed : path.resolve(process.cwd(), trimmed);
|
||||
}
|
||||
|
||||
function normalizePathList(value: string | string[] | undefined): string[] {
|
||||
if (!value) return [];
|
||||
const raw = Array.isArray(value) ? value : [value];
|
||||
const out: string[] = [];
|
||||
for (const entry of raw) {
|
||||
if (typeof entry !== 'string') continue;
|
||||
const resolved = resolveInvocationPath(entry);
|
||||
if (!resolved) continue;
|
||||
out.push(resolved);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function dedupePaths(paths: string[]): string[] {
|
||||
const seen = new Set<string>();
|
||||
const out: string[] = [];
|
||||
for (const item of paths) {
|
||||
const trimmed = item.trim();
|
||||
if (!trimmed || seen.has(trimmed)) continue;
|
||||
seen.add(trimmed);
|
||||
out.push(trimmed);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function buildCookieHeader(cookieMap: Record<string, string>): string {
|
||||
return Object.entries(cookieMap)
|
||||
.filter(([, value]) => typeof value === 'string' && value.length > 0)
|
||||
.map(([name, value]) => `${name}=${value}`)
|
||||
.join('; ');
|
||||
}
|
||||
|
||||
async function fetchWithCookiePreservingRedirects(
|
||||
url: string,
|
||||
init: Omit<RequestInit, 'redirect'>,
|
||||
signal?: AbortSignal,
|
||||
maxRedirects = 10,
|
||||
): Promise<Response> {
|
||||
let current = url;
|
||||
for (let i = 0; i <= maxRedirects; i += 1) {
|
||||
const res = await fetch(current, { ...init, redirect: 'manual', signal });
|
||||
if (res.status >= 300 && res.status < 400) {
|
||||
const location = res.headers.get('location');
|
||||
if (!location) return res;
|
||||
current = new URL(location, current).toString();
|
||||
continue;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
throw new Error(`Too many redirects while downloading media (>${maxRedirects}).`);
|
||||
}
|
||||
|
||||
async function downloadGeminiMedia(
|
||||
url: string,
|
||||
cookieMap: Record<string, string>,
|
||||
outputPath: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<void> {
|
||||
const cookieHeader = buildCookieHeader(cookieMap);
|
||||
const res = await fetchWithCookiePreservingRedirects(
|
||||
url,
|
||||
{
|
||||
headers: {
|
||||
cookie: cookieHeader,
|
||||
'user-agent': USER_AGENT,
|
||||
},
|
||||
},
|
||||
signal,
|
||||
);
|
||||
if (!res.ok) {
|
||||
throw new Error(`Failed to download media: ${res.status} ${res.statusText} (${res.url})`);
|
||||
}
|
||||
|
||||
const data = new Uint8Array(await res.arrayBuffer());
|
||||
await mkdir(path.dirname(outputPath), { recursive: true });
|
||||
await writeFile(outputPath, data);
|
||||
}
|
||||
|
||||
function extractGgdlUrls(rawText: string): string[] {
|
||||
const matches =
|
||||
rawText.match(/https?:\/\/[^/\s"']*googleusercontent\.com\/gg-dl\/[^\s"']+/g) ?? [];
|
||||
const seen = new Set<string>();
|
||||
const urls: string[] = [];
|
||||
for (const match of matches) {
|
||||
if (seen.has(match)) continue;
|
||||
seen.add(match);
|
||||
urls.push(match);
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
|
||||
async function saveFirstGeminiVideoFromOutput(
|
||||
output: GeminiWebRunOutput,
|
||||
cookieMap: Record<string, string>,
|
||||
outputPath: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<{ saved: boolean; videoCount: number }> {
|
||||
const ggdl = extractGgdlUrls(output.rawResponseText);
|
||||
if (!ggdl[0]) return { saved: false, videoCount: 0 };
|
||||
|
||||
const videoCandidates = ggdl.filter((url) => /\.(mp4|webm|mov)(?:$|[?#])/i.test(url));
|
||||
const preferred =
|
||||
(videoCandidates.length > 0 ? videoCandidates[videoCandidates.length - 1] : null) ??
|
||||
ggdl.find((url) => /video/i.test(url)) ??
|
||||
ggdl[ggdl.length - 1];
|
||||
await downloadGeminiMedia(preferred, cookieMap, outputPath, signal);
|
||||
return { saved: true, videoCount: ggdl.length };
|
||||
}
|
||||
|
||||
function resolveGeminiWebModel(
|
||||
desiredModel: string | null | undefined,
|
||||
log?: BrowserLogger,
|
||||
|
|
@ -115,6 +228,9 @@ export async function loadGeminiCookieMap(log?: BrowserLogger): Promise<Record<s
|
|||
export function createGeminiWebExecutor(
|
||||
geminiOptions: GeminiWebOptions,
|
||||
): (runOptions: BrowserRunOptions) => Promise<BrowserRunResult> {
|
||||
let persistedChatMetadata: unknown | null = null;
|
||||
let referenceImagesUploaded = false;
|
||||
|
||||
return async (runOptions: BrowserRunOptions): Promise<BrowserRunResult> => {
|
||||
const startTime = Date.now();
|
||||
const log = runOptions.log;
|
||||
|
|
@ -133,23 +249,38 @@ export function createGeminiWebExecutor(
|
|||
? Math.max(1_000, runOptions.config.timeoutMs)
|
||||
: null;
|
||||
|
||||
const generateVideoPath = resolveInvocationPath(geminiOptions.generateVideo);
|
||||
|
||||
const defaultTimeoutMs = geminiOptions.youtube
|
||||
? 240_000
|
||||
: geminiOptions.generateImage || geminiOptions.editImage
|
||||
: generateVideoPath
|
||||
? 900_000
|
||||
: geminiOptions.generateImage || geminiOptions.editImage
|
||||
? 300_000
|
||||
: 120_000;
|
||||
|
||||
const timeoutMs = Math.min(configTimeout ?? defaultTimeoutMs, 600_000);
|
||||
const timeoutCapMs = generateVideoPath ? 1_800_000 : 600_000;
|
||||
const timeoutMs = Math.min(configTimeout ?? defaultTimeoutMs, timeoutCapMs);
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const keepSession = geminiOptions.keepSession === true;
|
||||
|
||||
const generateImagePath = resolveInvocationPath(geminiOptions.generateImage);
|
||||
const editImagePath = resolveInvocationPath(geminiOptions.editImage);
|
||||
const outputPath = resolveInvocationPath(geminiOptions.outputPath);
|
||||
const attachmentPaths = (runOptions.attachments ?? []).map((attachment) => attachment.path);
|
||||
const referenceImagePaths = normalizePathList(geminiOptions.referenceImages);
|
||||
const requestFilePaths = dedupePaths(
|
||||
keepSession ? attachmentPaths : [...referenceImagePaths, ...attachmentPaths],
|
||||
);
|
||||
|
||||
if (generateVideoPath && (generateImagePath || editImagePath)) {
|
||||
throw new Error('Gemini web executor: generateVideo cannot be combined with generateImage/editImage options.');
|
||||
}
|
||||
|
||||
let prompt = runOptions.prompt;
|
||||
if (geminiOptions.aspectRatio && (generateImagePath || editImagePath)) {
|
||||
if (geminiOptions.aspectRatio && (generateImagePath || editImagePath || generateVideoPath)) {
|
||||
prompt = `${prompt} (aspect ratio: ${geminiOptions.aspectRatio})`;
|
||||
}
|
||||
if (geminiOptions.youtube) {
|
||||
|
|
@ -158,29 +289,50 @@ export function createGeminiWebExecutor(
|
|||
if (generateImagePath && !editImagePath) {
|
||||
prompt = `Generate an image: ${prompt}`;
|
||||
}
|
||||
if (generateVideoPath) {
|
||||
prompt = `Generate a video: ${prompt}`;
|
||||
}
|
||||
|
||||
const model: GeminiWebModelId = resolveGeminiWebModel(runOptions.config?.desiredModel, log);
|
||||
let response: GeminiWebResponse;
|
||||
let videoSaveSummary: { saved: boolean; videoCount: number; outputPath: string } | null = null;
|
||||
|
||||
try {
|
||||
let chatMetadata: unknown = keepSession ? persistedChatMetadata : null;
|
||||
|
||||
if (keepSession && referenceImagePaths.length > 0 && !referenceImagesUploaded) {
|
||||
const intro = await runGeminiWebWithFallback({
|
||||
prompt: 'Here are reference images for future messages.',
|
||||
files: referenceImagePaths,
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
chatMetadata = intro.metadata;
|
||||
persistedChatMetadata = intro.metadata;
|
||||
referenceImagesUploaded = true;
|
||||
}
|
||||
|
||||
if (editImagePath) {
|
||||
const intro = await runGeminiWebWithFallback({
|
||||
prompt: 'Here is an image to edit',
|
||||
files: [editImagePath],
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
chatMetadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
const editPrompt = `Use image generation tool to ${prompt}`;
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt: editPrompt,
|
||||
files: attachmentPaths,
|
||||
files: requestFilePaths,
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata: intro.metadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (keepSession) persistedChatMetadata = out.metadata;
|
||||
response = {
|
||||
text: out.text ?? null,
|
||||
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
|
||||
|
|
@ -198,12 +350,13 @@ export function createGeminiWebExecutor(
|
|||
} else if (generateImagePath) {
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt,
|
||||
files: attachmentPaths,
|
||||
files: requestFilePaths,
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
chatMetadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (keepSession) persistedChatMetadata = out.metadata;
|
||||
response = {
|
||||
text: out.text ?? null,
|
||||
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
|
||||
|
|
@ -216,15 +369,36 @@ export function createGeminiWebExecutor(
|
|||
if (!imageSave.saved) {
|
||||
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
|
||||
}
|
||||
} else if (generateVideoPath) {
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt,
|
||||
files: requestFilePaths,
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (keepSession) persistedChatMetadata = out.metadata;
|
||||
response = {
|
||||
text: out.text ?? null,
|
||||
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
|
||||
has_images: false,
|
||||
image_count: 0,
|
||||
};
|
||||
|
||||
const resolvedOutputPath = generateVideoPath ?? outputPath ?? 'generated.mp4';
|
||||
const save = await saveFirstGeminiVideoFromOutput(out, cookieMap, resolvedOutputPath, controller.signal);
|
||||
videoSaveSummary = { ...save, outputPath: resolvedOutputPath };
|
||||
} else {
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt,
|
||||
files: attachmentPaths,
|
||||
files: requestFilePaths,
|
||||
model,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
chatMetadata,
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (keepSession) persistedChatMetadata = out.metadata;
|
||||
response = {
|
||||
text: out.text ?? null,
|
||||
thoughts: geminiOptions.showThoughts ? out.thoughts : null,
|
||||
|
|
@ -247,6 +421,15 @@ export function createGeminiWebExecutor(
|
|||
const imagePath = generateImagePath || outputPath || 'generated.png';
|
||||
answerMarkdown += `\n\n*Generated ${response.image_count} image(s). Saved to: ${imagePath}*`;
|
||||
}
|
||||
if (videoSaveSummary) {
|
||||
if (videoSaveSummary.saved) {
|
||||
answerMarkdown += `\n\n*Generated ${videoSaveSummary.videoCount || 1} video(s). Saved to: ${videoSaveSummary.outputPath}*`;
|
||||
} else if (/video_gen_chip/.test(answerMarkdown) || /video_gen_chip/.test(response.text ?? '')) {
|
||||
answerMarkdown += '\n\n*Video generation is asynchronous. Check Gemini web UI to download the result.*';
|
||||
} else {
|
||||
answerMarkdown += '\n\n*No downloadable video URL found in Gemini response.*';
|
||||
}
|
||||
}
|
||||
|
||||
const tookMs = Date.now() - startTime;
|
||||
log?.(`[gemini-web] Completed in ${tookMs}ms`);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ Options:
|
|||
-m, --model <id> gemini-3-pro | gemini-2.5-pro | gemini-2.5-flash (default: gemini-3-pro)
|
||||
--json Output JSON
|
||||
--image [path] Generate an image and save it (default: ./generated.png)
|
||||
--reference <files...> Reference images for vision input
|
||||
--login Only refresh cookies, then exit
|
||||
--cookie-path <path> Cookie file path (default: ${cookiePath})
|
||||
--profile-dir <path> Chrome profile dir (default: ${profileDir})
|
||||
|
|
@ -75,6 +76,7 @@ function parseArgs(argv: string[]): {
|
|||
loginOnly?: boolean;
|
||||
cookiePath?: string;
|
||||
profileDir?: string;
|
||||
referenceImages?: string[];
|
||||
} {
|
||||
const out: ReturnType<typeof parseArgs> = {};
|
||||
const positional: string[] = [];
|
||||
|
|
@ -157,6 +159,19 @@ function parseArgs(argv: string[]): {
|
|||
out.profileDir = arg.slice('--profile-dir='.length);
|
||||
continue;
|
||||
}
|
||||
if (arg === '--reference' || arg === '--ref') {
|
||||
out.referenceImages = [];
|
||||
while (i + 1 < argv.length) {
|
||||
const next = argv[i + 1];
|
||||
if (next && !next.startsWith('-')) {
|
||||
out.referenceImages.push(next);
|
||||
i += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg.startsWith('-')) {
|
||||
throw new Error(`Unknown option: ${arg}`);
|
||||
|
|
@ -178,6 +193,7 @@ function parseArgs(argv: string[]): {
|
|||
if (out.cookiePath === '') delete out.cookiePath;
|
||||
if (out.profileDir === '') delete out.profileDir;
|
||||
if (out.promptFiles?.length === 0) delete out.promptFiles;
|
||||
if (out.referenceImages?.length === 0) delete out.referenceImages;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
@ -186,7 +202,7 @@ async function isCookieMapValid(cookieMap: Record<string, string>): Promise<bool
|
|||
if (!hasRequiredGeminiCookies(cookieMap)) return false;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), 10_000);
|
||||
const timer = setTimeout(() => controller.abort(), 30_000);
|
||||
try {
|
||||
await fetchGeminiAccessToken(cookieMap, controller.signal);
|
||||
return true;
|
||||
|
|
@ -201,7 +217,7 @@ async function ensureGeminiCookieMap(options: {
|
|||
cookiePath: string;
|
||||
profileDir: string;
|
||||
}): Promise<Record<string, string>> {
|
||||
const log = (msg: string) => console.log(msg);
|
||||
const log = (msg: string) => console.error(msg);
|
||||
|
||||
let cookieMap = await readGeminiCookieMapFromDisk({ cookiePath: options.cookiePath, log });
|
||||
if (await isCookieMapValid(cookieMap)) return cookieMap;
|
||||
|
|
@ -256,80 +272,39 @@ async function main(): Promise<void> {
|
|||
return;
|
||||
}
|
||||
|
||||
const promptFromStdin = await readPromptFromStdin();
|
||||
const promptFromFiles = args.promptFiles ? readPromptFiles(args.promptFiles) : null;
|
||||
const prompt = promptFromFiles || args.prompt || promptFromStdin;
|
||||
const promptFromArgs = promptFromFiles || args.prompt;
|
||||
const prompt = promptFromArgs || (await readPromptFromStdin());
|
||||
if (!prompt) printUsage(1);
|
||||
|
||||
let cookieMap = await ensureGeminiCookieMap({ cookiePath, profileDir });
|
||||
|
||||
const desiredModel = resolveModel(args.model || 'gemini-3-pro');
|
||||
const imagePath = resolveImageOutputPath(args.imagePath);
|
||||
const referenceImages = (args.referenceImages ?? []).map((p) =>
|
||||
path.isAbsolute(p) ? p : path.resolve(process.cwd(), p),
|
||||
);
|
||||
|
||||
try {
|
||||
const effectivePrompt = imagePath ? `Generate an image: ${prompt}` : prompt;
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt: effectivePrompt,
|
||||
files: [],
|
||||
model: desiredModel,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
});
|
||||
|
||||
let imageSaved = false;
|
||||
let imageCount = 0;
|
||||
if (imagePath) {
|
||||
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath);
|
||||
imageSaved = save.saved;
|
||||
imageCount = save.imageCount;
|
||||
if (!imageSaved) {
|
||||
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (args.json) {
|
||||
process.stdout.write(
|
||||
`${JSON.stringify(
|
||||
imagePath ? { ...out, imageSaved, imageCount, imagePath } : out,
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
if (out.errorMessage) process.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.errorMessage) {
|
||||
throw new Error(out.errorMessage);
|
||||
}
|
||||
|
||||
process.stdout.write(out.text ?? '');
|
||||
if (!out.text?.endsWith('\n')) process.stdout.write('\n');
|
||||
if (imagePath) {
|
||||
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
|
||||
}
|
||||
return;
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
|
||||
if (message.includes('Unable to locate Gemini access token')) {
|
||||
console.error('[gemini-web] Cookies may be expired. Re-opening browser to refresh cookies...');
|
||||
await sleep(500);
|
||||
cookieMap = await getGeminiCookieMapViaChrome({ userDataDir: profileDir, log: (m) => console.log(m) });
|
||||
await writeGeminiCookieMapToDisk(cookieMap, { cookiePath, log: (m) => console.log(m) });
|
||||
const controller = new AbortController();
|
||||
const timeoutMs = imagePath ? 300_000 : 120_000;
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const effectivePrompt = imagePath ? `Generate an image: ${prompt}` : prompt;
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt: imagePath ? `Generate an image: ${prompt}` : prompt,
|
||||
files: [],
|
||||
prompt: effectivePrompt,
|
||||
files: referenceImages,
|
||||
model: desiredModel,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
let imageSaved = false;
|
||||
let imageCount = 0;
|
||||
if (imagePath) {
|
||||
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath);
|
||||
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath, controller.signal);
|
||||
imageSaved = save.saved;
|
||||
imageCount = save.imageCount;
|
||||
if (!imageSaved) {
|
||||
|
|
@ -359,6 +334,68 @@ async function main(): Promise<void> {
|
|||
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
|
||||
}
|
||||
return;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
|
||||
if (message.includes('Unable to locate Gemini access token')) {
|
||||
console.error('[gemini-web] Cookies may be expired. Re-opening browser to refresh cookies...');
|
||||
await sleep(500);
|
||||
cookieMap = await getGeminiCookieMapViaChrome({ userDataDir: profileDir, log: (m) => console.error(m) });
|
||||
await writeGeminiCookieMapToDisk(cookieMap, { cookiePath, log: (m) => console.error(m) });
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeoutMs = imagePath ? 300_000 : 120_000;
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const out = await runGeminiWebWithFallback({
|
||||
prompt: imagePath ? `Generate an image: ${prompt}` : prompt,
|
||||
files: referenceImages,
|
||||
model: desiredModel,
|
||||
cookieMap,
|
||||
chatMetadata: null,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
let imageSaved = false;
|
||||
let imageCount = 0;
|
||||
if (imagePath) {
|
||||
const save = await saveFirstGeminiImageFromOutput(out, cookieMap, imagePath, controller.signal);
|
||||
imageSaved = save.saved;
|
||||
imageCount = save.imageCount;
|
||||
if (!imageSaved) {
|
||||
throw new Error(`No images generated. Response text:\n${out.text || '(empty response)'}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (args.json) {
|
||||
process.stdout.write(
|
||||
`${JSON.stringify(
|
||||
imagePath ? { ...out, imageSaved, imageCount, imagePath } : out,
|
||||
null,
|
||||
2,
|
||||
)}\n`,
|
||||
);
|
||||
if (out.errorMessage) process.exit(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (out.errorMessage) {
|
||||
throw new Error(out.errorMessage);
|
||||
}
|
||||
|
||||
process.stdout.write(out.text ?? '');
|
||||
if (!out.text?.endsWith('\n')) process.stdout.write('\n');
|
||||
if (imagePath) {
|
||||
process.stdout.write(`Saved image (${imageCount || 1}) to: ${imagePath}\n`);
|
||||
}
|
||||
return;
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
throw error;
|
||||
|
|
|
|||
|
|
@ -2,9 +2,18 @@ export interface GeminiWebOptions {
|
|||
youtube?: string;
|
||||
generateImage?: string;
|
||||
editImage?: string;
|
||||
generateVideo?: string;
|
||||
outputPath?: string;
|
||||
showThoughts?: boolean;
|
||||
aspectRatio?: string;
|
||||
/**
|
||||
* One or more local image paths to upload as persistent reference images.
|
||||
* - If `keepSession` is enabled, they are uploaded once per executor session.
|
||||
* - Otherwise, they are attached to each request.
|
||||
*/
|
||||
referenceImages?: string | string[];
|
||||
/** Preserve Gemini chat metadata to continue multi-turn conversations within the same executor instance. */
|
||||
keepSession?: boolean;
|
||||
}
|
||||
|
||||
export interface GeminiWebResponse {
|
||||
|
|
|
|||
Loading…
Reference in New Issue