From 8e88cf4a8b9047b9d5ef6620d47c92a91d59ead6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Fri, 16 Jan 2026 20:20:07 -0600 Subject: [PATCH] feat: implement session management for image generation skills and add session handling functions --- skills/baoyu-comic/SKILL.md | 6 ++ skills/baoyu-gemini-web/SKILL.md | 49 +++++++--- skills/baoyu-gemini-web/scripts/main.ts | 69 ++++++++++---- skills/baoyu-gemini-web/scripts/paths.ts | 9 ++ .../baoyu-gemini-web/scripts/session-store.ts | 90 +++++++++++++++++++ skills/baoyu-slide-deck/SKILL.md | 11 ++- skills/baoyu-xhs-images/SKILL.md | 8 +- 7 files changed, 211 insertions(+), 31 deletions(-) create mode 100644 skills/baoyu-gemini-web/scripts/session-store.ts diff --git a/skills/baoyu-comic/SKILL.md b/skills/baoyu-comic/SKILL.md index 1abf147..230c009 100644 --- a/skills/baoyu-comic/SKILL.md +++ b/skills/baoyu-comic/SKILL.md @@ -104,6 +104,12 @@ For each page (cover + pages): - If text-only: concatenate prompts into single text - If multiple skills available, ask user preference +**Session Management**: +If the image generation skill supports `--sessionId`: +1. Generate a unique session ID at the start (e.g., `comic-{topic-slug}-{timestamp}`) +2. Use the same session ID for character sheet and all pages +3. This ensures visual consistency (character appearance, style) across all generated images + 3. Report progress after each generation ### Step 5: Completion Report diff --git a/skills/baoyu-gemini-web/SKILL.md b/skills/baoyu-gemini-web/SKILL.md index 260583b..7410c42 100644 --- a/skills/baoyu-gemini-web/SKILL.md +++ b/skills/baoyu-gemini-web/SKILL.md @@ -19,6 +19,10 @@ npx -y bun scripts/main.ts "Hello, Gemini" npx -y bun scripts/main.ts --prompt "Explain quantum computing" npx -y bun scripts/main.ts --prompt "A cute cat" --image cat.png npx -y bun scripts/main.ts --promptfiles system.md content.md --image out.png + +# Multi-turn conversation (agent generates unique sessionId) +npx -y bun scripts/main.ts "Remember this: 42" --sessionId my-unique-id-123 +npx -y bun scripts/main.ts "What number?" --sessionId my-unique-id-123 ``` ## Executor options (programmatic) @@ -78,19 +82,21 @@ npx -y bun scripts/main.ts "Hello" --json ## Options -| Option | Short | Description | -|--------|-------|-------------| -| `--prompt ` | `-p` | Prompt text | -| `--promptfiles ` | | Read prompt from files (concatenated in order) | -| `--model ` | `-m` | Model: gemini-3-pro (default), gemini-2.5-pro, gemini-2.5-flash | -| `--image [path]` | | Generate image, save to path (default: generated.png) | -| `--json` | | Output as JSON | -| `--login` | | Refresh cookies only, then exit | -| `--cookie-path ` | | Custom cookie file path | -| `--profile-dir ` | | Chrome profile directory | -| `--help` | `-h` | Show help | +| Option | Description | +|--------|-------------| +| `--prompt `, `-p` | Prompt text | +| `--promptfiles ` | Read prompt from files (concatenated in order) | +| `--model `, `-m` | Model: gemini-3-pro (default), gemini-2.5-pro, gemini-2.5-flash | +| `--image [path]` | Generate image, save to path (default: generated.png) | +| `--sessionId ` | Session ID for multi-turn conversation (agent generates unique ID) | +| `--list-sessions` | List saved sessions (max 100, sorted by update time) | +| `--json` | Output as JSON | +| `--login` | Refresh cookies only, then exit | +| `--cookie-path ` | Custom cookie file path | +| `--profile-dir ` | Chrome profile directory | +| `--help`, `-h` | Show help | -CLI note: `scripts/main.ts` currently supports text + image generation. Reference images / multi-turn / video generation are exposed via the executor options above. +CLI note: `scripts/main.ts` supports text generation, image generation, and multi-turn conversations via `--sessionId`. Reference images and video generation are exposed via the executor API. ## Models @@ -138,3 +144,22 @@ npx -y bun scripts/main.ts "Hello" --json | jq '.text' # Concatenate system.md + content.md as prompt npx -y bun scripts/main.ts --promptfiles system.md content.md --image output.png ``` + +### Multi-turn conversation +```bash +# Start a session with unique ID (agent generates this) +npx -y bun scripts/main.ts "You are a helpful math tutor." --sessionId task-abc123 + +# Continue the conversation (remembers context) +npx -y bun scripts/main.ts "What is 2+2?" --sessionId task-abc123 +npx -y bun scripts/main.ts "Now multiply that by 10" --sessionId task-abc123 + +# List recent sessions (max 100, sorted by update time) +npx -y bun scripts/main.ts --list-sessions +``` + +Session files are stored in `~/Library/Application Support/baoyu-skills/gemini-web/sessions/.json` and contain: +- `id`: Session ID +- `metadata`: Gemini chat metadata for continuation +- `messages`: Array of `{role, content, timestamp, error?}` +- `createdAt`, `updatedAt`: Timestamps diff --git a/skills/baoyu-gemini-web/scripts/main.ts b/skills/baoyu-gemini-web/scripts/main.ts index 7b8a185..8e3f06c 100644 --- a/skills/baoyu-gemini-web/scripts/main.ts +++ b/skills/baoyu-gemini-web/scripts/main.ts @@ -10,6 +10,7 @@ import { writeGeminiCookieMapToDisk, } from './cookie-store.js'; import { resolveGeminiWebChromeProfileDir, resolveGeminiWebCookiePath } from './paths.js'; +import { readSession, writeSession, listSessions } from './session-store.js'; function printUsage(exitCode = 0): never { const cookiePath = resolveGeminiWebCookiePath(); @@ -21,6 +22,10 @@ function printUsage(exitCode = 0): never { npx -y bun skills/baoyu-gemini-web/scripts/main.ts --prompt "A cute cat" --image generated.png npx -y bun skills/baoyu-gemini-web/scripts/main.ts --promptfiles system.md content.md --image out.png +Multi-turn conversation (agent generates unique sessionId): + npx -y bun skills/baoyu-gemini-web/scripts/main.ts "Remember 42" --sessionId abc123 + npx -y bun skills/baoyu-gemini-web/scripts/main.ts "What number?" --sessionId abc123 + Options: -p, --prompt Prompt text --promptfiles Read prompt from one or more files (concatenated in order) @@ -28,6 +33,8 @@ Options: --json Output JSON --image [path] Generate an image and save it (default: ./generated.png) --reference Reference images for vision input + --sessionId Session ID for multi-turn conversation (agent should generate unique ID) + --list-sessions List saved sessions (max 100, sorted by update time) --login Only refresh cookies, then exit --cookie-path Cookie file path (default: ${cookiePath}) --profile-dir Chrome profile dir (default: ${profileDir}) @@ -77,6 +84,8 @@ function parseArgs(argv: string[]): { cookiePath?: string; profileDir?: string; referenceImages?: string[]; + sessionId?: string; + listSessions?: boolean; } { const out: ReturnType = {}; const positional: string[] = []; @@ -172,6 +181,19 @@ function parseArgs(argv: string[]): { } continue; } + if (arg === '--sessionId' || arg === '--session-id') { + out.sessionId = argv[i + 1] ?? ''; + i += 1; + continue; + } + if (arg.startsWith('--sessionId=') || arg.startsWith('--session-id=')) { + out.sessionId = arg.split('=')[1] ?? ''; + continue; + } + if (arg === '--list-sessions') { + out.listSessions = true; + continue; + } if (arg.startsWith('-')) { throw new Error(`Unknown option: ${arg}`); @@ -194,6 +216,8 @@ function parseArgs(argv: string[]): { if (out.profileDir === '') delete out.profileDir; if (out.promptFiles?.length === 0) delete out.promptFiles; if (out.referenceImages?.length === 0) delete out.referenceImages; + if (out.sessionId != null) out.sessionId = out.sessionId.trim(); + if (out.sessionId === '') delete out.sessionId; return out; } @@ -267,6 +291,18 @@ async function main(): Promise { const cookiePath = args.cookiePath ?? resolveGeminiWebCookiePath(); const profileDir = args.profileDir ?? resolveGeminiWebChromeProfileDir(); + if (args.listSessions) { + const sessions = await listSessions(); + if (sessions.length === 0) { + console.log('No saved sessions.'); + } else { + for (const { id, updatedAt } of sessions) { + console.log(`${id}\t${updatedAt}`); + } + } + return; + } + if (args.loginOnly) { await ensureGeminiCookieMap({ cookiePath, profileDir }); return; @@ -277,6 +313,9 @@ async function main(): Promise { const prompt = promptFromArgs || (await readPromptFromStdin()); if (!prompt) printUsage(1); + const sessionData = args.sessionId ? await readSession(args.sessionId) : null; + const chatMetadata = sessionData?.metadata ?? null; + let cookieMap = await ensureGeminiCookieMap({ cookiePath, profileDir }); const desiredModel = resolveModel(args.model || 'gemini-3-pro'); @@ -297,10 +336,14 @@ async function main(): Promise { files: referenceImages, model: desiredModel, cookieMap, - chatMetadata: null, + chatMetadata, signal: controller.signal, }); + if (args.sessionId && out.metadata) { + await writeSession(args.sessionId, out.metadata, prompt, out.text ?? '', out.errorMessage); + } + let imageSaved = false; let imageCount = 0; if (imagePath) { @@ -313,13 +356,8 @@ async function main(): Promise { } if (args.json) { - process.stdout.write( - `${JSON.stringify( - imagePath ? { ...out, imageSaved, imageCount, imagePath } : out, - null, - 2, - )}\n`, - ); + const jsonOut = { ...out, ...(imagePath && { imageSaved, imageCount, imagePath }), ...(args.sessionId && { sessionId: args.sessionId }) }; + process.stdout.write(`${JSON.stringify(jsonOut, null, 2)}\n`); if (out.errorMessage) process.exit(1); return; } @@ -356,10 +394,14 @@ async function main(): Promise { files: referenceImages, model: desiredModel, cookieMap, - chatMetadata: null, + chatMetadata, signal: controller.signal, }); + if (args.sessionId && out.metadata) { + await writeSession(args.sessionId, out.metadata, prompt, out.text ?? '', out.errorMessage); + } + let imageSaved = false; let imageCount = 0; if (imagePath) { @@ -372,13 +414,8 @@ async function main(): Promise { } if (args.json) { - process.stdout.write( - `${JSON.stringify( - imagePath ? { ...out, imageSaved, imageCount, imagePath } : out, - null, - 2, - )}\n`, - ); + const jsonOut = { ...out, ...(imagePath && { imageSaved, imageCount, imagePath }), ...(args.sessionId && { sessionId: args.sessionId }) }; + process.stdout.write(`${JSON.stringify(jsonOut, null, 2)}\n`); if (out.errorMessage) process.exit(1); return; } diff --git a/skills/baoyu-gemini-web/scripts/paths.ts b/skills/baoyu-gemini-web/scripts/paths.ts index 2b4a2b6..406bd1f 100644 --- a/skills/baoyu-gemini-web/scripts/paths.ts +++ b/skills/baoyu-gemini-web/scripts/paths.ts @@ -34,3 +34,12 @@ export function resolveGeminiWebChromeProfileDir(): string { if (override) return path.resolve(override); return path.join(resolveGeminiWebDataDir(), PROFILE_DIR_NAME); } + +export function resolveGeminiWebSessionsDir(): string { + return path.join(resolveGeminiWebDataDir(), 'sessions'); +} + +export function resolveGeminiWebSessionPath(name: string): string { + const sanitized = name.replace(/[^a-zA-Z0-9_-]/g, '_'); + return path.join(resolveGeminiWebSessionsDir(), `${sanitized}.json`); +} diff --git a/skills/baoyu-gemini-web/scripts/session-store.ts b/skills/baoyu-gemini-web/scripts/session-store.ts new file mode 100644 index 0000000..fa3836e --- /dev/null +++ b/skills/baoyu-gemini-web/scripts/session-store.ts @@ -0,0 +1,90 @@ +import { mkdir, readFile, writeFile, readdir, stat } from 'node:fs/promises'; +import path from 'node:path'; +import { resolveGeminiWebSessionsDir, resolveGeminiWebSessionPath } from './paths.js'; + +export interface SessionMessage { + role: 'user' | 'assistant'; + content: string; + timestamp: string; + error?: string; +} + +export interface SessionData { + id: string; + metadata: unknown; + messages: SessionMessage[]; + createdAt: string; + updatedAt: string; +} + +export interface SessionListItem { + id: string; + updatedAt: string; +} + +export async function readSession(id: string): Promise { + const sessionPath = resolveGeminiWebSessionPath(id); + try { + const content = await readFile(sessionPath, 'utf8'); + return JSON.parse(content) as SessionData; + } catch { + return null; + } +} + +export async function writeSession( + id: string, + metadata: unknown, + userMessage: string, + assistantMessage: string, + error?: string, +): Promise { + const sessionPath = resolveGeminiWebSessionPath(id); + const sessionsDir = resolveGeminiWebSessionsDir(); + await mkdir(sessionsDir, { recursive: true }); + + const existing = await readSession(id); + const now = new Date().toISOString(); + + const newMessages: SessionMessage[] = [ + { role: 'user', content: userMessage, timestamp: now }, + { role: 'assistant', content: assistantMessage, timestamp: now, ...(error && { error }) }, + ]; + + const data: SessionData = { + id, + metadata, + messages: [...(existing?.messages ?? []), ...newMessages], + createdAt: existing?.createdAt ?? now, + updatedAt: now, + }; + await writeFile(sessionPath, JSON.stringify(data, null, 2)); +} + +export async function listSessions(limit = 100): Promise { + const sessionsDir = resolveGeminiWebSessionsDir(); + try { + const files = await readdir(sessionsDir); + const jsonFiles = files.filter((f) => f.endsWith('.json')); + + const items: { id: string; updatedAt: string; mtime: number }[] = []; + for (const file of jsonFiles) { + const filePath = path.join(sessionsDir, file); + try { + const stats = await stat(filePath); + items.push({ + id: file.slice(0, -5), + updatedAt: stats.mtime.toISOString(), + mtime: stats.mtime.getTime(), + }); + } catch { + continue; + } + } + + items.sort((a, b) => b.mtime - a.mtime); + return items.slice(0, limit).map(({ id, updatedAt }) => ({ id, updatedAt })); + } catch { + return []; + } +} diff --git a/skills/baoyu-slide-deck/SKILL.md b/skills/baoyu-slide-deck/SKILL.md index 7b6364d..18b17fb 100644 --- a/skills/baoyu-slide-deck/SKILL.md +++ b/skills/baoyu-slide-deck/SKILL.md @@ -232,14 +232,21 @@ Style notes: [specific style characteristics to emphasize] ### Step 5: Generate Images +**Session Management**: +If the image generation skill supports `--sessionId`: +1. Generate a unique session ID at the start (e.g., `slides-{topic-slug}-{timestamp}`) +2. Use the same session ID for all slides +3. This ensures visual consistency (color scheme, style, typography) across all slides + For each slide, generate using: ```bash -/baoyu-gemini-web --promptfiles [SKILL_ROOT]/skills/baoyu-slide-deck/prompts/system.md [TARGET_DIR]/prompts/01-cover.md --image [TARGET_DIR]/01-cover.png +# With session support +/baoyu-gemini-web --promptfiles [SKILL_ROOT]/skills/baoyu-slide-deck/prompts/system.md [TARGET_DIR]/prompts/01-cover.md --image [TARGET_DIR]/01-cover.png --sessionId slides-topic-20260117 ``` Generation flow: -1. Generate images sequentially +1. Generate images sequentially with the same session ID 2. After each image, output progress: "Generated X/N" 3. On failure, auto-retry once 4. If retry fails, log reason, continue to next diff --git a/skills/baoyu-xhs-images/SKILL.md b/skills/baoyu-xhs-images/SKILL.md index 9ac196a..44850b7 100644 --- a/skills/baoyu-xhs-images/SKILL.md +++ b/skills/baoyu-xhs-images/SKILL.md @@ -261,8 +261,14 @@ Style notes: [style-specific characteristics] 1. Check available image generation skills 2. If multiple skills available, ask user to choose +**Session Management**: +If the image generation skill supports `--sessionId`: +1. Generate a unique session ID at the start (e.g., `xhs-{topic-slug}-{timestamp}`) +2. Use the same session ID for all images in the series +3. This ensures style consistency across all generated images + **Generation Flow**: -1. Call selected image generation skill with prompt file and output path +1. Call selected image generation skill with prompt file, output path, and session ID 2. Confirm generation success 3. Report progress: "Generated X/N" 4. Continue to next