From 00bf94640394c9b5a50bfc68c3077a3b43328c0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Wed, 11 Mar 2026 17:24:18 -0500 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=A4=8D=E7=94=A8=E5=B7=B2?= =?UTF-8?q?=E6=9C=89=20Chrome=20CDP=20=E5=AE=9E=E4=BE=8B=EF=BC=8C=E4=BF=AE?= =?UTF-8?q?=E5=A4=8D=E7=AB=AF=E5=8F=A3=E6=A3=80=E6=B5=8B=E9=A1=BA=E5=BA=8F?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- skills/baoyu-url-to-markdown/scripts/cdp.ts | 38 ++++++++++++++- skills/baoyu-url-to-markdown/scripts/main.ts | 51 ++++++++++++++------ 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/skills/baoyu-url-to-markdown/scripts/cdp.ts b/skills/baoyu-url-to-markdown/scripts/cdp.ts index fcb35e4..611c840 100644 --- a/skills/baoyu-url-to-markdown/scripts/cdp.ts +++ b/skills/baoyu-url-to-markdown/scripts/cdp.ts @@ -1,7 +1,8 @@ import { spawn, type ChildProcess } from "node:child_process"; import fs from "node:fs"; -import { mkdir } from "node:fs/promises"; +import { mkdir, readFile } from "node:fs/promises"; import net from "node:net"; +import path from "node:path"; import process from "node:process"; import { resolveUrlToMarkdownChromeProfileDir } from "./paths.js"; @@ -121,6 +122,41 @@ export async function getFreePort(): Promise { }); } +export async function findExistingChromePort(): Promise { + const profileDir = resolveUrlToMarkdownChromeProfileDir(); + + const activePortPath = path.join(profileDir, "DevToolsActivePort"); + try { + const content = await readFile(activePortPath, "utf-8"); + const port = parseInt(content.split("\n")[0].trim(), 10); + if (port && !isNaN(port)) { + const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 }); + if (res.ok) return port; + } + } catch {} + + if (process.platform !== "win32") { + try { + const { execSync } = await import("node:child_process"); + const ps = execSync("ps aux", { encoding: "utf-8", timeout: 5_000 }); + const escapedDir = profileDir.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const lines = ps.split("\n").filter(l => l.includes(profileDir) && l.includes("--remote-debugging-port=")); + for (const line of lines) { + const portMatch = line.match(/--remote-debugging-port=(\d+)/); + if (portMatch) { + const port = parseInt(portMatch[1], 10); + if (port && !isNaN(port)) { + const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 }); + if (res.ok) return port; + } + } + } + } catch {} + } + + return null; +} + export function findChromeExecutable(): string | null { const override = process.env.URL_CHROME_PATH?.trim(); if (override && fs.existsSync(override)) return override; diff --git a/skills/baoyu-url-to-markdown/scripts/main.ts b/skills/baoyu-url-to-markdown/scripts/main.ts index e7b23b2..31d948f 100644 --- a/skills/baoyu-url-to-markdown/scripts/main.ts +++ b/skills/baoyu-url-to-markdown/scripts/main.ts @@ -3,7 +3,7 @@ import { writeFile, mkdir, access } from "node:fs/promises"; import path from "node:path"; import process from "node:process"; -import { CdpConnection, getFreePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js"; +import { CdpConnection, getFreePort, findExistingChromePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js"; import { absolutizeUrlsScript, extractContent, createMarkdownDocument, type ConversionResult } from "./html-to-markdown.js"; import { localizeMarkdownMedia, countRemoteMedia } from "./media-localizer.js"; import { resolveUrlToMarkdownDataDir } from "./paths.js"; @@ -98,21 +98,37 @@ async function waitForUserSignal(): Promise { } async function captureUrl(args: Args): Promise { - const port = await getFreePort(); - const chrome = await launchChrome(args.url, port, false); + const existingPort = await findExistingChromePort(); + const reusing = existingPort !== null; + const port = existingPort ?? await getFreePort(); + const chrome = reusing ? null : await launchChrome(args.url, port, false); + + if (reusing) console.log(`Reusing existing Chrome on port ${port}`); let cdp: CdpConnection | null = null; + let targetId: string | null = null; try { const wsUrl = await waitForChromeDebugPort(port, 30_000); cdp = await CdpConnection.connect(wsUrl, CDP_CONNECT_TIMEOUT_MS); - const targets = await cdp.send<{ targetInfos: Array<{ targetId: string; type: string; url: string }> }>("Target.getTargets"); - const pageTarget = targets.targetInfos.find(t => t.type === "page" && t.url.startsWith("http")); - if (!pageTarget) throw new Error("No page target found"); - - const { sessionId } = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId: pageTarget.targetId, flatten: true }); - await cdp.send("Network.enable", {}, { sessionId }); - await cdp.send("Page.enable", {}, { sessionId }); + let sessionId: string; + if (reusing) { + const created = await cdp.send<{ targetId: string }>("Target.createTarget", { url: args.url }); + targetId = created.targetId; + const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true }); + sessionId = attached.sessionId; + await cdp.send("Network.enable", {}, { sessionId }); + await cdp.send("Page.enable", {}, { sessionId }); + } else { + const targets = await cdp.send<{ targetInfos: Array<{ targetId: string; type: string; url: string }> }>("Target.getTargets"); + const pageTarget = targets.targetInfos.find(t => t.type === "page" && t.url.startsWith("http")); + if (!pageTarget) throw new Error("No page target found"); + targetId = pageTarget.targetId; + const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true }); + sessionId = attached.sessionId; + await cdp.send("Network.enable", {}, { sessionId }); + await cdp.send("Page.enable", {}, { sessionId }); + } if (args.wait) { await waitForUserSignal(); @@ -136,11 +152,18 @@ async function captureUrl(args: Args): Promise { return await extractContent(html, args.url); } finally { - if (cdp) { - try { await cdp.send("Browser.close", {}, { timeoutMs: 5_000 }); } catch {} - cdp.close(); + if (reusing) { + if (cdp && targetId) { + try { await cdp.send("Target.closeTarget", { targetId }, { timeoutMs: 5_000 }); } catch {} + } + if (cdp) cdp.close(); + } else { + if (cdp) { + try { await cdp.send("Browser.close", {}, { timeoutMs: 5_000 }); } catch {} + cdp.close(); + } + if (chrome) killChrome(chrome); } - killChrome(chrome); } }