支持复用已有 Chrome CDP 实例,修复端口检测顺序问题

This commit is contained in:
Jim Liu 宝玉 2026-03-11 17:24:18 -05:00
parent 1cb54420e0
commit 00bf946403
2 changed files with 74 additions and 15 deletions

View File

@ -1,7 +1,8 @@
import { spawn, type ChildProcess } from "node:child_process";
import fs from "node:fs";
import { mkdir } from "node:fs/promises";
import { mkdir, readFile } from "node:fs/promises";
import net from "node:net";
import path from "node:path";
import process from "node:process";
import { resolveUrlToMarkdownChromeProfileDir } from "./paths.js";
@ -121,6 +122,41 @@ export async function getFreePort(): Promise<number> {
});
}
export async function findExistingChromePort(): Promise<number | null> {
const profileDir = resolveUrlToMarkdownChromeProfileDir();
const activePortPath = path.join(profileDir, "DevToolsActivePort");
try {
const content = await readFile(activePortPath, "utf-8");
const port = parseInt(content.split("\n")[0].trim(), 10);
if (port && !isNaN(port)) {
const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 });
if (res.ok) return port;
}
} catch {}
if (process.platform !== "win32") {
try {
const { execSync } = await import("node:child_process");
const ps = execSync("ps aux", { encoding: "utf-8", timeout: 5_000 });
const escapedDir = profileDir.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const lines = ps.split("\n").filter(l => l.includes(profileDir) && l.includes("--remote-debugging-port="));
for (const line of lines) {
const portMatch = line.match(/--remote-debugging-port=(\d+)/);
if (portMatch) {
const port = parseInt(portMatch[1], 10);
if (port && !isNaN(port)) {
const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 });
if (res.ok) return port;
}
}
}
} catch {}
}
return null;
}
export function findChromeExecutable(): string | null {
const override = process.env.URL_CHROME_PATH?.trim();
if (override && fs.existsSync(override)) return override;

View File

@ -3,7 +3,7 @@ import { writeFile, mkdir, access } from "node:fs/promises";
import path from "node:path";
import process from "node:process";
import { CdpConnection, getFreePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js";
import { CdpConnection, getFreePort, findExistingChromePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js";
import { absolutizeUrlsScript, extractContent, createMarkdownDocument, type ConversionResult } from "./html-to-markdown.js";
import { localizeMarkdownMedia, countRemoteMedia } from "./media-localizer.js";
import { resolveUrlToMarkdownDataDir } from "./paths.js";
@ -98,21 +98,37 @@ async function waitForUserSignal(): Promise<void> {
}
async function captureUrl(args: Args): Promise<ConversionResult> {
const port = await getFreePort();
const chrome = await launchChrome(args.url, port, false);
const existingPort = await findExistingChromePort();
const reusing = existingPort !== null;
const port = existingPort ?? await getFreePort();
const chrome = reusing ? null : await launchChrome(args.url, port, false);
if (reusing) console.log(`Reusing existing Chrome on port ${port}`);
let cdp: CdpConnection | null = null;
let targetId: string | null = null;
try {
const wsUrl = await waitForChromeDebugPort(port, 30_000);
cdp = await CdpConnection.connect(wsUrl, CDP_CONNECT_TIMEOUT_MS);
let sessionId: string;
if (reusing) {
const created = await cdp.send<{ targetId: string }>("Target.createTarget", { url: args.url });
targetId = created.targetId;
const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true });
sessionId = attached.sessionId;
await cdp.send("Network.enable", {}, { sessionId });
await cdp.send("Page.enable", {}, { sessionId });
} else {
const targets = await cdp.send<{ targetInfos: Array<{ targetId: string; type: string; url: string }> }>("Target.getTargets");
const pageTarget = targets.targetInfos.find(t => t.type === "page" && t.url.startsWith("http"));
if (!pageTarget) throw new Error("No page target found");
const { sessionId } = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId: pageTarget.targetId, flatten: true });
targetId = pageTarget.targetId;
const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true });
sessionId = attached.sessionId;
await cdp.send("Network.enable", {}, { sessionId });
await cdp.send("Page.enable", {}, { sessionId });
}
if (args.wait) {
await waitForUserSignal();
@ -136,11 +152,18 @@ async function captureUrl(args: Args): Promise<ConversionResult> {
return await extractContent(html, args.url);
} finally {
if (reusing) {
if (cdp && targetId) {
try { await cdp.send("Target.closeTarget", { targetId }, { timeoutMs: 5_000 }); } catch {}
}
if (cdp) cdp.close();
} else {
if (cdp) {
try { await cdp.send("Browser.close", {}, { timeoutMs: 5_000 }); } catch {}
cdp.close();
}
killChrome(chrome);
if (chrome) killChrome(chrome);
}
}
}