支持复用已有 Chrome CDP 实例,修复端口检测顺序问题
This commit is contained in:
parent
1cb54420e0
commit
00bf946403
|
|
@ -1,7 +1,8 @@
|
|||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import fs from "node:fs";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { mkdir, readFile } from "node:fs/promises";
|
||||
import net from "node:net";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import { resolveUrlToMarkdownChromeProfileDir } from "./paths.js";
|
||||
|
|
@ -121,6 +122,41 @@ export async function getFreePort(): Promise<number> {
|
|||
});
|
||||
}
|
||||
|
||||
export async function findExistingChromePort(): Promise<number | null> {
|
||||
const profileDir = resolveUrlToMarkdownChromeProfileDir();
|
||||
|
||||
const activePortPath = path.join(profileDir, "DevToolsActivePort");
|
||||
try {
|
||||
const content = await readFile(activePortPath, "utf-8");
|
||||
const port = parseInt(content.split("\n")[0].trim(), 10);
|
||||
if (port && !isNaN(port)) {
|
||||
const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 });
|
||||
if (res.ok) return port;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (process.platform !== "win32") {
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
const ps = execSync("ps aux", { encoding: "utf-8", timeout: 5_000 });
|
||||
const escapedDir = profileDir.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
const lines = ps.split("\n").filter(l => l.includes(profileDir) && l.includes("--remote-debugging-port="));
|
||||
for (const line of lines) {
|
||||
const portMatch = line.match(/--remote-debugging-port=(\d+)/);
|
||||
if (portMatch) {
|
||||
const port = parseInt(portMatch[1], 10);
|
||||
if (port && !isNaN(port)) {
|
||||
const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 3_000 });
|
||||
if (res.ok) return port;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function findChromeExecutable(): string | null {
|
||||
const override = process.env.URL_CHROME_PATH?.trim();
|
||||
if (override && fs.existsSync(override)) return override;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { writeFile, mkdir, access } from "node:fs/promises";
|
|||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
import { CdpConnection, getFreePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js";
|
||||
import { CdpConnection, getFreePort, findExistingChromePort, launchChrome, waitForChromeDebugPort, waitForNetworkIdle, waitForPageLoad, autoScroll, evaluateScript, killChrome } from "./cdp.js";
|
||||
import { absolutizeUrlsScript, extractContent, createMarkdownDocument, type ConversionResult } from "./html-to-markdown.js";
|
||||
import { localizeMarkdownMedia, countRemoteMedia } from "./media-localizer.js";
|
||||
import { resolveUrlToMarkdownDataDir } from "./paths.js";
|
||||
|
|
@ -98,21 +98,37 @@ async function waitForUserSignal(): Promise<void> {
|
|||
}
|
||||
|
||||
async function captureUrl(args: Args): Promise<ConversionResult> {
|
||||
const port = await getFreePort();
|
||||
const chrome = await launchChrome(args.url, port, false);
|
||||
const existingPort = await findExistingChromePort();
|
||||
const reusing = existingPort !== null;
|
||||
const port = existingPort ?? await getFreePort();
|
||||
const chrome = reusing ? null : await launchChrome(args.url, port, false);
|
||||
|
||||
if (reusing) console.log(`Reusing existing Chrome on port ${port}`);
|
||||
|
||||
let cdp: CdpConnection | null = null;
|
||||
let targetId: string | null = null;
|
||||
try {
|
||||
const wsUrl = await waitForChromeDebugPort(port, 30_000);
|
||||
cdp = await CdpConnection.connect(wsUrl, CDP_CONNECT_TIMEOUT_MS);
|
||||
|
||||
let sessionId: string;
|
||||
if (reusing) {
|
||||
const created = await cdp.send<{ targetId: string }>("Target.createTarget", { url: args.url });
|
||||
targetId = created.targetId;
|
||||
const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true });
|
||||
sessionId = attached.sessionId;
|
||||
await cdp.send("Network.enable", {}, { sessionId });
|
||||
await cdp.send("Page.enable", {}, { sessionId });
|
||||
} else {
|
||||
const targets = await cdp.send<{ targetInfos: Array<{ targetId: string; type: string; url: string }> }>("Target.getTargets");
|
||||
const pageTarget = targets.targetInfos.find(t => t.type === "page" && t.url.startsWith("http"));
|
||||
if (!pageTarget) throw new Error("No page target found");
|
||||
|
||||
const { sessionId } = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId: pageTarget.targetId, flatten: true });
|
||||
targetId = pageTarget.targetId;
|
||||
const attached = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true });
|
||||
sessionId = attached.sessionId;
|
||||
await cdp.send("Network.enable", {}, { sessionId });
|
||||
await cdp.send("Page.enable", {}, { sessionId });
|
||||
}
|
||||
|
||||
if (args.wait) {
|
||||
await waitForUserSignal();
|
||||
|
|
@ -136,11 +152,18 @@ async function captureUrl(args: Args): Promise<ConversionResult> {
|
|||
|
||||
return await extractContent(html, args.url);
|
||||
} finally {
|
||||
if (reusing) {
|
||||
if (cdp && targetId) {
|
||||
try { await cdp.send("Target.closeTarget", { targetId }, { timeoutMs: 5_000 }); } catch {}
|
||||
}
|
||||
if (cdp) cdp.close();
|
||||
} else {
|
||||
if (cdp) {
|
||||
try { await cdp.send("Browser.close", {}, { timeoutMs: 5_000 }); } catch {}
|
||||
cdp.close();
|
||||
}
|
||||
killChrome(chrome);
|
||||
if (chrome) killChrome(chrome);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue