641 lines
19 KiB
TypeScript
641 lines
19 KiB
TypeScript
import fs from "node:fs";
|
|
import path from "node:path";
|
|
import readline from "node:readline";
|
|
import process from "node:process";
|
|
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
|
|
import { fetchXArticle } from "./graphql.js";
|
|
import { formatArticleMarkdown } from "./markdown.js";
|
|
import { localizeMarkdownMedia, type LocalizeMarkdownMediaResult } from "./media-localizer.js";
|
|
import { resolveReferencedTweetsFromArticle } from "./referenced-tweets.js";
|
|
import { hasRequiredXCookies, loadXCookies, refreshXCookies } from "./cookies.js";
|
|
import { resolveXToMarkdownConsentPath } from "./paths.js";
|
|
import { tweetToMarkdown } from "./tweet-to-markdown.js";
|
|
|
|
type CliArgs = {
|
|
url: string | null;
|
|
output: string | null;
|
|
json: boolean;
|
|
login: boolean;
|
|
downloadMedia: boolean;
|
|
help: boolean;
|
|
};
|
|
|
|
type ConsentRecord = {
|
|
version: number;
|
|
accepted: boolean;
|
|
acceptedAt: string;
|
|
disclaimerVersion: string;
|
|
};
|
|
|
|
const DISCLAIMER_VERSION = "1.0";
|
|
|
|
function formatScriptCommand(fallback: string): string {
|
|
const raw = process.argv[1];
|
|
const displayPath = raw
|
|
? (() => {
|
|
const relative = path.relative(process.cwd(), raw);
|
|
return relative && !relative.startsWith("..") ? relative : raw;
|
|
})()
|
|
: fallback;
|
|
const quotedPath = displayPath.includes(" ")
|
|
? `"${displayPath.replace(/"/g, '\\"')}"`
|
|
: displayPath;
|
|
return `npx -y bun ${quotedPath}`;
|
|
}
|
|
|
|
function printUsage(exitCode: number): never {
|
|
const cmd = formatScriptCommand("scripts/main.ts");
|
|
console.log(`X (Twitter) to Markdown
|
|
|
|
Usage:
|
|
${cmd} <url>
|
|
${cmd} --url <url>
|
|
|
|
Options:
|
|
--output <path>, -o Output path (file or dir). Default: ./x-to-markdown/<slug>/
|
|
--json Output as JSON
|
|
--download-media Download images/videos to local ./imgs and ./videos next to markdown
|
|
--login Refresh cookies only, then exit
|
|
--help, -h Show help
|
|
|
|
Examples:
|
|
${cmd} https://x.com/username/status/1234567890
|
|
${cmd} https://x.com/i/article/1234567890 -o ./article.md
|
|
${cmd} https://x.com/username/status/1234567890 -o ./out/
|
|
${cmd} https://x.com/username/status/1234567890 --download-media
|
|
${cmd} https://x.com/username/status/1234567890 --json | jq -r '.markdownPath'
|
|
${cmd} --login
|
|
`);
|
|
process.exit(exitCode);
|
|
}
|
|
|
|
function parseArgs(argv: string[]): CliArgs {
|
|
const out: CliArgs = {
|
|
url: null,
|
|
output: null,
|
|
json: false,
|
|
login: false,
|
|
downloadMedia: false,
|
|
help: false,
|
|
};
|
|
|
|
const positional: string[] = [];
|
|
|
|
for (let i = 0; i < argv.length; i++) {
|
|
const a = argv[i]!;
|
|
|
|
if (a === "--help" || a === "-h") {
|
|
out.help = true;
|
|
continue;
|
|
}
|
|
|
|
if (a === "--json") {
|
|
out.json = true;
|
|
continue;
|
|
}
|
|
|
|
if (a === "--login") {
|
|
out.login = true;
|
|
continue;
|
|
}
|
|
|
|
if (a === "--download-media") {
|
|
out.downloadMedia = true;
|
|
continue;
|
|
}
|
|
|
|
if (a === "--url") {
|
|
const v = argv[++i];
|
|
if (!v) throw new Error("Missing value for --url");
|
|
out.url = v;
|
|
continue;
|
|
}
|
|
|
|
if (a === "--output" || a === "-o") {
|
|
const v = argv[++i];
|
|
if (!v) throw new Error(`Missing value for ${a}`);
|
|
out.output = v;
|
|
continue;
|
|
}
|
|
|
|
if (a.startsWith("-")) {
|
|
throw new Error(`Unknown option: ${a}`);
|
|
}
|
|
|
|
positional.push(a);
|
|
}
|
|
|
|
if (!out.url && positional.length > 0) {
|
|
out.url = positional[0]!;
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
function normalizeInputUrl(input: string): string {
|
|
const trimmed = input.trim();
|
|
if (!trimmed) return "";
|
|
try {
|
|
return new URL(trimmed).toString();
|
|
} catch {
|
|
return trimmed;
|
|
}
|
|
}
|
|
|
|
function parseArticleId(input: string): string | null {
|
|
const trimmed = input.trim();
|
|
if (!trimmed) return null;
|
|
|
|
try {
|
|
const parsed = new URL(trimmed);
|
|
const match = parsed.pathname.match(/\/(?:i\/)?article\/(\d+)/);
|
|
if (match?.[1]) return match[1];
|
|
} catch {
|
|
return null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function parseTweetId(input: string): string | null {
|
|
const trimmed = input.trim();
|
|
if (!trimmed) return null;
|
|
if (/^\d+$/.test(trimmed)) return trimmed;
|
|
|
|
try {
|
|
const parsed = new URL(trimmed);
|
|
const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/);
|
|
if (match?.[1]) return match[1];
|
|
} catch {
|
|
return null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
function parseTweetUsername(input: string): string | null {
|
|
const trimmed = input.trim();
|
|
if (!trimmed) return null;
|
|
try {
|
|
const parsed = new URL(trimmed);
|
|
const match = parsed.pathname.match(/^\/([^/]+)\/status(?:es)?\/\d+/);
|
|
if (match?.[1]) return match[1];
|
|
} catch {
|
|
return null;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function sanitizeSlug(input: string): string {
|
|
return input
|
|
.trim()
|
|
.replace(/^@/, "")
|
|
.replace(/[^a-zA-Z0-9_-]+/g, "-")
|
|
.replace(/-+/g, "-")
|
|
.replace(/^[-_]+|[-_]+$/g, "")
|
|
.slice(0, 120);
|
|
}
|
|
|
|
function extractContentSlug(markdown: string): string {
|
|
const headingMatch = markdown.match(/^#\s+(.+)$/m);
|
|
if (headingMatch?.[1]) {
|
|
return sanitizeSlug(headingMatch[1].slice(0, 60)).toLowerCase();
|
|
}
|
|
const lines = markdown.split("\n");
|
|
let inFrontmatter = false;
|
|
for (const line of lines) {
|
|
if (line === "---") {
|
|
inFrontmatter = !inFrontmatter;
|
|
continue;
|
|
}
|
|
if (inFrontmatter) continue;
|
|
const trimmed = line.trim();
|
|
if (trimmed) {
|
|
return sanitizeSlug(trimmed.slice(0, 60)).toLowerCase();
|
|
}
|
|
}
|
|
return "untitled";
|
|
}
|
|
|
|
function resolveSlugAndId(normalizedUrl: string, kind: "tweet" | "article"): { slug: string; idPart: string } {
|
|
const articleId = kind === "article" ? parseArticleId(normalizedUrl) : null;
|
|
const tweetId = kind === "tweet" ? parseTweetId(normalizedUrl) : null;
|
|
const username = kind === "tweet" ? parseTweetUsername(normalizedUrl) : null;
|
|
|
|
const idPart = articleId ?? tweetId ?? String(Date.now());
|
|
const userSlug = username ? sanitizeSlug(username) : null;
|
|
const slug = userSlug ?? idPart;
|
|
return { slug, idPart };
|
|
}
|
|
|
|
function extractFrontmatterUrls(markdown: string): string[] {
|
|
const match = markdown.match(/^---\n([\s\S]*?)\n---/);
|
|
if (!match?.[1]) return [];
|
|
|
|
const lines = match[1].split("\n");
|
|
const urls: string[] = [];
|
|
for (const line of lines) {
|
|
const m = line.match(/^(url|requestedUrl):\s*["']([^"']+)["']\s*$/);
|
|
if (m?.[2]) {
|
|
urls.push(m[2]);
|
|
}
|
|
}
|
|
return urls;
|
|
}
|
|
|
|
function frontmatterMatchesTarget(
|
|
markdown: string,
|
|
normalizedUrl: string,
|
|
kind: "tweet" | "article"
|
|
): boolean {
|
|
const urls = extractFrontmatterUrls(markdown);
|
|
if (urls.length === 0) return false;
|
|
|
|
const targetId = kind === "article" ? parseArticleId(normalizedUrl) : parseTweetId(normalizedUrl);
|
|
if (!targetId) return false;
|
|
|
|
for (const url of urls) {
|
|
const candidateId = kind === "article" ? parseArticleId(url) : parseTweetId(url);
|
|
if (candidateId && candidateId === targetId) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function listMarkdownFiles(dirPath: string): string[] {
|
|
try {
|
|
return fs
|
|
.readdirSync(dirPath)
|
|
.filter((name) => name.toLowerCase().endsWith(".md"))
|
|
.map((name) => path.join(dirPath, name))
|
|
.sort();
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
function resolveExistingMarkdownPath(
|
|
normalizedUrl: string,
|
|
kind: "tweet" | "article",
|
|
argsOutput: string | null
|
|
): string | null {
|
|
const { slug, idPart } = resolveSlugAndId(normalizedUrl, kind);
|
|
const candidateDirs = new Set<string>();
|
|
const candidateFiles = new Set<string>();
|
|
|
|
if (argsOutput) {
|
|
const resolved = path.resolve(argsOutput);
|
|
const looksDir = argsOutput.endsWith("/") || argsOutput.endsWith("\\");
|
|
try {
|
|
if (fs.existsSync(resolved)) {
|
|
const stat = fs.statSync(resolved);
|
|
if (stat.isFile()) {
|
|
candidateFiles.add(resolved);
|
|
} else if (stat.isDirectory()) {
|
|
candidateDirs.add(path.join(resolved, slug, idPart));
|
|
candidateDirs.add(resolved);
|
|
}
|
|
} else if (looksDir) {
|
|
candidateDirs.add(path.join(resolved, slug, idPart));
|
|
}
|
|
} catch {
|
|
// ignore and continue
|
|
}
|
|
} else {
|
|
candidateDirs.add(path.resolve(process.cwd(), "x-to-markdown", slug, idPart));
|
|
}
|
|
|
|
for (const filePath of candidateFiles) {
|
|
if (!filePath.toLowerCase().endsWith(".md")) continue;
|
|
try {
|
|
const markdown = fs.readFileSync(filePath, "utf8");
|
|
if (frontmatterMatchesTarget(markdown, normalizedUrl, kind)) {
|
|
return filePath;
|
|
}
|
|
} catch {
|
|
// ignore and continue
|
|
}
|
|
}
|
|
|
|
for (const dirPath of candidateDirs) {
|
|
if (!fs.existsSync(dirPath)) continue;
|
|
let stat: fs.Stats;
|
|
try {
|
|
stat = fs.statSync(dirPath);
|
|
} catch {
|
|
continue;
|
|
}
|
|
if (!stat.isDirectory()) continue;
|
|
|
|
const markdownFiles = listMarkdownFiles(dirPath);
|
|
for (const filePath of markdownFiles) {
|
|
try {
|
|
const markdown = fs.readFileSync(filePath, "utf8");
|
|
if (frontmatterMatchesTarget(markdown, normalizedUrl, kind)) {
|
|
return filePath;
|
|
}
|
|
} catch {
|
|
// ignore and continue
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
async function resolveOutputPath(
|
|
normalizedUrl: string,
|
|
kind: "tweet" | "article",
|
|
argsOutput: string | null,
|
|
contentSlug: string,
|
|
log: (message: string) => void
|
|
): Promise<{ outputDir: string; markdownPath: string; slug: string }> {
|
|
const articleId = kind === "article" ? parseArticleId(normalizedUrl) : null;
|
|
const tweetId = kind === "tweet" ? parseTweetId(normalizedUrl) : null;
|
|
const username = kind === "tweet" ? parseTweetUsername(normalizedUrl) : null;
|
|
|
|
const userSlug = username ? sanitizeSlug(username) : null;
|
|
const idPart = articleId ?? tweetId ?? String(Date.now());
|
|
const slug = userSlug ?? idPart;
|
|
|
|
const defaultFileName = `${contentSlug}.md`;
|
|
|
|
if (argsOutput) {
|
|
const wantsDir = argsOutput.endsWith("/") || argsOutput.endsWith("\\");
|
|
const resolved = path.resolve(argsOutput);
|
|
try {
|
|
if (wantsDir || (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory())) {
|
|
const outputDir = path.join(resolved, slug, idPart);
|
|
await mkdir(outputDir, { recursive: true });
|
|
return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug };
|
|
}
|
|
} catch {
|
|
// treat as file path
|
|
}
|
|
|
|
const outputDir = path.dirname(resolved);
|
|
await mkdir(outputDir, { recursive: true });
|
|
return { outputDir, markdownPath: resolved, slug };
|
|
}
|
|
|
|
const outputDir = path.resolve(process.cwd(), "x-to-markdown", slug, idPart);
|
|
await mkdir(outputDir, { recursive: true });
|
|
return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug };
|
|
}
|
|
|
|
function formatMetaMarkdown(meta: Record<string, string | number | null | undefined>): string {
|
|
const lines = ["---"];
|
|
for (const [key, value] of Object.entries(meta)) {
|
|
if (value === undefined || value === null || value === "") continue;
|
|
if (typeof value === "number") {
|
|
lines.push(`${key}: ${value}`);
|
|
} else {
|
|
lines.push(`${key}: ${JSON.stringify(value)}`);
|
|
}
|
|
}
|
|
lines.push("---");
|
|
return lines.join("\n");
|
|
}
|
|
|
|
async function promptYesNo(question: string): Promise<boolean> {
|
|
if (!process.stdin.isTTY) return false;
|
|
|
|
const rl = readline.createInterface({
|
|
input: process.stdin,
|
|
output: process.stderr,
|
|
});
|
|
|
|
try {
|
|
const answer = await new Promise<string>((resolve) => rl.question(question, resolve));
|
|
const normalized = answer.trim().toLowerCase();
|
|
return normalized === "y" || normalized === "yes";
|
|
} finally {
|
|
rl.close();
|
|
}
|
|
}
|
|
|
|
function isValidConsent(value: unknown): value is ConsentRecord {
|
|
if (!value || typeof value !== "object") return false;
|
|
const record = value as Partial<ConsentRecord>;
|
|
return (
|
|
record.accepted === true &&
|
|
record.disclaimerVersion === DISCLAIMER_VERSION &&
|
|
typeof record.acceptedAt === "string" &&
|
|
record.acceptedAt.length > 0
|
|
);
|
|
}
|
|
|
|
async function ensureConsent(log: (message: string) => void): Promise<void> {
|
|
const consentPath = resolveXToMarkdownConsentPath();
|
|
|
|
try {
|
|
if (fs.existsSync(consentPath) && fs.statSync(consentPath).isFile()) {
|
|
const raw = await readFile(consentPath, "utf8");
|
|
const parsed = JSON.parse(raw) as unknown;
|
|
if (isValidConsent(parsed)) {
|
|
log(
|
|
`⚠️ Warning: Using reverse-engineered X API (not official). Accepted on: ${(parsed as ConsentRecord).acceptedAt}`
|
|
);
|
|
return;
|
|
}
|
|
}
|
|
} catch {
|
|
// fall through to prompt
|
|
}
|
|
|
|
log(`⚠️ DISCLAIMER
|
|
|
|
This tool uses a reverse-engineered X (Twitter) API, NOT an official API.
|
|
|
|
Risks:
|
|
- May break without notice if X changes their API
|
|
- No official support or guarantees
|
|
- Account restrictions possible if API usage detected
|
|
- Use at your own risk
|
|
`);
|
|
|
|
if (!process.stdin.isTTY) {
|
|
throw new Error(
|
|
`Consent required. Run in a TTY or create ${consentPath} with accepted: true and disclaimerVersion: ${DISCLAIMER_VERSION}`
|
|
);
|
|
}
|
|
|
|
const accepted = await promptYesNo("Do you accept these terms and wish to continue? (y/N): ");
|
|
if (!accepted) {
|
|
throw new Error("User declined the disclaimer. Exiting.");
|
|
}
|
|
|
|
await mkdir(path.dirname(consentPath), { recursive: true });
|
|
const payload: ConsentRecord = {
|
|
version: 1,
|
|
accepted: true,
|
|
acceptedAt: new Date().toISOString(),
|
|
disclaimerVersion: DISCLAIMER_VERSION,
|
|
};
|
|
await writeFile(consentPath, JSON.stringify(payload, null, 2), "utf8");
|
|
log(`[x-to-markdown] Consent saved to: ${consentPath}`);
|
|
}
|
|
|
|
async function convertArticleToMarkdown(
|
|
inputUrl: string,
|
|
articleId: string,
|
|
log: (message: string) => void
|
|
): Promise<string> {
|
|
log("[x-to-markdown] Loading cookies...");
|
|
const cookieMap = await loadXCookies(log);
|
|
if (!hasRequiredXCookies(cookieMap)) {
|
|
throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome.");
|
|
}
|
|
|
|
log(`[x-to-markdown] Fetching article ${articleId}...`);
|
|
const article = await fetchXArticle(articleId, cookieMap, false);
|
|
const referencedTweets = await resolveReferencedTweetsFromArticle(article, cookieMap, { log });
|
|
const { markdown: body, coverUrl } = formatArticleMarkdown(article, { referencedTweets });
|
|
|
|
const title = typeof (article as any)?.title === "string" ? String((article as any).title).trim() : "";
|
|
const meta = formatMetaMarkdown({
|
|
url: `https://x.com/i/article/${articleId}`,
|
|
requestedUrl: inputUrl,
|
|
title: title || null,
|
|
coverImage: coverUrl,
|
|
});
|
|
|
|
return [meta, body.trimEnd()].filter(Boolean).join("\n\n").trimEnd();
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
const args = parseArgs(process.argv.slice(2));
|
|
if (args.help) printUsage(0);
|
|
if (!args.login && !args.url) printUsage(1);
|
|
|
|
const log = (message: string) => console.error(message);
|
|
await ensureConsent(log);
|
|
|
|
if (args.login) {
|
|
log("[x-to-markdown] Refreshing cookies via browser login...");
|
|
const cookieMap = await refreshXCookies(log);
|
|
if (!hasRequiredXCookies(cookieMap)) {
|
|
throw new Error("Missing auth cookies after login. Please ensure you are logged in to X.");
|
|
}
|
|
log("[x-to-markdown] Cookies refreshed.");
|
|
return;
|
|
}
|
|
|
|
const normalizedUrl = normalizeInputUrl(args.url ?? "");
|
|
const articleId = parseArticleId(normalizedUrl);
|
|
const tweetId = parseTweetId(normalizedUrl);
|
|
if (!articleId && !tweetId) {
|
|
throw new Error("Invalid X url. Examples: https://x.com/<user>/status/<id> or https://x.com/i/article/<id>");
|
|
}
|
|
|
|
const kind = articleId ? ("article" as const) : ("tweet" as const);
|
|
|
|
if (args.downloadMedia) {
|
|
const existingMarkdownPath = resolveExistingMarkdownPath(normalizedUrl, kind, args.output);
|
|
if (existingMarkdownPath) {
|
|
log(`[x-to-markdown] Reusing existing markdown: ${existingMarkdownPath}`);
|
|
const existingMarkdown = await readFile(existingMarkdownPath, "utf8");
|
|
const mediaResult = await localizeMarkdownMedia(existingMarkdown, {
|
|
markdownPath: existingMarkdownPath,
|
|
log,
|
|
});
|
|
const didLocalize =
|
|
mediaResult.downloadedImages > 0 ||
|
|
mediaResult.downloadedVideos > 0 ||
|
|
mediaResult.markdown !== existingMarkdown;
|
|
|
|
if (didLocalize) {
|
|
await writeFile(existingMarkdownPath, mediaResult.markdown, "utf8");
|
|
log(
|
|
`[x-to-markdown] Media localized: images=${mediaResult.downloadedImages}, videos=${mediaResult.downloadedVideos}`
|
|
);
|
|
log(`[x-to-markdown] Saved: ${existingMarkdownPath}`);
|
|
|
|
const { slug } = resolveSlugAndId(normalizedUrl, kind);
|
|
if (args.json) {
|
|
console.log(
|
|
JSON.stringify(
|
|
{
|
|
url: articleId ? `https://x.com/i/article/${articleId}` : normalizedUrl,
|
|
requestedUrl: normalizedUrl,
|
|
type: kind,
|
|
slug,
|
|
outputDir: path.dirname(existingMarkdownPath),
|
|
markdownPath: existingMarkdownPath,
|
|
downloadMedia: true,
|
|
downloadedImages: mediaResult.downloadedImages,
|
|
downloadedVideos: mediaResult.downloadedVideos,
|
|
imageDir: mediaResult.imageDir,
|
|
videoDir: mediaResult.videoDir,
|
|
},
|
|
null,
|
|
2
|
|
)
|
|
);
|
|
} else {
|
|
console.log(existingMarkdownPath);
|
|
}
|
|
return;
|
|
}
|
|
|
|
log("[x-to-markdown] Existing markdown already localized; rebuilding content to refresh placement.");
|
|
}
|
|
}
|
|
|
|
let markdown =
|
|
kind === "article" && articleId
|
|
? await convertArticleToMarkdown(normalizedUrl, articleId, log)
|
|
: await tweetToMarkdown(normalizedUrl, { log });
|
|
|
|
const contentSlug = extractContentSlug(markdown);
|
|
const { outputDir, markdownPath, slug } = await resolveOutputPath(normalizedUrl, kind, args.output, contentSlug, log);
|
|
|
|
let mediaResult: LocalizeMarkdownMediaResult | null = null;
|
|
|
|
if (args.downloadMedia) {
|
|
mediaResult = await localizeMarkdownMedia(markdown, {
|
|
markdownPath,
|
|
log,
|
|
});
|
|
markdown = mediaResult.markdown;
|
|
log(
|
|
`[x-to-markdown] Media localized: images=${mediaResult.downloadedImages}, videos=${mediaResult.downloadedVideos}`
|
|
);
|
|
}
|
|
|
|
await writeFile(markdownPath, markdown, "utf8");
|
|
log(`[x-to-markdown] Saved: ${markdownPath}`);
|
|
|
|
if (args.json) {
|
|
console.log(
|
|
JSON.stringify(
|
|
{
|
|
url: articleId ? `https://x.com/i/article/${articleId}` : normalizedUrl,
|
|
requestedUrl: normalizedUrl,
|
|
type: kind,
|
|
slug,
|
|
outputDir,
|
|
markdownPath,
|
|
downloadMedia: args.downloadMedia,
|
|
downloadedImages: mediaResult?.downloadedImages ?? 0,
|
|
downloadedVideos: mediaResult?.downloadedVideos ?? 0,
|
|
imageDir: mediaResult?.imageDir ?? null,
|
|
videoDir: mediaResult?.videoDir ?? null,
|
|
},
|
|
null,
|
|
2
|
|
)
|
|
);
|
|
} else {
|
|
console.log(markdownPath);
|
|
}
|
|
}
|
|
|
|
await main().catch((error) => {
|
|
console.error(error instanceof Error ? error.message : String(error ?? ""));
|
|
process.exit(1);
|
|
});
|