JimLiu-baoyu-skills/skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts

213 lines
6.2 KiB
TypeScript

#!/usr/bin/env npx tsx
import * as path from "node:path";
import { fileURLToPath } from "node:url";
import { hasRequiredXCookies, loadXCookies } from "./cookies.js";
import { fetchTweetThread } from "./thread.js";
import { formatArticleMarkdown } from "./markdown.js";
import { resolveReferencedTweetsFromArticle } from "./referenced-tweets.js";
import { formatThreadTweetsMarkdown } from "./thread-markdown.js";
import { resolveArticleEntityFromTweet } from "./tweet-article.js";
type TweetToMarkdownOptions = {
log?: (message: string) => void;
};
function parseArgs(): { url?: string } {
const args = process.argv.slice(2);
let url: string | undefined;
for (const arg of args) {
if (!arg.startsWith("-") && !url) {
url = arg;
}
}
return { url };
}
function normalizeInputUrl(input: string): string {
const trimmed = input.trim();
if (!trimmed) return "";
try {
return new URL(trimmed).toString();
} catch {
return trimmed;
}
}
function formatScriptCommand(fallback: string): string {
const raw = process.argv[1];
const displayPath = raw
? (() => {
const relative = path.relative(process.cwd(), raw);
return relative && !relative.startsWith("..") ? relative : raw;
})()
: fallback;
const quotedPath = displayPath.includes(" ")
? `"${displayPath.replace(/"/g, '\\"')}"`
: displayPath;
return `npx -y bun ${quotedPath}`;
}
function parseTweetId(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
if (/^\d+$/.test(trimmed)) return trimmed;
try {
const parsed = new URL(trimmed);
const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function buildTweetUrl(username: string | undefined, tweetId: string | undefined): string | null {
if (!tweetId) return null;
if (username) {
return `https://x.com/${username}/status/${tweetId}`;
}
return `https://x.com/i/web/status/${tweetId}`;
}
function formatMetaMarkdown(meta: Record<string, string | number | null | undefined>): string {
const lines = ["---"];
for (const [key, value] of Object.entries(meta)) {
if (value === undefined || value === null || value === "") continue;
if (typeof value === "number") {
lines.push(`${key}: ${value}`);
} else {
lines.push(`${key}: ${JSON.stringify(value)}`);
}
}
lines.push("---");
return lines.join("\n");
}
function extractTweetText(tweet: any): string {
const noteText = tweet?.note_tweet?.note_tweet_results?.result?.text;
const legacyText = tweet?.legacy?.full_text ?? tweet?.legacy?.text ?? "";
return (noteText ?? legacyText ?? "").trim();
}
function isOnlyUrl(text: string): boolean {
const trimmed = text.trim();
if (!trimmed) return true;
return /^https?:\/\/\S+$/.test(trimmed);
}
export async function tweetToMarkdown(
inputUrl: string,
options: TweetToMarkdownOptions = {}
): Promise<string> {
const normalizedUrl = normalizeInputUrl(inputUrl);
const tweetId = parseTweetId(normalizedUrl);
if (!tweetId) {
throw new Error("Invalid tweet url. Example: https://x.com/<user>/status/<tweet_id>");
}
const log = options.log ?? (() => {});
log("[tweet-to-markdown] Loading cookies...");
const cookieMap = await loadXCookies(log);
if (!hasRequiredXCookies(cookieMap)) {
throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome.");
}
log(`[tweet-to-markdown] Fetching thread for ${tweetId}...`);
const thread = await fetchTweetThread(tweetId, cookieMap);
if (!thread) {
throw new Error("Failed to fetch thread.");
}
const tweets = thread.tweets ?? [];
if (tweets.length === 0) {
throw new Error("No tweets found in thread.");
}
const firstTweet = tweets[0] as any;
const user = thread.user ?? firstTweet?.core?.user_results?.result?.legacy;
const username = user?.screen_name;
const name = user?.name;
const author =
username && name ? `${name} (@${username})` : username ? `@${username}` : name ?? null;
const authorUrl = username ? `https://x.com/${username}` : undefined;
const requestedUrl = normalizedUrl || buildTweetUrl(username, tweetId) || inputUrl.trim();
const rootUrl = buildTweetUrl(username, thread.rootId ?? tweetId) ?? requestedUrl;
const articleEntity = await resolveArticleEntityFromTweet(firstTweet, cookieMap);
let coverImage: string | null = null;
let remainingTweets = tweets;
const parts: string[] = [];
if (articleEntity) {
const referencedTweets = await resolveReferencedTweetsFromArticle(articleEntity, cookieMap, { log });
const articleResult = formatArticleMarkdown(articleEntity, { referencedTweets });
coverImage = articleResult.coverUrl;
const articleMarkdown = articleResult.markdown.trimEnd();
if (articleMarkdown) {
parts.push(articleMarkdown);
const firstTweetText = extractTweetText(firstTweet);
if (isOnlyUrl(firstTweetText)) {
remainingTweets = tweets.slice(1);
}
}
}
const meta = formatMetaMarkdown({
url: rootUrl,
requestedUrl: requestedUrl,
author,
authorName: name ?? null,
authorUsername: username ?? null,
authorUrl: authorUrl ?? null,
tweetCount: thread.totalTweets ?? tweets.length,
coverImage,
});
parts.unshift(meta);
if (remainingTweets.length > 0) {
const hasArticle = parts.length > 1;
if (hasArticle) {
parts.push("## Thread");
}
const tweetMarkdown = formatThreadTweetsMarkdown(remainingTweets, {
username,
headingLevel: hasArticle ? 3 : 2,
startIndex: 1,
includeTweetUrls: true,
});
if (tweetMarkdown) {
parts.push(tweetMarkdown);
}
}
return parts.join("\n\n").trimEnd();
}
async function main() {
const { url } = parseArgs();
if (!url) {
console.error("Usage:");
console.error(` ${formatScriptCommand("scripts/tweet-to-markdown.ts")} <tweet url>`);
process.exit(1);
}
const markdown = await tweetToMarkdown(url, { log: console.log });
console.log(markdown);
}
const isCliExecution =
process.argv[1] && fileURLToPath(import.meta.url) === path.resolve(process.argv[1]);
if (isCliExecution) {
main().catch((error) => {
console.error(error instanceof Error ? error.message : error);
process.exit(1);
});
}