From 4998eaf8c22f37d86e9f25aed4d4e35217242f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Sun, 18 Jan 2026 17:42:25 -0600 Subject: [PATCH] feat: add baoyu-danger-x-to-markdown skill Ignore generated x-to-markdown output directory. --- .gitignore | 1 + skills/baoyu-danger-x-to-markdown/SKILL.md | 177 ++++++++ .../scripts/constants.ts | 143 ++++++ .../scripts/cookie-file.ts | 85 ++++ .../scripts/cookies.ts | 423 ++++++++++++++++++ .../scripts/graphql.ts | 389 ++++++++++++++++ .../scripts/http.ts | 117 +++++ .../scripts/main.ts | 419 +++++++++++++++++ .../scripts/markdown.ts | 306 +++++++++++++ .../scripts/paths.ts | 41 ++ .../scripts/thread-markdown.ts | 295 ++++++++++++ .../scripts/thread.ts | 311 +++++++++++++ .../scripts/tweet-article.ts | 96 ++++ .../scripts/tweet-to-markdown.ts | 190 ++++++++ .../scripts/types.ts | 79 ++++ 15 files changed, 3072 insertions(+) create mode 100644 skills/baoyu-danger-x-to-markdown/SKILL.md create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/constants.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/cookie-file.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/cookies.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/graphql.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/http.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/main.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/markdown.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/paths.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/thread-markdown.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/thread.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/tweet-article.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts create mode 100644 skills/baoyu-danger-x-to-markdown/scripts/types.ts diff --git a/.gitignore b/.gitignore index c98f656..9fbeaae 100644 --- a/.gitignore +++ b/.gitignore @@ -143,3 +143,4 @@ tests-data/ # Skill extensions (user customization) .baoyu-skills/ +x-to-markdown/ diff --git a/skills/baoyu-danger-x-to-markdown/SKILL.md b/skills/baoyu-danger-x-to-markdown/SKILL.md new file mode 100644 index 0000000..23a3cb5 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/SKILL.md @@ -0,0 +1,177 @@ +--- +name: baoyu-danger-x-to-markdown +description: Convert X (Twitter) tweet or article URL to markdown. Uses reverse-engineered X API (private). Requires user consent before use. +--- + +# X to Markdown + +Converts X (Twitter) content to markdown format: +- Tweet threads → Markdown with YAML front matter +- X Articles → Full article content extraction + +## Script Directory + +**Important**: All scripts are located in the `scripts/` subdirectory of this skill. + +**Agent Execution Instructions**: +1. Determine this SKILL.md file's directory path as `SKILL_DIR` +2. Script path = `${SKILL_DIR}/scripts/.ts` +3. Replace all `${SKILL_DIR}` in this document with the actual path + +**Script Reference**: +| Script | Purpose | +|--------|---------| +| `scripts/main.ts` | CLI entry point for URL conversion | + +## ⚠️ Disclaimer (REQUIRED) + +**Before using this skill**, the consent check MUST be performed. + +### Consent Check Flow + +**Step 1**: Check consent file + +```bash +# macOS +cat ~/Library/Application\ Support/baoyu-skills/x-to-markdown/consent.json 2>/dev/null + +# Linux +cat ~/.local/share/baoyu-skills/x-to-markdown/consent.json 2>/dev/null + +# Windows (PowerShell) +Get-Content "$env:APPDATA\baoyu-skills\x-to-markdown\consent.json" 2>$null +``` + +**Step 2**: If consent exists and `accepted: true` with matching `disclaimerVersion: "1.0"`: + +Print warning and proceed: +``` +⚠️ Warning: Using reverse-engineered X API (not official). Accepted on: +``` + +**Step 3**: If consent file doesn't exist or `disclaimerVersion` mismatch: + +Display disclaimer and ask user: + +``` +⚠️ DISCLAIMER + +This tool uses a reverse-engineered X (Twitter) API, NOT an official API. + +Risks: +- May break without notice if X changes their API +- No official support or guarantees +- Account restrictions possible if API usage detected +- Use at your own risk + +Do you accept these terms and wish to continue? +``` + +Use `AskUserQuestion` tool with options: +- **Yes, I accept** - Continue and save consent +- **No, I decline** - Exit immediately + +**Step 4**: On acceptance, create consent file: + +```bash +# macOS +mkdir -p ~/Library/Application\ Support/baoyu-skills/x-to-markdown +cat > ~/Library/Application\ Support/baoyu-skills/x-to-markdown/consent.json << 'EOF' +{ + "version": 1, + "accepted": true, + "acceptedAt": "", + "disclaimerVersion": "1.0" +} +EOF + +# Linux +mkdir -p ~/.local/share/baoyu-skills/x-to-markdown +cat > ~/.local/share/baoyu-skills/x-to-markdown/consent.json << 'EOF' +{ + "version": 1, + "accepted": true, + "acceptedAt": "", + "disclaimerVersion": "1.0" +} +EOF +``` + +**Step 5**: On decline, output message and stop: +``` +User declined the disclaimer. Exiting. +``` + +--- + +## Usage + +```bash +# Convert tweet (outputs markdown path) +npx -y bun ${SKILL_DIR}/scripts/main.ts + +# Save to specific file +npx -y bun ${SKILL_DIR}/scripts/main.ts -o output.md + +# JSON output +npx -y bun ${SKILL_DIR}/scripts/main.ts --json +``` + +## Options + +| Option | Description | +|--------|-------------| +| `` | Tweet or article URL | +| `-o ` | Output path (file or dir) | +| `--json` | Output as JSON | +| `--login` | Refresh cookies only | + +## File Structure + +``` +x-to-markdown/ +└── {username}/ + └── {tweet-id}.md +``` + +## Supported URLs + +- `https://x.com//status/` +- `https://twitter.com//status/` +- `https://x.com/i/article/` + +## Output Format + +```markdown +--- +url: https://x.com/username/status/123 +author: "Display Name (@username)" +tweet_count: 3 +--- + +Tweet content... + +--- + +Thread continuation... +``` + +## Authentication + +**Option 1**: Environment variables (recommended) +- `X_AUTH_TOKEN` - auth_token cookie +- `X_CT0` - ct0 cookie + +**Option 2**: Chrome login (auto if env vars not set) +- First run opens Chrome for login +- Cookies cached locally + +## Extension Support + +Custom configurations via EXTEND.md. + +**Check paths** (priority order): +1. `.baoyu-skills/baoyu-danger-x-to-markdown/EXTEND.md` (project) +2. `~/.baoyu-skills/baoyu-danger-x-to-markdown/EXTEND.md` (user) + +If found, load before workflow. Extension content overrides defaults. diff --git a/skills/baoyu-danger-x-to-markdown/scripts/constants.ts b/skills/baoyu-danger-x-to-markdown/scripts/constants.ts new file mode 100644 index 0000000..325a012 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/constants.ts @@ -0,0 +1,143 @@ +import { resolveXToMarkdownChromeProfileDir } from "./paths.js"; + +export const DEFAULT_BEARER_TOKEN = + "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"; +export const DEFAULT_USER_AGENT = + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"; +export const X_LOGIN_URL = "https://x.com/home"; +export const X_USER_DATA_DIR = resolveXToMarkdownChromeProfileDir(); + +export const X_COOKIE_NAMES = ["auth_token", "ct0", "gt", "twid"] as const; +export const X_REQUIRED_COOKIES = ["auth_token", "ct0"] as const; + +export const FALLBACK_QUERY_ID = "id8pHQbQi7eZ6P9mA1th1Q"; +export const FALLBACK_FEATURE_SWITCHES = [ + "profile_label_improvements_pcf_label_in_post_enabled", + "responsive_web_profile_redirect_enabled", + "rweb_tipjar_consumption_enabled", + "verified_phone_label_enabled", + "responsive_web_graphql_skip_user_profile_image_extensions_enabled", + "responsive_web_graphql_timeline_navigation_enabled", +]; +export const FALLBACK_FIELD_TOGGLES = ["withPayments", "withAuxiliaryUserLabels"]; + +export const FALLBACK_TWEET_QUERY_ID = "HJ9lpOL-ZlOk5CkCw0JW6Q"; +export const FALLBACK_TWEET_FEATURE_SWITCHES = [ + "creator_subscriptions_tweet_preview_api_enabled", + "premium_content_api_read_enabled", + "communities_web_enable_tweet_community_results_fetch", + "c9s_tweet_anatomy_moderator_badge_enabled", + "responsive_web_grok_analyze_button_fetch_trends_enabled", + "responsive_web_grok_analyze_post_followups_enabled", + "responsive_web_jetfuel_frame", + "responsive_web_grok_share_attachment_enabled", + "responsive_web_grok_annotations_enabled", + "articles_preview_enabled", + "responsive_web_edit_tweet_api_enabled", + "graphql_is_translatable_rweb_tweet_is_translatable_enabled", + "view_counts_everywhere_api_enabled", + "longform_notetweets_consumption_enabled", + "responsive_web_twitter_article_tweet_consumption_enabled", + "tweet_awards_web_tipping_enabled", + "responsive_web_grok_show_grok_translated_post", + "responsive_web_grok_analysis_button_from_backend", + "post_ctas_fetch_enabled", + "creator_subscriptions_quote_tweet_preview_enabled", + "freedom_of_speech_not_reach_fetch_enabled", + "standardized_nudges_misinfo", + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled", + "longform_notetweets_rich_text_read_enabled", + "longform_notetweets_inline_media_enabled", + "profile_label_improvements_pcf_label_in_post_enabled", + "responsive_web_profile_redirect_enabled", + "rweb_tipjar_consumption_enabled", + "verified_phone_label_enabled", + "responsive_web_grok_image_annotation_enabled", + "responsive_web_grok_imagine_annotation_enabled", + "responsive_web_grok_community_note_auto_translation_is_enabled", + "responsive_web_graphql_skip_user_profile_image_extensions_enabled", + "responsive_web_graphql_timeline_navigation_enabled", + "responsive_web_enhance_cards_enabled", +]; +export const FALLBACK_TWEET_FIELD_TOGGLES = [ + "withArticleRichContentState", + "withArticlePlainText", + "withGrokAnalyze", + "withDisallowedReplyControls", + "withPayments", + "withAuxiliaryUserLabels", +]; + +export const FALLBACK_TWEET_DETAIL_QUERY_ID = "_8aYOgEDz35BrBcBal1-_w"; +export const FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES = [ + "rweb_video_screen_enabled", + "profile_label_improvements_pcf_label_in_post_enabled", + "rweb_tipjar_consumption_enabled", + "verified_phone_label_enabled", + "creator_subscriptions_tweet_preview_api_enabled", + "responsive_web_graphql_timeline_navigation_enabled", + "responsive_web_graphql_skip_user_profile_image_extensions_enabled", + "premium_content_api_read_enabled", + "communities_web_enable_tweet_community_results_fetch", + "c9s_tweet_anatomy_moderator_badge_enabled", + "responsive_web_grok_analyze_button_fetch_trends_enabled", + "responsive_web_grok_analyze_post_followups_enabled", + "responsive_web_jetfuel_frame", + "responsive_web_grok_share_attachment_enabled", + "articles_preview_enabled", + "responsive_web_edit_tweet_api_enabled", + "graphql_is_translatable_rweb_tweet_is_translatable_enabled", + "view_counts_everywhere_api_enabled", + "longform_notetweets_consumption_enabled", + "responsive_web_twitter_article_tweet_consumption_enabled", + "tweet_awards_web_tipping_enabled", + "responsive_web_grok_show_grok_translated_post", + "responsive_web_grok_analysis_button_from_backend", + "creator_subscriptions_quote_tweet_preview_enabled", + "freedom_of_speech_not_reach_fetch_enabled", + "standardized_nudges_misinfo", + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled", + "longform_notetweets_rich_text_read_enabled", + "longform_notetweets_inline_media_enabled", + "responsive_web_grok_image_annotation_enabled", + "responsive_web_enhance_cards_enabled", +]; +export const FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS: Record = { + rweb_video_screen_enabled: false, + profile_label_improvements_pcf_label_in_post_enabled: true, + rweb_tipjar_consumption_enabled: true, + verified_phone_label_enabled: false, + creator_subscriptions_tweet_preview_api_enabled: true, + responsive_web_graphql_timeline_navigation_enabled: true, + responsive_web_graphql_skip_user_profile_image_extensions_enabled: false, + premium_content_api_read_enabled: false, + communities_web_enable_tweet_community_results_fetch: true, + c9s_tweet_anatomy_moderator_badge_enabled: true, + responsive_web_grok_analyze_button_fetch_trends_enabled: false, + responsive_web_grok_analyze_post_followups_enabled: true, + responsive_web_jetfuel_frame: false, + responsive_web_grok_share_attachment_enabled: true, + articles_preview_enabled: true, + responsive_web_edit_tweet_api_enabled: true, + graphql_is_translatable_rweb_tweet_is_translatable_enabled: true, + view_counts_everywhere_api_enabled: true, + longform_notetweets_consumption_enabled: true, + responsive_web_twitter_article_tweet_consumption_enabled: true, + tweet_awards_web_tipping_enabled: false, + responsive_web_grok_show_grok_translated_post: false, + responsive_web_grok_analysis_button_from_backend: true, + creator_subscriptions_quote_tweet_preview_enabled: false, + freedom_of_speech_not_reach_fetch_enabled: true, + standardized_nudges_misinfo: true, + tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true, + longform_notetweets_rich_text_read_enabled: true, + longform_notetweets_inline_media_enabled: true, + responsive_web_grok_image_annotation_enabled: true, + responsive_web_enhance_cards_enabled: false, +}; +export const FALLBACK_TWEET_DETAIL_FIELD_TOGGLES = [ + "withArticleRichContentState", + "withArticlePlainText", + "withGrokAnalyze", + "withDisallowedReplyControls", +]; diff --git a/skills/baoyu-danger-x-to-markdown/scripts/cookie-file.ts b/skills/baoyu-danger-x-to-markdown/scripts/cookie-file.ts new file mode 100644 index 0000000..1c67463 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/cookie-file.ts @@ -0,0 +1,85 @@ +import fs from "node:fs"; +import path from "node:path"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; + +import { resolveXToMarkdownCookiePath } from "./paths.js"; + +export type CookieMap = Record; + +export type CookieFileData = + | { + cookies: CookieMap; + updated_at: number; + source?: string; + } + | { + version: number; + updatedAt: string; + cookieMap: CookieMap; + source?: string; + }; + +export async function read_cookie_file( + p: string = resolveXToMarkdownCookiePath() +): Promise { + try { + if (!fs.existsSync(p) || !fs.statSync(p).isFile()) return null; + const raw = await readFile(p, "utf8"); + const data = JSON.parse(raw) as unknown; + + if (data && typeof data === "object" && "cookies" in (data as any)) { + const cookies = (data as any).cookies as unknown; + if (cookies && typeof cookies === "object") { + const out: CookieMap = {}; + for (const [k, v] of Object.entries(cookies as Record)) { + if (typeof v === "string") out[k] = v; + } + return Object.keys(out).length > 0 ? out : null; + } + } + + if (data && typeof data === "object" && "cookieMap" in (data as any)) { + const cookies = (data as any).cookieMap as unknown; + if (cookies && typeof cookies === "object") { + const out: CookieMap = {}; + for (const [k, v] of Object.entries(cookies as Record)) { + if (typeof v === "string") out[k] = v; + } + return Object.keys(out).length > 0 ? out : null; + } + } + + if (data && typeof data === "object") { + const out: CookieMap = {}; + for (const [k, v] of Object.entries(data as Record)) { + if (typeof v === "string") out[k] = v; + } + return Object.keys(out).length > 0 ? out : null; + } + + return null; + } catch { + return null; + } +} + +export async function write_cookie_file( + cookies: CookieMap, + p: string = resolveXToMarkdownCookiePath(), + source?: string +): Promise { + const dir = path.dirname(p); + await mkdir(dir, { recursive: true }); + + const payload: CookieFileData = { + version: 1, + updatedAt: new Date().toISOString(), + cookieMap: cookies, + source, + }; + await writeFile(p, JSON.stringify(payload, null, 2), "utf8"); +} + +export const readCookieFile = read_cookie_file; +export const writeCookieFile = write_cookie_file; + diff --git a/skills/baoyu-danger-x-to-markdown/scripts/cookies.ts b/skills/baoyu-danger-x-to-markdown/scripts/cookies.ts new file mode 100644 index 0000000..578cd54 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/cookies.ts @@ -0,0 +1,423 @@ +import { spawn, type ChildProcess } from "node:child_process"; +import fs from "node:fs"; +import { mkdir } from "node:fs/promises"; +import net from "node:net"; +import process from "node:process"; + +import { read_cookie_file, write_cookie_file } from "./cookie-file.js"; +import { resolveXToMarkdownCookiePath } from "./paths.js"; +import { X_COOKIE_NAMES, X_REQUIRED_COOKIES, X_LOGIN_URL, X_USER_DATA_DIR } from "./constants.js"; +import type { CookieLike } from "./types.js"; + +type CdpSendOptions = { sessionId?: string; timeoutMs?: number }; + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function fetchWithTimeout( + url: string, + init: RequestInit & { timeoutMs?: number } = {} +): Promise { + const { timeoutMs, ...rest } = init; + if (!timeoutMs || timeoutMs <= 0) return fetch(url, rest); + + const ctl = new AbortController(); + const t = setTimeout(() => ctl.abort(), timeoutMs); + try { + return await fetch(url, { ...rest, signal: ctl.signal }); + } finally { + clearTimeout(t); + } +} + +class CdpConnection { + private ws: WebSocket; + private nextId = 0; + private pending = new Map< + number, + { resolve: (v: unknown) => void; reject: (e: Error) => void; timer: ReturnType | null } + >(); + + private constructor(ws: WebSocket) { + this.ws = ws; + this.ws.addEventListener("message", (event) => { + try { + const data = + typeof event.data === "string" + ? event.data + : new TextDecoder().decode(event.data as ArrayBuffer); + const msg = JSON.parse(data) as { id?: number; result?: unknown; error?: { message?: string } }; + if (msg.id) { + const p = this.pending.get(msg.id); + if (p) { + this.pending.delete(msg.id); + if (p.timer) clearTimeout(p.timer); + if (msg.error?.message) p.reject(new Error(msg.error.message)); + else p.resolve(msg.result); + } + } + } catch {} + }); + this.ws.addEventListener("close", () => { + for (const [id, p] of this.pending.entries()) { + this.pending.delete(id); + if (p.timer) clearTimeout(p.timer); + p.reject(new Error("CDP connection closed.")); + } + }); + } + + static async connect(url: string, timeoutMs: number): Promise { + const ws = new WebSocket(url); + await new Promise((resolve, reject) => { + const t = setTimeout(() => reject(new Error("CDP connection timeout.")), timeoutMs); + ws.addEventListener("open", () => { + clearTimeout(t); + resolve(); + }); + ws.addEventListener("error", () => { + clearTimeout(t); + reject(new Error("CDP connection failed.")); + }); + }); + return new CdpConnection(ws); + } + + async send( + method: string, + params?: Record, + opts?: CdpSendOptions + ): Promise { + const id = ++this.nextId; + const msg: Record = { id, method }; + if (params) msg.params = params; + if (opts?.sessionId) msg.sessionId = opts.sessionId; + + const timeoutMs = opts?.timeoutMs ?? 15_000; + const out = await new Promise((resolve, reject) => { + const t = + timeoutMs > 0 + ? setTimeout(() => { + this.pending.delete(id); + reject(new Error(`CDP timeout: ${method}`)); + }, timeoutMs) + : null; + this.pending.set(id, { resolve, reject, timer: t }); + this.ws.send(JSON.stringify(msg)); + }); + return out as T; + } + + close(): void { + try { + this.ws.close(); + } catch {} + } +} + +async function getFreePort(): Promise { + return await new Promise((resolve, reject) => { + const srv = net.createServer(); + srv.unref(); + srv.on("error", reject); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address(); + if (!addr || typeof addr === "string") { + srv.close(() => reject(new Error("Unable to allocate a free TCP port."))); + return; + } + const port = addr.port; + srv.close((err) => (err ? reject(err) : resolve(port))); + }); + }); +} + +function findChromeExecutable(): string | null { + const override = process.env.X_CHROME_PATH?.trim(); + if (override && fs.existsSync(override)) return override; + + const candidates: string[] = []; + switch (process.platform) { + case "darwin": + candidates.push( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge" + ); + break; + case "win32": + candidates.push( + "C:\\\\Program Files\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe", + "C:\\\\Program Files (x86)\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe", + "C:\\\\Program Files\\\\Microsoft\\\\Edge\\\\Application\\\\msedge.exe", + "C:\\\\Program Files (x86)\\\\Microsoft\\\\Edge\\\\Application\\\\msedge.exe" + ); + break; + default: + candidates.push( + "/usr/bin/google-chrome", + "/usr/bin/google-chrome-stable", + "/usr/bin/chromium", + "/usr/bin/chromium-browser", + "/snap/bin/chromium", + "/usr/bin/microsoft-edge" + ); + break; + } + + for (const p of candidates) { + if (fs.existsSync(p)) return p; + } + return null; +} + +async function waitForChromeDebugPort(port: number, timeoutMs: number): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + try { + const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 5_000 }); + if (!res.ok) throw new Error(`status=${res.status}`); + const j = (await res.json()) as { webSocketDebuggerUrl?: string }; + if (j.webSocketDebuggerUrl) return j.webSocketDebuggerUrl; + } catch {} + await sleep(200); + } + throw new Error("Chrome debug port not ready"); +} + +async function launchChrome(profileDir: string, port: number): Promise { + const chrome = findChromeExecutable(); + if (!chrome) throw new Error("Chrome executable not found."); + + const args = [ + `--remote-debugging-port=${port}`, + `--user-data-dir=${profileDir}`, + "--no-first-run", + "--no-default-browser-check", + "--disable-popup-blocking", + X_LOGIN_URL, + ]; + + return spawn(chrome, args, { stdio: "ignore" }); +} + +async function fetchXCookiesViaCdp( + profileDir: string, + timeoutMs: number, + verbose: boolean, + log?: (message: string) => void +): Promise> { + await mkdir(profileDir, { recursive: true }); + + const port = await getFreePort(); + const chrome = await launchChrome(profileDir, port); + + let cdp: CdpConnection | null = null; + try { + const wsUrl = await waitForChromeDebugPort(port, 30_000); + cdp = await CdpConnection.connect(wsUrl, 15_000); + + const { targetId } = await cdp.send<{ targetId: string }>("Target.createTarget", { + url: X_LOGIN_URL, + newWindow: true, + }); + const { sessionId } = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true }); + await cdp.send("Network.enable", {}, { sessionId }); + + if (verbose) { + log?.("[x-cookies] Chrome opened. If needed, complete X login in the window. Waiting for cookies..."); + } + + const start = Date.now(); + let last: Record = {}; + + while (Date.now() - start < timeoutMs) { + const { cookies } = await cdp.send<{ cookies: CookieLike[] }>( + "Network.getCookies", + { urls: ["https://x.com/", "https://twitter.com/"] }, + { sessionId, timeoutMs: 10_000 } + ); + + const m = buildXCookieMap((cookies ?? []).filter(Boolean)); + last = m; + if (hasRequiredXCookies(m)) { + return m; + } + + await sleep(1000); + } + + throw new Error(`Timed out waiting for X cookies. Last keys: ${Object.keys(last).join(", ")}`); + } finally { + if (cdp) { + try { + await cdp.send("Browser.close", {}, { timeoutMs: 5_000 }); + } catch {} + cdp.close(); + } + + try { + chrome.kill("SIGTERM"); + } catch {} + setTimeout(() => { + if (!chrome.killed) { + try { + chrome.kill("SIGKILL"); + } catch {} + } + }, 2_000).unref?.(); + } +} + +function resolveCookieDomain(cookie: CookieLike): string | null { + const rawDomain = cookie.domain?.trim(); + if (rawDomain) { + return rawDomain.startsWith(".") ? rawDomain.slice(1) : rawDomain; + } + const rawUrl = cookie.url?.trim(); + if (rawUrl) { + try { + return new URL(rawUrl).hostname; + } catch { + return null; + } + } + return null; +} + +function pickCookieValue(cookies: T[], name: string): string | undefined { + const matches = cookies.filter((cookie) => cookie.name === name && typeof cookie.value === "string"); + if (matches.length === 0) return undefined; + + const preferred = matches.find((cookie) => { + const domain = resolveCookieDomain(cookie); + return domain === "x.com" && (cookie.path ?? "/") === "/"; + }); + const xDomain = matches.find((cookie) => (resolveCookieDomain(cookie) ?? "").endsWith("x.com")); + const twitterDomain = matches.find((cookie) => (resolveCookieDomain(cookie) ?? "").endsWith("twitter.com")); + return (preferred ?? xDomain ?? twitterDomain ?? matches[0])?.value; +} + +function buildXCookieMap(cookies: T[]): Record { + const cookieMap: Record = {}; + for (const name of X_COOKIE_NAMES) { + const value = pickCookieValue(cookies, name); + if (value) cookieMap[name] = value; + } + return cookieMap; +} + +export function hasRequiredXCookies(cookieMap: Record): boolean { + return X_REQUIRED_COOKIES.every((name) => Boolean(cookieMap[name])); +} + +function filterXCookieMap(cookieMap: Record): Record { + const filtered: Record = {}; + for (const name of X_COOKIE_NAMES) { + const value = cookieMap[name]; + if (value) filtered[name] = value; + } + return filtered; +} + +function buildInlineCookiesFromEnv(): CookieLike[] { + const cookies: CookieLike[] = []; + const authToken = process.env.X_AUTH_TOKEN?.trim(); + const ct0 = process.env.X_CT0?.trim(); + const gt = process.env.X_GUEST_TOKEN?.trim(); + const twid = process.env.X_TWID?.trim(); + + if (authToken) { + cookies.push({ name: "auth_token", value: authToken, domain: "x.com", path: "/" }); + } + if (ct0) { + cookies.push({ name: "ct0", value: ct0, domain: "x.com", path: "/" }); + } + if (gt) { + cookies.push({ name: "gt", value: gt, domain: "x.com", path: "/" }); + } + if (twid) { + cookies.push({ name: "twid", value: twid, domain: "x.com", path: "/" }); + } + + return cookies; +} + +async function loadXCookiesFromInline(log?: (message: string) => void): Promise> { + const inline = buildInlineCookiesFromEnv(); + if (inline.length === 0) return {}; + + const cookieMap = buildXCookieMap( + inline.filter((cookie): cookie is CookieLike => Boolean(cookie?.name && typeof cookie.value === "string")) + ); + + if (Object.keys(cookieMap).length > 0) { + log?.(`[x-cookies] Loaded X cookies from env: ${Object.keys(cookieMap).length} cookie(s).`); + } else { + log?.("[x-cookies] Env cookies provided but no X cookies matched."); + } + + return cookieMap; +} + +async function loadXCookiesFromFile(log?: (message: string) => void): Promise> { + const cookiePath = resolveXToMarkdownCookiePath(); + const fileMap = filterXCookieMap((await read_cookie_file(cookiePath)) ?? {}); + if (Object.keys(fileMap).length > 0) { + log?.(`[x-cookies] Loaded X cookies from file: ${cookiePath} (${Object.keys(fileMap).length} cookie(s))`); + } + return fileMap; +} + +async function loadXCookiesFromCdp(log?: (message: string) => void): Promise> { + try { + const cookieMap = await fetchXCookiesViaCdp(X_USER_DATA_DIR, 5 * 60 * 1000, true, log); + if (!hasRequiredXCookies(cookieMap)) return cookieMap; + + const cookiePath = resolveXToMarkdownCookiePath(); + try { + await write_cookie_file(cookieMap, cookiePath, "cdp"); + log?.(`[x-cookies] Cookies saved to ${cookiePath}`); + } catch (error) { + log?.( + `[x-cookies] Failed to write cookie file (${cookiePath}): ${ + error instanceof Error ? error.message : String(error ?? "") + }` + ); + } + if (cookieMap.auth_token) log?.(`[x-cookies] auth_token: ${cookieMap.auth_token.slice(0, 20)}...`); + if (cookieMap.ct0) log?.(`[x-cookies] ct0: ${cookieMap.ct0.slice(0, 20)}...`); + return cookieMap; + } catch (error) { + log?.( + `[x-cookies] Failed to load cookies via Chrome DevTools Protocol: ${ + error instanceof Error ? error.message : String(error ?? "") + }` + ); + return {}; + } +} + +export async function loadXCookies(log?: (message: string) => void): Promise> { + const inlineMap = await loadXCookiesFromInline(log); + const fileMap = await loadXCookiesFromFile(log); + const combined = { ...fileMap, ...inlineMap }; + + if (hasRequiredXCookies(combined)) return combined; + + const cdpMap = await loadXCookiesFromCdp(log); + return { ...fileMap, ...cdpMap, ...inlineMap }; +} + +export async function refreshXCookies(log?: (message: string) => void): Promise> { + return loadXCookiesFromCdp(log); +} + +export function buildCookieHeader(cookieMap: Record): string | undefined { + const entries = Object.entries(cookieMap).filter(([, value]) => value); + if (entries.length === 0) return undefined; + return entries.map(([key, value]) => `${key}=${value}`).join("; "); +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/graphql.ts b/skills/baoyu-danger-x-to-markdown/scripts/graphql.ts new file mode 100644 index 0000000..d20694e --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/graphql.ts @@ -0,0 +1,389 @@ +import { + DEFAULT_BEARER_TOKEN, + DEFAULT_USER_AGENT, + FALLBACK_FEATURE_SWITCHES, + FALLBACK_FIELD_TOGGLES, + FALLBACK_QUERY_ID, + FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS, + FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES, + FALLBACK_TWEET_DETAIL_FIELD_TOGGLES, + FALLBACK_TWEET_DETAIL_QUERY_ID, + FALLBACK_TWEET_FEATURE_SWITCHES, + FALLBACK_TWEET_FIELD_TOGGLES, + FALLBACK_TWEET_QUERY_ID, +} from "./constants.js"; +import { + buildFeatureMap, + buildFieldToggleMap, + buildRequestHeaders, + buildTweetFieldToggleMap, + fetchHomeHtml, + fetchText, + parseStringList, +} from "./http.js"; +import type { ArticleQueryInfo } from "./types.js"; + +function isNonEmptyObject(value: unknown): value is Record { + return Boolean(value && typeof value === "object" && Object.keys(value as Record).length > 0); +} + +function unwrapTweetResult(result: any): any { + if (!result) return null; + if (result.__typename === "TweetWithVisibilityResults" && result.tweet) { + return result.tweet; + } + return result; +} + +function extractArticleFromTweet(payload: unknown): unknown { + const root = (payload as { data?: any }).data ?? payload; + const result = root?.tweetResult?.result ?? root?.tweet_result?.result ?? root?.tweet_result; + const tweet = unwrapTweetResult(result); + const legacy = tweet?.legacy ?? {}; + const article = legacy?.article ?? tweet?.article; + return ( + article?.article_results?.result ?? + legacy?.article_results?.result ?? + tweet?.article_results?.result ?? + null + ); +} + +function extractTweetFromPayload(payload: unknown): unknown { + const root = (payload as { data?: any }).data ?? payload; + const result = root?.tweetResult?.result ?? root?.tweet_result?.result ?? root?.tweet_result; + return unwrapTweetResult(result); +} + +function extractArticleFromEntity(payload: unknown): unknown { + const root = (payload as { data?: any }).data ?? payload; + return ( + root?.article_result_by_rest_id?.result ?? + root?.article_result_by_rest_id ?? + root?.article_entity_result?.result ?? + null + ); +} + +async function resolveArticleQueryInfo(userAgent: string): Promise { + const html = await fetchHomeHtml(userAgent); + + const bundleMatch = html.match(/"bundle\\.TwitterArticles":"([a-z0-9]+)"/); + if (!bundleMatch) { + return { + queryId: FALLBACK_QUERY_ID, + featureSwitches: FALLBACK_FEATURE_SWITCHES, + fieldToggles: FALLBACK_FIELD_TOGGLES, + html, + }; + } + + const bundleHash = bundleMatch[1]; + const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/bundle.TwitterArticles.${bundleHash}a.js`; + const chunk = await fetchText(chunkUrl, { + headers: { + "user-agent": userAgent, + }, + }); + + const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"ArticleEntityResultByRestId\"/); + const featureMatch = chunk.match( + /operationName:\"ArticleEntityResultByRestId\"[\s\S]*?featureSwitches:\[(.*?)\]/ + ); + const fieldToggleMatch = chunk.match( + /operationName:\"ArticleEntityResultByRestId\"[\s\S]*?fieldToggles:\[(.*?)\]/ + ); + + const featureSwitches = parseStringList(featureMatch?.[1]); + const fieldToggles = parseStringList(fieldToggleMatch?.[1]); + + return { + queryId: queryIdMatch?.[1] ?? FALLBACK_QUERY_ID, + featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_FEATURE_SWITCHES, + fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_FIELD_TOGGLES, + html, + }; +} + +function resolveMainChunkHash(html: string): string | null { + const match = html.match(/main\\.([a-z0-9]+)\\.js/); + return match?.[1] ?? null; +} + +function resolveApiChunkHash(html: string): string | null { + const match = html.match(/api:\"([a-zA-Z0-9_-]+)\"/); + return match?.[1] ?? null; +} + +async function resolveTweetDetailQueryInfo(userAgent: string): Promise { + const html = await fetchHomeHtml(userAgent); + const apiHash = resolveApiChunkHash(html); + if (!apiHash) { + return { + queryId: FALLBACK_TWEET_DETAIL_QUERY_ID, + featureSwitches: FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES, + fieldToggles: FALLBACK_TWEET_DETAIL_FIELD_TOGGLES, + html, + }; + } + + const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/api.${apiHash}a.js`; + const chunk = await fetchText(chunkUrl, { + headers: { + "user-agent": userAgent, + }, + }); + + const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"TweetDetail\"/); + const featureMatch = chunk.match( + /operationName:\"TweetDetail\"[\s\S]*?featureSwitches:\[(.*?)\]/ + ); + const fieldToggleMatch = chunk.match( + /operationName:\"TweetDetail\"[\s\S]*?fieldToggles:\[(.*?)\]/ + ); + + const featureSwitches = parseStringList(featureMatch?.[1]); + const fieldToggles = parseStringList(fieldToggleMatch?.[1]); + + return { + queryId: queryIdMatch?.[1] ?? FALLBACK_TWEET_DETAIL_QUERY_ID, + featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES, + fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_TWEET_DETAIL_FIELD_TOGGLES, + html, + }; +} + +function buildTweetDetailFieldToggleMap(keys: string[]): Record { + const toggles = buildFieldToggleMap(keys); + if (Object.prototype.hasOwnProperty.call(toggles, "withArticlePlainText")) { + toggles.withArticlePlainText = false; + } + if (Object.prototype.hasOwnProperty.call(toggles, "withGrokAnalyze")) { + toggles.withGrokAnalyze = false; + } + if (Object.prototype.hasOwnProperty.call(toggles, "withDisallowedReplyControls")) { + toggles.withDisallowedReplyControls = false; + } + return toggles; +} + +async function resolveTweetQueryInfo(userAgent: string): Promise { + const html = await fetchHomeHtml(userAgent); + const mainHash = resolveMainChunkHash(html); + if (!mainHash) { + return { + queryId: FALLBACK_TWEET_QUERY_ID, + featureSwitches: FALLBACK_TWEET_FEATURE_SWITCHES, + fieldToggles: FALLBACK_TWEET_FIELD_TOGGLES, + html, + }; + } + + const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/main.${mainHash}.js`; + const chunk = await fetchText(chunkUrl, { + headers: { + "user-agent": userAgent, + }, + }); + + const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"TweetResultByRestId\"/); + const featureMatch = chunk.match( + /operationName:\"TweetResultByRestId\"[\s\S]*?featureSwitches:\[(.*?)\]/ + ); + const fieldToggleMatch = chunk.match( + /operationName:\"TweetResultByRestId\"[\s\S]*?fieldToggles:\[(.*?)\]/ + ); + + const featureSwitches = parseStringList(featureMatch?.[1]); + const fieldToggles = parseStringList(fieldToggleMatch?.[1]); + + return { + queryId: queryIdMatch?.[1] ?? FALLBACK_TWEET_QUERY_ID, + featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_TWEET_FEATURE_SWITCHES, + fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_TWEET_FIELD_TOGGLES, + html, + }; +} + +async function fetchTweetResult( + tweetId: string, + cookieMap: Record, + userAgent: string, + bearerToken: string +): Promise { + const queryInfo = await resolveTweetQueryInfo(userAgent); + const features = buildFeatureMap(queryInfo.html, queryInfo.featureSwitches); + const fieldToggles = buildTweetFieldToggleMap(queryInfo.fieldToggles); + + const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/TweetResultByRestId`); + url.searchParams.set( + "variables", + JSON.stringify({ + tweetId, + withCommunity: false, + includePromotedContent: false, + withVoice: true, + }) + ); + if (Object.keys(features).length > 0) { + url.searchParams.set("features", JSON.stringify(features)); + } + if (Object.keys(fieldToggles).length > 0) { + url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles)); + } + + const response = await fetch(url.toString(), { + headers: buildRequestHeaders(cookieMap, userAgent, bearerToken), + }); + + const text = await response.text(); + if (!response.ok) { + throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`); + } + + try { + return JSON.parse(text); + } catch (error) { + throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`); + } +} + +export async function fetchTweetDetail( + tweetId: string, + cookieMap: Record, + cursor?: string +): Promise { + const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT; + const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN; + const queryInfo = await resolveTweetDetailQueryInfo(userAgent); + const features = buildFeatureMap( + queryInfo.html, + queryInfo.featureSwitches, + FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS + ); + const fieldToggles = buildTweetDetailFieldToggleMap(queryInfo.fieldToggles); + + const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/TweetDetail`); + url.searchParams.set( + "variables", + JSON.stringify({ + focalTweetId: tweetId, + cursor, + referrer: cursor ? "tweet" : undefined, + with_rux_injections: false, + includePromotedContent: true, + withCommunity: true, + withQuickPromoteEligibilityTweetFields: true, + withBirdwatchNotes: true, + withVoice: true, + withV2Timeline: true, + withDownvotePerspective: false, + withReactionsMetadata: false, + withReactionsPerspective: false, + withSuperFollowsTweetFields: false, + withSuperFollowsUserFields: false, + }) + ); + if (Object.keys(features).length > 0) { + url.searchParams.set("features", JSON.stringify(features)); + } + if (Object.keys(fieldToggles).length > 0) { + url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles)); + } + + const response = await fetch(url.toString(), { + headers: buildRequestHeaders(cookieMap, userAgent, bearerToken), + }); + + const text = await response.text(); + if (!response.ok) { + throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`); + } + + try { + return JSON.parse(text); + } catch (error) { + throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`); + } +} + +async function fetchArticleEntityById( + articleEntityId: string, + cookieMap: Record, + userAgent: string, + bearerToken: string +): Promise { + const queryInfo = await resolveArticleQueryInfo(userAgent); + const features = buildFeatureMap(queryInfo.html, queryInfo.featureSwitches); + const fieldToggles = buildFieldToggleMap(queryInfo.fieldToggles); + + const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/ArticleEntityResultByRestId`); + url.searchParams.set("variables", JSON.stringify({ articleEntityId })); + if (Object.keys(features).length > 0) { + url.searchParams.set("features", JSON.stringify(features)); + } + if (Object.keys(fieldToggles).length > 0) { + url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles)); + } + + const response = await fetch(url.toString(), { + headers: buildRequestHeaders(cookieMap, userAgent, bearerToken), + }); + + const text = await response.text(); + if (!response.ok) { + throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`); + } + + try { + return JSON.parse(text); + } catch (error) { + throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`); + } +} + +export async function fetchXArticle( + articleId: string, + cookieMap: Record, + raw: boolean +): Promise { + const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT; + const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN; + + const tweetPayload = await fetchTweetResult(articleId, cookieMap, userAgent, bearerToken); + if (raw) { + return tweetPayload; + } + + const articleFromTweet = extractArticleFromTweet(tweetPayload); + if (isNonEmptyObject(articleFromTweet)) { + return articleFromTweet; + } + + const articlePayload = await fetchArticleEntityById(articleId, cookieMap, userAgent, bearerToken); + const articleFromEntity = extractArticleFromEntity(articlePayload); + if (isNonEmptyObject(articleFromEntity)) { + return articleFromEntity; + } + return articleFromEntity ?? articlePayload; +} + +export async function fetchXTweet( + tweetId: string, + cookieMap: Record, + raw: boolean +): Promise { + const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT; + const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN; + + const tweetPayload = await fetchTweetResult(tweetId, cookieMap, userAgent, bearerToken); + if (raw) { + return tweetPayload; + } + + const tweet = extractTweetFromPayload(tweetPayload); + if (isNonEmptyObject(tweet)) { + return tweet; + } + return tweet ?? tweetPayload; +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/http.ts b/skills/baoyu-danger-x-to-markdown/scripts/http.ts new file mode 100644 index 0000000..2907bb5 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/http.ts @@ -0,0 +1,117 @@ +import { buildCookieHeader } from "./cookies.js"; + +let cachedHomeHtml: { userAgent: string; html: string } | null = null; + +export async function fetchText(url: string, init?: RequestInit): Promise { + const response = await fetch(url, init); + const text = await response.text(); + if (!response.ok) { + throw new Error(`Request failed (${response.status}) for ${url}: ${text.slice(0, 200)}`); + } + return text; +} + +export async function fetchHomeHtml(userAgent: string): Promise { + if (cachedHomeHtml?.userAgent === userAgent) { + return cachedHomeHtml.html; + } + const html = await fetchText("https://x.com", { + headers: { + "user-agent": userAgent, + }, + }); + cachedHomeHtml = { userAgent, html }; + return html; +} + +export function parseStringList(raw: string | undefined): string[] { + if (!raw) return []; + return raw + .split(",") + .map((item) => item.trim()) + .filter(Boolean) + .map((item) => item.replace(/^\"|\"$/g, "")); +} + +export function resolveFeatureValue(html: string, key: string): boolean | undefined { + const keyPattern = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + const unescaped = new RegExp(`"${keyPattern}"\\s*:\\s*\\{"value"\\s*:\\s*(true|false)`); + const escaped = new RegExp(`\\\\"${keyPattern}\\\\"\\s*:\\s*\\\\{\\\\"value\\\\"\\s*:\\s*(true|false)`); + const match = html.match(unescaped) ?? html.match(escaped); + if (!match) return undefined; + return match[1] === "true"; +} + +export function buildFeatureMap( + html: string, + keys: string[], + defaults?: Record +): Record { + const features: Record = {}; + for (const key of keys) { + const value = resolveFeatureValue(html, key); + if (value !== undefined) { + features[key] = value; + } else if (defaults && Object.prototype.hasOwnProperty.call(defaults, key)) { + features[key] = defaults[key] ?? true; + } else { + features[key] = true; + } + } + if (!Object.prototype.hasOwnProperty.call(features, "responsive_web_graphql_exclude_directive_enabled")) { + features.responsive_web_graphql_exclude_directive_enabled = true; + } + return features; +} + +export function buildFieldToggleMap(keys: string[]): Record { + const toggles: Record = {}; + for (const key of keys) { + toggles[key] = true; + } + return toggles; +} + +export function buildTweetFieldToggleMap(keys: string[]): Record { + const toggles: Record = {}; + for (const key of keys) { + if (key === "withGrokAnalyze" || key === "withDisallowedReplyControls") { + toggles[key] = false; + } else { + toggles[key] = true; + } + } + return toggles; +} + +export function buildRequestHeaders( + cookieMap: Record, + userAgent: string, + bearerToken: string +): Record { + const headers: Record = { + authorization: bearerToken, + "user-agent": userAgent, + accept: "application/json", + "x-twitter-active-user": "yes", + "x-twitter-client-language": "en", + "accept-language": "en", + }; + + if (cookieMap.auth_token) { + headers["x-twitter-auth-type"] = "OAuth2Session"; + } + + const cookieHeader = buildCookieHeader(cookieMap); + if (cookieHeader) { + headers.cookie = cookieHeader; + } + if (cookieMap.ct0) { + headers["x-csrf-token"] = cookieMap.ct0; + } + if (process.env.X_CLIENT_TRANSACTION_ID?.trim()) { + headers["x-client-transaction-id"] = process.env.X_CLIENT_TRANSACTION_ID.trim(); + } + + return headers; +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/main.ts b/skills/baoyu-danger-x-to-markdown/scripts/main.ts new file mode 100644 index 0000000..3d34280 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/main.ts @@ -0,0 +1,419 @@ +import fs from "node:fs"; +import path from "node:path"; +import readline from "node:readline"; +import process from "node:process"; +import { mkdir, readFile, rename, writeFile } from "node:fs/promises"; + +import { fetchXArticle } from "./graphql.js"; +import { formatArticleMarkdown } from "./markdown.js"; +import { hasRequiredXCookies, loadXCookies, refreshXCookies } from "./cookies.js"; +import { resolveXToMarkdownConsentPath } from "./paths.js"; +import { tweetToMarkdown } from "./tweet-to-markdown.js"; + +type CliArgs = { + url: string | null; + output: string | null; + json: boolean; + login: boolean; + help: boolean; +}; + +type ConsentRecord = { + version: number; + accepted: boolean; + acceptedAt: string; + disclaimerVersion: string; +}; + +const DISCLAIMER_VERSION = "1.0"; + +function printUsage(exitCode: number): never { + const cmd = "npx -y bun skills/baoyu-danger-x-to-markdown/scripts/main.ts"; + console.log(`X (Twitter) to Markdown + +Usage: + ${cmd} + ${cmd} --url + +Options: + --output , -o Output path (file or dir). Default: ./x-to-markdown// + --json Output as JSON + --login Refresh cookies only, then exit + --help, -h Show help + +Examples: + ${cmd} https://x.com/username/status/1234567890 + ${cmd} https://x.com/i/article/1234567890 -o ./article.md + ${cmd} https://x.com/username/status/1234567890 -o ./out/ + ${cmd} https://x.com/username/status/1234567890 --json | jq -r '.markdownPath' + ${cmd} --login +`); + process.exit(exitCode); +} + +function parseArgs(argv: string[]): CliArgs { + const out: CliArgs = { + url: null, + output: null, + json: false, + login: false, + help: false, + }; + + const positional: string[] = []; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]!; + + if (a === "--help" || a === "-h") { + out.help = true; + continue; + } + + if (a === "--json") { + out.json = true; + continue; + } + + if (a === "--login") { + out.login = true; + continue; + } + + if (a === "--url") { + const v = argv[++i]; + if (!v) throw new Error("Missing value for --url"); + out.url = v; + continue; + } + + if (a === "--output" || a === "-o") { + const v = argv[++i]; + if (!v) throw new Error(`Missing value for ${a}`); + out.output = v; + continue; + } + + if (a.startsWith("-")) { + throw new Error(`Unknown option: ${a}`); + } + + positional.push(a); + } + + if (!out.url && positional.length > 0) { + out.url = positional[0]!; + } + + return out; +} + +function normalizeInputUrl(input: string): string { + const trimmed = input.trim(); + if (!trimmed) return ""; + try { + return new URL(trimmed).toString(); + } catch { + return trimmed; + } +} + +function parseArticleId(input: string): string | null { + const trimmed = input.trim(); + if (!trimmed) return null; + + try { + const parsed = new URL(trimmed); + const match = parsed.pathname.match(/\/(?:i\/)?article\/(\d+)/); + if (match?.[1]) return match[1]; + } catch { + return null; + } + + return null; +} + +function parseTweetId(input: string): string | null { + const trimmed = input.trim(); + if (!trimmed) return null; + if (/^\d+$/.test(trimmed)) return trimmed; + + try { + const parsed = new URL(trimmed); + const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/); + if (match?.[1]) return match[1]; + } catch { + return null; + } + + return null; +} + +function parseTweetUsername(input: string): string | null { + const trimmed = input.trim(); + if (!trimmed) return null; + try { + const parsed = new URL(trimmed); + const match = parsed.pathname.match(/^\/([^/]+)\/status(?:es)?\/\d+/); + if (match?.[1]) return match[1]; + } catch { + return null; + } + return null; +} + +function sanitizeSlug(input: string): string { + return input + .trim() + .replace(/^@/, "") + .replace(/[^a-zA-Z0-9_-]+/g, "-") + .replace(/-+/g, "-") + .replace(/^[-_]+|[-_]+$/g, "") + .slice(0, 120); +} + +function formatBackupTimestamp(date: Date = new Date()): string { + const pad2 = (n: number) => String(n).padStart(2, "0"); + return `${date.getFullYear()}${pad2(date.getMonth() + 1)}${pad2(date.getDate())}-${pad2(date.getHours())}${pad2( + date.getMinutes() + )}${pad2(date.getSeconds())}`; +} + +async function backupDirIfExists(dir: string, log: (message: string) => void): Promise { + try { + if (!fs.existsSync(dir)) return; + const stat = fs.statSync(dir); + if (!stat.isDirectory()) return; + const backup = `${dir}-backup-${formatBackupTimestamp()}`; + await rename(dir, backup); + log(`[x-to-markdown] Existing directory moved to: ${backup}`); + } catch (error) { + throw new Error( + `Failed to backup existing directory (${dir}): ${error instanceof Error ? error.message : String(error ?? "")}` + ); + } +} + +function resolveDefaultOutputDir(slug: string): string { + return path.resolve(process.cwd(), "x-to-markdown", slug); +} + +async function resolveOutputPath( + normalizedUrl: string, + kind: "tweet" | "article", + argsOutput: string | null, + log: (message: string) => void +): Promise<{ outputDir: string; markdownPath: string; slug: string }> { + const articleId = kind === "article" ? parseArticleId(normalizedUrl) : null; + const tweetId = kind === "tweet" ? parseTweetId(normalizedUrl) : null; + const username = kind === "tweet" ? parseTweetUsername(normalizedUrl) : null; + + const userSlug = username ? sanitizeSlug(username) : null; + const idPart = articleId ?? tweetId ?? String(Date.now()); + const slug = userSlug ?? idPart; + + const defaultFileName = kind === "article" ? `${idPart}.md` : `${idPart}.md`; + + if (argsOutput) { + const wantsDir = argsOutput.endsWith("/") || argsOutput.endsWith("\\"); + const resolved = path.resolve(argsOutput); + try { + if (wantsDir || (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory())) { + const outputDir = path.join(resolved, slug); + await backupDirIfExists(outputDir, log); + await mkdir(outputDir, { recursive: true }); + return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug }; + } + } catch { + // treat as file path + } + + const outputDir = path.dirname(resolved); + await mkdir(outputDir, { recursive: true }); + return { outputDir, markdownPath: resolved, slug }; + } + + const outputDir = resolveDefaultOutputDir(slug); + await backupDirIfExists(outputDir, log); + await mkdir(outputDir, { recursive: true }); + return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug }; +} + +function formatMetaMarkdown(meta: Record): string { + const lines = ["---"]; + for (const [key, value] of Object.entries(meta)) { + if (value === undefined || value === null || value === "") continue; + if (typeof value === "number") { + lines.push(`${key}: ${value}`); + } else { + lines.push(`${key}: ${JSON.stringify(value)}`); + } + } + lines.push("---"); + return lines.join("\n"); +} + +async function promptYesNo(question: string): Promise { + if (!process.stdin.isTTY) return false; + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stderr, + }); + + try { + const answer = await new Promise((resolve) => rl.question(question, resolve)); + const normalized = answer.trim().toLowerCase(); + return normalized === "y" || normalized === "yes"; + } finally { + rl.close(); + } +} + +function isValidConsent(value: unknown): value is ConsentRecord { + if (!value || typeof value !== "object") return false; + const record = value as Partial; + return ( + record.accepted === true && + record.disclaimerVersion === DISCLAIMER_VERSION && + typeof record.acceptedAt === "string" && + record.acceptedAt.length > 0 + ); +} + +async function ensureConsent(log: (message: string) => void): Promise { + const consentPath = resolveXToMarkdownConsentPath(); + + try { + if (fs.existsSync(consentPath) && fs.statSync(consentPath).isFile()) { + const raw = await readFile(consentPath, "utf8"); + const parsed = JSON.parse(raw) as unknown; + if (isValidConsent(parsed)) { + log( + `⚠️ Warning: Using reverse-engineered X API (not official). Accepted on: ${(parsed as ConsentRecord).acceptedAt}` + ); + return; + } + } + } catch { + // fall through to prompt + } + + log(`⚠️ DISCLAIMER + +This tool uses a reverse-engineered X (Twitter) API, NOT an official API. + +Risks: +- May break without notice if X changes their API +- No official support or guarantees +- Account restrictions possible if API usage detected +- Use at your own risk +`); + + if (!process.stdin.isTTY) { + throw new Error( + `Consent required. Run in a TTY or create ${consentPath} with accepted: true and disclaimerVersion: ${DISCLAIMER_VERSION}` + ); + } + + const accepted = await promptYesNo("Do you accept these terms and wish to continue? (y/N): "); + if (!accepted) { + throw new Error("User declined the disclaimer. Exiting."); + } + + await mkdir(path.dirname(consentPath), { recursive: true }); + const payload: ConsentRecord = { + version: 1, + accepted: true, + acceptedAt: new Date().toISOString(), + disclaimerVersion: DISCLAIMER_VERSION, + }; + await writeFile(consentPath, JSON.stringify(payload, null, 2), "utf8"); + log(`[x-to-markdown] Consent saved to: ${consentPath}`); +} + +async function convertArticleToMarkdown( + inputUrl: string, + articleId: string, + log: (message: string) => void +): Promise { + log("[x-to-markdown] Loading cookies..."); + const cookieMap = await loadXCookies(log); + if (!hasRequiredXCookies(cookieMap)) { + throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome."); + } + + log(`[x-to-markdown] Fetching article ${articleId}...`); + const article = await fetchXArticle(articleId, cookieMap, false); + const body = formatArticleMarkdown(article).trimEnd(); + + const title = typeof (article as any)?.title === "string" ? String((article as any).title).trim() : ""; + const meta = formatMetaMarkdown({ + url: `https://x.com/i/article/${articleId}`, + requested_url: inputUrl, + title: title || null, + }); + + return [meta, body].filter(Boolean).join("\n\n").trimEnd(); +} + +async function main(): Promise { + const args = parseArgs(process.argv.slice(2)); + if (args.help) printUsage(0); + if (!args.login && !args.url) printUsage(1); + + const log = (message: string) => console.error(message); + await ensureConsent(log); + + if (args.login) { + log("[x-to-markdown] Refreshing cookies via browser login..."); + const cookieMap = await refreshXCookies(log); + if (!hasRequiredXCookies(cookieMap)) { + throw new Error("Missing auth cookies after login. Please ensure you are logged in to X."); + } + log("[x-to-markdown] Cookies refreshed."); + return; + } + + const normalizedUrl = normalizeInputUrl(args.url ?? ""); + const articleId = parseArticleId(normalizedUrl); + const tweetId = parseTweetId(normalizedUrl); + if (!articleId && !tweetId) { + throw new Error("Invalid X url. Examples: https://x.com//status/ or https://x.com/i/article/"); + } + + const kind = articleId ? ("article" as const) : ("tweet" as const); + const { outputDir, markdownPath, slug } = await resolveOutputPath(normalizedUrl, kind, args.output, log); + + const markdown = + kind === "article" && articleId + ? await convertArticleToMarkdown(normalizedUrl, articleId, log) + : await tweetToMarkdown(normalizedUrl, { log }); + + await writeFile(markdownPath, markdown, "utf8"); + log(`[x-to-markdown] Saved: ${markdownPath}`); + + if (args.json) { + console.log( + JSON.stringify( + { + url: articleId ? `https://x.com/i/article/${articleId}` : normalizedUrl, + requested_url: normalizedUrl, + type: kind, + slug, + outputDir, + markdownPath, + }, + null, + 2 + ) + ); + } else { + console.log(markdownPath); + } +} + +await main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error ?? "")); + process.exit(1); +}); diff --git a/skills/baoyu-danger-x-to-markdown/scripts/markdown.ts b/skills/baoyu-danger-x-to-markdown/scripts/markdown.ts new file mode 100644 index 0000000..ae62469 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/markdown.ts @@ -0,0 +1,306 @@ +import type { + ArticleBlock, + ArticleContentState, + ArticleEntity, + ArticleMediaInfo, +} from "./types.js"; + +function coerceArticleEntity(value: unknown): ArticleEntity | null { + if (!value || typeof value !== "object") return null; + const candidate = value as ArticleEntity; + if ( + typeof candidate.title === "string" || + typeof candidate.plain_text === "string" || + typeof candidate.preview_text === "string" || + candidate.content_state + ) { + return candidate; + } + return null; +} + +function escapeMarkdownAlt(text: string): string { + return text.replace(/[\[\]]/g, "\\$&"); +} + +function normalizeCaption(caption?: string): string { + const trimmed = caption?.trim(); + if (!trimmed) return ""; + return trimmed.replace(/\s+/g, " "); +} + +function resolveMediaUrl(info?: ArticleMediaInfo): string | undefined { + if (!info) return undefined; + if (info.original_img_url) return info.original_img_url; + if (info.preview_image?.original_img_url) return info.preview_image.original_img_url; + const variants = info.variants ?? []; + const mp4 = variants + .filter((variant) => variant?.content_type?.includes("video")) + .sort((a, b) => (b.bit_rate ?? 0) - (a.bit_rate ?? 0))[0]; + return mp4?.url ?? variants[0]?.url; +} + +function buildMediaById(article: ArticleEntity): Map { + const map = new Map(); + for (const entity of article.media_entities ?? []) { + if (!entity?.media_id) continue; + const url = resolveMediaUrl(entity.media_info); + if (url) { + map.set(entity.media_id, url); + } + } + return map; +} + +function collectMediaUrls( + article: ArticleEntity, + usedUrls: Set, + excludeUrl?: string +): string[] { + const urls: string[] = []; + const addUrl = (url?: string) => { + if (!url) return; + if (excludeUrl && url === excludeUrl) { + usedUrls.add(url); + return; + } + if (usedUrls.has(url)) return; + usedUrls.add(url); + urls.push(url); + }; + + for (const entity of article.media_entities ?? []) { + addUrl(resolveMediaUrl(entity?.media_info)); + } + + return urls; +} + +function resolveEntityMediaLines( + entityKey: number | undefined, + entityMap: ArticleContentState["entityMap"] | undefined, + mediaById: Map, + usedUrls: Set +): string[] { + if (entityKey === undefined || !entityMap) return []; + const entry = entityMap[String(entityKey)]; + const value = entry?.value; + if (!value) return []; + const type = value.type; + if (type !== "MEDIA" && type !== "IMAGE") return []; + + const caption = normalizeCaption(value.data?.caption); + const altText = caption ? escapeMarkdownAlt(caption) : ""; + const lines: string[] = []; + + const mediaItems = value.data?.mediaItems ?? []; + for (const item of mediaItems) { + const mediaId = + typeof item?.mediaId === "string" + ? item.mediaId + : typeof item?.media_id === "string" + ? item.media_id + : undefined; + const url = mediaId ? mediaById.get(mediaId) : undefined; + if (url && !usedUrls.has(url)) { + usedUrls.add(url); + lines.push(`![${altText}](${url})`); + } + } + + const fallbackUrl = typeof value.data?.url === "string" ? value.data.url : undefined; + if (fallbackUrl && !usedUrls.has(fallbackUrl)) { + usedUrls.add(fallbackUrl); + lines.push(`![${altText}](${fallbackUrl})`); + } + + return lines; +} + +function renderContentBlocks( + blocks: ArticleBlock[], + entityMap: ArticleContentState["entityMap"] | undefined, + mediaById: Map, + usedUrls: Set +): string[] { + const lines: string[] = []; + let previousKind: "list" | "quote" | "heading" | "text" | "code" | "media" | null = null; + let listKind: "ordered" | "unordered" | null = null; + let orderedIndex = 0; + let inCodeBlock = false; + + const pushBlock = ( + blockLines: string[], + kind: "list" | "quote" | "heading" | "text" | "media" + ) => { + if (blockLines.length === 0) return; + if ( + lines.length > 0 && + previousKind && + !(previousKind === kind && (kind === "list" || kind === "quote" || kind === "media")) + ) { + lines.push(""); + } + lines.push(...blockLines); + previousKind = kind; + }; + + const collectMediaLines = (block: ArticleBlock): string[] => { + const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : []; + const mediaLines: string[] = []; + for (const range of ranges) { + if (typeof range?.key !== "number") continue; + mediaLines.push(...resolveEntityMediaLines(range.key, entityMap, mediaById, usedUrls)); + } + return mediaLines; + }; + + for (const block of blocks) { + const type = typeof block?.type === "string" ? block.type : "unstyled"; + const text = typeof block?.text === "string" ? block.text : ""; + + if (type === "code-block") { + if (!inCodeBlock) { + if (lines.length > 0) { + lines.push(""); + } + lines.push("```"); + inCodeBlock = true; + } + lines.push(text); + previousKind = "code"; + listKind = null; + orderedIndex = 0; + continue; + } + + if (type === "atomic") { + if (inCodeBlock) { + lines.push("```"); + inCodeBlock = false; + previousKind = "code"; + } + listKind = null; + orderedIndex = 0; + const mediaLines = collectMediaLines(block); + if (mediaLines.length > 0) { + pushBlock(mediaLines, "media"); + } + continue; + } + + if (inCodeBlock) { + lines.push("```"); + inCodeBlock = false; + previousKind = "code"; + } + + if (type === "unordered-list-item") { + listKind = "unordered"; + orderedIndex = 0; + pushBlock([`- ${text}`], "list"); + continue; + } + + if (type === "ordered-list-item") { + if (listKind !== "ordered") { + orderedIndex = 0; + } + listKind = "ordered"; + orderedIndex += 1; + pushBlock([`${orderedIndex}. ${text}`], "list"); + continue; + } + + listKind = null; + orderedIndex = 0; + + switch (type) { + case "header-one": + pushBlock([`# ${text}`], "heading"); + break; + case "header-two": + pushBlock([`## ${text}`], "heading"); + break; + case "header-three": + pushBlock([`### ${text}`], "heading"); + break; + case "header-four": + pushBlock([`#### ${text}`], "heading"); + break; + case "header-five": + pushBlock([`##### ${text}`], "heading"); + break; + case "header-six": + pushBlock([`###### ${text}`], "heading"); + break; + case "blockquote": { + const quoteLines = text.length > 0 ? text.split("\n") : [""]; + pushBlock(quoteLines.map((line) => `> ${line}`), "quote"); + break; + } + default: + pushBlock([text], "text"); + break; + } + + const trailingMediaLines = collectMediaLines(block); + if (trailingMediaLines.length > 0) { + pushBlock(trailingMediaLines, "media"); + } + } + + if (inCodeBlock) { + lines.push("```"); + } + + return lines; +} + +export function formatArticleMarkdown(article: unknown): string { + const candidate = coerceArticleEntity(article); + if (!candidate) { + return `\`\`\`json\n${JSON.stringify(article, null, 2)}\n\`\`\``; + } + + const lines: string[] = []; + const usedUrls = new Set(); + const mediaById = buildMediaById(candidate); + const title = typeof candidate.title === "string" ? candidate.title.trim() : ""; + if (title) { + lines.push(`# ${title}`); + } + + const coverUrl = resolveMediaUrl(candidate.cover_media?.media_info); + if (coverUrl) { + if (lines.length > 0) lines.push(""); + lines.push(`![](${coverUrl})`); + usedUrls.add(coverUrl); + } + + const blocks = candidate.content_state?.blocks; + const entityMap = candidate.content_state?.entityMap; + if (Array.isArray(blocks) && blocks.length > 0) { + const rendered = renderContentBlocks(blocks, entityMap, mediaById, usedUrls); + if (rendered.length > 0) { + if (lines.length > 0) lines.push(""); + lines.push(...rendered); + } + } else if (typeof candidate.plain_text === "string") { + if (lines.length > 0) lines.push(""); + lines.push(candidate.plain_text.trim()); + } else if (typeof candidate.preview_text === "string") { + if (lines.length > 0) lines.push(""); + lines.push(candidate.preview_text.trim()); + } + + const mediaUrls = collectMediaUrls(candidate, usedUrls, coverUrl); + if (mediaUrls.length > 0) { + lines.push("", "## Media", ""); + for (const url of mediaUrls) { + lines.push(`![](${url})`); + } + } + + return lines.join("\n").trimEnd(); +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/paths.ts b/skills/baoyu-danger-x-to-markdown/scripts/paths.ts new file mode 100644 index 0000000..ae68599 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/paths.ts @@ -0,0 +1,41 @@ +import os from "node:os"; +import path from "node:path"; +import process from "node:process"; + +const APP_DATA_DIR = "baoyu-skills"; +const X_TO_MARKDOWN_DATA_DIR = "x-to-markdown"; +const COOKIE_FILE_NAME = "cookies.json"; +const PROFILE_DIR_NAME = "chrome-profile"; +const CONSENT_FILE_NAME = "consent.json"; + +export function resolveUserDataRoot(): string { + if (process.platform === "win32") { + return process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming"); + } + if (process.platform === "darwin") { + return path.join(os.homedir(), "Library", "Application Support"); + } + return process.env.XDG_DATA_HOME ?? path.join(os.homedir(), ".local", "share"); +} + +export function resolveXToMarkdownDataDir(): string { + const override = process.env.X_DATA_DIR?.trim(); + if (override) return path.resolve(override); + return path.join(resolveUserDataRoot(), APP_DATA_DIR, X_TO_MARKDOWN_DATA_DIR); +} + +export function resolveXToMarkdownCookiePath(): string { + const override = process.env.X_COOKIE_PATH?.trim(); + if (override) return path.resolve(override); + return path.join(resolveXToMarkdownDataDir(), COOKIE_FILE_NAME); +} + +export function resolveXToMarkdownChromeProfileDir(): string { + const override = process.env.X_CHROME_PROFILE_DIR?.trim(); + if (override) return path.resolve(override); + return path.join(resolveXToMarkdownDataDir(), PROFILE_DIR_NAME); +} + +export function resolveXToMarkdownConsentPath(): string { + return path.join(resolveXToMarkdownDataDir(), CONSENT_FILE_NAME); +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/thread-markdown.ts b/skills/baoyu-danger-x-to-markdown/scripts/thread-markdown.ts new file mode 100644 index 0000000..2e2d64b --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/thread-markdown.ts @@ -0,0 +1,295 @@ +type ThreadLike = { + requestedId?: string; + rootId?: string; + tweets?: unknown[]; + totalTweets?: number; + user?: any; +}; + +type TweetPhoto = { + src: string; + alt?: string; +}; + +type TweetVideo = { + url: string; + poster?: string; + alt?: string; + type?: string; +}; + +export type ThreadTweetsMarkdownOptions = { + username?: string; + headingLevel?: number; + startIndex?: number; + includeTweetUrls?: boolean; +}; + +export type ThreadMarkdownOptions = ThreadTweetsMarkdownOptions & { + includeHeader?: boolean; + title?: string; + sourceUrl?: string; +}; + +function coerceThread(value: unknown): ThreadLike | null { + if (!value || typeof value !== "object") return null; + const candidate = value as ThreadLike; + if (!Array.isArray(candidate.tweets)) return null; + return candidate; +} + +function escapeMarkdownAlt(text: string): string { + return text.replace(/[\[\]]/g, "\\$&"); +} + +function normalizeAlt(text?: string | null): string { + const trimmed = text?.trim(); + if (!trimmed) return ""; + return trimmed.replace(/\s+/g, " "); +} + +function parseTweetText(tweet: any): string { + const noteText = tweet?.note_tweet?.note_tweet_results?.result?.text; + const legacyText = tweet?.legacy?.full_text ?? tweet?.legacy?.text ?? ""; + return (noteText ?? legacyText ?? "").trim(); +} + +function parsePhotos(tweet: any): TweetPhoto[] { + const media = tweet?.legacy?.extended_entities?.media ?? []; + return media + .reduce((acc: TweetPhoto[], item: any) => { + if (item?.type !== "photo") { + return acc; + } + const src = item.media_url_https ?? item.media_url; + if (!src) { + return acc; + } + const alt = normalizeAlt(item.ext_alt_text); + acc.push({ src, alt }); + return acc; + }, []) + .filter((photo) => Boolean(photo.src)); +} + +function parseVideos(tweet: any): TweetVideo[] { + const media = tweet?.legacy?.extended_entities?.media ?? []; + return media + .reduce((acc: TweetVideo[], item: any) => { + if (!item?.type || !["animated_gif", "video"].includes(item.type)) { + return acc; + } + const variants = item?.video_info?.variants ?? []; + const sources = variants + .map((variant: any) => ({ + contentType: variant?.content_type, + url: variant?.url, + bitrate: variant?.bitrate ?? 0, + })) + .filter((variant: any) => Boolean(variant.url)); + + const videoSources = sources.filter((variant: any) => + String(variant.contentType ?? "").includes("video") + ); + const sorted = (videoSources.length > 0 ? videoSources : sources).sort( + (a: any, b: any) => (b.bitrate ?? 0) - (a.bitrate ?? 0) + ); + const best = sorted[0]; + if (!best?.url) { + return acc; + } + const alt = normalizeAlt(item.ext_alt_text); + acc.push({ + url: best.url, + poster: item.media_url_https ?? item.media_url ?? undefined, + alt, + type: item.type, + }); + return acc; + }, []) + .filter((video) => Boolean(video.url)); +} + +function unwrapTweetResult(result: any): any { + if (!result) return null; + if (result.__typename === "TweetWithVisibilityResults" && result.tweet) { + return result.tweet; + } + return result; +} + +function resolveTweetId(tweet: any): string | undefined { + return tweet?.legacy?.id_str ?? tweet?.rest_id; +} + +function buildTweetUrl(username: string | undefined, tweetId: string | undefined): string | null { + if (!tweetId) return null; + if (username) { + return `https://x.com/${username}/status/${tweetId}`; + } + return `https://x.com/i/web/status/${tweetId}`; +} + +function formatTweetMarkdown( + tweet: any, + index: number, + options: ThreadTweetsMarkdownOptions +): string[] { + const headingLevel = options.headingLevel ?? 2; + const includeTweetUrls = options.includeTweetUrls ?? true; + const headingPrefix = "#".repeat(Math.min(Math.max(headingLevel, 1), 6)); + const tweetId = resolveTweetId(tweet); + const tweetUrl = includeTweetUrls ? buildTweetUrl(options.username, tweetId) : null; + + const lines: string[] = []; + lines.push(`${headingPrefix} ${index}`); + if (tweetUrl) { + lines.push(tweetUrl); + } + lines.push(""); + + const text = parseTweetText(tweet); + const photos = parsePhotos(tweet); + const videos = parseVideos(tweet); + const quoted = unwrapTweetResult(tweet?.quoted_status_result?.result); + + const bodyLines: string[] = []; + if (text) { + bodyLines.push(...text.split(/\r?\n/)); + } + + const quotedLines = formatQuotedTweetMarkdown(quoted); + if (quotedLines.length > 0) { + if (bodyLines.length > 0) bodyLines.push(""); + bodyLines.push(...quotedLines); + } + + const photoLines = photos.map((photo) => { + const alt = photo.alt ? escapeMarkdownAlt(photo.alt) : ""; + return `![${alt}](${photo.src})`; + }); + if (photoLines.length > 0) { + if (bodyLines.length > 0) bodyLines.push(""); + bodyLines.push(...photoLines); + } + + const videoLines: string[] = []; + for (const video of videos) { + if (video.poster) { + const alt = video.alt ? escapeMarkdownAlt(video.alt) : "video"; + videoLines.push(`![${alt}](${video.poster})`); + } + videoLines.push(`[${video.type ?? "video"}](${video.url})`); + } + if (videoLines.length > 0) { + if (bodyLines.length > 0) bodyLines.push(""); + bodyLines.push(...videoLines); + } + + if (bodyLines.length === 0) { + bodyLines.push("_No text or media._"); + } + + lines.push(...bodyLines); + return lines; +} + +function formatQuotedTweetMarkdown(quoted: any): string[] { + if (!quoted) return []; + const quotedUser = quoted?.core?.user_results?.result?.legacy; + const quotedUsername = quotedUser?.screen_name; + const quotedName = quotedUser?.name; + const quotedAuthor = + quotedUsername && quotedName + ? `${quotedName} (@${quotedUsername})` + : quotedUsername + ? `@${quotedUsername}` + : quotedName ?? "Unknown"; + + const quotedId = resolveTweetId(quoted); + const quotedUrl = + buildTweetUrl(quotedUsername, quotedId) ?? + (quotedId ? `https://x.com/i/web/status/${quotedId}` : "unavailable"); + + const quotedText = parseTweetText(quoted); + const lines: string[] = []; + lines.push(`Author: ${quotedAuthor}`); + lines.push(`URL: ${quotedUrl}`); + if (quotedText) { + lines.push("", ...quotedText.split(/\r?\n/)); + } else { + lines.push("", "(no content)"); + } + + return lines.map((line) => `> ${line}`.trimEnd()); +} + +export function formatThreadTweetsMarkdown( + tweets: unknown[], + options: ThreadTweetsMarkdownOptions = {} +): string { + const lines: string[] = []; + const startIndex = options.startIndex ?? 1; + if (!Array.isArray(tweets) || tweets.length === 0) { + return ""; + } + + tweets.forEach((tweet, index) => { + if (lines.length > 0) { + lines.push(""); + } + lines.push(...formatTweetMarkdown(tweet, startIndex + index, options)); + }); + + return lines.join("\n").trimEnd(); +} + +export function formatThreadMarkdown( + thread: unknown, + options: ThreadMarkdownOptions = {} +): string { + const candidate = coerceThread(thread); + if (!candidate) { + return `\`\`\`json\n${JSON.stringify(thread, null, 2)}\n\`\`\``; + } + + const tweets = candidate.tweets ?? []; + const firstTweet = tweets[0] as any; + const user = candidate.user ?? firstTweet?.core?.user_results?.result?.legacy; + const username = user?.screen_name; + const name = user?.name; + + const includeHeader = options.includeHeader ?? true; + const lines: string[] = []; + if (includeHeader) { + if (options.title) { + lines.push(`# ${options.title}`); + } else if (username) { + lines.push(`# Thread by @${username}${name ? ` (${name})` : ""}`); + } else { + lines.push("# Thread"); + } + + const sourceUrl = options.sourceUrl ?? buildTweetUrl(username, candidate.rootId ?? candidate.requestedId); + if (sourceUrl) { + lines.push(`Source: ${sourceUrl}`); + } + if (typeof candidate.totalTweets === "number") { + lines.push(`Tweets: ${candidate.totalTweets}`); + } + } + + const tweetMarkdown = formatThreadTweetsMarkdown(tweets, { + ...options, + username, + }); + + if (tweetMarkdown) { + if (lines.length > 0) { + lines.push(""); + } + lines.push(tweetMarkdown); + } + + return lines.join("\n").trimEnd(); +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/thread.ts b/skills/baoyu-danger-x-to-markdown/scripts/thread.ts new file mode 100644 index 0000000..02d6b5d --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/thread.ts @@ -0,0 +1,311 @@ +import { fetchTweetDetail } from "./graphql.js"; + +type TweetEntry = { + tweet: any; + user?: any; +}; + +type ParsedEntries = { + entries: TweetEntry[]; + moreCursor?: string; + topCursor?: string; + bottomCursor?: string; +}; + +type ThreadResult = { + requestedId: string; + rootId: string; + tweets: any[]; + totalTweets: number; + user?: any; + responses?: unknown[]; +}; + +function unwrapTweetResult(result: any): any { + if (!result) return null; + if (result.__typename === "TweetWithVisibilityResults" && result.tweet) { + return result.tweet; + } + return result; +} + +function extractTweetEntry(itemContent: any): TweetEntry | null { + const result = itemContent?.tweet_results?.result; + if (!result) return null; + const resolved = unwrapTweetResult(result?.tweet ?? result); + if (!resolved) return null; + const user = resolved?.core?.user_results?.result?.legacy; + return { tweet: resolved, user }; +} + +function parseInstruction(instruction?: any): ParsedEntries { + const { entries: entities, moduleItems } = instruction || {}; + const entries: TweetEntry[] = []; + let moreCursor: string | undefined; + let topCursor: string | undefined; + let bottomCursor: string | undefined; + + const parseItems = (items: any[]) => { + items?.forEach((item) => { + const itemContent = item?.item?.itemContent ?? item?.itemContent; + if (!itemContent) { + return; + } + + if ( + itemContent.cursorType && + ["ShowMore", "ShowMoreThreads"].includes(itemContent.cursorType) && + itemContent.itemType === "TimelineTimelineCursor" + ) { + moreCursor = itemContent.value; + return; + } + + const entry = extractTweetEntry(itemContent); + if (entry) { + entries.push(entry); + } + }); + }; + + if (moduleItems) { + parseItems(moduleItems); + } + + for (const entity of entities ?? []) { + if (entity?.content?.clientEventInfo?.component === "you_might_also_like") { + continue; + } + + const { itemContent, items, cursorType, entryType, value } = entity?.content ?? {}; + if (cursorType === "Bottom" && entryType === "TimelineTimelineCursor") { + bottomCursor = value; + } + + if ( + itemContent?.cursorType === "Bottom" && + itemContent?.itemType === "TimelineTimelineCursor" + ) { + bottomCursor = bottomCursor ?? itemContent?.value; + } + + if (cursorType === "Top" && entryType === "TimelineTimelineCursor") { + topCursor = topCursor ?? value; + } + + if (itemContent) { + const entry = extractTweetEntry(itemContent); + if (entry) { + entries.push(entry); + } + if ( + itemContent.cursorType && + ["ShowMore", "ShowMoreThreads"].includes(itemContent.cursorType) && + itemContent.itemType === "TimelineTimelineCursor" + ) { + moreCursor = moreCursor ?? itemContent.value; + } + + if (itemContent.cursorType === "Top" && itemContent.itemType === "TimelineTimelineCursor") { + topCursor = topCursor ?? itemContent.value; + } + } + + if (items) { + parseItems(items); + } + } + + return { entries, moreCursor, topCursor, bottomCursor }; +} + +function parseTweetsAndToken(response: any): ParsedEntries { + const instruction = + response?.data?.threaded_conversation_with_injections_v2?.instructions?.find( + (ins: any) => ins?.type === "TimelineAddEntries" || ins?.type === "TimelineAddToModule" + ) ?? + response?.data?.threaded_conversation_with_injections?.instructions?.find( + (ins: any) => ins?.type === "TimelineAddEntries" || ins?.type === "TimelineAddToModule" + ); + + return parseInstruction(instruction); +} + +function toTimestamp(value: string | undefined): number { + if (!value) return 0; + const parsed = Date.parse(value); + return Number.isNaN(parsed) ? 0 : parsed; +} + +export async function fetchTweetThread( + tweetId: string, + cookieMap: Record, + includeResponses = false +): Promise { + const responses: unknown[] = []; + const res = await fetchTweetDetail(tweetId, cookieMap); + if (includeResponses) { + responses.push(res); + } + + let { entries, moreCursor, topCursor, bottomCursor } = parseTweetsAndToken(res); + if (!entries.length) { + const errorMessage = res?.errors?.[0]?.message; + if (errorMessage) { + throw new Error(errorMessage); + } + return null; + } + + let allEntries = entries.slice(); + const root = allEntries.find((entry) => entry.tweet?.legacy?.id_str === tweetId); + if (!root) { + throw new Error("Can not fetch the root tweet"); + } + + let rootEntry = root.tweet.legacy; + + const isSameThread = (entry: TweetEntry) => { + const tweet = entry.tweet?.legacy; + if (!tweet) return false; + return ( + tweet.user_id_str === rootEntry.user_id_str && + tweet.conversation_id_str === rootEntry.conversation_id_str && + (tweet.id_str === rootEntry.id_str || + tweet.in_reply_to_user_id_str === rootEntry.user_id_str || + tweet.in_reply_to_status_id_str === rootEntry.conversation_id_str || + !tweet.in_reply_to_user_id_str) + ); + }; + + const inThread = (items: TweetEntry[]) => items.some(isSameThread); + + let hasThread = inThread(entries); + let maxRequestCount = 1000; + let topHasThread = true; + + while (topCursor && topHasThread && maxRequestCount > 0) { + const newRes = await fetchTweetDetail(tweetId, cookieMap, topCursor); + if (includeResponses) { + responses.push(newRes); + } + + const parsed = parseTweetsAndToken(newRes); + topHasThread = inThread(parsed.entries); + topCursor = parsed.topCursor; + allEntries = parsed.entries.concat(allEntries); + maxRequestCount--; + } + + async function checkMoreTweets(focalId: string) { + while (moreCursor && hasThread && maxRequestCount > 0) { + const newRes = await fetchTweetDetail(focalId, cookieMap, moreCursor); + if (includeResponses) { + responses.push(newRes); + } + + const parsed = parseTweetsAndToken(newRes); + moreCursor = parsed.moreCursor; + bottomCursor = bottomCursor ?? parsed.bottomCursor; + + hasThread = inThread(parsed.entries); + allEntries = allEntries.concat(parsed.entries); + maxRequestCount--; + } + + if (bottomCursor) { + const newRes = await fetchTweetDetail(focalId, cookieMap, bottomCursor); + if (includeResponses) { + responses.push(newRes); + } + + const parsed = parseTweetsAndToken(newRes); + allEntries = allEntries.concat(parsed.entries); + bottomCursor = undefined; + } + } + + await checkMoreTweets(tweetId); + + const allThreadEntries = allEntries.filter( + (entry) => entry.tweet?.legacy?.id_str === tweetId || isSameThread(entry) + ); + const lastEntity = allThreadEntries[allThreadEntries.length - 1]; + if (lastEntity?.tweet?.legacy?.id_str) { + const lastRes = await fetchTweetDetail(lastEntity.tweet.legacy.id_str, cookieMap); + if (includeResponses) { + responses.push(lastRes); + } + + const parsed = parseTweetsAndToken(lastRes); + hasThread = inThread(parsed.entries); + allEntries = allEntries.concat(parsed.entries); + moreCursor = parsed.moreCursor; + bottomCursor = parsed.bottomCursor; + maxRequestCount--; + + await checkMoreTweets(lastEntity.tweet.legacy.id_str); + } + + const distinctEntries: TweetEntry[] = []; + const entriesMap = allEntries.reduce((acc, entry) => { + const id = entry.tweet?.legacy?.id_str ?? entry.tweet?.rest_id; + if (id && !acc.has(id)) { + distinctEntries.push(entry); + acc.set(id, entry); + } + return acc; + }, new Map()); + allEntries = distinctEntries; + + while (rootEntry.in_reply_to_status_id_str) { + const parent = entriesMap.get(rootEntry.in_reply_to_status_id_str)?.tweet?.legacy; + if ( + parent && + parent.user_id_str === rootEntry.user_id_str && + parent.conversation_id_str === rootEntry.conversation_id_str && + parent.id_str !== rootEntry.id_str + ) { + rootEntry = parent; + } else { + break; + } + } + + allEntries = allEntries.sort((a, b) => { + const aTime = toTimestamp(a.tweet?.legacy?.created_at); + const bTime = toTimestamp(b.tweet?.legacy?.created_at); + return aTime - bTime; + }); + + const rootIndex = allEntries.findIndex( + (entry) => entry.tweet?.legacy?.id_str === rootEntry.id_str + ); + if (rootIndex > 0) { + allEntries = allEntries.slice(rootIndex); + } + + const threadEntries = allEntries.filter( + (entry) => entry.tweet?.legacy?.id_str === tweetId || isSameThread(entry) + ); + + if (!threadEntries.length) { + return null; + } + + const tweets = threadEntries.map((entry) => entry.tweet); + const user = threadEntries[0].user ?? threadEntries[0].tweet?.core?.user_results?.result?.legacy; + const result: ThreadResult = { + requestedId: tweetId, + rootId: rootEntry.id_str ?? tweetId, + tweets, + totalTweets: tweets.length, + user, + }; + + if (includeResponses) { + result.responses = responses; + } + + return result; +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/tweet-article.ts b/skills/baoyu-danger-x-to-markdown/scripts/tweet-article.ts new file mode 100644 index 0000000..d891197 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/tweet-article.ts @@ -0,0 +1,96 @@ +import { fetchXArticle } from "./graphql.js"; +import type { ArticleEntity } from "./types.js"; + +function coerceArticleEntity(value: unknown): ArticleEntity | null { + if (!value || typeof value !== "object") return null; + const candidate = value as ArticleEntity; + if ( + typeof candidate.title === "string" || + typeof candidate.plain_text === "string" || + typeof candidate.preview_text === "string" || + candidate.content_state + ) { + return candidate; + } + return null; +} + +function hasArticleContent(article: ArticleEntity): boolean { + const blocks = article.content_state?.blocks; + if (Array.isArray(blocks) && blocks.length > 0) { + return true; + } + if (typeof article.plain_text === "string" && article.plain_text.trim()) { + return true; + } + if (typeof article.preview_text === "string" && article.preview_text.trim()) { + return true; + } + return false; +} + +function parseArticleIdFromUrl(raw: string | undefined): string | null { + if (!raw) return null; + try { + const parsed = new URL(raw); + const match = parsed.pathname.match(/\/(?:i\/)?article\/(\d+)/); + if (match?.[1]) return match[1]; + } catch { + return null; + } + return null; +} + +function extractArticleIdFromUrls(urls: any[] | undefined): string | null { + if (!Array.isArray(urls)) return null; + for (const url of urls) { + const candidate = + url?.expanded_url ?? url?.url ?? (url?.display_url ? `https://${url.display_url}` : undefined); + const id = parseArticleIdFromUrl(candidate); + if (id) return id; + } + return null; +} + +export function extractArticleEntityFromTweet(tweet: any): unknown | null { + return ( + tweet?.article?.article_results?.result ?? + tweet?.article?.result ?? + tweet?.legacy?.article?.article_results?.result ?? + tweet?.legacy?.article?.result ?? + tweet?.article_results?.result ?? + null + ); +} + +export function extractArticleIdFromTweet(tweet: any): string | null { + const embedded = extractArticleEntityFromTweet(tweet); + const embeddedArticle = embedded as { rest_id?: string } | null; + if (embeddedArticle?.rest_id) { + return embeddedArticle.rest_id; + } + + const noteUrls = tweet?.note_tweet?.note_tweet_results?.result?.entity_set?.urls; + const legacyUrls = tweet?.legacy?.entities?.urls; + return extractArticleIdFromUrls(noteUrls) ?? extractArticleIdFromUrls(legacyUrls); +} + +export async function resolveArticleEntityFromTweet( + tweet: any, + cookieMap: Record +): Promise { + if (!tweet) return null; + const embedded = extractArticleEntityFromTweet(tweet); + const embeddedArticle = coerceArticleEntity(embedded); + if (embeddedArticle && hasArticleContent(embeddedArticle)) { + return embedded; + } + + const articleId = extractArticleIdFromTweet(tweet); + if (!articleId) { + return embedded ?? null; + } + + const fetched = await fetchXArticle(articleId, cookieMap, false); + return fetched ?? embedded ?? null; +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts b/skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts new file mode 100644 index 0000000..1492478 --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts @@ -0,0 +1,190 @@ +#!/usr/bin/env npx tsx + +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; +import { hasRequiredXCookies, loadXCookies } from "./cookies.js"; +import { fetchTweetThread } from "./thread.js"; +import { formatArticleMarkdown } from "./markdown.js"; +import { formatThreadTweetsMarkdown } from "./thread-markdown.js"; +import { resolveArticleEntityFromTweet } from "./tweet-article.js"; + +type TweetToMarkdownOptions = { + log?: (message: string) => void; +}; + +function parseArgs(): { url?: string } { + const args = process.argv.slice(2); + let url: string | undefined; + + for (const arg of args) { + if (!arg.startsWith("-") && !url) { + url = arg; + } + } + + return { url }; +} + +function normalizeInputUrl(input: string): string { + const trimmed = input.trim(); + if (!trimmed) return ""; + try { + return new URL(trimmed).toString(); + } catch { + return trimmed; + } +} + +function parseTweetId(input: string): string | null { + const trimmed = input.trim(); + if (!trimmed) return null; + if (/^\d+$/.test(trimmed)) return trimmed; + + try { + const parsed = new URL(trimmed); + const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/); + if (match?.[1]) return match[1]; + } catch { + return null; + } + + return null; +} + +function buildTweetUrl(username: string | undefined, tweetId: string | undefined): string | null { + if (!tweetId) return null; + if (username) { + return `https://x.com/${username}/status/${tweetId}`; + } + return `https://x.com/i/web/status/${tweetId}`; +} + +function formatMetaMarkdown(meta: Record): string { + const lines = ["---"]; + for (const [key, value] of Object.entries(meta)) { + if (value === undefined || value === null || value === "") continue; + if (typeof value === "number") { + lines.push(`${key}: ${value}`); + } else { + lines.push(`${key}: ${JSON.stringify(value)}`); + } + } + lines.push("---"); + return lines.join("\n"); +} + +function extractTweetText(tweet: any): string { + const noteText = tweet?.note_tweet?.note_tweet_results?.result?.text; + const legacyText = tweet?.legacy?.full_text ?? tweet?.legacy?.text ?? ""; + return (noteText ?? legacyText ?? "").trim(); +} + +function isOnlyUrl(text: string): boolean { + const trimmed = text.trim(); + if (!trimmed) return true; + return /^https?:\/\/\S+$/.test(trimmed); +} + +export async function tweetToMarkdown( + inputUrl: string, + options: TweetToMarkdownOptions = {} +): Promise { + const normalizedUrl = normalizeInputUrl(inputUrl); + const tweetId = parseTweetId(normalizedUrl); + if (!tweetId) { + throw new Error("Invalid tweet url. Example: https://x.com//status/"); + } + + const log = options.log ?? (() => {}); + log("[tweet-to-markdown] Loading cookies..."); + const cookieMap = await loadXCookies(log); + if (!hasRequiredXCookies(cookieMap)) { + throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome."); + } + + log(`[tweet-to-markdown] Fetching thread for ${tweetId}...`); + const thread = await fetchTweetThread(tweetId, cookieMap); + if (!thread) { + throw new Error("Failed to fetch thread."); + } + + const tweets = thread.tweets ?? []; + if (tweets.length === 0) { + throw new Error("No tweets found in thread."); + } + + const firstTweet = tweets[0] as any; + const user = thread.user ?? firstTweet?.core?.user_results?.result?.legacy; + const username = user?.screen_name; + const name = user?.name; + const author = + username && name ? `${name} (@${username})` : username ? `@${username}` : name ?? null; + const authorUrl = username ? `https://x.com/${username}` : undefined; + const requestedUrl = normalizedUrl || buildTweetUrl(username, tweetId) || inputUrl.trim(); + const rootUrl = buildTweetUrl(username, thread.rootId ?? tweetId) ?? requestedUrl; + + const meta = formatMetaMarkdown({ + url: rootUrl, + requested_url: requestedUrl, + author, + author_name: name ?? null, + author_username: username ?? null, + author_url: authorUrl ?? null, + tweet_count: thread.totalTweets ?? tweets.length, + }); + + const parts: string[] = [meta]; + + const articleEntity = await resolveArticleEntityFromTweet(firstTweet, cookieMap); + let remainingTweets = tweets; + if (articleEntity) { + const articleMarkdown = formatArticleMarkdown(articleEntity).trimEnd(); + if (articleMarkdown) { + parts.push(articleMarkdown); + const firstTweetText = extractTweetText(firstTweet); + if (isOnlyUrl(firstTweetText)) { + remainingTweets = tweets.slice(1); + } + } + } + + if (remainingTweets.length > 0) { + const hasArticle = parts.length > 1; + if (hasArticle) { + parts.push("## Thread"); + } + const tweetMarkdown = formatThreadTweetsMarkdown(remainingTweets, { + username, + headingLevel: hasArticle ? 3 : 2, + startIndex: 1, + includeTweetUrls: true, + }); + if (tweetMarkdown) { + parts.push(tweetMarkdown); + } + } + + return parts.join("\n\n").trimEnd(); +} + +async function main() { + const { url } = parseArgs(); + if (!url) { + console.error("Usage:"); + console.error(" npx -y bun skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts "); + process.exit(1); + } + + const markdown = await tweetToMarkdown(url, { log: console.log }); + console.log(markdown); +} + +const isCliExecution = + process.argv[1] && fileURLToPath(import.meta.url) === path.resolve(process.argv[1]); + +if (isCliExecution) { + main().catch((error) => { + console.error(error instanceof Error ? error.message : error); + process.exit(1); + }); +} diff --git a/skills/baoyu-danger-x-to-markdown/scripts/types.ts b/skills/baoyu-danger-x-to-markdown/scripts/types.ts new file mode 100644 index 0000000..fc2365d --- /dev/null +++ b/skills/baoyu-danger-x-to-markdown/scripts/types.ts @@ -0,0 +1,79 @@ +export type CookieLike = { + name?: string; + value?: string; + domain?: string; + path?: string; + url?: string; +}; + +export type ArticleQueryInfo = { + queryId: string; + featureSwitches: string[]; + fieldToggles: string[]; + html: string; +}; + +export type ArticleEntityRange = { + key?: number; + offset?: number; + length?: number; +}; + +export type ArticleBlock = { + type?: string; + text?: string; + entityRanges?: ArticleEntityRange[]; +}; + +export type ArticleEntityMapMediaItem = { + mediaId?: string; + media_id?: string; + localMediaId?: string; +}; + +export type ArticleEntityMapEntry = { + key?: string; + value?: { + type?: string; + mutability?: string; + data?: { + caption?: string; + mediaItems?: ArticleEntityMapMediaItem[]; + url?: string; + }; + }; +}; + +export type ArticleContentState = { + blocks?: ArticleBlock[]; + entityMap?: Record; +}; + +export type ArticleMediaInfo = { + __typename?: string; + original_img_url?: string; + preview_image?: { + original_img_url?: string; + }; + variants?: Array<{ + content_type?: string; + url?: string; + bit_rate?: number; + }>; +}; + +export type ArticleMediaEntity = { + media_id?: string; + media_info?: ArticleMediaInfo; +}; + +export type ArticleEntity = { + title?: string; + plain_text?: string; + preview_text?: string; + content_state?: ArticleContentState; + cover_media?: { + media_info?: ArticleMediaInfo; + }; + media_entities?: ArticleMediaEntity[]; +};