feat: add baoyu-danger-x-to-markdown skill

Ignore generated x-to-markdown output directory.
This commit is contained in:
Jim Liu 宝玉 2026-01-18 17:42:25 -06:00
parent 5993b6969d
commit 4998eaf8c2
15 changed files with 3072 additions and 0 deletions

1
.gitignore vendored
View File

@ -143,3 +143,4 @@ tests-data/
# Skill extensions (user customization)
.baoyu-skills/
x-to-markdown/

View File

@ -0,0 +1,177 @@
---
name: baoyu-danger-x-to-markdown
description: Convert X (Twitter) tweet or article URL to markdown. Uses reverse-engineered X API (private). Requires user consent before use.
---
# X to Markdown
Converts X (Twitter) content to markdown format:
- Tweet threads → Markdown with YAML front matter
- X Articles → Full article content extraction
## Script Directory
**Important**: All scripts are located in the `scripts/` subdirectory of this skill.
**Agent Execution Instructions**:
1. Determine this SKILL.md file's directory path as `SKILL_DIR`
2. Script path = `${SKILL_DIR}/scripts/<script-name>.ts`
3. Replace all `${SKILL_DIR}` in this document with the actual path
**Script Reference**:
| Script | Purpose |
|--------|---------|
| `scripts/main.ts` | CLI entry point for URL conversion |
## ⚠️ Disclaimer (REQUIRED)
**Before using this skill**, the consent check MUST be performed.
### Consent Check Flow
**Step 1**: Check consent file
```bash
# macOS
cat ~/Library/Application\ Support/baoyu-skills/x-to-markdown/consent.json 2>/dev/null
# Linux
cat ~/.local/share/baoyu-skills/x-to-markdown/consent.json 2>/dev/null
# Windows (PowerShell)
Get-Content "$env:APPDATA\baoyu-skills\x-to-markdown\consent.json" 2>$null
```
**Step 2**: If consent exists and `accepted: true` with matching `disclaimerVersion: "1.0"`:
Print warning and proceed:
```
⚠️ Warning: Using reverse-engineered X API (not official). Accepted on: <acceptedAt date>
```
**Step 3**: If consent file doesn't exist or `disclaimerVersion` mismatch:
Display disclaimer and ask user:
```
⚠️ DISCLAIMER
This tool uses a reverse-engineered X (Twitter) API, NOT an official API.
Risks:
- May break without notice if X changes their API
- No official support or guarantees
- Account restrictions possible if API usage detected
- Use at your own risk
Do you accept these terms and wish to continue?
```
Use `AskUserQuestion` tool with options:
- **Yes, I accept** - Continue and save consent
- **No, I decline** - Exit immediately
**Step 4**: On acceptance, create consent file:
```bash
# macOS
mkdir -p ~/Library/Application\ Support/baoyu-skills/x-to-markdown
cat > ~/Library/Application\ Support/baoyu-skills/x-to-markdown/consent.json << 'EOF'
{
"version": 1,
"accepted": true,
"acceptedAt": "<ISO timestamp>",
"disclaimerVersion": "1.0"
}
EOF
# Linux
mkdir -p ~/.local/share/baoyu-skills/x-to-markdown
cat > ~/.local/share/baoyu-skills/x-to-markdown/consent.json << 'EOF'
{
"version": 1,
"accepted": true,
"acceptedAt": "<ISO timestamp>",
"disclaimerVersion": "1.0"
}
EOF
```
**Step 5**: On decline, output message and stop:
```
User declined the disclaimer. Exiting.
```
---
## Usage
```bash
# Convert tweet (outputs markdown path)
npx -y bun ${SKILL_DIR}/scripts/main.ts <url>
# Save to specific file
npx -y bun ${SKILL_DIR}/scripts/main.ts <url> -o output.md
# JSON output
npx -y bun ${SKILL_DIR}/scripts/main.ts <url> --json
```
## Options
| Option | Description |
|--------|-------------|
| `<url>` | Tweet or article URL |
| `-o <path>` | Output path (file or dir) |
| `--json` | Output as JSON |
| `--login` | Refresh cookies only |
## File Structure
```
x-to-markdown/
└── {username}/
└── {tweet-id}.md
```
## Supported URLs
- `https://x.com/<user>/status/<id>`
- `https://twitter.com/<user>/status/<id>`
- `https://x.com/i/article/<id>`
## Output Format
```markdown
---
url: https://x.com/username/status/123
author: "Display Name (@username)"
tweet_count: 3
---
Tweet content...
---
Thread continuation...
```
## Authentication
**Option 1**: Environment variables (recommended)
- `X_AUTH_TOKEN` - auth_token cookie
- `X_CT0` - ct0 cookie
**Option 2**: Chrome login (auto if env vars not set)
- First run opens Chrome for login
- Cookies cached locally
## Extension Support
Custom configurations via EXTEND.md.
**Check paths** (priority order):
1. `.baoyu-skills/baoyu-danger-x-to-markdown/EXTEND.md` (project)
2. `~/.baoyu-skills/baoyu-danger-x-to-markdown/EXTEND.md` (user)
If found, load before workflow. Extension content overrides defaults.

View File

@ -0,0 +1,143 @@
import { resolveXToMarkdownChromeProfileDir } from "./paths.js";
export const DEFAULT_BEARER_TOKEN =
"Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA";
export const DEFAULT_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
export const X_LOGIN_URL = "https://x.com/home";
export const X_USER_DATA_DIR = resolveXToMarkdownChromeProfileDir();
export const X_COOKIE_NAMES = ["auth_token", "ct0", "gt", "twid"] as const;
export const X_REQUIRED_COOKIES = ["auth_token", "ct0"] as const;
export const FALLBACK_QUERY_ID = "id8pHQbQi7eZ6P9mA1th1Q";
export const FALLBACK_FEATURE_SWITCHES = [
"profile_label_improvements_pcf_label_in_post_enabled",
"responsive_web_profile_redirect_enabled",
"rweb_tipjar_consumption_enabled",
"verified_phone_label_enabled",
"responsive_web_graphql_skip_user_profile_image_extensions_enabled",
"responsive_web_graphql_timeline_navigation_enabled",
];
export const FALLBACK_FIELD_TOGGLES = ["withPayments", "withAuxiliaryUserLabels"];
export const FALLBACK_TWEET_QUERY_ID = "HJ9lpOL-ZlOk5CkCw0JW6Q";
export const FALLBACK_TWEET_FEATURE_SWITCHES = [
"creator_subscriptions_tweet_preview_api_enabled",
"premium_content_api_read_enabled",
"communities_web_enable_tweet_community_results_fetch",
"c9s_tweet_anatomy_moderator_badge_enabled",
"responsive_web_grok_analyze_button_fetch_trends_enabled",
"responsive_web_grok_analyze_post_followups_enabled",
"responsive_web_jetfuel_frame",
"responsive_web_grok_share_attachment_enabled",
"responsive_web_grok_annotations_enabled",
"articles_preview_enabled",
"responsive_web_edit_tweet_api_enabled",
"graphql_is_translatable_rweb_tweet_is_translatable_enabled",
"view_counts_everywhere_api_enabled",
"longform_notetweets_consumption_enabled",
"responsive_web_twitter_article_tweet_consumption_enabled",
"tweet_awards_web_tipping_enabled",
"responsive_web_grok_show_grok_translated_post",
"responsive_web_grok_analysis_button_from_backend",
"post_ctas_fetch_enabled",
"creator_subscriptions_quote_tweet_preview_enabled",
"freedom_of_speech_not_reach_fetch_enabled",
"standardized_nudges_misinfo",
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled",
"longform_notetweets_rich_text_read_enabled",
"longform_notetweets_inline_media_enabled",
"profile_label_improvements_pcf_label_in_post_enabled",
"responsive_web_profile_redirect_enabled",
"rweb_tipjar_consumption_enabled",
"verified_phone_label_enabled",
"responsive_web_grok_image_annotation_enabled",
"responsive_web_grok_imagine_annotation_enabled",
"responsive_web_grok_community_note_auto_translation_is_enabled",
"responsive_web_graphql_skip_user_profile_image_extensions_enabled",
"responsive_web_graphql_timeline_navigation_enabled",
"responsive_web_enhance_cards_enabled",
];
export const FALLBACK_TWEET_FIELD_TOGGLES = [
"withArticleRichContentState",
"withArticlePlainText",
"withGrokAnalyze",
"withDisallowedReplyControls",
"withPayments",
"withAuxiliaryUserLabels",
];
export const FALLBACK_TWEET_DETAIL_QUERY_ID = "_8aYOgEDz35BrBcBal1-_w";
export const FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES = [
"rweb_video_screen_enabled",
"profile_label_improvements_pcf_label_in_post_enabled",
"rweb_tipjar_consumption_enabled",
"verified_phone_label_enabled",
"creator_subscriptions_tweet_preview_api_enabled",
"responsive_web_graphql_timeline_navigation_enabled",
"responsive_web_graphql_skip_user_profile_image_extensions_enabled",
"premium_content_api_read_enabled",
"communities_web_enable_tweet_community_results_fetch",
"c9s_tweet_anatomy_moderator_badge_enabled",
"responsive_web_grok_analyze_button_fetch_trends_enabled",
"responsive_web_grok_analyze_post_followups_enabled",
"responsive_web_jetfuel_frame",
"responsive_web_grok_share_attachment_enabled",
"articles_preview_enabled",
"responsive_web_edit_tweet_api_enabled",
"graphql_is_translatable_rweb_tweet_is_translatable_enabled",
"view_counts_everywhere_api_enabled",
"longform_notetweets_consumption_enabled",
"responsive_web_twitter_article_tweet_consumption_enabled",
"tweet_awards_web_tipping_enabled",
"responsive_web_grok_show_grok_translated_post",
"responsive_web_grok_analysis_button_from_backend",
"creator_subscriptions_quote_tweet_preview_enabled",
"freedom_of_speech_not_reach_fetch_enabled",
"standardized_nudges_misinfo",
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled",
"longform_notetweets_rich_text_read_enabled",
"longform_notetweets_inline_media_enabled",
"responsive_web_grok_image_annotation_enabled",
"responsive_web_enhance_cards_enabled",
];
export const FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS: Record<string, boolean> = {
rweb_video_screen_enabled: false,
profile_label_improvements_pcf_label_in_post_enabled: true,
rweb_tipjar_consumption_enabled: true,
verified_phone_label_enabled: false,
creator_subscriptions_tweet_preview_api_enabled: true,
responsive_web_graphql_timeline_navigation_enabled: true,
responsive_web_graphql_skip_user_profile_image_extensions_enabled: false,
premium_content_api_read_enabled: false,
communities_web_enable_tweet_community_results_fetch: true,
c9s_tweet_anatomy_moderator_badge_enabled: true,
responsive_web_grok_analyze_button_fetch_trends_enabled: false,
responsive_web_grok_analyze_post_followups_enabled: true,
responsive_web_jetfuel_frame: false,
responsive_web_grok_share_attachment_enabled: true,
articles_preview_enabled: true,
responsive_web_edit_tweet_api_enabled: true,
graphql_is_translatable_rweb_tweet_is_translatable_enabled: true,
view_counts_everywhere_api_enabled: true,
longform_notetweets_consumption_enabled: true,
responsive_web_twitter_article_tweet_consumption_enabled: true,
tweet_awards_web_tipping_enabled: false,
responsive_web_grok_show_grok_translated_post: false,
responsive_web_grok_analysis_button_from_backend: true,
creator_subscriptions_quote_tweet_preview_enabled: false,
freedom_of_speech_not_reach_fetch_enabled: true,
standardized_nudges_misinfo: true,
tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled: true,
longform_notetweets_rich_text_read_enabled: true,
longform_notetweets_inline_media_enabled: true,
responsive_web_grok_image_annotation_enabled: true,
responsive_web_enhance_cards_enabled: false,
};
export const FALLBACK_TWEET_DETAIL_FIELD_TOGGLES = [
"withArticleRichContentState",
"withArticlePlainText",
"withGrokAnalyze",
"withDisallowedReplyControls",
];

View File

@ -0,0 +1,85 @@
import fs from "node:fs";
import path from "node:path";
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { resolveXToMarkdownCookiePath } from "./paths.js";
export type CookieMap = Record<string, string>;
export type CookieFileData =
| {
cookies: CookieMap;
updated_at: number;
source?: string;
}
| {
version: number;
updatedAt: string;
cookieMap: CookieMap;
source?: string;
};
export async function read_cookie_file(
p: string = resolveXToMarkdownCookiePath()
): Promise<CookieMap | null> {
try {
if (!fs.existsSync(p) || !fs.statSync(p).isFile()) return null;
const raw = await readFile(p, "utf8");
const data = JSON.parse(raw) as unknown;
if (data && typeof data === "object" && "cookies" in (data as any)) {
const cookies = (data as any).cookies as unknown;
if (cookies && typeof cookies === "object") {
const out: CookieMap = {};
for (const [k, v] of Object.entries(cookies as Record<string, unknown>)) {
if (typeof v === "string") out[k] = v;
}
return Object.keys(out).length > 0 ? out : null;
}
}
if (data && typeof data === "object" && "cookieMap" in (data as any)) {
const cookies = (data as any).cookieMap as unknown;
if (cookies && typeof cookies === "object") {
const out: CookieMap = {};
for (const [k, v] of Object.entries(cookies as Record<string, unknown>)) {
if (typeof v === "string") out[k] = v;
}
return Object.keys(out).length > 0 ? out : null;
}
}
if (data && typeof data === "object") {
const out: CookieMap = {};
for (const [k, v] of Object.entries(data as Record<string, unknown>)) {
if (typeof v === "string") out[k] = v;
}
return Object.keys(out).length > 0 ? out : null;
}
return null;
} catch {
return null;
}
}
export async function write_cookie_file(
cookies: CookieMap,
p: string = resolveXToMarkdownCookiePath(),
source?: string
): Promise<void> {
const dir = path.dirname(p);
await mkdir(dir, { recursive: true });
const payload: CookieFileData = {
version: 1,
updatedAt: new Date().toISOString(),
cookieMap: cookies,
source,
};
await writeFile(p, JSON.stringify(payload, null, 2), "utf8");
}
export const readCookieFile = read_cookie_file;
export const writeCookieFile = write_cookie_file;

View File

@ -0,0 +1,423 @@
import { spawn, type ChildProcess } from "node:child_process";
import fs from "node:fs";
import { mkdir } from "node:fs/promises";
import net from "node:net";
import process from "node:process";
import { read_cookie_file, write_cookie_file } from "./cookie-file.js";
import { resolveXToMarkdownCookiePath } from "./paths.js";
import { X_COOKIE_NAMES, X_REQUIRED_COOKIES, X_LOGIN_URL, X_USER_DATA_DIR } from "./constants.js";
import type { CookieLike } from "./types.js";
type CdpSendOptions = { sessionId?: string; timeoutMs?: number };
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async function fetchWithTimeout(
url: string,
init: RequestInit & { timeoutMs?: number } = {}
): Promise<Response> {
const { timeoutMs, ...rest } = init;
if (!timeoutMs || timeoutMs <= 0) return fetch(url, rest);
const ctl = new AbortController();
const t = setTimeout(() => ctl.abort(), timeoutMs);
try {
return await fetch(url, { ...rest, signal: ctl.signal });
} finally {
clearTimeout(t);
}
}
class CdpConnection {
private ws: WebSocket;
private nextId = 0;
private pending = new Map<
number,
{ resolve: (v: unknown) => void; reject: (e: Error) => void; timer: ReturnType<typeof setTimeout> | null }
>();
private constructor(ws: WebSocket) {
this.ws = ws;
this.ws.addEventListener("message", (event) => {
try {
const data =
typeof event.data === "string"
? event.data
: new TextDecoder().decode(event.data as ArrayBuffer);
const msg = JSON.parse(data) as { id?: number; result?: unknown; error?: { message?: string } };
if (msg.id) {
const p = this.pending.get(msg.id);
if (p) {
this.pending.delete(msg.id);
if (p.timer) clearTimeout(p.timer);
if (msg.error?.message) p.reject(new Error(msg.error.message));
else p.resolve(msg.result);
}
}
} catch {}
});
this.ws.addEventListener("close", () => {
for (const [id, p] of this.pending.entries()) {
this.pending.delete(id);
if (p.timer) clearTimeout(p.timer);
p.reject(new Error("CDP connection closed."));
}
});
}
static async connect(url: string, timeoutMs: number): Promise<CdpConnection> {
const ws = new WebSocket(url);
await new Promise<void>((resolve, reject) => {
const t = setTimeout(() => reject(new Error("CDP connection timeout.")), timeoutMs);
ws.addEventListener("open", () => {
clearTimeout(t);
resolve();
});
ws.addEventListener("error", () => {
clearTimeout(t);
reject(new Error("CDP connection failed."));
});
});
return new CdpConnection(ws);
}
async send<T = unknown>(
method: string,
params?: Record<string, unknown>,
opts?: CdpSendOptions
): Promise<T> {
const id = ++this.nextId;
const msg: Record<string, unknown> = { id, method };
if (params) msg.params = params;
if (opts?.sessionId) msg.sessionId = opts.sessionId;
const timeoutMs = opts?.timeoutMs ?? 15_000;
const out = await new Promise<unknown>((resolve, reject) => {
const t =
timeoutMs > 0
? setTimeout(() => {
this.pending.delete(id);
reject(new Error(`CDP timeout: ${method}`));
}, timeoutMs)
: null;
this.pending.set(id, { resolve, reject, timer: t });
this.ws.send(JSON.stringify(msg));
});
return out as T;
}
close(): void {
try {
this.ws.close();
} catch {}
}
}
async function getFreePort(): Promise<number> {
return await new Promise((resolve, reject) => {
const srv = net.createServer();
srv.unref();
srv.on("error", reject);
srv.listen(0, "127.0.0.1", () => {
const addr = srv.address();
if (!addr || typeof addr === "string") {
srv.close(() => reject(new Error("Unable to allocate a free TCP port.")));
return;
}
const port = addr.port;
srv.close((err) => (err ? reject(err) : resolve(port)));
});
});
}
function findChromeExecutable(): string | null {
const override = process.env.X_CHROME_PATH?.trim();
if (override && fs.existsSync(override)) return override;
const candidates: string[] = [];
switch (process.platform) {
case "darwin":
candidates.push(
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
"/Applications/Chromium.app/Contents/MacOS/Chromium",
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"
);
break;
case "win32":
candidates.push(
"C:\\\\Program Files\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe",
"C:\\\\Program Files (x86)\\\\Google\\\\Chrome\\\\Application\\\\chrome.exe",
"C:\\\\Program Files\\\\Microsoft\\\\Edge\\\\Application\\\\msedge.exe",
"C:\\\\Program Files (x86)\\\\Microsoft\\\\Edge\\\\Application\\\\msedge.exe"
);
break;
default:
candidates.push(
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium",
"/usr/bin/chromium-browser",
"/snap/bin/chromium",
"/usr/bin/microsoft-edge"
);
break;
}
for (const p of candidates) {
if (fs.existsSync(p)) return p;
}
return null;
}
async function waitForChromeDebugPort(port: number, timeoutMs: number): Promise<string> {
const start = Date.now();
while (Date.now() - start < timeoutMs) {
try {
const res = await fetchWithTimeout(`http://127.0.0.1:${port}/json/version`, { timeoutMs: 5_000 });
if (!res.ok) throw new Error(`status=${res.status}`);
const j = (await res.json()) as { webSocketDebuggerUrl?: string };
if (j.webSocketDebuggerUrl) return j.webSocketDebuggerUrl;
} catch {}
await sleep(200);
}
throw new Error("Chrome debug port not ready");
}
async function launchChrome(profileDir: string, port: number): Promise<ChildProcess> {
const chrome = findChromeExecutable();
if (!chrome) throw new Error("Chrome executable not found.");
const args = [
`--remote-debugging-port=${port}`,
`--user-data-dir=${profileDir}`,
"--no-first-run",
"--no-default-browser-check",
"--disable-popup-blocking",
X_LOGIN_URL,
];
return spawn(chrome, args, { stdio: "ignore" });
}
async function fetchXCookiesViaCdp(
profileDir: string,
timeoutMs: number,
verbose: boolean,
log?: (message: string) => void
): Promise<Record<string, string>> {
await mkdir(profileDir, { recursive: true });
const port = await getFreePort();
const chrome = await launchChrome(profileDir, port);
let cdp: CdpConnection | null = null;
try {
const wsUrl = await waitForChromeDebugPort(port, 30_000);
cdp = await CdpConnection.connect(wsUrl, 15_000);
const { targetId } = await cdp.send<{ targetId: string }>("Target.createTarget", {
url: X_LOGIN_URL,
newWindow: true,
});
const { sessionId } = await cdp.send<{ sessionId: string }>("Target.attachToTarget", { targetId, flatten: true });
await cdp.send("Network.enable", {}, { sessionId });
if (verbose) {
log?.("[x-cookies] Chrome opened. If needed, complete X login in the window. Waiting for cookies...");
}
const start = Date.now();
let last: Record<string, string> = {};
while (Date.now() - start < timeoutMs) {
const { cookies } = await cdp.send<{ cookies: CookieLike[] }>(
"Network.getCookies",
{ urls: ["https://x.com/", "https://twitter.com/"] },
{ sessionId, timeoutMs: 10_000 }
);
const m = buildXCookieMap((cookies ?? []).filter(Boolean));
last = m;
if (hasRequiredXCookies(m)) {
return m;
}
await sleep(1000);
}
throw new Error(`Timed out waiting for X cookies. Last keys: ${Object.keys(last).join(", ")}`);
} finally {
if (cdp) {
try {
await cdp.send("Browser.close", {}, { timeoutMs: 5_000 });
} catch {}
cdp.close();
}
try {
chrome.kill("SIGTERM");
} catch {}
setTimeout(() => {
if (!chrome.killed) {
try {
chrome.kill("SIGKILL");
} catch {}
}
}, 2_000).unref?.();
}
}
function resolveCookieDomain(cookie: CookieLike): string | null {
const rawDomain = cookie.domain?.trim();
if (rawDomain) {
return rawDomain.startsWith(".") ? rawDomain.slice(1) : rawDomain;
}
const rawUrl = cookie.url?.trim();
if (rawUrl) {
try {
return new URL(rawUrl).hostname;
} catch {
return null;
}
}
return null;
}
function pickCookieValue<T extends CookieLike>(cookies: T[], name: string): string | undefined {
const matches = cookies.filter((cookie) => cookie.name === name && typeof cookie.value === "string");
if (matches.length === 0) return undefined;
const preferred = matches.find((cookie) => {
const domain = resolveCookieDomain(cookie);
return domain === "x.com" && (cookie.path ?? "/") === "/";
});
const xDomain = matches.find((cookie) => (resolveCookieDomain(cookie) ?? "").endsWith("x.com"));
const twitterDomain = matches.find((cookie) => (resolveCookieDomain(cookie) ?? "").endsWith("twitter.com"));
return (preferred ?? xDomain ?? twitterDomain ?? matches[0])?.value;
}
function buildXCookieMap<T extends CookieLike>(cookies: T[]): Record<string, string> {
const cookieMap: Record<string, string> = {};
for (const name of X_COOKIE_NAMES) {
const value = pickCookieValue(cookies, name);
if (value) cookieMap[name] = value;
}
return cookieMap;
}
export function hasRequiredXCookies(cookieMap: Record<string, string>): boolean {
return X_REQUIRED_COOKIES.every((name) => Boolean(cookieMap[name]));
}
function filterXCookieMap(cookieMap: Record<string, string>): Record<string, string> {
const filtered: Record<string, string> = {};
for (const name of X_COOKIE_NAMES) {
const value = cookieMap[name];
if (value) filtered[name] = value;
}
return filtered;
}
function buildInlineCookiesFromEnv(): CookieLike[] {
const cookies: CookieLike[] = [];
const authToken = process.env.X_AUTH_TOKEN?.trim();
const ct0 = process.env.X_CT0?.trim();
const gt = process.env.X_GUEST_TOKEN?.trim();
const twid = process.env.X_TWID?.trim();
if (authToken) {
cookies.push({ name: "auth_token", value: authToken, domain: "x.com", path: "/" });
}
if (ct0) {
cookies.push({ name: "ct0", value: ct0, domain: "x.com", path: "/" });
}
if (gt) {
cookies.push({ name: "gt", value: gt, domain: "x.com", path: "/" });
}
if (twid) {
cookies.push({ name: "twid", value: twid, domain: "x.com", path: "/" });
}
return cookies;
}
async function loadXCookiesFromInline(log?: (message: string) => void): Promise<Record<string, string>> {
const inline = buildInlineCookiesFromEnv();
if (inline.length === 0) return {};
const cookieMap = buildXCookieMap(
inline.filter((cookie): cookie is CookieLike => Boolean(cookie?.name && typeof cookie.value === "string"))
);
if (Object.keys(cookieMap).length > 0) {
log?.(`[x-cookies] Loaded X cookies from env: ${Object.keys(cookieMap).length} cookie(s).`);
} else {
log?.("[x-cookies] Env cookies provided but no X cookies matched.");
}
return cookieMap;
}
async function loadXCookiesFromFile(log?: (message: string) => void): Promise<Record<string, string>> {
const cookiePath = resolveXToMarkdownCookiePath();
const fileMap = filterXCookieMap((await read_cookie_file(cookiePath)) ?? {});
if (Object.keys(fileMap).length > 0) {
log?.(`[x-cookies] Loaded X cookies from file: ${cookiePath} (${Object.keys(fileMap).length} cookie(s))`);
}
return fileMap;
}
async function loadXCookiesFromCdp(log?: (message: string) => void): Promise<Record<string, string>> {
try {
const cookieMap = await fetchXCookiesViaCdp(X_USER_DATA_DIR, 5 * 60 * 1000, true, log);
if (!hasRequiredXCookies(cookieMap)) return cookieMap;
const cookiePath = resolveXToMarkdownCookiePath();
try {
await write_cookie_file(cookieMap, cookiePath, "cdp");
log?.(`[x-cookies] Cookies saved to ${cookiePath}`);
} catch (error) {
log?.(
`[x-cookies] Failed to write cookie file (${cookiePath}): ${
error instanceof Error ? error.message : String(error ?? "")
}`
);
}
if (cookieMap.auth_token) log?.(`[x-cookies] auth_token: ${cookieMap.auth_token.slice(0, 20)}...`);
if (cookieMap.ct0) log?.(`[x-cookies] ct0: ${cookieMap.ct0.slice(0, 20)}...`);
return cookieMap;
} catch (error) {
log?.(
`[x-cookies] Failed to load cookies via Chrome DevTools Protocol: ${
error instanceof Error ? error.message : String(error ?? "")
}`
);
return {};
}
}
export async function loadXCookies(log?: (message: string) => void): Promise<Record<string, string>> {
const inlineMap = await loadXCookiesFromInline(log);
const fileMap = await loadXCookiesFromFile(log);
const combined = { ...fileMap, ...inlineMap };
if (hasRequiredXCookies(combined)) return combined;
const cdpMap = await loadXCookiesFromCdp(log);
return { ...fileMap, ...cdpMap, ...inlineMap };
}
export async function refreshXCookies(log?: (message: string) => void): Promise<Record<string, string>> {
return loadXCookiesFromCdp(log);
}
export function buildCookieHeader(cookieMap: Record<string, string>): string | undefined {
const entries = Object.entries(cookieMap).filter(([, value]) => value);
if (entries.length === 0) return undefined;
return entries.map(([key, value]) => `${key}=${value}`).join("; ");
}

View File

@ -0,0 +1,389 @@
import {
DEFAULT_BEARER_TOKEN,
DEFAULT_USER_AGENT,
FALLBACK_FEATURE_SWITCHES,
FALLBACK_FIELD_TOGGLES,
FALLBACK_QUERY_ID,
FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS,
FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES,
FALLBACK_TWEET_DETAIL_FIELD_TOGGLES,
FALLBACK_TWEET_DETAIL_QUERY_ID,
FALLBACK_TWEET_FEATURE_SWITCHES,
FALLBACK_TWEET_FIELD_TOGGLES,
FALLBACK_TWEET_QUERY_ID,
} from "./constants.js";
import {
buildFeatureMap,
buildFieldToggleMap,
buildRequestHeaders,
buildTweetFieldToggleMap,
fetchHomeHtml,
fetchText,
parseStringList,
} from "./http.js";
import type { ArticleQueryInfo } from "./types.js";
function isNonEmptyObject(value: unknown): value is Record<string, unknown> {
return Boolean(value && typeof value === "object" && Object.keys(value as Record<string, unknown>).length > 0);
}
function unwrapTweetResult(result: any): any {
if (!result) return null;
if (result.__typename === "TweetWithVisibilityResults" && result.tweet) {
return result.tweet;
}
return result;
}
function extractArticleFromTweet(payload: unknown): unknown {
const root = (payload as { data?: any }).data ?? payload;
const result = root?.tweetResult?.result ?? root?.tweet_result?.result ?? root?.tweet_result;
const tweet = unwrapTweetResult(result);
const legacy = tweet?.legacy ?? {};
const article = legacy?.article ?? tweet?.article;
return (
article?.article_results?.result ??
legacy?.article_results?.result ??
tweet?.article_results?.result ??
null
);
}
function extractTweetFromPayload(payload: unknown): unknown {
const root = (payload as { data?: any }).data ?? payload;
const result = root?.tweetResult?.result ?? root?.tweet_result?.result ?? root?.tweet_result;
return unwrapTweetResult(result);
}
function extractArticleFromEntity(payload: unknown): unknown {
const root = (payload as { data?: any }).data ?? payload;
return (
root?.article_result_by_rest_id?.result ??
root?.article_result_by_rest_id ??
root?.article_entity_result?.result ??
null
);
}
async function resolveArticleQueryInfo(userAgent: string): Promise<ArticleQueryInfo> {
const html = await fetchHomeHtml(userAgent);
const bundleMatch = html.match(/"bundle\\.TwitterArticles":"([a-z0-9]+)"/);
if (!bundleMatch) {
return {
queryId: FALLBACK_QUERY_ID,
featureSwitches: FALLBACK_FEATURE_SWITCHES,
fieldToggles: FALLBACK_FIELD_TOGGLES,
html,
};
}
const bundleHash = bundleMatch[1];
const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/bundle.TwitterArticles.${bundleHash}a.js`;
const chunk = await fetchText(chunkUrl, {
headers: {
"user-agent": userAgent,
},
});
const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"ArticleEntityResultByRestId\"/);
const featureMatch = chunk.match(
/operationName:\"ArticleEntityResultByRestId\"[\s\S]*?featureSwitches:\[(.*?)\]/
);
const fieldToggleMatch = chunk.match(
/operationName:\"ArticleEntityResultByRestId\"[\s\S]*?fieldToggles:\[(.*?)\]/
);
const featureSwitches = parseStringList(featureMatch?.[1]);
const fieldToggles = parseStringList(fieldToggleMatch?.[1]);
return {
queryId: queryIdMatch?.[1] ?? FALLBACK_QUERY_ID,
featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_FEATURE_SWITCHES,
fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_FIELD_TOGGLES,
html,
};
}
function resolveMainChunkHash(html: string): string | null {
const match = html.match(/main\\.([a-z0-9]+)\\.js/);
return match?.[1] ?? null;
}
function resolveApiChunkHash(html: string): string | null {
const match = html.match(/api:\"([a-zA-Z0-9_-]+)\"/);
return match?.[1] ?? null;
}
async function resolveTweetDetailQueryInfo(userAgent: string): Promise<ArticleQueryInfo> {
const html = await fetchHomeHtml(userAgent);
const apiHash = resolveApiChunkHash(html);
if (!apiHash) {
return {
queryId: FALLBACK_TWEET_DETAIL_QUERY_ID,
featureSwitches: FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES,
fieldToggles: FALLBACK_TWEET_DETAIL_FIELD_TOGGLES,
html,
};
}
const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/api.${apiHash}a.js`;
const chunk = await fetchText(chunkUrl, {
headers: {
"user-agent": userAgent,
},
});
const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"TweetDetail\"/);
const featureMatch = chunk.match(
/operationName:\"TweetDetail\"[\s\S]*?featureSwitches:\[(.*?)\]/
);
const fieldToggleMatch = chunk.match(
/operationName:\"TweetDetail\"[\s\S]*?fieldToggles:\[(.*?)\]/
);
const featureSwitches = parseStringList(featureMatch?.[1]);
const fieldToggles = parseStringList(fieldToggleMatch?.[1]);
return {
queryId: queryIdMatch?.[1] ?? FALLBACK_TWEET_DETAIL_QUERY_ID,
featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_TWEET_DETAIL_FEATURE_SWITCHES,
fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_TWEET_DETAIL_FIELD_TOGGLES,
html,
};
}
function buildTweetDetailFieldToggleMap(keys: string[]): Record<string, boolean> {
const toggles = buildFieldToggleMap(keys);
if (Object.prototype.hasOwnProperty.call(toggles, "withArticlePlainText")) {
toggles.withArticlePlainText = false;
}
if (Object.prototype.hasOwnProperty.call(toggles, "withGrokAnalyze")) {
toggles.withGrokAnalyze = false;
}
if (Object.prototype.hasOwnProperty.call(toggles, "withDisallowedReplyControls")) {
toggles.withDisallowedReplyControls = false;
}
return toggles;
}
async function resolveTweetQueryInfo(userAgent: string): Promise<ArticleQueryInfo> {
const html = await fetchHomeHtml(userAgent);
const mainHash = resolveMainChunkHash(html);
if (!mainHash) {
return {
queryId: FALLBACK_TWEET_QUERY_ID,
featureSwitches: FALLBACK_TWEET_FEATURE_SWITCHES,
fieldToggles: FALLBACK_TWEET_FIELD_TOGGLES,
html,
};
}
const chunkUrl = `https://abs.twimg.com/responsive-web/client-web/main.${mainHash}.js`;
const chunk = await fetchText(chunkUrl, {
headers: {
"user-agent": userAgent,
},
});
const queryIdMatch = chunk.match(/queryId:\"([^\"]+)\",operationName:\"TweetResultByRestId\"/);
const featureMatch = chunk.match(
/operationName:\"TweetResultByRestId\"[\s\S]*?featureSwitches:\[(.*?)\]/
);
const fieldToggleMatch = chunk.match(
/operationName:\"TweetResultByRestId\"[\s\S]*?fieldToggles:\[(.*?)\]/
);
const featureSwitches = parseStringList(featureMatch?.[1]);
const fieldToggles = parseStringList(fieldToggleMatch?.[1]);
return {
queryId: queryIdMatch?.[1] ?? FALLBACK_TWEET_QUERY_ID,
featureSwitches: featureSwitches.length > 0 ? featureSwitches : FALLBACK_TWEET_FEATURE_SWITCHES,
fieldToggles: fieldToggles.length > 0 ? fieldToggles : FALLBACK_TWEET_FIELD_TOGGLES,
html,
};
}
async function fetchTweetResult(
tweetId: string,
cookieMap: Record<string, string>,
userAgent: string,
bearerToken: string
): Promise<unknown> {
const queryInfo = await resolveTweetQueryInfo(userAgent);
const features = buildFeatureMap(queryInfo.html, queryInfo.featureSwitches);
const fieldToggles = buildTweetFieldToggleMap(queryInfo.fieldToggles);
const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/TweetResultByRestId`);
url.searchParams.set(
"variables",
JSON.stringify({
tweetId,
withCommunity: false,
includePromotedContent: false,
withVoice: true,
})
);
if (Object.keys(features).length > 0) {
url.searchParams.set("features", JSON.stringify(features));
}
if (Object.keys(fieldToggles).length > 0) {
url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles));
}
const response = await fetch(url.toString(), {
headers: buildRequestHeaders(cookieMap, userAgent, bearerToken),
});
const text = await response.text();
if (!response.ok) {
throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`);
}
try {
return JSON.parse(text);
} catch (error) {
throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`);
}
}
export async function fetchTweetDetail(
tweetId: string,
cookieMap: Record<string, string>,
cursor?: string
): Promise<unknown> {
const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT;
const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN;
const queryInfo = await resolveTweetDetailQueryInfo(userAgent);
const features = buildFeatureMap(
queryInfo.html,
queryInfo.featureSwitches,
FALLBACK_TWEET_DETAIL_FEATURE_DEFAULTS
);
const fieldToggles = buildTweetDetailFieldToggleMap(queryInfo.fieldToggles);
const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/TweetDetail`);
url.searchParams.set(
"variables",
JSON.stringify({
focalTweetId: tweetId,
cursor,
referrer: cursor ? "tweet" : undefined,
with_rux_injections: false,
includePromotedContent: true,
withCommunity: true,
withQuickPromoteEligibilityTweetFields: true,
withBirdwatchNotes: true,
withVoice: true,
withV2Timeline: true,
withDownvotePerspective: false,
withReactionsMetadata: false,
withReactionsPerspective: false,
withSuperFollowsTweetFields: false,
withSuperFollowsUserFields: false,
})
);
if (Object.keys(features).length > 0) {
url.searchParams.set("features", JSON.stringify(features));
}
if (Object.keys(fieldToggles).length > 0) {
url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles));
}
const response = await fetch(url.toString(), {
headers: buildRequestHeaders(cookieMap, userAgent, bearerToken),
});
const text = await response.text();
if (!response.ok) {
throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`);
}
try {
return JSON.parse(text);
} catch (error) {
throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`);
}
}
async function fetchArticleEntityById(
articleEntityId: string,
cookieMap: Record<string, string>,
userAgent: string,
bearerToken: string
): Promise<unknown> {
const queryInfo = await resolveArticleQueryInfo(userAgent);
const features = buildFeatureMap(queryInfo.html, queryInfo.featureSwitches);
const fieldToggles = buildFieldToggleMap(queryInfo.fieldToggles);
const url = new URL(`https://x.com/i/api/graphql/${queryInfo.queryId}/ArticleEntityResultByRestId`);
url.searchParams.set("variables", JSON.stringify({ articleEntityId }));
if (Object.keys(features).length > 0) {
url.searchParams.set("features", JSON.stringify(features));
}
if (Object.keys(fieldToggles).length > 0) {
url.searchParams.set("fieldToggles", JSON.stringify(fieldToggles));
}
const response = await fetch(url.toString(), {
headers: buildRequestHeaders(cookieMap, userAgent, bearerToken),
});
const text = await response.text();
if (!response.ok) {
throw new Error(`X API error (${response.status}): ${text.slice(0, 400)}`);
}
try {
return JSON.parse(text);
} catch (error) {
throw new Error(`Failed to parse response JSON: ${error instanceof Error ? error.message : String(error)}`);
}
}
export async function fetchXArticle(
articleId: string,
cookieMap: Record<string, string>,
raw: boolean
): Promise<unknown> {
const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT;
const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN;
const tweetPayload = await fetchTweetResult(articleId, cookieMap, userAgent, bearerToken);
if (raw) {
return tweetPayload;
}
const articleFromTweet = extractArticleFromTweet(tweetPayload);
if (isNonEmptyObject(articleFromTweet)) {
return articleFromTweet;
}
const articlePayload = await fetchArticleEntityById(articleId, cookieMap, userAgent, bearerToken);
const articleFromEntity = extractArticleFromEntity(articlePayload);
if (isNonEmptyObject(articleFromEntity)) {
return articleFromEntity;
}
return articleFromEntity ?? articlePayload;
}
export async function fetchXTweet(
tweetId: string,
cookieMap: Record<string, string>,
raw: boolean
): Promise<unknown> {
const userAgent = process.env.X_USER_AGENT?.trim() || DEFAULT_USER_AGENT;
const bearerToken = process.env.X_BEARER_TOKEN?.trim() || DEFAULT_BEARER_TOKEN;
const tweetPayload = await fetchTweetResult(tweetId, cookieMap, userAgent, bearerToken);
if (raw) {
return tweetPayload;
}
const tweet = extractTweetFromPayload(tweetPayload);
if (isNonEmptyObject(tweet)) {
return tweet;
}
return tweet ?? tweetPayload;
}

View File

@ -0,0 +1,117 @@
import { buildCookieHeader } from "./cookies.js";
let cachedHomeHtml: { userAgent: string; html: string } | null = null;
export async function fetchText(url: string, init?: RequestInit): Promise<string> {
const response = await fetch(url, init);
const text = await response.text();
if (!response.ok) {
throw new Error(`Request failed (${response.status}) for ${url}: ${text.slice(0, 200)}`);
}
return text;
}
export async function fetchHomeHtml(userAgent: string): Promise<string> {
if (cachedHomeHtml?.userAgent === userAgent) {
return cachedHomeHtml.html;
}
const html = await fetchText("https://x.com", {
headers: {
"user-agent": userAgent,
},
});
cachedHomeHtml = { userAgent, html };
return html;
}
export function parseStringList(raw: string | undefined): string[] {
if (!raw) return [];
return raw
.split(",")
.map((item) => item.trim())
.filter(Boolean)
.map((item) => item.replace(/^\"|\"$/g, ""));
}
export function resolveFeatureValue(html: string, key: string): boolean | undefined {
const keyPattern = key.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const unescaped = new RegExp(`"${keyPattern}"\\s*:\\s*\\{"value"\\s*:\\s*(true|false)`);
const escaped = new RegExp(`\\\\"${keyPattern}\\\\"\\s*:\\s*\\\\{\\\\"value\\\\"\\s*:\\s*(true|false)`);
const match = html.match(unescaped) ?? html.match(escaped);
if (!match) return undefined;
return match[1] === "true";
}
export function buildFeatureMap(
html: string,
keys: string[],
defaults?: Record<string, boolean>
): Record<string, boolean> {
const features: Record<string, boolean> = {};
for (const key of keys) {
const value = resolveFeatureValue(html, key);
if (value !== undefined) {
features[key] = value;
} else if (defaults && Object.prototype.hasOwnProperty.call(defaults, key)) {
features[key] = defaults[key] ?? true;
} else {
features[key] = true;
}
}
if (!Object.prototype.hasOwnProperty.call(features, "responsive_web_graphql_exclude_directive_enabled")) {
features.responsive_web_graphql_exclude_directive_enabled = true;
}
return features;
}
export function buildFieldToggleMap(keys: string[]): Record<string, boolean> {
const toggles: Record<string, boolean> = {};
for (const key of keys) {
toggles[key] = true;
}
return toggles;
}
export function buildTweetFieldToggleMap(keys: string[]): Record<string, boolean> {
const toggles: Record<string, boolean> = {};
for (const key of keys) {
if (key === "withGrokAnalyze" || key === "withDisallowedReplyControls") {
toggles[key] = false;
} else {
toggles[key] = true;
}
}
return toggles;
}
export function buildRequestHeaders(
cookieMap: Record<string, string>,
userAgent: string,
bearerToken: string
): Record<string, string> {
const headers: Record<string, string> = {
authorization: bearerToken,
"user-agent": userAgent,
accept: "application/json",
"x-twitter-active-user": "yes",
"x-twitter-client-language": "en",
"accept-language": "en",
};
if (cookieMap.auth_token) {
headers["x-twitter-auth-type"] = "OAuth2Session";
}
const cookieHeader = buildCookieHeader(cookieMap);
if (cookieHeader) {
headers.cookie = cookieHeader;
}
if (cookieMap.ct0) {
headers["x-csrf-token"] = cookieMap.ct0;
}
if (process.env.X_CLIENT_TRANSACTION_ID?.trim()) {
headers["x-client-transaction-id"] = process.env.X_CLIENT_TRANSACTION_ID.trim();
}
return headers;
}

View File

@ -0,0 +1,419 @@
import fs from "node:fs";
import path from "node:path";
import readline from "node:readline";
import process from "node:process";
import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
import { fetchXArticle } from "./graphql.js";
import { formatArticleMarkdown } from "./markdown.js";
import { hasRequiredXCookies, loadXCookies, refreshXCookies } from "./cookies.js";
import { resolveXToMarkdownConsentPath } from "./paths.js";
import { tweetToMarkdown } from "./tweet-to-markdown.js";
type CliArgs = {
url: string | null;
output: string | null;
json: boolean;
login: boolean;
help: boolean;
};
type ConsentRecord = {
version: number;
accepted: boolean;
acceptedAt: string;
disclaimerVersion: string;
};
const DISCLAIMER_VERSION = "1.0";
function printUsage(exitCode: number): never {
const cmd = "npx -y bun skills/baoyu-danger-x-to-markdown/scripts/main.ts";
console.log(`X (Twitter) to Markdown
Usage:
${cmd} <url>
${cmd} --url <url>
Options:
--output <path>, -o Output path (file or dir). Default: ./x-to-markdown/<slug>/
--json Output as JSON
--login Refresh cookies only, then exit
--help, -h Show help
Examples:
${cmd} https://x.com/username/status/1234567890
${cmd} https://x.com/i/article/1234567890 -o ./article.md
${cmd} https://x.com/username/status/1234567890 -o ./out/
${cmd} https://x.com/username/status/1234567890 --json | jq -r '.markdownPath'
${cmd} --login
`);
process.exit(exitCode);
}
function parseArgs(argv: string[]): CliArgs {
const out: CliArgs = {
url: null,
output: null,
json: false,
login: false,
help: false,
};
const positional: string[] = [];
for (let i = 0; i < argv.length; i++) {
const a = argv[i]!;
if (a === "--help" || a === "-h") {
out.help = true;
continue;
}
if (a === "--json") {
out.json = true;
continue;
}
if (a === "--login") {
out.login = true;
continue;
}
if (a === "--url") {
const v = argv[++i];
if (!v) throw new Error("Missing value for --url");
out.url = v;
continue;
}
if (a === "--output" || a === "-o") {
const v = argv[++i];
if (!v) throw new Error(`Missing value for ${a}`);
out.output = v;
continue;
}
if (a.startsWith("-")) {
throw new Error(`Unknown option: ${a}`);
}
positional.push(a);
}
if (!out.url && positional.length > 0) {
out.url = positional[0]!;
}
return out;
}
function normalizeInputUrl(input: string): string {
const trimmed = input.trim();
if (!trimmed) return "";
try {
return new URL(trimmed).toString();
} catch {
return trimmed;
}
}
function parseArticleId(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
try {
const parsed = new URL(trimmed);
const match = parsed.pathname.match(/\/(?:i\/)?article\/(\d+)/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function parseTweetId(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
if (/^\d+$/.test(trimmed)) return trimmed;
try {
const parsed = new URL(trimmed);
const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function parseTweetUsername(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
try {
const parsed = new URL(trimmed);
const match = parsed.pathname.match(/^\/([^/]+)\/status(?:es)?\/\d+/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function sanitizeSlug(input: string): string {
return input
.trim()
.replace(/^@/, "")
.replace(/[^a-zA-Z0-9_-]+/g, "-")
.replace(/-+/g, "-")
.replace(/^[-_]+|[-_]+$/g, "")
.slice(0, 120);
}
function formatBackupTimestamp(date: Date = new Date()): string {
const pad2 = (n: number) => String(n).padStart(2, "0");
return `${date.getFullYear()}${pad2(date.getMonth() + 1)}${pad2(date.getDate())}-${pad2(date.getHours())}${pad2(
date.getMinutes()
)}${pad2(date.getSeconds())}`;
}
async function backupDirIfExists(dir: string, log: (message: string) => void): Promise<void> {
try {
if (!fs.existsSync(dir)) return;
const stat = fs.statSync(dir);
if (!stat.isDirectory()) return;
const backup = `${dir}-backup-${formatBackupTimestamp()}`;
await rename(dir, backup);
log(`[x-to-markdown] Existing directory moved to: ${backup}`);
} catch (error) {
throw new Error(
`Failed to backup existing directory (${dir}): ${error instanceof Error ? error.message : String(error ?? "")}`
);
}
}
function resolveDefaultOutputDir(slug: string): string {
return path.resolve(process.cwd(), "x-to-markdown", slug);
}
async function resolveOutputPath(
normalizedUrl: string,
kind: "tweet" | "article",
argsOutput: string | null,
log: (message: string) => void
): Promise<{ outputDir: string; markdownPath: string; slug: string }> {
const articleId = kind === "article" ? parseArticleId(normalizedUrl) : null;
const tweetId = kind === "tweet" ? parseTweetId(normalizedUrl) : null;
const username = kind === "tweet" ? parseTweetUsername(normalizedUrl) : null;
const userSlug = username ? sanitizeSlug(username) : null;
const idPart = articleId ?? tweetId ?? String(Date.now());
const slug = userSlug ?? idPart;
const defaultFileName = kind === "article" ? `${idPart}.md` : `${idPart}.md`;
if (argsOutput) {
const wantsDir = argsOutput.endsWith("/") || argsOutput.endsWith("\\");
const resolved = path.resolve(argsOutput);
try {
if (wantsDir || (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory())) {
const outputDir = path.join(resolved, slug);
await backupDirIfExists(outputDir, log);
await mkdir(outputDir, { recursive: true });
return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug };
}
} catch {
// treat as file path
}
const outputDir = path.dirname(resolved);
await mkdir(outputDir, { recursive: true });
return { outputDir, markdownPath: resolved, slug };
}
const outputDir = resolveDefaultOutputDir(slug);
await backupDirIfExists(outputDir, log);
await mkdir(outputDir, { recursive: true });
return { outputDir, markdownPath: path.join(outputDir, defaultFileName), slug };
}
function formatMetaMarkdown(meta: Record<string, string | number | null | undefined>): string {
const lines = ["---"];
for (const [key, value] of Object.entries(meta)) {
if (value === undefined || value === null || value === "") continue;
if (typeof value === "number") {
lines.push(`${key}: ${value}`);
} else {
lines.push(`${key}: ${JSON.stringify(value)}`);
}
}
lines.push("---");
return lines.join("\n");
}
async function promptYesNo(question: string): Promise<boolean> {
if (!process.stdin.isTTY) return false;
const rl = readline.createInterface({
input: process.stdin,
output: process.stderr,
});
try {
const answer = await new Promise<string>((resolve) => rl.question(question, resolve));
const normalized = answer.trim().toLowerCase();
return normalized === "y" || normalized === "yes";
} finally {
rl.close();
}
}
function isValidConsent(value: unknown): value is ConsentRecord {
if (!value || typeof value !== "object") return false;
const record = value as Partial<ConsentRecord>;
return (
record.accepted === true &&
record.disclaimerVersion === DISCLAIMER_VERSION &&
typeof record.acceptedAt === "string" &&
record.acceptedAt.length > 0
);
}
async function ensureConsent(log: (message: string) => void): Promise<void> {
const consentPath = resolveXToMarkdownConsentPath();
try {
if (fs.existsSync(consentPath) && fs.statSync(consentPath).isFile()) {
const raw = await readFile(consentPath, "utf8");
const parsed = JSON.parse(raw) as unknown;
if (isValidConsent(parsed)) {
log(
`⚠️ Warning: Using reverse-engineered X API (not official). Accepted on: ${(parsed as ConsentRecord).acceptedAt}`
);
return;
}
}
} catch {
// fall through to prompt
}
log(`⚠️ DISCLAIMER
This tool uses a reverse-engineered X (Twitter) API, NOT an official API.
Risks:
- May break without notice if X changes their API
- No official support or guarantees
- Account restrictions possible if API usage detected
- Use at your own risk
`);
if (!process.stdin.isTTY) {
throw new Error(
`Consent required. Run in a TTY or create ${consentPath} with accepted: true and disclaimerVersion: ${DISCLAIMER_VERSION}`
);
}
const accepted = await promptYesNo("Do you accept these terms and wish to continue? (y/N): ");
if (!accepted) {
throw new Error("User declined the disclaimer. Exiting.");
}
await mkdir(path.dirname(consentPath), { recursive: true });
const payload: ConsentRecord = {
version: 1,
accepted: true,
acceptedAt: new Date().toISOString(),
disclaimerVersion: DISCLAIMER_VERSION,
};
await writeFile(consentPath, JSON.stringify(payload, null, 2), "utf8");
log(`[x-to-markdown] Consent saved to: ${consentPath}`);
}
async function convertArticleToMarkdown(
inputUrl: string,
articleId: string,
log: (message: string) => void
): Promise<string> {
log("[x-to-markdown] Loading cookies...");
const cookieMap = await loadXCookies(log);
if (!hasRequiredXCookies(cookieMap)) {
throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome.");
}
log(`[x-to-markdown] Fetching article ${articleId}...`);
const article = await fetchXArticle(articleId, cookieMap, false);
const body = formatArticleMarkdown(article).trimEnd();
const title = typeof (article as any)?.title === "string" ? String((article as any).title).trim() : "";
const meta = formatMetaMarkdown({
url: `https://x.com/i/article/${articleId}`,
requested_url: inputUrl,
title: title || null,
});
return [meta, body].filter(Boolean).join("\n\n").trimEnd();
}
async function main(): Promise<void> {
const args = parseArgs(process.argv.slice(2));
if (args.help) printUsage(0);
if (!args.login && !args.url) printUsage(1);
const log = (message: string) => console.error(message);
await ensureConsent(log);
if (args.login) {
log("[x-to-markdown] Refreshing cookies via browser login...");
const cookieMap = await refreshXCookies(log);
if (!hasRequiredXCookies(cookieMap)) {
throw new Error("Missing auth cookies after login. Please ensure you are logged in to X.");
}
log("[x-to-markdown] Cookies refreshed.");
return;
}
const normalizedUrl = normalizeInputUrl(args.url ?? "");
const articleId = parseArticleId(normalizedUrl);
const tweetId = parseTweetId(normalizedUrl);
if (!articleId && !tweetId) {
throw new Error("Invalid X url. Examples: https://x.com/<user>/status/<id> or https://x.com/i/article/<id>");
}
const kind = articleId ? ("article" as const) : ("tweet" as const);
const { outputDir, markdownPath, slug } = await resolveOutputPath(normalizedUrl, kind, args.output, log);
const markdown =
kind === "article" && articleId
? await convertArticleToMarkdown(normalizedUrl, articleId, log)
: await tweetToMarkdown(normalizedUrl, { log });
await writeFile(markdownPath, markdown, "utf8");
log(`[x-to-markdown] Saved: ${markdownPath}`);
if (args.json) {
console.log(
JSON.stringify(
{
url: articleId ? `https://x.com/i/article/${articleId}` : normalizedUrl,
requested_url: normalizedUrl,
type: kind,
slug,
outputDir,
markdownPath,
},
null,
2
)
);
} else {
console.log(markdownPath);
}
}
await main().catch((error) => {
console.error(error instanceof Error ? error.message : String(error ?? ""));
process.exit(1);
});

View File

@ -0,0 +1,306 @@
import type {
ArticleBlock,
ArticleContentState,
ArticleEntity,
ArticleMediaInfo,
} from "./types.js";
function coerceArticleEntity(value: unknown): ArticleEntity | null {
if (!value || typeof value !== "object") return null;
const candidate = value as ArticleEntity;
if (
typeof candidate.title === "string" ||
typeof candidate.plain_text === "string" ||
typeof candidate.preview_text === "string" ||
candidate.content_state
) {
return candidate;
}
return null;
}
function escapeMarkdownAlt(text: string): string {
return text.replace(/[\[\]]/g, "\\$&");
}
function normalizeCaption(caption?: string): string {
const trimmed = caption?.trim();
if (!trimmed) return "";
return trimmed.replace(/\s+/g, " ");
}
function resolveMediaUrl(info?: ArticleMediaInfo): string | undefined {
if (!info) return undefined;
if (info.original_img_url) return info.original_img_url;
if (info.preview_image?.original_img_url) return info.preview_image.original_img_url;
const variants = info.variants ?? [];
const mp4 = variants
.filter((variant) => variant?.content_type?.includes("video"))
.sort((a, b) => (b.bit_rate ?? 0) - (a.bit_rate ?? 0))[0];
return mp4?.url ?? variants[0]?.url;
}
function buildMediaById(article: ArticleEntity): Map<string, string> {
const map = new Map<string, string>();
for (const entity of article.media_entities ?? []) {
if (!entity?.media_id) continue;
const url = resolveMediaUrl(entity.media_info);
if (url) {
map.set(entity.media_id, url);
}
}
return map;
}
function collectMediaUrls(
article: ArticleEntity,
usedUrls: Set<string>,
excludeUrl?: string
): string[] {
const urls: string[] = [];
const addUrl = (url?: string) => {
if (!url) return;
if (excludeUrl && url === excludeUrl) {
usedUrls.add(url);
return;
}
if (usedUrls.has(url)) return;
usedUrls.add(url);
urls.push(url);
};
for (const entity of article.media_entities ?? []) {
addUrl(resolveMediaUrl(entity?.media_info));
}
return urls;
}
function resolveEntityMediaLines(
entityKey: number | undefined,
entityMap: ArticleContentState["entityMap"] | undefined,
mediaById: Map<string, string>,
usedUrls: Set<string>
): string[] {
if (entityKey === undefined || !entityMap) return [];
const entry = entityMap[String(entityKey)];
const value = entry?.value;
if (!value) return [];
const type = value.type;
if (type !== "MEDIA" && type !== "IMAGE") return [];
const caption = normalizeCaption(value.data?.caption);
const altText = caption ? escapeMarkdownAlt(caption) : "";
const lines: string[] = [];
const mediaItems = value.data?.mediaItems ?? [];
for (const item of mediaItems) {
const mediaId =
typeof item?.mediaId === "string"
? item.mediaId
: typeof item?.media_id === "string"
? item.media_id
: undefined;
const url = mediaId ? mediaById.get(mediaId) : undefined;
if (url && !usedUrls.has(url)) {
usedUrls.add(url);
lines.push(`![${altText}](${url})`);
}
}
const fallbackUrl = typeof value.data?.url === "string" ? value.data.url : undefined;
if (fallbackUrl && !usedUrls.has(fallbackUrl)) {
usedUrls.add(fallbackUrl);
lines.push(`![${altText}](${fallbackUrl})`);
}
return lines;
}
function renderContentBlocks(
blocks: ArticleBlock[],
entityMap: ArticleContentState["entityMap"] | undefined,
mediaById: Map<string, string>,
usedUrls: Set<string>
): string[] {
const lines: string[] = [];
let previousKind: "list" | "quote" | "heading" | "text" | "code" | "media" | null = null;
let listKind: "ordered" | "unordered" | null = null;
let orderedIndex = 0;
let inCodeBlock = false;
const pushBlock = (
blockLines: string[],
kind: "list" | "quote" | "heading" | "text" | "media"
) => {
if (blockLines.length === 0) return;
if (
lines.length > 0 &&
previousKind &&
!(previousKind === kind && (kind === "list" || kind === "quote" || kind === "media"))
) {
lines.push("");
}
lines.push(...blockLines);
previousKind = kind;
};
const collectMediaLines = (block: ArticleBlock): string[] => {
const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];
const mediaLines: string[] = [];
for (const range of ranges) {
if (typeof range?.key !== "number") continue;
mediaLines.push(...resolveEntityMediaLines(range.key, entityMap, mediaById, usedUrls));
}
return mediaLines;
};
for (const block of blocks) {
const type = typeof block?.type === "string" ? block.type : "unstyled";
const text = typeof block?.text === "string" ? block.text : "";
if (type === "code-block") {
if (!inCodeBlock) {
if (lines.length > 0) {
lines.push("");
}
lines.push("```");
inCodeBlock = true;
}
lines.push(text);
previousKind = "code";
listKind = null;
orderedIndex = 0;
continue;
}
if (type === "atomic") {
if (inCodeBlock) {
lines.push("```");
inCodeBlock = false;
previousKind = "code";
}
listKind = null;
orderedIndex = 0;
const mediaLines = collectMediaLines(block);
if (mediaLines.length > 0) {
pushBlock(mediaLines, "media");
}
continue;
}
if (inCodeBlock) {
lines.push("```");
inCodeBlock = false;
previousKind = "code";
}
if (type === "unordered-list-item") {
listKind = "unordered";
orderedIndex = 0;
pushBlock([`- ${text}`], "list");
continue;
}
if (type === "ordered-list-item") {
if (listKind !== "ordered") {
orderedIndex = 0;
}
listKind = "ordered";
orderedIndex += 1;
pushBlock([`${orderedIndex}. ${text}`], "list");
continue;
}
listKind = null;
orderedIndex = 0;
switch (type) {
case "header-one":
pushBlock([`# ${text}`], "heading");
break;
case "header-two":
pushBlock([`## ${text}`], "heading");
break;
case "header-three":
pushBlock([`### ${text}`], "heading");
break;
case "header-four":
pushBlock([`#### ${text}`], "heading");
break;
case "header-five":
pushBlock([`##### ${text}`], "heading");
break;
case "header-six":
pushBlock([`###### ${text}`], "heading");
break;
case "blockquote": {
const quoteLines = text.length > 0 ? text.split("\n") : [""];
pushBlock(quoteLines.map((line) => `> ${line}`), "quote");
break;
}
default:
pushBlock([text], "text");
break;
}
const trailingMediaLines = collectMediaLines(block);
if (trailingMediaLines.length > 0) {
pushBlock(trailingMediaLines, "media");
}
}
if (inCodeBlock) {
lines.push("```");
}
return lines;
}
export function formatArticleMarkdown(article: unknown): string {
const candidate = coerceArticleEntity(article);
if (!candidate) {
return `\`\`\`json\n${JSON.stringify(article, null, 2)}\n\`\`\``;
}
const lines: string[] = [];
const usedUrls = new Set<string>();
const mediaById = buildMediaById(candidate);
const title = typeof candidate.title === "string" ? candidate.title.trim() : "";
if (title) {
lines.push(`# ${title}`);
}
const coverUrl = resolveMediaUrl(candidate.cover_media?.media_info);
if (coverUrl) {
if (lines.length > 0) lines.push("");
lines.push(`![](${coverUrl})`);
usedUrls.add(coverUrl);
}
const blocks = candidate.content_state?.blocks;
const entityMap = candidate.content_state?.entityMap;
if (Array.isArray(blocks) && blocks.length > 0) {
const rendered = renderContentBlocks(blocks, entityMap, mediaById, usedUrls);
if (rendered.length > 0) {
if (lines.length > 0) lines.push("");
lines.push(...rendered);
}
} else if (typeof candidate.plain_text === "string") {
if (lines.length > 0) lines.push("");
lines.push(candidate.plain_text.trim());
} else if (typeof candidate.preview_text === "string") {
if (lines.length > 0) lines.push("");
lines.push(candidate.preview_text.trim());
}
const mediaUrls = collectMediaUrls(candidate, usedUrls, coverUrl);
if (mediaUrls.length > 0) {
lines.push("", "## Media", "");
for (const url of mediaUrls) {
lines.push(`![](${url})`);
}
}
return lines.join("\n").trimEnd();
}

View File

@ -0,0 +1,41 @@
import os from "node:os";
import path from "node:path";
import process from "node:process";
const APP_DATA_DIR = "baoyu-skills";
const X_TO_MARKDOWN_DATA_DIR = "x-to-markdown";
const COOKIE_FILE_NAME = "cookies.json";
const PROFILE_DIR_NAME = "chrome-profile";
const CONSENT_FILE_NAME = "consent.json";
export function resolveUserDataRoot(): string {
if (process.platform === "win32") {
return process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming");
}
if (process.platform === "darwin") {
return path.join(os.homedir(), "Library", "Application Support");
}
return process.env.XDG_DATA_HOME ?? path.join(os.homedir(), ".local", "share");
}
export function resolveXToMarkdownDataDir(): string {
const override = process.env.X_DATA_DIR?.trim();
if (override) return path.resolve(override);
return path.join(resolveUserDataRoot(), APP_DATA_DIR, X_TO_MARKDOWN_DATA_DIR);
}
export function resolveXToMarkdownCookiePath(): string {
const override = process.env.X_COOKIE_PATH?.trim();
if (override) return path.resolve(override);
return path.join(resolveXToMarkdownDataDir(), COOKIE_FILE_NAME);
}
export function resolveXToMarkdownChromeProfileDir(): string {
const override = process.env.X_CHROME_PROFILE_DIR?.trim();
if (override) return path.resolve(override);
return path.join(resolveXToMarkdownDataDir(), PROFILE_DIR_NAME);
}
export function resolveXToMarkdownConsentPath(): string {
return path.join(resolveXToMarkdownDataDir(), CONSENT_FILE_NAME);
}

View File

@ -0,0 +1,295 @@
type ThreadLike = {
requestedId?: string;
rootId?: string;
tweets?: unknown[];
totalTweets?: number;
user?: any;
};
type TweetPhoto = {
src: string;
alt?: string;
};
type TweetVideo = {
url: string;
poster?: string;
alt?: string;
type?: string;
};
export type ThreadTweetsMarkdownOptions = {
username?: string;
headingLevel?: number;
startIndex?: number;
includeTweetUrls?: boolean;
};
export type ThreadMarkdownOptions = ThreadTweetsMarkdownOptions & {
includeHeader?: boolean;
title?: string;
sourceUrl?: string;
};
function coerceThread(value: unknown): ThreadLike | null {
if (!value || typeof value !== "object") return null;
const candidate = value as ThreadLike;
if (!Array.isArray(candidate.tweets)) return null;
return candidate;
}
function escapeMarkdownAlt(text: string): string {
return text.replace(/[\[\]]/g, "\\$&");
}
function normalizeAlt(text?: string | null): string {
const trimmed = text?.trim();
if (!trimmed) return "";
return trimmed.replace(/\s+/g, " ");
}
function parseTweetText(tweet: any): string {
const noteText = tweet?.note_tweet?.note_tweet_results?.result?.text;
const legacyText = tweet?.legacy?.full_text ?? tweet?.legacy?.text ?? "";
return (noteText ?? legacyText ?? "").trim();
}
function parsePhotos(tweet: any): TweetPhoto[] {
const media = tweet?.legacy?.extended_entities?.media ?? [];
return media
.reduce((acc: TweetPhoto[], item: any) => {
if (item?.type !== "photo") {
return acc;
}
const src = item.media_url_https ?? item.media_url;
if (!src) {
return acc;
}
const alt = normalizeAlt(item.ext_alt_text);
acc.push({ src, alt });
return acc;
}, [])
.filter((photo) => Boolean(photo.src));
}
function parseVideos(tweet: any): TweetVideo[] {
const media = tweet?.legacy?.extended_entities?.media ?? [];
return media
.reduce((acc: TweetVideo[], item: any) => {
if (!item?.type || !["animated_gif", "video"].includes(item.type)) {
return acc;
}
const variants = item?.video_info?.variants ?? [];
const sources = variants
.map((variant: any) => ({
contentType: variant?.content_type,
url: variant?.url,
bitrate: variant?.bitrate ?? 0,
}))
.filter((variant: any) => Boolean(variant.url));
const videoSources = sources.filter((variant: any) =>
String(variant.contentType ?? "").includes("video")
);
const sorted = (videoSources.length > 0 ? videoSources : sources).sort(
(a: any, b: any) => (b.bitrate ?? 0) - (a.bitrate ?? 0)
);
const best = sorted[0];
if (!best?.url) {
return acc;
}
const alt = normalizeAlt(item.ext_alt_text);
acc.push({
url: best.url,
poster: item.media_url_https ?? item.media_url ?? undefined,
alt,
type: item.type,
});
return acc;
}, [])
.filter((video) => Boolean(video.url));
}
function unwrapTweetResult(result: any): any {
if (!result) return null;
if (result.__typename === "TweetWithVisibilityResults" && result.tweet) {
return result.tweet;
}
return result;
}
function resolveTweetId(tweet: any): string | undefined {
return tweet?.legacy?.id_str ?? tweet?.rest_id;
}
function buildTweetUrl(username: string | undefined, tweetId: string | undefined): string | null {
if (!tweetId) return null;
if (username) {
return `https://x.com/${username}/status/${tweetId}`;
}
return `https://x.com/i/web/status/${tweetId}`;
}
function formatTweetMarkdown(
tweet: any,
index: number,
options: ThreadTweetsMarkdownOptions
): string[] {
const headingLevel = options.headingLevel ?? 2;
const includeTweetUrls = options.includeTweetUrls ?? true;
const headingPrefix = "#".repeat(Math.min(Math.max(headingLevel, 1), 6));
const tweetId = resolveTweetId(tweet);
const tweetUrl = includeTweetUrls ? buildTweetUrl(options.username, tweetId) : null;
const lines: string[] = [];
lines.push(`${headingPrefix} ${index}`);
if (tweetUrl) {
lines.push(tweetUrl);
}
lines.push("");
const text = parseTweetText(tweet);
const photos = parsePhotos(tweet);
const videos = parseVideos(tweet);
const quoted = unwrapTweetResult(tweet?.quoted_status_result?.result);
const bodyLines: string[] = [];
if (text) {
bodyLines.push(...text.split(/\r?\n/));
}
const quotedLines = formatQuotedTweetMarkdown(quoted);
if (quotedLines.length > 0) {
if (bodyLines.length > 0) bodyLines.push("");
bodyLines.push(...quotedLines);
}
const photoLines = photos.map((photo) => {
const alt = photo.alt ? escapeMarkdownAlt(photo.alt) : "";
return `![${alt}](${photo.src})`;
});
if (photoLines.length > 0) {
if (bodyLines.length > 0) bodyLines.push("");
bodyLines.push(...photoLines);
}
const videoLines: string[] = [];
for (const video of videos) {
if (video.poster) {
const alt = video.alt ? escapeMarkdownAlt(video.alt) : "video";
videoLines.push(`![${alt}](${video.poster})`);
}
videoLines.push(`[${video.type ?? "video"}](${video.url})`);
}
if (videoLines.length > 0) {
if (bodyLines.length > 0) bodyLines.push("");
bodyLines.push(...videoLines);
}
if (bodyLines.length === 0) {
bodyLines.push("_No text or media._");
}
lines.push(...bodyLines);
return lines;
}
function formatQuotedTweetMarkdown(quoted: any): string[] {
if (!quoted) return [];
const quotedUser = quoted?.core?.user_results?.result?.legacy;
const quotedUsername = quotedUser?.screen_name;
const quotedName = quotedUser?.name;
const quotedAuthor =
quotedUsername && quotedName
? `${quotedName} (@${quotedUsername})`
: quotedUsername
? `@${quotedUsername}`
: quotedName ?? "Unknown";
const quotedId = resolveTweetId(quoted);
const quotedUrl =
buildTweetUrl(quotedUsername, quotedId) ??
(quotedId ? `https://x.com/i/web/status/${quotedId}` : "unavailable");
const quotedText = parseTweetText(quoted);
const lines: string[] = [];
lines.push(`Author: ${quotedAuthor}`);
lines.push(`URL: ${quotedUrl}`);
if (quotedText) {
lines.push("", ...quotedText.split(/\r?\n/));
} else {
lines.push("", "(no content)");
}
return lines.map((line) => `> ${line}`.trimEnd());
}
export function formatThreadTweetsMarkdown(
tweets: unknown[],
options: ThreadTweetsMarkdownOptions = {}
): string {
const lines: string[] = [];
const startIndex = options.startIndex ?? 1;
if (!Array.isArray(tweets) || tweets.length === 0) {
return "";
}
tweets.forEach((tweet, index) => {
if (lines.length > 0) {
lines.push("");
}
lines.push(...formatTweetMarkdown(tweet, startIndex + index, options));
});
return lines.join("\n").trimEnd();
}
export function formatThreadMarkdown(
thread: unknown,
options: ThreadMarkdownOptions = {}
): string {
const candidate = coerceThread(thread);
if (!candidate) {
return `\`\`\`json\n${JSON.stringify(thread, null, 2)}\n\`\`\``;
}
const tweets = candidate.tweets ?? [];
const firstTweet = tweets[0] as any;
const user = candidate.user ?? firstTweet?.core?.user_results?.result?.legacy;
const username = user?.screen_name;
const name = user?.name;
const includeHeader = options.includeHeader ?? true;
const lines: string[] = [];
if (includeHeader) {
if (options.title) {
lines.push(`# ${options.title}`);
} else if (username) {
lines.push(`# Thread by @${username}${name ? ` (${name})` : ""}`);
} else {
lines.push("# Thread");
}
const sourceUrl = options.sourceUrl ?? buildTweetUrl(username, candidate.rootId ?? candidate.requestedId);
if (sourceUrl) {
lines.push(`Source: ${sourceUrl}`);
}
if (typeof candidate.totalTweets === "number") {
lines.push(`Tweets: ${candidate.totalTweets}`);
}
}
const tweetMarkdown = formatThreadTweetsMarkdown(tweets, {
...options,
username,
});
if (tweetMarkdown) {
if (lines.length > 0) {
lines.push("");
}
lines.push(tweetMarkdown);
}
return lines.join("\n").trimEnd();
}

View File

@ -0,0 +1,311 @@
import { fetchTweetDetail } from "./graphql.js";
type TweetEntry = {
tweet: any;
user?: any;
};
type ParsedEntries = {
entries: TweetEntry[];
moreCursor?: string;
topCursor?: string;
bottomCursor?: string;
};
type ThreadResult = {
requestedId: string;
rootId: string;
tweets: any[];
totalTweets: number;
user?: any;
responses?: unknown[];
};
function unwrapTweetResult(result: any): any {
if (!result) return null;
if (result.__typename === "TweetWithVisibilityResults" && result.tweet) {
return result.tweet;
}
return result;
}
function extractTweetEntry(itemContent: any): TweetEntry | null {
const result = itemContent?.tweet_results?.result;
if (!result) return null;
const resolved = unwrapTweetResult(result?.tweet ?? result);
if (!resolved) return null;
const user = resolved?.core?.user_results?.result?.legacy;
return { tweet: resolved, user };
}
function parseInstruction(instruction?: any): ParsedEntries {
const { entries: entities, moduleItems } = instruction || {};
const entries: TweetEntry[] = [];
let moreCursor: string | undefined;
let topCursor: string | undefined;
let bottomCursor: string | undefined;
const parseItems = (items: any[]) => {
items?.forEach((item) => {
const itemContent = item?.item?.itemContent ?? item?.itemContent;
if (!itemContent) {
return;
}
if (
itemContent.cursorType &&
["ShowMore", "ShowMoreThreads"].includes(itemContent.cursorType) &&
itemContent.itemType === "TimelineTimelineCursor"
) {
moreCursor = itemContent.value;
return;
}
const entry = extractTweetEntry(itemContent);
if (entry) {
entries.push(entry);
}
});
};
if (moduleItems) {
parseItems(moduleItems);
}
for (const entity of entities ?? []) {
if (entity?.content?.clientEventInfo?.component === "you_might_also_like") {
continue;
}
const { itemContent, items, cursorType, entryType, value } = entity?.content ?? {};
if (cursorType === "Bottom" && entryType === "TimelineTimelineCursor") {
bottomCursor = value;
}
if (
itemContent?.cursorType === "Bottom" &&
itemContent?.itemType === "TimelineTimelineCursor"
) {
bottomCursor = bottomCursor ?? itemContent?.value;
}
if (cursorType === "Top" && entryType === "TimelineTimelineCursor") {
topCursor = topCursor ?? value;
}
if (itemContent) {
const entry = extractTweetEntry(itemContent);
if (entry) {
entries.push(entry);
}
if (
itemContent.cursorType &&
["ShowMore", "ShowMoreThreads"].includes(itemContent.cursorType) &&
itemContent.itemType === "TimelineTimelineCursor"
) {
moreCursor = moreCursor ?? itemContent.value;
}
if (itemContent.cursorType === "Top" && itemContent.itemType === "TimelineTimelineCursor") {
topCursor = topCursor ?? itemContent.value;
}
}
if (items) {
parseItems(items);
}
}
return { entries, moreCursor, topCursor, bottomCursor };
}
function parseTweetsAndToken(response: any): ParsedEntries {
const instruction =
response?.data?.threaded_conversation_with_injections_v2?.instructions?.find(
(ins: any) => ins?.type === "TimelineAddEntries" || ins?.type === "TimelineAddToModule"
) ??
response?.data?.threaded_conversation_with_injections?.instructions?.find(
(ins: any) => ins?.type === "TimelineAddEntries" || ins?.type === "TimelineAddToModule"
);
return parseInstruction(instruction);
}
function toTimestamp(value: string | undefined): number {
if (!value) return 0;
const parsed = Date.parse(value);
return Number.isNaN(parsed) ? 0 : parsed;
}
export async function fetchTweetThread(
tweetId: string,
cookieMap: Record<string, string>,
includeResponses = false
): Promise<ThreadResult | null> {
const responses: unknown[] = [];
const res = await fetchTweetDetail(tweetId, cookieMap);
if (includeResponses) {
responses.push(res);
}
let { entries, moreCursor, topCursor, bottomCursor } = parseTweetsAndToken(res);
if (!entries.length) {
const errorMessage = res?.errors?.[0]?.message;
if (errorMessage) {
throw new Error(errorMessage);
}
return null;
}
let allEntries = entries.slice();
const root = allEntries.find((entry) => entry.tweet?.legacy?.id_str === tweetId);
if (!root) {
throw new Error("Can not fetch the root tweet");
}
let rootEntry = root.tweet.legacy;
const isSameThread = (entry: TweetEntry) => {
const tweet = entry.tweet?.legacy;
if (!tweet) return false;
return (
tweet.user_id_str === rootEntry.user_id_str &&
tweet.conversation_id_str === rootEntry.conversation_id_str &&
(tweet.id_str === rootEntry.id_str ||
tweet.in_reply_to_user_id_str === rootEntry.user_id_str ||
tweet.in_reply_to_status_id_str === rootEntry.conversation_id_str ||
!tweet.in_reply_to_user_id_str)
);
};
const inThread = (items: TweetEntry[]) => items.some(isSameThread);
let hasThread = inThread(entries);
let maxRequestCount = 1000;
let topHasThread = true;
while (topCursor && topHasThread && maxRequestCount > 0) {
const newRes = await fetchTweetDetail(tweetId, cookieMap, topCursor);
if (includeResponses) {
responses.push(newRes);
}
const parsed = parseTweetsAndToken(newRes);
topHasThread = inThread(parsed.entries);
topCursor = parsed.topCursor;
allEntries = parsed.entries.concat(allEntries);
maxRequestCount--;
}
async function checkMoreTweets(focalId: string) {
while (moreCursor && hasThread && maxRequestCount > 0) {
const newRes = await fetchTweetDetail(focalId, cookieMap, moreCursor);
if (includeResponses) {
responses.push(newRes);
}
const parsed = parseTweetsAndToken(newRes);
moreCursor = parsed.moreCursor;
bottomCursor = bottomCursor ?? parsed.bottomCursor;
hasThread = inThread(parsed.entries);
allEntries = allEntries.concat(parsed.entries);
maxRequestCount--;
}
if (bottomCursor) {
const newRes = await fetchTweetDetail(focalId, cookieMap, bottomCursor);
if (includeResponses) {
responses.push(newRes);
}
const parsed = parseTweetsAndToken(newRes);
allEntries = allEntries.concat(parsed.entries);
bottomCursor = undefined;
}
}
await checkMoreTweets(tweetId);
const allThreadEntries = allEntries.filter(
(entry) => entry.tweet?.legacy?.id_str === tweetId || isSameThread(entry)
);
const lastEntity = allThreadEntries[allThreadEntries.length - 1];
if (lastEntity?.tweet?.legacy?.id_str) {
const lastRes = await fetchTweetDetail(lastEntity.tweet.legacy.id_str, cookieMap);
if (includeResponses) {
responses.push(lastRes);
}
const parsed = parseTweetsAndToken(lastRes);
hasThread = inThread(parsed.entries);
allEntries = allEntries.concat(parsed.entries);
moreCursor = parsed.moreCursor;
bottomCursor = parsed.bottomCursor;
maxRequestCount--;
await checkMoreTweets(lastEntity.tweet.legacy.id_str);
}
const distinctEntries: TweetEntry[] = [];
const entriesMap = allEntries.reduce((acc, entry) => {
const id = entry.tweet?.legacy?.id_str ?? entry.tweet?.rest_id;
if (id && !acc.has(id)) {
distinctEntries.push(entry);
acc.set(id, entry);
}
return acc;
}, new Map<string, TweetEntry>());
allEntries = distinctEntries;
while (rootEntry.in_reply_to_status_id_str) {
const parent = entriesMap.get(rootEntry.in_reply_to_status_id_str)?.tweet?.legacy;
if (
parent &&
parent.user_id_str === rootEntry.user_id_str &&
parent.conversation_id_str === rootEntry.conversation_id_str &&
parent.id_str !== rootEntry.id_str
) {
rootEntry = parent;
} else {
break;
}
}
allEntries = allEntries.sort((a, b) => {
const aTime = toTimestamp(a.tweet?.legacy?.created_at);
const bTime = toTimestamp(b.tweet?.legacy?.created_at);
return aTime - bTime;
});
const rootIndex = allEntries.findIndex(
(entry) => entry.tweet?.legacy?.id_str === rootEntry.id_str
);
if (rootIndex > 0) {
allEntries = allEntries.slice(rootIndex);
}
const threadEntries = allEntries.filter(
(entry) => entry.tweet?.legacy?.id_str === tweetId || isSameThread(entry)
);
if (!threadEntries.length) {
return null;
}
const tweets = threadEntries.map((entry) => entry.tweet);
const user = threadEntries[0].user ?? threadEntries[0].tweet?.core?.user_results?.result?.legacy;
const result: ThreadResult = {
requestedId: tweetId,
rootId: rootEntry.id_str ?? tweetId,
tweets,
totalTweets: tweets.length,
user,
};
if (includeResponses) {
result.responses = responses;
}
return result;
}

View File

@ -0,0 +1,96 @@
import { fetchXArticle } from "./graphql.js";
import type { ArticleEntity } from "./types.js";
function coerceArticleEntity(value: unknown): ArticleEntity | null {
if (!value || typeof value !== "object") return null;
const candidate = value as ArticleEntity;
if (
typeof candidate.title === "string" ||
typeof candidate.plain_text === "string" ||
typeof candidate.preview_text === "string" ||
candidate.content_state
) {
return candidate;
}
return null;
}
function hasArticleContent(article: ArticleEntity): boolean {
const blocks = article.content_state?.blocks;
if (Array.isArray(blocks) && blocks.length > 0) {
return true;
}
if (typeof article.plain_text === "string" && article.plain_text.trim()) {
return true;
}
if (typeof article.preview_text === "string" && article.preview_text.trim()) {
return true;
}
return false;
}
function parseArticleIdFromUrl(raw: string | undefined): string | null {
if (!raw) return null;
try {
const parsed = new URL(raw);
const match = parsed.pathname.match(/\/(?:i\/)?article\/(\d+)/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function extractArticleIdFromUrls(urls: any[] | undefined): string | null {
if (!Array.isArray(urls)) return null;
for (const url of urls) {
const candidate =
url?.expanded_url ?? url?.url ?? (url?.display_url ? `https://${url.display_url}` : undefined);
const id = parseArticleIdFromUrl(candidate);
if (id) return id;
}
return null;
}
export function extractArticleEntityFromTweet(tweet: any): unknown | null {
return (
tweet?.article?.article_results?.result ??
tweet?.article?.result ??
tweet?.legacy?.article?.article_results?.result ??
tweet?.legacy?.article?.result ??
tweet?.article_results?.result ??
null
);
}
export function extractArticleIdFromTweet(tweet: any): string | null {
const embedded = extractArticleEntityFromTweet(tweet);
const embeddedArticle = embedded as { rest_id?: string } | null;
if (embeddedArticle?.rest_id) {
return embeddedArticle.rest_id;
}
const noteUrls = tweet?.note_tweet?.note_tweet_results?.result?.entity_set?.urls;
const legacyUrls = tweet?.legacy?.entities?.urls;
return extractArticleIdFromUrls(noteUrls) ?? extractArticleIdFromUrls(legacyUrls);
}
export async function resolveArticleEntityFromTweet(
tweet: any,
cookieMap: Record<string, string>
): Promise<unknown | null> {
if (!tweet) return null;
const embedded = extractArticleEntityFromTweet(tweet);
const embeddedArticle = coerceArticleEntity(embedded);
if (embeddedArticle && hasArticleContent(embeddedArticle)) {
return embedded;
}
const articleId = extractArticleIdFromTweet(tweet);
if (!articleId) {
return embedded ?? null;
}
const fetched = await fetchXArticle(articleId, cookieMap, false);
return fetched ?? embedded ?? null;
}

View File

@ -0,0 +1,190 @@
#!/usr/bin/env npx tsx
import * as path from "node:path";
import { fileURLToPath } from "node:url";
import { hasRequiredXCookies, loadXCookies } from "./cookies.js";
import { fetchTweetThread } from "./thread.js";
import { formatArticleMarkdown } from "./markdown.js";
import { formatThreadTweetsMarkdown } from "./thread-markdown.js";
import { resolveArticleEntityFromTweet } from "./tweet-article.js";
type TweetToMarkdownOptions = {
log?: (message: string) => void;
};
function parseArgs(): { url?: string } {
const args = process.argv.slice(2);
let url: string | undefined;
for (const arg of args) {
if (!arg.startsWith("-") && !url) {
url = arg;
}
}
return { url };
}
function normalizeInputUrl(input: string): string {
const trimmed = input.trim();
if (!trimmed) return "";
try {
return new URL(trimmed).toString();
} catch {
return trimmed;
}
}
function parseTweetId(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
if (/^\d+$/.test(trimmed)) return trimmed;
try {
const parsed = new URL(trimmed);
const match = parsed.pathname.match(/\/status(?:es)?\/(\d+)/);
if (match?.[1]) return match[1];
} catch {
return null;
}
return null;
}
function buildTweetUrl(username: string | undefined, tweetId: string | undefined): string | null {
if (!tweetId) return null;
if (username) {
return `https://x.com/${username}/status/${tweetId}`;
}
return `https://x.com/i/web/status/${tweetId}`;
}
function formatMetaMarkdown(meta: Record<string, string | number | null | undefined>): string {
const lines = ["---"];
for (const [key, value] of Object.entries(meta)) {
if (value === undefined || value === null || value === "") continue;
if (typeof value === "number") {
lines.push(`${key}: ${value}`);
} else {
lines.push(`${key}: ${JSON.stringify(value)}`);
}
}
lines.push("---");
return lines.join("\n");
}
function extractTweetText(tweet: any): string {
const noteText = tweet?.note_tweet?.note_tweet_results?.result?.text;
const legacyText = tweet?.legacy?.full_text ?? tweet?.legacy?.text ?? "";
return (noteText ?? legacyText ?? "").trim();
}
function isOnlyUrl(text: string): boolean {
const trimmed = text.trim();
if (!trimmed) return true;
return /^https?:\/\/\S+$/.test(trimmed);
}
export async function tweetToMarkdown(
inputUrl: string,
options: TweetToMarkdownOptions = {}
): Promise<string> {
const normalizedUrl = normalizeInputUrl(inputUrl);
const tweetId = parseTweetId(normalizedUrl);
if (!tweetId) {
throw new Error("Invalid tweet url. Example: https://x.com/<user>/status/<tweet_id>");
}
const log = options.log ?? (() => {});
log("[tweet-to-markdown] Loading cookies...");
const cookieMap = await loadXCookies(log);
if (!hasRequiredXCookies(cookieMap)) {
throw new Error("Missing auth cookies. Provide X_AUTH_TOKEN and X_CT0 or log in via Chrome.");
}
log(`[tweet-to-markdown] Fetching thread for ${tweetId}...`);
const thread = await fetchTweetThread(tweetId, cookieMap);
if (!thread) {
throw new Error("Failed to fetch thread.");
}
const tweets = thread.tweets ?? [];
if (tweets.length === 0) {
throw new Error("No tweets found in thread.");
}
const firstTweet = tweets[0] as any;
const user = thread.user ?? firstTweet?.core?.user_results?.result?.legacy;
const username = user?.screen_name;
const name = user?.name;
const author =
username && name ? `${name} (@${username})` : username ? `@${username}` : name ?? null;
const authorUrl = username ? `https://x.com/${username}` : undefined;
const requestedUrl = normalizedUrl || buildTweetUrl(username, tweetId) || inputUrl.trim();
const rootUrl = buildTweetUrl(username, thread.rootId ?? tweetId) ?? requestedUrl;
const meta = formatMetaMarkdown({
url: rootUrl,
requested_url: requestedUrl,
author,
author_name: name ?? null,
author_username: username ?? null,
author_url: authorUrl ?? null,
tweet_count: thread.totalTweets ?? tweets.length,
});
const parts: string[] = [meta];
const articleEntity = await resolveArticleEntityFromTweet(firstTweet, cookieMap);
let remainingTweets = tweets;
if (articleEntity) {
const articleMarkdown = formatArticleMarkdown(articleEntity).trimEnd();
if (articleMarkdown) {
parts.push(articleMarkdown);
const firstTweetText = extractTweetText(firstTweet);
if (isOnlyUrl(firstTweetText)) {
remainingTweets = tweets.slice(1);
}
}
}
if (remainingTweets.length > 0) {
const hasArticle = parts.length > 1;
if (hasArticle) {
parts.push("## Thread");
}
const tweetMarkdown = formatThreadTweetsMarkdown(remainingTweets, {
username,
headingLevel: hasArticle ? 3 : 2,
startIndex: 1,
includeTweetUrls: true,
});
if (tweetMarkdown) {
parts.push(tweetMarkdown);
}
}
return parts.join("\n\n").trimEnd();
}
async function main() {
const { url } = parseArgs();
if (!url) {
console.error("Usage:");
console.error(" npx -y bun skills/baoyu-danger-x-to-markdown/scripts/tweet-to-markdown.ts <tweet url>");
process.exit(1);
}
const markdown = await tweetToMarkdown(url, { log: console.log });
console.log(markdown);
}
const isCliExecution =
process.argv[1] && fileURLToPath(import.meta.url) === path.resolve(process.argv[1]);
if (isCliExecution) {
main().catch((error) => {
console.error(error instanceof Error ? error.message : error);
process.exit(1);
});
}

View File

@ -0,0 +1,79 @@
export type CookieLike = {
name?: string;
value?: string;
domain?: string;
path?: string;
url?: string;
};
export type ArticleQueryInfo = {
queryId: string;
featureSwitches: string[];
fieldToggles: string[];
html: string;
};
export type ArticleEntityRange = {
key?: number;
offset?: number;
length?: number;
};
export type ArticleBlock = {
type?: string;
text?: string;
entityRanges?: ArticleEntityRange[];
};
export type ArticleEntityMapMediaItem = {
mediaId?: string;
media_id?: string;
localMediaId?: string;
};
export type ArticleEntityMapEntry = {
key?: string;
value?: {
type?: string;
mutability?: string;
data?: {
caption?: string;
mediaItems?: ArticleEntityMapMediaItem[];
url?: string;
};
};
};
export type ArticleContentState = {
blocks?: ArticleBlock[];
entityMap?: Record<string, ArticleEntityMapEntry>;
};
export type ArticleMediaInfo = {
__typename?: string;
original_img_url?: string;
preview_image?: {
original_img_url?: string;
};
variants?: Array<{
content_type?: string;
url?: string;
bit_rate?: number;
}>;
};
export type ArticleMediaEntity = {
media_id?: string;
media_info?: ArticleMediaInfo;
};
export type ArticleEntity = {
title?: string;
plain_text?: string;
preview_text?: string;
content_state?: ArticleContentState;
cover_media?: {
media_info?: ArticleMediaInfo;
};
media_entities?: ArticleMediaEntity[];
};