import fs from 'node:fs'; import { mkdir, writeFile } from 'node:fs/promises'; import https from 'node:https'; import os from 'node:os'; import path from 'node:path'; import process from 'node:process'; import { createHash } from 'node:crypto'; import frontMatter from 'front-matter'; import hljs from 'highlight.js/lib/common'; import { Lexer, Marked, type RendererObject, type Tokens } from 'marked'; import { unified } from 'unified'; import remarkCjkFriendly from 'remark-cjk-friendly'; import remarkParse from 'remark-parse'; import remarkStringify from 'remark-stringify'; interface ImageInfo { placeholder: string; localPath: string; originalPath: string; blockIndex: number; } interface ParsedMarkdown { title: string; coverImage: string | null; contentImages: ImageInfo[]; html: string; totalBlocks: number; } type FrontmatterFields = Record; function parseFrontmatter(content: string): { frontmatter: FrontmatterFields; body: string } { try { const parsed = frontMatter(content); return { frontmatter: parsed.attributes ?? {}, body: parsed.body, }; } catch { return { frontmatter: {}, body: content }; } } function stripWrappingQuotes(value: string): string { if (!value) return value; const doubleQuoted = value.startsWith('"') && value.endsWith('"'); const singleQuoted = value.startsWith("'") && value.endsWith("'"); const cjkDoubleQuoted = value.startsWith('\u201c') && value.endsWith('\u201d'); const cjkSingleQuoted = value.startsWith('\u2018') && value.endsWith('\u2019'); if (doubleQuoted || singleQuoted || cjkDoubleQuoted || cjkSingleQuoted) { return value.slice(1, -1).trim(); } return value.trim(); } function toFrontmatterString(value: unknown): string | undefined { if (typeof value === 'string') { return stripWrappingQuotes(value); } if (typeof value === 'number' || typeof value === 'boolean') { return String(value); } return undefined; } function pickFirstString(frontmatter: FrontmatterFields, keys: string[]): string | undefined { for (const key of keys) { const value = toFrontmatterString(frontmatter[key]); if (value) return value; } return undefined; } function findCoverImageNearMarkdown(baseDir: string): string | null { const candidateDirs = [baseDir, path.join(baseDir, 'imgs')]; const coverPattern = /^cover\.(png|jpe?g|webp)$/i; for (const dir of candidateDirs) { try { if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory()) { continue; } const match = fs.readdirSync(dir).find((entry) => coverPattern.test(entry)); if (match) { return path.join(dir, match); } } catch { continue; } } return null; } function extractTitleFromMarkdown(markdown: string): string { const tokens = Lexer.lex(markdown, { gfm: true, breaks: true }); for (const token of tokens) { if (token.type === 'heading' && token.depth === 1) { return stripWrappingQuotes(token.text); } } return ''; } function downloadFile(url: string, destPath: string, maxRedirects = 5): Promise { return new Promise((resolve, reject) => { if (!url.startsWith('https://')) { reject(new Error(`Refusing non-HTTPS download: ${url}`)); return; } if (maxRedirects <= 0) { reject(new Error('Too many redirects')); return; } const file = fs.createWriteStream(destPath); const request = https.get(url, { headers: { 'User-Agent': 'Mozilla/5.0' } }, (response) => { if (response.statusCode === 301 || response.statusCode === 302) { const redirectUrl = response.headers.location; if (redirectUrl) { file.close(); fs.unlinkSync(destPath); downloadFile(redirectUrl, destPath, maxRedirects - 1).then(resolve).catch(reject); return; } } if (response.statusCode !== 200) { file.close(); fs.unlinkSync(destPath); reject(new Error(`Failed to download: ${response.statusCode}`)); return; } response.pipe(file); file.on('finish', () => { file.close(); resolve(); }); }); request.on('error', (err) => { file.close(); fs.unlink(destPath, () => {}); reject(err); }); request.setTimeout(30000, () => { request.destroy(); reject(new Error('Download timeout')); }); }); } function getImageExtension(urlOrPath: string): string { const match = urlOrPath.match(/\.(jpg|jpeg|png|gif|webp)(\?|$)/i); return match ? match[1]!.toLowerCase() : 'png'; } async function resolveImagePath(imagePath: string, baseDir: string, tempDir: string): Promise { if (imagePath.startsWith('http://')) { console.error(`[md-to-html] Skipping non-HTTPS image: ${imagePath}`); return ''; } if (imagePath.startsWith('https://')) { const hash = createHash('md5').update(imagePath).digest('hex').slice(0, 8); const ext = getImageExtension(imagePath); const localPath = path.join(tempDir, `remote_${hash}.${ext}`); if (!fs.existsSync(localPath)) { console.error(`[md-to-html] Downloading: ${imagePath}`); await downloadFile(imagePath, localPath); } return localPath; } if (path.isAbsolute(imagePath)) { return imagePath; } return path.resolve(baseDir, imagePath); } function escapeHtml(text: string): string { return text .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); } function highlightCode(code: string, lang: string): string { try { if (lang && hljs.getLanguage(lang)) { return hljs.highlight(code, { language: lang, ignoreIllegals: true }).value; } return hljs.highlightAuto(code).value; } catch { return escapeHtml(code); } } function preprocessCjkMarkdown(markdown: string): string { try { const processor = unified() .use(remarkParse) .use(remarkCjkFriendly) .use(remarkStringify); const result = String(processor.processSync(markdown)); return result.replace(/&#x([0-9A-Fa-f]+);/g, (_, hex: string) => String.fromCodePoint(parseInt(hex, 16))); } catch { return markdown; } } function convertMarkdownToHtml(markdown: string, imageCallback: (src: string, alt: string) => string): { html: string; totalBlocks: number } { const preprocessedMarkdown = preprocessCjkMarkdown(markdown); const blockTokens = Lexer.lex(preprocessedMarkdown, { gfm: true, breaks: true }); const renderer: RendererObject = { heading({ depth, tokens }: Tokens.Heading): string { if (depth === 1) { return ''; } return `

${this.parser.parseInline(tokens)}

`; }, paragraph({ tokens }: Tokens.Paragraph): string { const text = this.parser.parseInline(tokens).trim(); if (!text) return ''; return `

${text}

`; }, blockquote({ tokens }: Tokens.Blockquote): string { return `
${this.parser.parse(tokens)}
`; }, code({ text, lang = '' }: Tokens.Code): string { const language = lang.split(/\s+/)[0]!.toLowerCase(); const source = text.replace(/\n$/, ''); const highlighted = highlightCode(source, language).replace(/\n/g, '
'); const label = language ? `[${escapeHtml(language)}]
` : ''; return `
${label}${highlighted}
`; }, image({ href, text }: Tokens.Image): string { if (!href) return ''; return imageCallback(href, text ?? ''); }, link({ href, title, tokens, text }: Tokens.Link): string { const label = tokens?.length ? this.parser.parseInline(tokens) : escapeHtml(text || href || ''); if (!href) return label; const titleAttr = title ? ` title="${escapeHtml(title)}"` : ''; return `${label}`; }, }; const parser = new Marked({ gfm: true, breaks: true, }); parser.use({ renderer }); const rendered = parser.parse(preprocessedMarkdown); if (typeof rendered !== 'string') { throw new Error('Unexpected async markdown parse result'); } const totalBlocks = blockTokens.filter((token) => { if (token.type === 'space') return false; if (token.type === 'heading' && token.depth === 1) return false; return true; }).length; return { html: rendered, totalBlocks, }; } export async function parseMarkdown( markdownPath: string, options?: { coverImage?: string; title?: string; tempDir?: string }, ): Promise { const content = fs.readFileSync(markdownPath, 'utf-8'); const baseDir = path.dirname(markdownPath); const tempDir = options?.tempDir ?? path.join(os.tmpdir(), 'x-article-images'); await mkdir(tempDir, { recursive: true }); const { frontmatter, body } = parseFrontmatter(content); let title = stripWrappingQuotes(options?.title ?? '') || pickFirstString(frontmatter, ['title']) || ''; if (!title) { title = extractTitleFromMarkdown(body); } if (!title) { title = path.basename(markdownPath, path.extname(markdownPath)); } let coverImagePath = stripWrappingQuotes(options?.coverImage ?? '') || pickFirstString(frontmatter, [ 'cover_image', 'coverImage', 'cover', 'image', 'featureImage', 'feature_image', ]) || null; if (!coverImagePath) { coverImagePath = findCoverImageNearMarkdown(baseDir); } const images: Array<{ src: string; alt: string; blockIndex: number }> = []; let imageCounter = 0; const { html, totalBlocks } = convertMarkdownToHtml(body, (src, alt) => { const placeholder = `XIMGPH_${++imageCounter}`; images.push({ src, alt, blockIndex: -1 }); return placeholder; }); const htmlLines = html.split('\n'); for (let i = 0; i < images.length; i++) { const placeholder = `XIMGPH_${i + 1}`; for (let lineIndex = 0; lineIndex < htmlLines.length; lineIndex++) { const regex = new RegExp(`\\b${placeholder}\\b`); if (regex.test(htmlLines[lineIndex]!)) { images[i]!.blockIndex = lineIndex; break; } } } const contentImages: ImageInfo[] = []; let firstImageAsCover: string | null = null; for (let i = 0; i < images.length; i++) { const img = images[i]!; const localPath = await resolveImagePath(img.src, baseDir, tempDir); if (i === 0 && !coverImagePath) { firstImageAsCover = localPath; } contentImages.push({ placeholder: `XIMGPH_${i + 1}`, localPath, originalPath: img.src, blockIndex: img.blockIndex, }); } const finalHtml = html.replace(/\n{3,}/g, '\n\n').trim(); let resolvedCoverImage: string | null = null; if (coverImagePath) { resolvedCoverImage = await resolveImagePath(coverImagePath, baseDir, tempDir); } else if (firstImageAsCover) { resolvedCoverImage = firstImageAsCover; } return { title, coverImage: resolvedCoverImage, contentImages, html: finalHtml, totalBlocks, }; } function printUsage(): never { console.log(`Convert Markdown to HTML for X Article publishing Usage: npx -y bun md-to-html.ts [options] Options: --title Override title from frontmatter --cover <image> Override cover image from frontmatter --output <json|html> Output format (default: json) --html-only Output only the HTML content --save-html <path> Save HTML to file Frontmatter fields: title: Article title (or use first H1) cover_image: Cover image path or URL cover: Alias for cover_image image: Alias for cover_image Example: npx -y bun md-to-html.ts article.md --output json npx -y bun md-to-html.ts article.md --html-only > /tmp/article.html npx -y bun md-to-html.ts article.md --save-html /tmp/article.html `); process.exit(0); } async function main(): Promise<void> { const args = process.argv.slice(2); if (args.length === 0 || args.includes('--help') || args.includes('-h')) { printUsage(); } let markdownPath: string | undefined; let title: string | undefined; let coverImage: string | undefined; let outputFormat: 'json' | 'html' = 'json'; let htmlOnly = false; let saveHtmlPath: string | undefined; for (let i = 0; i < args.length; i++) { const arg = args[i]!; if (arg === '--title' && args[i + 1]) { title = args[++i]; } else if (arg === '--cover' && args[i + 1]) { coverImage = args[++i]; } else if (arg === '--output' && args[i + 1]) { outputFormat = args[++i] as 'json' | 'html'; } else if (arg === '--html-only') { htmlOnly = true; } else if (arg === '--save-html' && args[i + 1]) { saveHtmlPath = args[++i]; } else if (!arg.startsWith('-')) { markdownPath = arg; } } if (!markdownPath) { console.error('Error: Markdown file path required'); process.exit(1); } if (!fs.existsSync(markdownPath)) { console.error(`Error: File not found: ${markdownPath}`); process.exit(1); } const result = await parseMarkdown(markdownPath, { title, coverImage }); if (saveHtmlPath) { await writeFile(saveHtmlPath, result.html, 'utf-8'); console.error(`[md-to-html] HTML saved to: ${saveHtmlPath}`); } if (htmlOnly) { console.log(result.html); } else if (outputFormat === 'html') { console.log(result.html); } else { console.log(JSON.stringify(result, null, 2)); } } await main().catch((err) => { console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); process.exit(1); });