diff --git a/packages/baoyu-md/src/content.test.ts b/packages/baoyu-md/src/content.test.ts index 91dbd72..b076107 100644 --- a/packages/baoyu-md/src/content.test.ts +++ b/packages/baoyu-md/src/content.test.ts @@ -2,6 +2,7 @@ import assert from "node:assert/strict"; import test from "node:test"; import { + cleanSummaryText, extractSummaryFromBody, extractTitleFromMarkdown, parseFrontmatter, @@ -91,3 +92,19 @@ This is **the first paragraph** with [a link](https://example.com) and \`inline "This is the first paragraph with a link and inline code that should...", ); }); + +test("summary extraction normalizes raw HTML paragraphs to plain text", () => { + const summary = extractSummaryFromBody( + ` +# Heading +

2026年初,一只“龙虾”搅动了整个科技圈。腾讯楼下排起近千人长队,只为让工程师领取一份福利。

+`, + 120, + ); + + assert.equal( + summary, + "2026年初,一只“龙虾”搅动了整个科技圈。腾讯楼下排起近千人长队,只为让工程师领取一份福利。", + ); + assert.equal(cleanSummaryText("Good text!'"), "Good text!'"); +}); diff --git a/packages/baoyu-md/src/content.ts b/packages/baoyu-md/src/content.ts index 6be2b51..0bc73c5 100644 --- a/packages/baoyu-md/src/content.ts +++ b/packages/baoyu-md/src/content.ts @@ -46,6 +46,45 @@ export function stripWrappingQuotes(value: string): string { return value.trim(); } +const HTML_ENTITIES: Record = { + amp: "&", + apos: "'", + gt: ">", + lt: "<", + nbsp: " ", + quot: '"', +}; + +function decodeHtmlCodePoint(codePoint: number, fallback: string): string { + if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) { + return fallback; + } + return String.fromCodePoint(codePoint); +} + +function decodeHtmlEntities(value: string): string { + return value.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (entity, body: string) => { + const normalized = body.toLowerCase(); + if (normalized.startsWith("#x")) { + return decodeHtmlCodePoint(Number.parseInt(normalized.slice(2), 16), entity); + } + if (normalized.startsWith("#")) { + return decodeHtmlCodePoint(Number.parseInt(normalized.slice(1), 10), entity); + } + return HTML_ENTITIES[normalized] ?? entity; + }); +} + +export function cleanSummaryText(value: string): string { + return decodeHtmlEntities(stripWrappingQuotes(value)) + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(/<\/?[a-z][a-z0-9:-]*(?:\s+[^>]*)?>/gi, " ") + .replace(/\s+/g, " ") + .trim(); +} + export function toFrontmatterString(value: unknown): string | undefined { if (typeof value === "string") { return stripWrappingQuotes(value); @@ -94,10 +133,11 @@ export function extractSummaryFromBody(body: string, maxLen: number): string { .replace(/\*(.+?)\*/g, "$1") .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") .replace(/`([^`]+)`/g, "$1"); + const summaryText = cleanSummaryText(cleanText); - if (cleanText.length > 20) { - if (cleanText.length <= maxLen) return cleanText; - return `${cleanText.slice(0, maxLen - 3)}...`; + if (summaryText.length > 20) { + if (summaryText.length <= maxLen) return summaryText; + return `${summaryText.slice(0, maxLen - 3)}...`; } } diff --git a/packages/baoyu-md/src/html-builder.test.ts b/packages/baoyu-md/src/html-builder.test.ts index b68de0d..f472215 100644 --- a/packages/baoyu-md/src/html-builder.test.ts +++ b/packages/baoyu-md/src/html-builder.test.ts @@ -39,6 +39,22 @@ test("buildHtmlDocument includes optional meta tags and code theme CSS", () => { assert.match(html, /
Hello<\/article>/); }); +test("buildHtmlDocument escapes head metadata attributes", () => { + const html = buildHtmlDocument( + { + title: `Doc `, + author: `Bao"yu`, + description: `

Summary & notes

`, + }, + "", + "", + ); + + assert.match(html, /Doc <draft><\/title>/); + assert.match(html, /meta name="author" content="Bao"yu"/); + assert.match(html, /meta name="description" content="<p style="color: red">Summary & notes<\/p>"/); +}); + test("normalizeCssText and normalizeInlineCss replace variables and strip declarations", () => { const rawCss = ` :root { --md-primary-color: #000; --md-font-size: 12px; --foreground: 0 0% 5%; } diff --git a/packages/baoyu-md/src/html-builder.ts b/packages/baoyu-md/src/html-builder.ts index d27e03a..6756a1e 100644 --- a/packages/baoyu-md/src/html-builder.ts +++ b/packages/baoyu-md/src/html-builder.ts @@ -45,19 +45,24 @@ export function loadCodeThemeCss(themeName: string): string { } export function buildHtmlDocument(meta: HtmlDocumentMeta, css: string, html: string, codeThemeCss?: string): string { + const escapeHtmlAttribute = (value: string) => value + .replace(/&/g, "&") + .replace(/"/g, """) + .replace(/</g, "<") + .replace(/>/g, ">"); const lines = [ "<!doctype html>", "<html>", "<head>", ' <meta charset="utf-8" />', ' <meta name="viewport" content="width=device-width, initial-scale=1" />', - ` <title>${meta.title}`, + ` ${escapeHtmlAttribute(meta.title)}`, ]; if (meta.author) { - lines.push(` `); + lines.push(` `); } if (meta.description) { - lines.push(` `); + lines.push(` `); } lines.push(` `); if (codeThemeCss) { diff --git a/skills/baoyu-post-to-wechat/scripts/md-to-wechat.ts b/skills/baoyu-post-to-wechat/scripts/md-to-wechat.ts index ffec0ae..3e8cba1 100644 --- a/skills/baoyu-post-to-wechat/scripts/md-to-wechat.ts +++ b/skills/baoyu-post-to-wechat/scripts/md-to-wechat.ts @@ -4,6 +4,7 @@ import path from "node:path"; import process from "node:process"; import { + cleanSummaryText, extractSummaryFromBody, extractTitleFromMarkdown, parseFrontmatter, @@ -47,8 +48,9 @@ export async function convertMarkdown( } const author = stripWrappingQuotes(frontmatter.author ?? ""); - let summary = stripWrappingQuotes(frontmatter.description ?? "") + const frontmatterSummary = stripWrappingQuotes(frontmatter.description ?? "") || stripWrappingQuotes(frontmatter.summary ?? ""); + let summary = cleanSummaryText(frontmatterSummary); if (!summary) { summary = extractSummaryFromBody(body, 120); } diff --git a/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/content.ts b/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/content.ts index 6be2b51..0bc73c5 100644 --- a/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/content.ts +++ b/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/content.ts @@ -46,6 +46,45 @@ export function stripWrappingQuotes(value: string): string { return value.trim(); } +const HTML_ENTITIES: Record = { + amp: "&", + apos: "'", + gt: ">", + lt: "<", + nbsp: " ", + quot: '"', +}; + +function decodeHtmlCodePoint(codePoint: number, fallback: string): string { + if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) { + return fallback; + } + return String.fromCodePoint(codePoint); +} + +function decodeHtmlEntities(value: string): string { + return value.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (entity, body: string) => { + const normalized = body.toLowerCase(); + if (normalized.startsWith("#x")) { + return decodeHtmlCodePoint(Number.parseInt(normalized.slice(2), 16), entity); + } + if (normalized.startsWith("#")) { + return decodeHtmlCodePoint(Number.parseInt(normalized.slice(1), 10), entity); + } + return HTML_ENTITIES[normalized] ?? entity; + }); +} + +export function cleanSummaryText(value: string): string { + return decodeHtmlEntities(stripWrappingQuotes(value)) + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(/<\/?[a-z][a-z0-9:-]*(?:\s+[^>]*)?>/gi, " ") + .replace(/\s+/g, " ") + .trim(); +} + export function toFrontmatterString(value: unknown): string | undefined { if (typeof value === "string") { return stripWrappingQuotes(value); @@ -94,10 +133,11 @@ export function extractSummaryFromBody(body: string, maxLen: number): string { .replace(/\*(.+?)\*/g, "$1") .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") .replace(/`([^`]+)`/g, "$1"); + const summaryText = cleanSummaryText(cleanText); - if (cleanText.length > 20) { - if (cleanText.length <= maxLen) return cleanText; - return `${cleanText.slice(0, maxLen - 3)}...`; + if (summaryText.length > 20) { + if (summaryText.length <= maxLen) return summaryText; + return `${summaryText.slice(0, maxLen - 3)}...`; } } diff --git a/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/html-builder.ts b/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/html-builder.ts index d27e03a..6756a1e 100644 --- a/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/html-builder.ts +++ b/skills/baoyu-post-to-wechat/scripts/vendor/baoyu-md/src/html-builder.ts @@ -45,19 +45,24 @@ export function loadCodeThemeCss(themeName: string): string { } export function buildHtmlDocument(meta: HtmlDocumentMeta, css: string, html: string, codeThemeCss?: string): string { + const escapeHtmlAttribute = (value: string) => value + .replace(/&/g, "&") + .replace(/"/g, """) + .replace(//g, ">"); const lines = [ "", "", "", ' ', ' ', - ` ${meta.title}`, + ` ${escapeHtmlAttribute(meta.title)}`, ]; if (meta.author) { - lines.push(` `); + lines.push(` `); } if (meta.description) { - lines.push(` `); + lines.push(` `); } lines.push(` `); if (codeThemeCss) {