fix: preserve inline code in cjk emphasis

This commit is contained in:
shixy 2026-03-14 17:12:03 +08:00
parent 38fc733b99
commit 2aa9790789
8 changed files with 252 additions and 1 deletions

View File

@ -0,0 +1,64 @@
import assert from "node:assert/strict";
import test from "node:test";
import { initRenderer, renderMarkdown } from "./renderer.ts";
const render = (md: string) => {
const r = initRenderer();
return renderMarkdown(md, r).html;
};
test("bold with inline code (no underscore)", () => {
const html = render("**算出 `logits`,算出 `loss`。**");
assert.match(html, /<code[^>]*>logits<\/code>/);
assert.match(html, /<code[^>]*>loss<\/code>/);
});
test("bold with inline code (contains underscore)", () => {
const html = render("**变成 `input_ids`。**");
assert.match(html, /<code[^>]*>input_ids<\/code>/);
});
test("emphasis with inline code", () => {
const html = render("*查看 `hidden_states`*");
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
});
test("plain inline code (regression)", () => {
const html = render("`lm_head`");
assert.match(html, /<code[^>]*>lm_head<\/code>/);
});
test("bold without code (regression)", () => {
const html = render("**纯粗体文本**");
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
assert.doesNotMatch(html, /<code/);
});
test("bold with inline code containing backticks", () => {
const html = render("**``a`b``**");
assert.match(html, /<code[^>]*>a&#96;b<\/code>/);
});
test("emphasis with inline code containing backticks", () => {
const html = render("*``a`b``*");
assert.match(html, /<em[^>]*><code[^>]*>a&#96;b<\/code><\/em>/);
});
test("bold with inline code containing consecutive backticks", () => {
const html = render("**```a``b```**");
assert.match(html, /<code[^>]*>a&#96;&#96;b<\/code>/);
});
test("bold with inline code containing only backticks", () => {
const html = render("**```` `` ````**");
assert.match(html, /<code[^>]*>&#96;&#96;<\/code>/);
});
test("bold with inline code containing only spaces", () => {
const oneSpace = render("**`` ``**");
assert.match(oneSpace, /<code[^>]*> <\/code>/);
const twoSpaces = render("**`` ``**");
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
});

View File

@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
} }
} }
function wrapInlineCode(value: string): string {
const runs = value.match(/`+/g);
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
const padding = /^ *$/.test(value) ? "" : " ";
return `${fence}${padding}${value}${padding}${fence}`;
}
export function initRenderer(opts: IOpts = {}): RendererAPI { export function initRenderer(opts: IOpts = {}): RendererAPI {
const footnotes: [number, string, string][] = []; const footnotes: [number, string, string][] = [];
let footnoteIndex = 0; let footnoteIndex = 0;
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
const tree = processor.parse(markdown); const tree = processor.parse(markdown);
const extractText = (node: any): string => { const extractText = (node: any): string => {
if (node.type === "text") return node.value; if (node.type === "text") return node.value;
if (node.type === "inlineCode") return wrapInlineCode(node.value);
if (node.children) return node.children.map(extractText).join(""); if (node.children) return node.children.map(extractText).join("");
return ""; return "";
}; };

View File

@ -34,3 +34,31 @@ test("bold without code (regression)", () => {
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/); assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
assert.doesNotMatch(html, /<code/); assert.doesNotMatch(html, /<code/);
}); });
test("bold with inline code containing backticks", () => {
const html = render("**``a`b``**");
assert.match(html, /<code[^>]*>a&#96;b<\/code>/);
});
test("emphasis with inline code containing backticks", () => {
const html = render("*``a`b``*");
assert.match(html, /<em[^>]*><code[^>]*>a&#96;b<\/code><\/em>/);
});
test("bold with inline code containing consecutive backticks", () => {
const html = render("**```a``b```**");
assert.match(html, /<code[^>]*>a&#96;&#96;b<\/code>/);
});
test("bold with inline code containing only backticks", () => {
const html = render("**```` `` ````**");
assert.match(html, /<code[^>]*>&#96;&#96;<\/code>/);
});
test("bold with inline code containing only spaces", () => {
const oneSpace = render("**`` ``**");
assert.match(oneSpace, /<code[^>]*> <\/code>/);
const twoSpaces = render("**`` ``**");
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
});

View File

@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
} }
} }
function wrapInlineCode(value: string): string {
const runs = value.match(/`+/g);
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
const padding = /^ *$/.test(value) ? "" : " ";
return `${fence}${padding}${value}${padding}${fence}`;
}
export function initRenderer(opts: IOpts = {}): RendererAPI { export function initRenderer(opts: IOpts = {}): RendererAPI {
const footnotes: [number, string, string][] = []; const footnotes: [number, string, string][] = [];
let footnoteIndex = 0; let footnoteIndex = 0;
@ -369,7 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
const tree = processor.parse(markdown); const tree = processor.parse(markdown);
const extractText = (node: any): string => { const extractText = (node: any): string => {
if (node.type === "text") return node.value; if (node.type === "text") return node.value;
if (node.type === "inlineCode") return `\`${node.value}\``; if (node.type === "inlineCode") return wrapInlineCode(node.value);
if (node.children) return node.children.map(extractText).join(""); if (node.children) return node.children.map(extractText).join("");
return ""; return "";
}; };

View File

@ -0,0 +1,64 @@
import assert from "node:assert/strict";
import test from "node:test";
import { initRenderer, renderMarkdown } from "./renderer.ts";
const render = (md: string) => {
const r = initRenderer();
return renderMarkdown(md, r).html;
};
test("bold with inline code (no underscore)", () => {
const html = render("**算出 `logits`,算出 `loss`。**");
assert.match(html, /<code[^>]*>logits<\/code>/);
assert.match(html, /<code[^>]*>loss<\/code>/);
});
test("bold with inline code (contains underscore)", () => {
const html = render("**变成 `input_ids`。**");
assert.match(html, /<code[^>]*>input_ids<\/code>/);
});
test("emphasis with inline code", () => {
const html = render("*查看 `hidden_states`*");
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
});
test("plain inline code (regression)", () => {
const html = render("`lm_head`");
assert.match(html, /<code[^>]*>lm_head<\/code>/);
});
test("bold without code (regression)", () => {
const html = render("**纯粗体文本**");
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
assert.doesNotMatch(html, /<code/);
});
test("bold with inline code containing backticks", () => {
const html = render("**``a`b``**");
assert.match(html, /<code[^>]*>a&#96;b<\/code>/);
});
test("emphasis with inline code containing backticks", () => {
const html = render("*``a`b``*");
assert.match(html, /<em[^>]*><code[^>]*>a&#96;b<\/code><\/em>/);
});
test("bold with inline code containing consecutive backticks", () => {
const html = render("**```a``b```**");
assert.match(html, /<code[^>]*>a&#96;&#96;b<\/code>/);
});
test("bold with inline code containing only backticks", () => {
const html = render("**```` `` ````**");
assert.match(html, /<code[^>]*>&#96;&#96;<\/code>/);
});
test("bold with inline code containing only spaces", () => {
const oneSpace = render("**`` ``**");
assert.match(oneSpace, /<code[^>]*> <\/code>/);
const twoSpaces = render("**`` ``**");
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
});

View File

@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
} }
} }
function wrapInlineCode(value: string): string {
const runs = value.match(/`+/g);
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
const padding = /^ *$/.test(value) ? "" : " ";
return `${fence}${padding}${value}${padding}${fence}`;
}
export function initRenderer(opts: IOpts = {}): RendererAPI { export function initRenderer(opts: IOpts = {}): RendererAPI {
const footnotes: [number, string, string][] = []; const footnotes: [number, string, string][] = [];
let footnoteIndex = 0; let footnoteIndex = 0;
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
const tree = processor.parse(markdown); const tree = processor.parse(markdown);
const extractText = (node: any): string => { const extractText = (node: any): string => {
if (node.type === "text") return node.value; if (node.type === "text") return node.value;
if (node.type === "inlineCode") return wrapInlineCode(node.value);
if (node.children) return node.children.map(extractText).join(""); if (node.children) return node.children.map(extractText).join("");
return ""; return "";
}; };

View File

@ -0,0 +1,64 @@
import assert from "node:assert/strict";
import test from "node:test";
import { initRenderer, renderMarkdown } from "./renderer.ts";
const render = (md: string) => {
const r = initRenderer();
return renderMarkdown(md, r).html;
};
test("bold with inline code (no underscore)", () => {
const html = render("**算出 `logits`,算出 `loss`。**");
assert.match(html, /<code[^>]*>logits<\/code>/);
assert.match(html, /<code[^>]*>loss<\/code>/);
});
test("bold with inline code (contains underscore)", () => {
const html = render("**变成 `input_ids`。**");
assert.match(html, /<code[^>]*>input_ids<\/code>/);
});
test("emphasis with inline code", () => {
const html = render("*查看 `hidden_states`*");
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
});
test("plain inline code (regression)", () => {
const html = render("`lm_head`");
assert.match(html, /<code[^>]*>lm_head<\/code>/);
});
test("bold without code (regression)", () => {
const html = render("**纯粗体文本**");
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
assert.doesNotMatch(html, /<code/);
});
test("bold with inline code containing backticks", () => {
const html = render("**``a`b``**");
assert.match(html, /<code[^>]*>a&#96;b<\/code>/);
});
test("emphasis with inline code containing backticks", () => {
const html = render("*``a`b``*");
assert.match(html, /<em[^>]*><code[^>]*>a&#96;b<\/code><\/em>/);
});
test("bold with inline code containing consecutive backticks", () => {
const html = render("**```a``b```**");
assert.match(html, /<code[^>]*>a&#96;&#96;b<\/code>/);
});
test("bold with inline code containing only backticks", () => {
const html = render("**```` `` ````**");
assert.match(html, /<code[^>]*>&#96;&#96;<\/code>/);
});
test("bold with inline code containing only spaces", () => {
const oneSpace = render("**`` ``**");
assert.match(oneSpace, /<code[^>]*> <\/code>/);
const twoSpaces = render("**`` ``**");
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
});

View File

@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
} }
} }
function wrapInlineCode(value: string): string {
const runs = value.match(/`+/g);
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
const padding = /^ *$/.test(value) ? "" : " ";
return `${fence}${padding}${value}${padding}${fence}`;
}
export function initRenderer(opts: IOpts = {}): RendererAPI { export function initRenderer(opts: IOpts = {}): RendererAPI {
const footnotes: [number, string, string][] = []; const footnotes: [number, string, string][] = [];
let footnoteIndex = 0; let footnoteIndex = 0;
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
const tree = processor.parse(markdown); const tree = processor.parse(markdown);
const extractText = (node: any): string => { const extractText = (node: any): string => {
if (node.type === "text") return node.value; if (node.type === "text") return node.value;
if (node.type === "inlineCode") return wrapInlineCode(node.value);
if (node.children) return node.children.map(extractText).join(""); if (node.children) return node.children.map(extractText).join("");
return ""; return "";
}; };