fix: preserve inline code in cjk emphasis
This commit is contained in:
parent
38fc733b99
commit
2aa9790789
|
|
@ -0,0 +1,64 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||
|
||||
const render = (md: string) => {
|
||||
const r = initRenderer();
|
||||
return renderMarkdown(md, r).html;
|
||||
};
|
||||
|
||||
test("bold with inline code (no underscore)", () => {
|
||||
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code (contains underscore)", () => {
|
||||
const html = render("**变成 `input_ids`。**");
|
||||
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code", () => {
|
||||
const html = render("*查看 `hidden_states`*");
|
||||
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||
});
|
||||
|
||||
test("plain inline code (regression)", () => {
|
||||
const html = render("`lm_head`");
|
||||
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||
});
|
||||
|
||||
test("bold without code (regression)", () => {
|
||||
const html = render("**纯粗体文本**");
|
||||
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||
assert.doesNotMatch(html, /<code/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing backticks", () => {
|
||||
const html = render("**``a`b``**");
|
||||
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code containing backticks", () => {
|
||||
const html = render("*``a`b``*");
|
||||
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing consecutive backticks", () => {
|
||||
const html = render("**```a``b```**");
|
||||
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only backticks", () => {
|
||||
const html = render("**```` `` ````**");
|
||||
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only spaces", () => {
|
||||
const oneSpace = render("**`` ``**");
|
||||
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||
|
||||
const twoSpaces = render("**`` ``**");
|
||||
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||
});
|
||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
|||
}
|
||||
}
|
||||
|
||||
function wrapInlineCode(value: string): string {
|
||||
const runs = value.match(/`+/g);
|
||||
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||
const padding = /^ *$/.test(value) ? "" : " ";
|
||||
return `${fence}${padding}${value}${padding}${fence}`;
|
||||
}
|
||||
|
||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||
const footnotes: [number, string, string][] = [];
|
||||
let footnoteIndex = 0;
|
||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
|||
const tree = processor.parse(markdown);
|
||||
const extractText = (node: any): string => {
|
||||
if (node.type === "text") return node.value;
|
||||
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||
if (node.children) return node.children.map(extractText).join("");
|
||||
return "";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -34,3 +34,31 @@ test("bold without code (regression)", () => {
|
|||
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||
assert.doesNotMatch(html, /<code/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing backticks", () => {
|
||||
const html = render("**``a`b``**");
|
||||
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code containing backticks", () => {
|
||||
const html = render("*``a`b``*");
|
||||
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing consecutive backticks", () => {
|
||||
const html = render("**```a``b```**");
|
||||
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only backticks", () => {
|
||||
const html = render("**```` `` ````**");
|
||||
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only spaces", () => {
|
||||
const oneSpace = render("**`` ``**");
|
||||
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||
|
||||
const twoSpaces = render("**`` ``**");
|
||||
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
|||
}
|
||||
}
|
||||
|
||||
function wrapInlineCode(value: string): string {
|
||||
const runs = value.match(/`+/g);
|
||||
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||
const padding = /^ *$/.test(value) ? "" : " ";
|
||||
return `${fence}${padding}${value}${padding}${fence}`;
|
||||
}
|
||||
|
||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||
const footnotes: [number, string, string][] = [];
|
||||
let footnoteIndex = 0;
|
||||
|
|
@ -369,7 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
|||
const tree = processor.parse(markdown);
|
||||
const extractText = (node: any): string => {
|
||||
if (node.type === "text") return node.value;
|
||||
if (node.type === "inlineCode") return `\`${node.value}\``;
|
||||
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||
if (node.children) return node.children.map(extractText).join("");
|
||||
return "";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||
|
||||
const render = (md: string) => {
|
||||
const r = initRenderer();
|
||||
return renderMarkdown(md, r).html;
|
||||
};
|
||||
|
||||
test("bold with inline code (no underscore)", () => {
|
||||
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code (contains underscore)", () => {
|
||||
const html = render("**变成 `input_ids`。**");
|
||||
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code", () => {
|
||||
const html = render("*查看 `hidden_states`*");
|
||||
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||
});
|
||||
|
||||
test("plain inline code (regression)", () => {
|
||||
const html = render("`lm_head`");
|
||||
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||
});
|
||||
|
||||
test("bold without code (regression)", () => {
|
||||
const html = render("**纯粗体文本**");
|
||||
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||
assert.doesNotMatch(html, /<code/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing backticks", () => {
|
||||
const html = render("**``a`b``**");
|
||||
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code containing backticks", () => {
|
||||
const html = render("*``a`b``*");
|
||||
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing consecutive backticks", () => {
|
||||
const html = render("**```a``b```**");
|
||||
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only backticks", () => {
|
||||
const html = render("**```` `` ````**");
|
||||
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only spaces", () => {
|
||||
const oneSpace = render("**`` ``**");
|
||||
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||
|
||||
const twoSpaces = render("**`` ``**");
|
||||
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||
});
|
||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
|||
}
|
||||
}
|
||||
|
||||
function wrapInlineCode(value: string): string {
|
||||
const runs = value.match(/`+/g);
|
||||
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||
const padding = /^ *$/.test(value) ? "" : " ";
|
||||
return `${fence}${padding}${value}${padding}${fence}`;
|
||||
}
|
||||
|
||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||
const footnotes: [number, string, string][] = [];
|
||||
let footnoteIndex = 0;
|
||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
|||
const tree = processor.parse(markdown);
|
||||
const extractText = (node: any): string => {
|
||||
if (node.type === "text") return node.value;
|
||||
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||
if (node.children) return node.children.map(extractText).join("");
|
||||
return "";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||
|
||||
const render = (md: string) => {
|
||||
const r = initRenderer();
|
||||
return renderMarkdown(md, r).html;
|
||||
};
|
||||
|
||||
test("bold with inline code (no underscore)", () => {
|
||||
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code (contains underscore)", () => {
|
||||
const html = render("**变成 `input_ids`。**");
|
||||
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code", () => {
|
||||
const html = render("*查看 `hidden_states`*");
|
||||
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||
});
|
||||
|
||||
test("plain inline code (regression)", () => {
|
||||
const html = render("`lm_head`");
|
||||
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||
});
|
||||
|
||||
test("bold without code (regression)", () => {
|
||||
const html = render("**纯粗体文本**");
|
||||
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||
assert.doesNotMatch(html, /<code/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing backticks", () => {
|
||||
const html = render("**``a`b``**");
|
||||
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||
});
|
||||
|
||||
test("emphasis with inline code containing backticks", () => {
|
||||
const html = render("*``a`b``*");
|
||||
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing consecutive backticks", () => {
|
||||
const html = render("**```a``b```**");
|
||||
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only backticks", () => {
|
||||
const html = render("**```` `` ````**");
|
||||
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||
});
|
||||
|
||||
test("bold with inline code containing only spaces", () => {
|
||||
const oneSpace = render("**`` ``**");
|
||||
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||
|
||||
const twoSpaces = render("**`` ``**");
|
||||
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||
});
|
||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
|||
}
|
||||
}
|
||||
|
||||
function wrapInlineCode(value: string): string {
|
||||
const runs = value.match(/`+/g);
|
||||
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||
const padding = /^ *$/.test(value) ? "" : " ";
|
||||
return `${fence}${padding}${value}${padding}${fence}`;
|
||||
}
|
||||
|
||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||
const footnotes: [number, string, string][] = [];
|
||||
let footnoteIndex = 0;
|
||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
|||
const tree = processor.parse(markdown);
|
||||
const extractText = (node: any): string => {
|
||||
if (node.type === "text") return node.value;
|
||||
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||
if (node.children) return node.children.map(extractText).join("");
|
||||
return "";
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue