fix: preserve inline code in cjk emphasis
This commit is contained in:
parent
38fc733b99
commit
2aa9790789
|
|
@ -0,0 +1,64 @@
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import test from "node:test";
|
||||||
|
|
||||||
|
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||||
|
|
||||||
|
const render = (md: string) => {
|
||||||
|
const r = initRenderer();
|
||||||
|
return renderMarkdown(md, r).html;
|
||||||
|
};
|
||||||
|
|
||||||
|
test("bold with inline code (no underscore)", () => {
|
||||||
|
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||||
|
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||||
|
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code (contains underscore)", () => {
|
||||||
|
const html = render("**变成 `input_ids`。**");
|
||||||
|
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code", () => {
|
||||||
|
const html = render("*查看 `hidden_states`*");
|
||||||
|
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("plain inline code (regression)", () => {
|
||||||
|
const html = render("`lm_head`");
|
||||||
|
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold without code (regression)", () => {
|
||||||
|
const html = render("**纯粗体文本**");
|
||||||
|
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||||
|
assert.doesNotMatch(html, /<code/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing backticks", () => {
|
||||||
|
const html = render("**``a`b``**");
|
||||||
|
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code containing backticks", () => {
|
||||||
|
const html = render("*``a`b``*");
|
||||||
|
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing consecutive backticks", () => {
|
||||||
|
const html = render("**```a``b```**");
|
||||||
|
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only backticks", () => {
|
||||||
|
const html = render("**```` `` ````**");
|
||||||
|
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only spaces", () => {
|
||||||
|
const oneSpace = render("**`` ``**");
|
||||||
|
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||||
|
|
||||||
|
const twoSpaces = render("**`` ``**");
|
||||||
|
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||||
|
});
|
||||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function wrapInlineCode(value: string): string {
|
||||||
|
const runs = value.match(/`+/g);
|
||||||
|
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||||
|
const padding = /^ *$/.test(value) ? "" : " ";
|
||||||
|
return `${fence}${padding}${value}${padding}${fence}`;
|
||||||
|
}
|
||||||
|
|
||||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||||
const footnotes: [number, string, string][] = [];
|
const footnotes: [number, string, string][] = [];
|
||||||
let footnoteIndex = 0;
|
let footnoteIndex = 0;
|
||||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
||||||
const tree = processor.parse(markdown);
|
const tree = processor.parse(markdown);
|
||||||
const extractText = (node: any): string => {
|
const extractText = (node: any): string => {
|
||||||
if (node.type === "text") return node.value;
|
if (node.type === "text") return node.value;
|
||||||
|
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||||
if (node.children) return node.children.map(extractText).join("");
|
if (node.children) return node.children.map(extractText).join("");
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -34,3 +34,31 @@ test("bold without code (regression)", () => {
|
||||||
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||||
assert.doesNotMatch(html, /<code/);
|
assert.doesNotMatch(html, /<code/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing backticks", () => {
|
||||||
|
const html = render("**``a`b``**");
|
||||||
|
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code containing backticks", () => {
|
||||||
|
const html = render("*``a`b``*");
|
||||||
|
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing consecutive backticks", () => {
|
||||||
|
const html = render("**```a``b```**");
|
||||||
|
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only backticks", () => {
|
||||||
|
const html = render("**```` `` ````**");
|
||||||
|
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only spaces", () => {
|
||||||
|
const oneSpace = render("**`` ``**");
|
||||||
|
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||||
|
|
||||||
|
const twoSpaces = render("**`` ``**");
|
||||||
|
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function wrapInlineCode(value: string): string {
|
||||||
|
const runs = value.match(/`+/g);
|
||||||
|
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||||
|
const padding = /^ *$/.test(value) ? "" : " ";
|
||||||
|
return `${fence}${padding}${value}${padding}${fence}`;
|
||||||
|
}
|
||||||
|
|
||||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||||
const footnotes: [number, string, string][] = [];
|
const footnotes: [number, string, string][] = [];
|
||||||
let footnoteIndex = 0;
|
let footnoteIndex = 0;
|
||||||
|
|
@ -369,7 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
||||||
const tree = processor.parse(markdown);
|
const tree = processor.parse(markdown);
|
||||||
const extractText = (node: any): string => {
|
const extractText = (node: any): string => {
|
||||||
if (node.type === "text") return node.value;
|
if (node.type === "text") return node.value;
|
||||||
if (node.type === "inlineCode") return `\`${node.value}\``;
|
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||||
if (node.children) return node.children.map(extractText).join("");
|
if (node.children) return node.children.map(extractText).join("");
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import test from "node:test";
|
||||||
|
|
||||||
|
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||||
|
|
||||||
|
const render = (md: string) => {
|
||||||
|
const r = initRenderer();
|
||||||
|
return renderMarkdown(md, r).html;
|
||||||
|
};
|
||||||
|
|
||||||
|
test("bold with inline code (no underscore)", () => {
|
||||||
|
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||||
|
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||||
|
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code (contains underscore)", () => {
|
||||||
|
const html = render("**变成 `input_ids`。**");
|
||||||
|
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code", () => {
|
||||||
|
const html = render("*查看 `hidden_states`*");
|
||||||
|
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("plain inline code (regression)", () => {
|
||||||
|
const html = render("`lm_head`");
|
||||||
|
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold without code (regression)", () => {
|
||||||
|
const html = render("**纯粗体文本**");
|
||||||
|
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||||
|
assert.doesNotMatch(html, /<code/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing backticks", () => {
|
||||||
|
const html = render("**``a`b``**");
|
||||||
|
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code containing backticks", () => {
|
||||||
|
const html = render("*``a`b``*");
|
||||||
|
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing consecutive backticks", () => {
|
||||||
|
const html = render("**```a``b```**");
|
||||||
|
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only backticks", () => {
|
||||||
|
const html = render("**```` `` ````**");
|
||||||
|
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only spaces", () => {
|
||||||
|
const oneSpace = render("**`` ``**");
|
||||||
|
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||||
|
|
||||||
|
const twoSpaces = render("**`` ``**");
|
||||||
|
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||||
|
});
|
||||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function wrapInlineCode(value: string): string {
|
||||||
|
const runs = value.match(/`+/g);
|
||||||
|
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||||
|
const padding = /^ *$/.test(value) ? "" : " ";
|
||||||
|
return `${fence}${padding}${value}${padding}${fence}`;
|
||||||
|
}
|
||||||
|
|
||||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||||
const footnotes: [number, string, string][] = [];
|
const footnotes: [number, string, string][] = [];
|
||||||
let footnoteIndex = 0;
|
let footnoteIndex = 0;
|
||||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
||||||
const tree = processor.parse(markdown);
|
const tree = processor.parse(markdown);
|
||||||
const extractText = (node: any): string => {
|
const extractText = (node: any): string => {
|
||||||
if (node.type === "text") return node.value;
|
if (node.type === "text") return node.value;
|
||||||
|
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||||
if (node.children) return node.children.map(extractText).join("");
|
if (node.children) return node.children.map(extractText).join("");
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import test from "node:test";
|
||||||
|
|
||||||
|
import { initRenderer, renderMarkdown } from "./renderer.ts";
|
||||||
|
|
||||||
|
const render = (md: string) => {
|
||||||
|
const r = initRenderer();
|
||||||
|
return renderMarkdown(md, r).html;
|
||||||
|
};
|
||||||
|
|
||||||
|
test("bold with inline code (no underscore)", () => {
|
||||||
|
const html = render("**算出 `logits`,算出 `loss`。**");
|
||||||
|
assert.match(html, /<code[^>]*>logits<\/code>/);
|
||||||
|
assert.match(html, /<code[^>]*>loss<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code (contains underscore)", () => {
|
||||||
|
const html = render("**变成 `input_ids`。**");
|
||||||
|
assert.match(html, /<code[^>]*>input_ids<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code", () => {
|
||||||
|
const html = render("*查看 `hidden_states`*");
|
||||||
|
assert.match(html, /<code[^>]*>hidden_states<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("plain inline code (regression)", () => {
|
||||||
|
const html = render("`lm_head`");
|
||||||
|
assert.match(html, /<code[^>]*>lm_head<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold without code (regression)", () => {
|
||||||
|
const html = render("**纯粗体文本**");
|
||||||
|
assert.match(html, /<strong[^>]*>纯粗体文本<\/strong>/);
|
||||||
|
assert.doesNotMatch(html, /<code/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing backticks", () => {
|
||||||
|
const html = render("**``a`b``**");
|
||||||
|
assert.match(html, /<code[^>]*>a`b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("emphasis with inline code containing backticks", () => {
|
||||||
|
const html = render("*``a`b``*");
|
||||||
|
assert.match(html, /<em[^>]*><code[^>]*>a`b<\/code><\/em>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing consecutive backticks", () => {
|
||||||
|
const html = render("**```a``b```**");
|
||||||
|
assert.match(html, /<code[^>]*>a``b<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only backticks", () => {
|
||||||
|
const html = render("**```` `` ````**");
|
||||||
|
assert.match(html, /<code[^>]*>``<\/code>/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("bold with inline code containing only spaces", () => {
|
||||||
|
const oneSpace = render("**`` ``**");
|
||||||
|
assert.match(oneSpace, /<code[^>]*> <\/code>/);
|
||||||
|
|
||||||
|
const twoSpaces = render("**`` ``**");
|
||||||
|
assert.match(twoSpaces, /<code[^>]*> <\/code>/);
|
||||||
|
});
|
||||||
|
|
@ -109,6 +109,13 @@ function parseFrontMatterAndContent(markdownText: string): ParseResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function wrapInlineCode(value: string): string {
|
||||||
|
const runs = value.match(/`+/g);
|
||||||
|
const fence = "`".repeat(Math.max(...(runs?.map((run) => run.length) ?? [0])) + 1);
|
||||||
|
const padding = /^ *$/.test(value) ? "" : " ";
|
||||||
|
return `${fence}${padding}${value}${padding}${fence}`;
|
||||||
|
}
|
||||||
|
|
||||||
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
export function initRenderer(opts: IOpts = {}): RendererAPI {
|
||||||
const footnotes: [number, string, string][] = [];
|
const footnotes: [number, string, string][] = [];
|
||||||
let footnoteIndex = 0;
|
let footnoteIndex = 0;
|
||||||
|
|
@ -369,6 +376,7 @@ function preprocessCjkEmphasis(markdown: string): string {
|
||||||
const tree = processor.parse(markdown);
|
const tree = processor.parse(markdown);
|
||||||
const extractText = (node: any): string => {
|
const extractText = (node: any): string => {
|
||||||
if (node.type === "text") return node.value;
|
if (node.type === "text") return node.value;
|
||||||
|
if (node.type === "inlineCode") return wrapInlineCode(node.value);
|
||||||
if (node.children) return node.children.map(extractText).join("");
|
if (node.children) return node.children.map(extractText).join("");
|
||||||
return "";
|
return "";
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue