JimLiu-baoyu-skills/packages/baoyu-md/src/content.ts

146 lines
4.4 KiB
TypeScript

import { Lexer } from "marked";
export type FrontmatterFields = Record<string, string>;
export function parseFrontmatter(content: string): {
frontmatter: FrontmatterFields;
body: string;
} {
const match = content.match(/^\s*---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/);
if (!match) {
return { frontmatter: {}, body: content };
}
const frontmatter: FrontmatterFields = {};
const lines = match[1]!.split("\n");
for (const line of lines) {
const colonIdx = line.indexOf(":");
if (colonIdx <= 0) continue;
const key = line.slice(0, colonIdx).trim();
const value = line.slice(colonIdx + 1).trim();
frontmatter[key] = stripWrappingQuotes(value);
}
return { frontmatter, body: match[2]! };
}
export function serializeFrontmatter(frontmatter: FrontmatterFields): string {
const entries = Object.entries(frontmatter);
if (entries.length === 0) return "";
return `---\n${entries.map(([key, value]) => `${key}: ${value}`).join("\n")}\n---\n`;
}
export function stripWrappingQuotes(value: string): string {
if (!value) return value;
const doubleQuoted = value.startsWith('"') && value.endsWith('"');
const singleQuoted = value.startsWith("'") && value.endsWith("'");
const cjkDoubleQuoted = value.startsWith("\u201c") && value.endsWith("\u201d");
const cjkSingleQuoted = value.startsWith("\u2018") && value.endsWith("\u2019");
if (doubleQuoted || singleQuoted || cjkDoubleQuoted || cjkSingleQuoted) {
return value.slice(1, -1).trim();
}
return value.trim();
}
const HTML_ENTITIES: Record<string, string> = {
amp: "&",
apos: "'",
gt: ">",
lt: "<",
nbsp: " ",
quot: '"',
};
function decodeHtmlCodePoint(codePoint: number, fallback: string): string {
if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) {
return fallback;
}
return String.fromCodePoint(codePoint);
}
function decodeHtmlEntities(value: string): string {
return value.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (entity, body: string) => {
const normalized = body.toLowerCase();
if (normalized.startsWith("#x")) {
return decodeHtmlCodePoint(Number.parseInt(normalized.slice(2), 16), entity);
}
if (normalized.startsWith("#")) {
return decodeHtmlCodePoint(Number.parseInt(normalized.slice(1), 10), entity);
}
return HTML_ENTITIES[normalized] ?? entity;
});
}
export function cleanSummaryText(value: string): string {
return decodeHtmlEntities(stripWrappingQuotes(value))
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
.replace(/<br\s*\/?>/gi, " ")
.replace(/<\/?[a-z][a-z0-9:-]*(?:\s+[^>]*)?>/gi, " ")
.replace(/\s+/g, " ")
.trim();
}
export function toFrontmatterString(value: unknown): string | undefined {
if (typeof value === "string") {
return stripWrappingQuotes(value);
}
if (typeof value === "number" || typeof value === "boolean") {
return String(value);
}
return undefined;
}
export function pickFirstString(
frontmatter: Record<string, unknown>,
keys: string[],
): string | undefined {
for (const key of keys) {
const value = toFrontmatterString(frontmatter[key]);
if (value) return value;
}
return undefined;
}
export function extractTitleFromMarkdown(markdown: string): string {
const tokens = Lexer.lex(markdown, { gfm: true, breaks: true });
for (const token of tokens) {
if (token.type !== "heading" || (token.depth !== 1 && token.depth !== 2)) continue;
return stripWrappingQuotes(token.text);
}
return "";
}
export function extractSummaryFromBody(body: string, maxLen: number): string {
const lines = body.split("\n");
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (trimmed.startsWith("#")) continue;
if (trimmed.startsWith("![")) continue;
if (trimmed.startsWith(">")) continue;
if (trimmed.startsWith("-") || trimmed.startsWith("*")) continue;
if (/^\d+\./.test(trimmed)) continue;
if (trimmed.startsWith("```")) continue;
const cleanText = trimmed
.replace(/\*\*(.+?)\*\*/g, "$1")
.replace(/\*(.+?)\*/g, "$1")
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
.replace(/`([^`]+)`/g, "$1");
const summaryText = cleanSummaryText(cleanText);
if (summaryText.length > 20) {
if (summaryText.length <= maxLen) return summaryText;
return `${summaryText.slice(0, maxLen - 3)}...`;
}
}
return "";
}