Fix Node-compatible parser tests
This commit is contained in:
parent
a5761dc71a
commit
be601a6fd5
|
|
@ -1,4 +1,5 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import {
|
||||
createMarkdownDocument,
|
||||
|
|
@ -129,73 +130,77 @@ function parse(html: string, url: string) {
|
|||
return tryUrlRuleParsers(html, url, baseMetadata);
|
||||
}
|
||||
|
||||
describe("url rule parsers", () => {
|
||||
test("parses archive.ph pages from CONTENT and restores the original URL", () => {
|
||||
const result = parse(ARCHIVE_HTML, "https://archive.ph/SMcX5");
|
||||
test("parses archive.ph pages from CONTENT and restores the original URL", () => {
|
||||
const result = parse(ARCHIVE_HTML, "https://archive.ph/SMcX5");
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.conversionMethod).toBe("parser:archive-ph");
|
||||
expect(result?.metadata.url).toBe(
|
||||
"https://www.newscientist.com/article/2520204-major-leap-towards-reanimation-after-death-as-mammals-brain-preserved/"
|
||||
);
|
||||
expect(result?.metadata.title).toBe(
|
||||
"Major leap towards reanimation after death as mammal brain preserved"
|
||||
);
|
||||
expect(result?.metadata.coverImage).toBe("https://cdn.example.com/brain.jpg");
|
||||
expect(result?.markdown).toContain("Researchers say the preserved structure");
|
||||
expect(result?.markdown).toContain("");
|
||||
expect(result?.markdown).not.toContain("Archive shell text that should be ignored");
|
||||
});
|
||||
|
||||
test("falls back to body when archive.ph CONTENT is missing", () => {
|
||||
const result = parse(ARCHIVE_FALLBACK_HTML, "https://archive.ph/fallback");
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.conversionMethod).toBe("parser:archive-ph");
|
||||
expect(result?.metadata.url).toBe("https://example.com/fallback-story");
|
||||
expect(result?.metadata.title).toBe("Fallback body parsing still works");
|
||||
expect(result?.markdown).toContain("When CONTENT is absent");
|
||||
});
|
||||
|
||||
test("parses X article pages from HTML", () => {
|
||||
const result = parse(
|
||||
ARTICLE_HTML,
|
||||
"https://x.com/dotey/article/2035141635713941927"
|
||||
);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.conversionMethod).toBe("parser:x-article");
|
||||
expect(result?.metadata.title).toBe("Karpathy:\"写代码\"已经不是对的动词了");
|
||||
expect(result?.metadata.author).toBe("宝玉 (@dotey)");
|
||||
expect(result?.metadata.coverImage).toBe("https://pbs.twimg.com/media/article-cover.jpg");
|
||||
expect(result?.metadata.published).toBe("2026-03-20T23:49:11.000Z");
|
||||
expect(result?.metadata.language).toBe("zh");
|
||||
expect(result?.markdown).toContain("## 要点速览");
|
||||
expect(result?.markdown).toContain(
|
||||
"[](/dotey/article/2035141635713941927/media/2)"
|
||||
);
|
||||
expect(result?.markdown).toContain("写代码已经不是对的动词了。");
|
||||
|
||||
const document = createMarkdownDocument(result!);
|
||||
expect(document).toContain("# Karpathy:\"写代码\"已经不是对的动词了");
|
||||
});
|
||||
|
||||
test("parses X status pages from HTML without duplicating the title heading", () => {
|
||||
const result = parse(
|
||||
STATUS_HTML,
|
||||
"https://x.com/dotey/status/2035590649081196710"
|
||||
);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.conversionMethod).toBe("parser:x-status");
|
||||
expect(result?.metadata.author).toBe("宝玉 (@dotey)");
|
||||
expect(result?.metadata.coverImage).toBe("https://pbs.twimg.com/media/tweet-main.jpg");
|
||||
expect(result?.metadata.language).toBe("zh");
|
||||
expect(result?.markdown).toContain("转译:把下面这段加到你的 Codex 自定义指令里");
|
||||
expect(result?.markdown).toContain("> Quote from Matt Shumer (@mattshumer_)");
|
||||
expect(result?.markdown).toContain(""));
|
||||
assert.ok(!result.markdown.includes("Archive shell text that should be ignored"));
|
||||
});
|
||||
|
||||
test("falls back to body when archive.ph CONTENT is missing", () => {
|
||||
const result = parse(ARCHIVE_FALLBACK_HTML, "https://archive.ph/fallback");
|
||||
|
||||
assert.ok(result);
|
||||
assert.equal(result.conversionMethod, "parser:archive-ph");
|
||||
assert.equal(result.metadata.url, "https://example.com/fallback-story");
|
||||
assert.equal(result.metadata.title, "Fallback body parsing still works");
|
||||
assert.ok(result.markdown.includes("When CONTENT is absent"));
|
||||
});
|
||||
|
||||
test("parses X article pages from HTML", () => {
|
||||
const result = parse(
|
||||
ARTICLE_HTML,
|
||||
"https://x.com/dotey/article/2035141635713941927"
|
||||
);
|
||||
|
||||
assert.ok(result);
|
||||
assert.equal(result.conversionMethod, "parser:x-article");
|
||||
assert.equal(result.metadata.title, "Karpathy:\"写代码\"已经不是对的动词了");
|
||||
assert.equal(result.metadata.author, "宝玉 (@dotey)");
|
||||
assert.equal(result.metadata.coverImage, "https://pbs.twimg.com/media/article-cover.jpg");
|
||||
assert.equal(result.metadata.published, "2026-03-20T23:49:11.000Z");
|
||||
assert.equal(result.metadata.language, "zh");
|
||||
assert.ok(result.markdown.includes("## 要点速览"));
|
||||
assert.ok(
|
||||
result.markdown.includes(
|
||||
"[](/dotey/article/2035141635713941927/media/2)"
|
||||
)
|
||||
);
|
||||
assert.ok(result.markdown.includes("写代码已经不是对的动词了。"));
|
||||
|
||||
const document = createMarkdownDocument(result);
|
||||
assert.ok(document.includes("# Karpathy:\"写代码\"已经不是对的动词了"));
|
||||
});
|
||||
|
||||
test("parses X status pages from HTML without duplicating the title heading", () => {
|
||||
const result = parse(
|
||||
STATUS_HTML,
|
||||
"https://x.com/dotey/status/2035590649081196710"
|
||||
);
|
||||
|
||||
assert.ok(result);
|
||||
assert.equal(result.conversionMethod, "parser:x-status");
|
||||
assert.equal(result.metadata.author, "宝玉 (@dotey)");
|
||||
assert.equal(result.metadata.coverImage, "https://pbs.twimg.com/media/tweet-main.jpg");
|
||||
assert.equal(result.metadata.language, "zh");
|
||||
assert.ok(result.markdown.includes("转译:把下面这段加到你的 Codex 自定义指令里"));
|
||||
assert.ok(result.markdown.includes("> Quote from Matt Shumer (@mattshumer_)"));
|
||||
assert.ok(result.markdown.includes("!["));
|
||||
|
||||
const document = createMarkdownDocument(result);
|
||||
assert.ok(
|
||||
!document.includes("\n\n# 转译:把下面这段加到你的 Codex 自定义指令里,体验会好太多:\n\n")
|
||||
);
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in New Issue