Fix Node-compatible parser tests

2026-03-23 12:08:51 -05:00 · 2026-03-23 12:08:51 -05:00 · be601a6fd5
parent a5761dc71a
commit be601a6fd5
1 changed files with 74 additions and 69 deletions
--- a/skills/baoyu-url-to-markdown/scripts/parsers/index.test.ts
+++ b/skills/baoyu-url-to-markdown/scripts/parsers/index.test.ts
@ -1,4 +1,5 @@
-import { describe, expect, test } from "bun:test";
+import assert from "node:assert/strict";
+import test from "node:test";

 import {
  createMarkdownDocument,
@ -129,73 +130,77 @@ function parse(html: string, url: string) {
  return tryUrlRuleParsers(html, url, baseMetadata);
 }

-describe("url rule parsers", () => {
-  test("parses archive.ph pages from CONTENT and restores the original URL", () => {
-    const result = parse(ARCHIVE_HTML, "https://archive.ph/SMcX5");
+test("parses archive.ph pages from CONTENT and restores the original URL", () => {
+  const result = parse(ARCHIVE_HTML, "https://archive.ph/SMcX5");

-    expect(result).not.toBeNull();
-    expect(result?.conversionMethod).toBe("parser:archive-ph");
-    expect(result?.metadata.url).toBe(
-      "https://www.newscientist.com/article/2520204-major-leap-towards-reanimation-after-death-as-mammals-brain-preserved/"
-    );
-    expect(result?.metadata.title).toBe(
-      "Major leap towards reanimation after death as mammal brain preserved"
-    );
-    expect(result?.metadata.coverImage).toBe("https://cdn.example.com/brain.jpg");
-    expect(result?.markdown).toContain("Researchers say the preserved structure");
-    expect(result?.markdown).toContain("![Brain tissue](https://cdn.example.com/brain.jpg)");
-    expect(result?.markdown).not.toContain("Archive shell text that should be ignored");
-  });
-
-  test("falls back to body when archive.ph CONTENT is missing", () => {
-    const result = parse(ARCHIVE_FALLBACK_HTML, "https://archive.ph/fallback");
-
-    expect(result).not.toBeNull();
-    expect(result?.conversionMethod).toBe("parser:archive-ph");
-    expect(result?.metadata.url).toBe("https://example.com/fallback-story");
-    expect(result?.metadata.title).toBe("Fallback body parsing still works");
-    expect(result?.markdown).toContain("When CONTENT is absent");
-  });
-
-  test("parses X article pages from HTML", () => {
-    const result = parse(
-      ARTICLE_HTML,
-      "https://x.com/dotey/article/2035141635713941927"
-    );
-
-    expect(result).not.toBeNull();
-    expect(result?.conversionMethod).toBe("parser:x-article");
-    expect(result?.metadata.title).toBe("Karpathy：\"写代码\"已经不是对的动词了");
-    expect(result?.metadata.author).toBe("宝玉 (@dotey)");
-    expect(result?.metadata.coverImage).toBe("https://pbs.twimg.com/media/article-cover.jpg");
-    expect(result?.metadata.published).toBe("2026-03-20T23:49:11.000Z");
-    expect(result?.metadata.language).toBe("zh");
-    expect(result?.markdown).toContain("## 要点速览");
-    expect(result?.markdown).toContain(
-      "[![](https://pbs.twimg.com/media/article-inline.jpg)](/dotey/article/2035141635713941927/media/2)"
-    );
-    expect(result?.markdown).toContain("写代码已经不是对的动词了。");
-
-    const document = createMarkdownDocument(result!);
-    expect(document).toContain("# Karpathy：\"写代码\"已经不是对的动词了");
-  });
-
-  test("parses X status pages from HTML without duplicating the title heading", () => {
-    const result = parse(
-      STATUS_HTML,
-      "https://x.com/dotey/status/2035590649081196710"
-    );
-
-    expect(result).not.toBeNull();
-    expect(result?.conversionMethod).toBe("parser:x-status");
-    expect(result?.metadata.author).toBe("宝玉 (@dotey)");
-    expect(result?.metadata.coverImage).toBe("https://pbs.twimg.com/media/tweet-main.jpg");
-    expect(result?.metadata.language).toBe("zh");
-    expect(result?.markdown).toContain("转译：把下面这段加到你的 Codex 自定义指令里");
-    expect(result?.markdown).toContain("> Quote from Matt Shumer (@mattshumer_)");
-    expect(result?.markdown).toContain("![");
-
-    const document = createMarkdownDocument(result!);
-    expect(document).not.toContain("\n\n# 转译：把下面这段加到你的 Codex 自定义指令里，体验会好太多：\n\n");
-  });
+  assert.ok(result);
+  assert.equal(result.conversionMethod, "parser:archive-ph");
+  assert.equal(
+    result.metadata.url,
+    "https://www.newscientist.com/article/2520204-major-leap-towards-reanimation-after-death-as-mammals-brain-preserved/"
+  );
+  assert.equal(
+    result.metadata.title,
+    "Major leap towards reanimation after death as mammal brain preserved"
+  );
+  assert.equal(result.metadata.coverImage, "https://cdn.example.com/brain.jpg");
+  assert.ok(result.markdown.includes("Researchers say the preserved structure"));
+  assert.ok(result.markdown.includes("![Brain tissue](https://cdn.example.com/brain.jpg)"));
+  assert.ok(!result.markdown.includes("Archive shell text that should be ignored"));
+});
+
+test("falls back to body when archive.ph CONTENT is missing", () => {
+  const result = parse(ARCHIVE_FALLBACK_HTML, "https://archive.ph/fallback");
+
+  assert.ok(result);
+  assert.equal(result.conversionMethod, "parser:archive-ph");
+  assert.equal(result.metadata.url, "https://example.com/fallback-story");
+  assert.equal(result.metadata.title, "Fallback body parsing still works");
+  assert.ok(result.markdown.includes("When CONTENT is absent"));
+});
+
+test("parses X article pages from HTML", () => {
+  const result = parse(
+    ARTICLE_HTML,
+    "https://x.com/dotey/article/2035141635713941927"
+  );
+
+  assert.ok(result);
+  assert.equal(result.conversionMethod, "parser:x-article");
+  assert.equal(result.metadata.title, "Karpathy：\"写代码\"已经不是对的动词了");
+  assert.equal(result.metadata.author, "宝玉 (@dotey)");
+  assert.equal(result.metadata.coverImage, "https://pbs.twimg.com/media/article-cover.jpg");
+  assert.equal(result.metadata.published, "2026-03-20T23:49:11.000Z");
+  assert.equal(result.metadata.language, "zh");
+  assert.ok(result.markdown.includes("## 要点速览"));
+  assert.ok(
+    result.markdown.includes(
+      "[![](https://pbs.twimg.com/media/article-inline.jpg)](/dotey/article/2035141635713941927/media/2)"
+    )
+  );
+  assert.ok(result.markdown.includes("写代码已经不是对的动词了。"));
+
+  const document = createMarkdownDocument(result);
+  assert.ok(document.includes("# Karpathy：\"写代码\"已经不是对的动词了"));
+});
+
+test("parses X status pages from HTML without duplicating the title heading", () => {
+  const result = parse(
+    STATUS_HTML,
+    "https://x.com/dotey/status/2035590649081196710"
+  );
+
+  assert.ok(result);
+  assert.equal(result.conversionMethod, "parser:x-status");
+  assert.equal(result.metadata.author, "宝玉 (@dotey)");
+  assert.equal(result.metadata.coverImage, "https://pbs.twimg.com/media/tweet-main.jpg");
+  assert.equal(result.metadata.language, "zh");
+  assert.ok(result.markdown.includes("转译：把下面这段加到你的 Codex 自定义指令里"));
+  assert.ok(result.markdown.includes("> Quote from Matt Shumer (@mattshumer_)"));
+  assert.ok(result.markdown.includes("!["));
+
+  const document = createMarkdownDocument(result);
+  assert.ok(
+    !document.includes("\n\n# 转译：把下面这段加到你的 Codex 自定义指令里，体验会好太多：\n\n")
+  );
 });