Compare commits
120 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
9977ff520c | |
|
|
03bd68d3a4 | |
|
|
39792f4360 | |
|
|
25b8a7f73d | |
|
|
e13092ba2a | |
|
|
acbcf19ba2 | |
|
|
dcd0f81433 | |
|
|
c938396efc | |
|
|
1ba68c9a9c | |
|
|
b3f5c0a8aa | |
|
|
9c45ede0c7 | |
|
|
30d2ac98ce | |
|
|
bfdd64bd4e | |
|
|
434d4857da | |
|
|
990fea4f7b | |
|
|
517ff566a1 | |
|
|
4c9af7d92f | |
|
|
46c4859d48 | |
|
|
f3f886217b | |
|
|
4d465d55d0 | |
|
|
11d80eeaa9 | |
|
|
58ba4579ef | |
|
|
67a45a57a0 | |
|
|
0b8ac256f4 | |
|
|
eaa0f1aa11 | |
|
|
ec5f4ffcc9 | |
|
|
d206e1674d | |
|
|
b62ad26098 | |
|
|
31b2929d1c | |
|
|
7c2253dd3e | |
|
|
8be98c5afe | |
|
|
fe1a854bed | |
|
|
eeba585315 | |
|
|
1ca99ae5ae | |
|
|
c5df9d01bf | |
|
|
ee1fc3c823 | |
|
|
b7725a4ba8 | |
|
|
a596c653d3 | |
|
|
3017bfe423 | |
|
|
ae1d619ab2 | |
|
|
4821508c34 | |
|
|
f1042c8a6c | |
|
|
fa0fe441f5 | |
|
|
90b2205914 | |
|
|
8e111c17b3 | |
|
|
80d7675355 | |
|
|
538ede2b32 | |
|
|
038e67fd9b | |
|
|
049462d6dd | |
|
|
c8042cef0d | |
|
|
15508eae43 | |
|
|
94eab2de63 | |
|
|
126b72ed36 | |
|
|
38f4f253df | |
|
|
8bc01debac | |
|
|
c5c54e26da | |
|
|
2a0bba6161 | |
|
|
c44a524fa6 | |
|
|
826535abe4 | |
|
|
fc50f31694 | |
|
|
204765a137 | |
|
|
4874cd2dae | |
|
|
b791ee5dc7 | |
|
|
450c76d955 | |
|
|
db33da26e7 | |
|
|
c7c98ba034 | |
|
|
60ab574559 | |
|
|
8e2967d4a2 | |
|
|
3a8b0cc158 | |
|
|
9e3d72cf42 | |
|
|
5eeb1e6d8d | |
|
|
0ee6dd4305 | |
|
|
7891f3c3c0 | |
|
|
74f4a48ca7 | |
|
|
881c03262e | |
|
|
6afcfa80cc | |
|
|
9eb032a22f | |
|
|
c51ae47eac | |
|
|
2ff139112f | |
|
|
d0764c2739 | |
|
|
2c14872e88 | |
|
|
e6d54f7492 | |
|
|
02a4ca498a | |
|
|
31994be0e1 | |
|
|
9137c5ab8c | |
|
|
e5d8ad91bc | |
|
|
9c06b92a74 | |
|
|
41a75584b3 | |
|
|
88843b0276 | |
|
|
6909c016b2 | |
|
|
bec1f1e2a1 | |
|
|
39a97678bb | |
|
|
6cd709b9e7 | |
|
|
aaf0f188dd | |
|
|
b6bf8ecd06 | |
|
|
7a0ffd9533 | |
|
|
69355b4ee1 | |
|
|
23b7487321 | |
|
|
ad8781c1c5 | |
|
|
86a3d6521b | |
|
|
e99ce744cd | |
|
|
40f9f05c22 | |
|
|
09ce80357f | |
|
|
7c995fcc24 | |
|
|
151f1ec2a8 | |
|
|
12e207dc3f | |
|
|
00e74ab071 | |
|
|
1653b8544b | |
|
|
dad8f3a800 | |
|
|
35298d7c9d | |
|
|
f22374ab62 | |
|
|
d4e80b1bc3 | |
|
|
a5761dc71a | |
|
|
a5189dff37 | |
|
|
39fe872bf3 | |
|
|
52813504f8 | |
|
|
a4d4108cd1 | |
|
|
d7e763f1f5 | |
|
|
097c09c59b | |
|
|
e4cd8bfefc |
|
|
@ -6,48 +6,33 @@
|
|||
},
|
||||
"metadata": {
|
||||
"description": "Skills shared by Baoyu for improving daily work efficiency",
|
||||
"version": "1.78.0"
|
||||
"version": "1.107.0"
|
||||
},
|
||||
"plugins": [
|
||||
{
|
||||
"name": "content-skills",
|
||||
"description": "Content generation and publishing skills",
|
||||
"name": "baoyu-skills",
|
||||
"description": "Content generation, AI backends, and utility tools for daily work efficiency",
|
||||
"source": "./",
|
||||
"strict": true,
|
||||
"strict": false,
|
||||
"skills": [
|
||||
"./skills/baoyu-xhs-images",
|
||||
"./skills/baoyu-post-to-x",
|
||||
"./skills/baoyu-post-to-wechat",
|
||||
"./skills/baoyu-post-to-weibo",
|
||||
"./skills/baoyu-article-illustrator",
|
||||
"./skills/baoyu-cover-image",
|
||||
"./skills/baoyu-slide-deck",
|
||||
"./skills/baoyu-comic",
|
||||
"./skills/baoyu-infographic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "ai-generation-skills",
|
||||
"description": "AI-powered generation backends",
|
||||
"source": "./",
|
||||
"strict": true,
|
||||
"skills": [
|
||||
"./skills/baoyu-danger-gemini-web",
|
||||
"./skills/baoyu-image-gen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "utility-skills",
|
||||
"description": "Utility tools for content processing",
|
||||
"source": "./",
|
||||
"strict": true,
|
||||
"skills": [
|
||||
"./skills/baoyu-danger-x-to-markdown",
|
||||
"./skills/baoyu-compress-image",
|
||||
"./skills/baoyu-url-to-markdown",
|
||||
"./skills/baoyu-cover-image",
|
||||
"./skills/baoyu-danger-gemini-web",
|
||||
"./skills/baoyu-danger-x-to-markdown",
|
||||
"./skills/baoyu-diagram",
|
||||
"./skills/baoyu-format-markdown",
|
||||
"./skills/baoyu-imagine",
|
||||
"./skills/baoyu-infographic",
|
||||
"./skills/baoyu-markdown-to-html",
|
||||
"./skills/baoyu-post-to-weibo",
|
||||
"./skills/baoyu-post-to-wechat",
|
||||
"./skills/baoyu-post-to-x",
|
||||
"./skills/baoyu-slide-deck",
|
||||
"./skills/baoyu-translate",
|
||||
"./skills/baoyu-url-to-markdown",
|
||||
"./skills/baoyu-image-cards",
|
||||
"./skills/baoyu-youtube-transcript"
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -154,6 +154,7 @@ illustrations/
|
|||
comic/
|
||||
translate/
|
||||
posts/
|
||||
diagram/
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
|
|
@ -167,3 +168,4 @@ posts/
|
|||
.release-artifacts/
|
||||
.worktrees/
|
||||
youtube-transcript/
|
||||
.omx/
|
||||
|
|
|
|||
270
CHANGELOG.md
270
CHANGELOG.md
|
|
@ -2,6 +2,276 @@
|
|||
|
||||
English | [中文](./CHANGELOG.zh.md)
|
||||
|
||||
## 1.107.0 - 2026-04-15
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: add SVG-to-PNG @2x conversion script — auto-converts generated SVG diagrams to @2x PNG using Sharp; consolidate reference files and add `{baseDir}` path resolution for portable skill loading
|
||||
|
||||
### Fixes
|
||||
- `claude-plugin`: allow inline marketplace manifest (#130)
|
||||
|
||||
## 1.106.0 - 2026-04-14
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: add architecture enrichment rules — automatically expand architecture diagrams with multiple client types, per-service tech stacks, database tiers, message buses, and color-coded categories; add full structural layout patterns, architecture-specific pitfalls, network topology templates, and layout math for complex diagrams
|
||||
|
||||
## 1.105.0 - 2026-04-13
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: unify to analyze→confirm→generate workflow — remove single/multi mode split; skill now analyzes any input material, recommends diagram types and splitting strategy, confirms once, then generates all diagrams
|
||||
|
||||
## 1.104.0 - 2026-04-13
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: add Mermaid sketch step (6d-0) before SVG generation — write a Mermaid code block as structural intent; add Mermaid–SVG consistency check in step 6f
|
||||
|
||||
### Fixes
|
||||
- `baoyu-post-to-wechat`: verify editor focus before paste and type operations to prevent silent paste failures
|
||||
|
||||
## 1.103.1 - 2026-04-13
|
||||
|
||||
### Fixes
|
||||
- `baoyu-markdown-to-html`: decode HTML entities and strip tags from article summary
|
||||
- `baoyu-post-to-weibo`: decode HTML entities and strip tags from article summary
|
||||
|
||||
## 1.103.0 - 2026-04-12
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: add multi-diagram mode — analyze article content and generate multiple diagrams at identified positions; new `--density` option (`minimal`, `balanced`, `per-section`, `rich`) and `--mode` option (`single`, `multi`, `auto`); auto-detects mode from input (file path → multi, short topic → single); inserts diagram image links into article; output structure `diagram/{article-slug}/NN-{type}-{slug}/`
|
||||
|
||||
### Fixes
|
||||
- `baoyu-article-illustrator`: prevent color names and hex codes from appearing as visible text in generated images — add semantic constraint to all palette references and prompt construction rules
|
||||
- `baoyu-cover-image`: prevent color names and hex codes from appearing as visible text in generated images — add constraint to all palette references and prompt template
|
||||
- `baoyu-image-cards`: prevent color names from appearing as visible text in generated images
|
||||
- `baoyu-post-to-wechat`: decode HTML entities and strip HTML tags from article summary before using as WeChat article digest
|
||||
|
||||
## 1.102.0 - 2026-04-12
|
||||
|
||||
### Features
|
||||
- `baoyu-imagine`: add OpenAI-compatible image API dialect — new `--imageApiDialect` flag, `OPENAI_IMAGE_API_DIALECT` env var, and `default_image_api_dialect` config for gateways that expect aspect-ratio `size` plus `metadata.resolution` instead of pixel `size`
|
||||
|
||||
## 1.101.0 - 2026-04-12
|
||||
|
||||
### Features
|
||||
- `baoyu-imagine`: improve Replicate provider compatibility — route models through family-specific input builders and validators (nano-banana, Seedream 4.5, Seedream 5 Lite, Wan 2.7 Image); update default model to `google/nano-banana-2`; fix Seedream 4.5 custom size encoding to use width/height schema; fix aspect-ratio default inheritance for unsupported Replicate models; block multi-output requests before they reach the API (by @justnode)
|
||||
|
||||
## 1.100.0 - 2026-04-12
|
||||
|
||||
### Features
|
||||
- `baoyu-imagine`: add Z.AI GLM-Image provider — supports `glm-image` and `cogview-4-250304` models via the Z.AI sync image API; configure with `ZAI_API_KEY` (or `BIGMODEL_API_KEY` for backward compatibility)
|
||||
|
||||
## 1.99.1 - 2026-04-11
|
||||
|
||||
### Fixes
|
||||
- `baoyu-article-illustrator`: omit `model` field from batch tasks when `--model` is not specified, letting `baoyu-imagine` resolve the default from env/config
|
||||
|
||||
## 1.99.0 - 2026-04-10
|
||||
|
||||
### Features
|
||||
- `baoyu-diagram`: add new skill for generating publication-ready SVG diagrams — flowcharts, structural/architecture diagrams, and illustrative intuition diagrams. Claude writes real SVG code directly following a cohesive design system; output is a single self-contained `.svg` file with embedded styles and auto dark-mode, ready to embed in articles, WeChat posts, slides, and docs
|
||||
|
||||
## 1.98.0 - 2026-04-10
|
||||
|
||||
### Features
|
||||
- `baoyu-xhs-images`: Restore as active skill (remove deprecated warning)
|
||||
- `baoyu-xhs-images`: Add `sketch-notes` style — hand-drawn educational infographic with macaron pastels, wobble lines, and warm cream background
|
||||
- `baoyu-xhs-images`: Add palette system (`macaron`, `warm`, `neon`) as optional `--palette` color override dimension
|
||||
- `baoyu-xhs-images`: Add 3 new presets: `hand-drawn-edu`, `sketch-card`, `sketch-summary`
|
||||
|
||||
## 1.97.1 - 2026-04-09
|
||||
|
||||
### Fixes
|
||||
- `baoyu-image-cards`: rename palette color roles from "Zone N" to "Block Color" to prevent AI rendering labels as visible text in images
|
||||
|
||||
## 1.97.0 - 2026-04-09
|
||||
|
||||
### Features
|
||||
- `baoyu-image-cards`: add `sketch-notes` style, palette system (`macaron`, `warm`, `neon`), and 3 new presets (`hand-drawn-edu`, `sketch-card`, `sketch-summary`)
|
||||
|
||||
### Fixes
|
||||
- `baoyu-xhs-images`: improve deprecated skill description for better routing
|
||||
|
||||
## 1.96.0 - 2026-04-09
|
||||
|
||||
### Features
|
||||
- `baoyu-image-cards`: add image card series skill migrated from `baoyu-xhs-images`, decoupled from Xiaohongshu platform
|
||||
- `baoyu-xhs-images`: deprecated, migrated to `baoyu-image-cards`
|
||||
|
||||
## 1.95.1 - 2026-04-09
|
||||
|
||||
### Fixes
|
||||
- `baoyu-slide-deck`: add `pptxgenjs` dependency and detect image format by magic bytes instead of file extension in PDF merge
|
||||
|
||||
## 1.95.0 - 2026-04-08
|
||||
|
||||
### Features
|
||||
- `baoyu-infographic`: add `hand-drawn-edu` style — macaron pastels, hand-drawn wobble, stick figures
|
||||
- `baoyu-slide-deck`: add `hand-drawn-edu` preset and `macaron` mood dimension with pastel color palette
|
||||
|
||||
## 1.94.0 - 2026-04-08
|
||||
|
||||
### Features
|
||||
- `baoyu-cover-image`: add macaron palette and hand-drawn-edu style preset
|
||||
|
||||
## 1.93.0 - 2026-04-08
|
||||
|
||||
### Features
|
||||
- `baoyu-article-illustrator`: add `hand-drawn-edu` preset — flowchart + sketch-notes + macaron combination for hand-drawn educational diagrams
|
||||
|
||||
### Refactor
|
||||
- `baoyu-article-illustrator`: extract palette as independent third dimension in Type × Style × Palette system
|
||||
|
||||
### Fixes
|
||||
- `baoyu-article-illustrator`: add explicit style file loading step in workflow
|
||||
|
||||
## 1.92.0 - 2026-04-08
|
||||
|
||||
### Features
|
||||
- `baoyu-article-illustrator`: add `macaron` style — soft macaron pastel color blocks (blue, mint, lavender, peach) on warm cream background with optional hand-drawn mode; add `edu-visual` preset
|
||||
|
||||
## 1.90.1 - 2026-04-05
|
||||
|
||||
### Fixes
|
||||
- `baoyu-post-to-wechat`: detect actual image format from buffer magic bytes to fix CDN content-type mismatches (e.g. WebP served for .png URLs); treat WebP as PNG-preferred for transparency handling
|
||||
|
||||
## 1.89.1 - 2026-04-01
|
||||
|
||||
### Features
|
||||
- `baoyu-chrome-cdp`: add `gracefulKillChrome` that waits for Chrome to exit and release its port; fix `killChrome` to use `exitCode`/`signalCode` instead of `.killed` for reliable process state detection
|
||||
- `baoyu-fetch`: auto-detect login state before extraction in interaction wait mode
|
||||
|
||||
### Maintenance
|
||||
- Sync vendor baoyu-chrome-cdp across CDP skills
|
||||
- `baoyu-url-to-markdown`: sync vendor baoyu-fetch with login auto-detect
|
||||
|
||||
## 1.89.0 - 2026-03-31
|
||||
|
||||
### Features
|
||||
- `baoyu-fetch`: add X session cookie sidecar to persist login across runs, graceful Chrome shutdown via Browser.close, and stale profile lock auto-recovery
|
||||
- `baoyu-article-illustrator`: add warm palette variant for vector-illustration style with new `warm-knowledge` preset
|
||||
- `baoyu-post-to-x`: add X session persistence after login, Chrome lock recovery, and graceful shutdown
|
||||
|
||||
### Documentation
|
||||
- `baoyu-post-to-weibo`: add post type auto-selection rules and safer CDP kill instructions
|
||||
|
||||
### Refactor
|
||||
- `baoyu-danger-gemini-web`: use graceful Chrome shutdown instead of hard kill
|
||||
- `baoyu-danger-x-to-markdown`: use graceful Chrome shutdown instead of hard kill
|
||||
|
||||
### Fixes
|
||||
- Sync npm lockfile and root node tests
|
||||
|
||||
### Maintenance
|
||||
- `baoyu-url-to-markdown`: sync vendor baoyu-fetch with session and lifecycle changes
|
||||
- Update bun.lock files
|
||||
|
||||
## 1.88.0 - 2026-03-27
|
||||
|
||||
### Features
|
||||
- `baoyu-fetch`: new URL reader CLI package with Chrome CDP and site-specific adapters (X/Twitter, YouTube, Hacker News, generic)
|
||||
|
||||
### Refactor
|
||||
- `baoyu-url-to-markdown`: replace custom CDP/converter pipeline with `baoyu-fetch` CLI
|
||||
- `shared-skill-packages`: add `package.json` `files` allowlist support and filter test files, changelogs, and `.changeset` dirs during vendor sync
|
||||
|
||||
### Fixes
|
||||
- `baoyu-md`: rename test image paths from `images/` to `imgs/`
|
||||
|
||||
## 1.87.2 - 2026-03-26
|
||||
|
||||
### Refactor
|
||||
- `baoyu-translate`: simplify translation prompts from 15+ verbose principles to 7 concise ones, consolidate analysis and review steps in workflow references
|
||||
|
||||
## 1.87.1 - 2026-03-26
|
||||
|
||||
### Maintenance
|
||||
- Add deprecation notice to `baoyu-image-gen` SKILL.md redirecting users to `baoyu-imagine`
|
||||
- Document deprecated skills policy in CLAUDE.md
|
||||
|
||||
## 1.87.0 - 2026-03-26
|
||||
|
||||
### Maintenance
|
||||
- Remove deprecated `baoyu-image-gen` redirect skill and plugin manifest entry — migration to `baoyu-imagine` is complete
|
||||
|
||||
## 1.86.0 - 2026-03-25
|
||||
|
||||
### Features
|
||||
- `baoyu-translate`: enrich translation prompt with full analysis context — source voice assessment, structured figurative language mapping, comprehension challenge reasoning, structural/creative challenges, and chunk position context for subagents
|
||||
|
||||
## 1.85.0 - 2026-03-25
|
||||
|
||||
### Features
|
||||
- `baoyu-imagine`: auto-migrate legacy `baoyu-image-gen` EXTEND.md config path at runtime
|
||||
- Add `baoyu-image-gen` deprecation redirect skill to guide users to install `baoyu-imagine` and remove the old skill
|
||||
|
||||
## 1.84.0 - 2026-03-25
|
||||
|
||||
### Features
|
||||
- Rename `baoyu-image-gen` skill to `baoyu-imagine` — shorter command name, all references updated across docs, configs, and dependent skills
|
||||
|
||||
## 1.83.0 - 2026-03-25
|
||||
|
||||
### Features
|
||||
- `baoyu-image-gen`: add MiniMax provider (`image-01` / `image-01-live`) with subject_reference for character/portrait consistency, custom sizes, and aspect ratio support
|
||||
|
||||
## 1.82.0 - 2026-03-24
|
||||
|
||||
### Features
|
||||
- `baoyu-url-to-markdown`: add browser fallback strategy — headless first, automatic retry in visible Chrome on technical failure; new `--browser auto|headless|headed` flag with `--headless`/`--headed` shortcuts
|
||||
- `baoyu-url-to-markdown`: add content cleaner module for HTML preprocessing before extraction (remove ads, base64 images, scripts, styles)
|
||||
- `baoyu-url-to-markdown`: support base64 data URI images in media localizer alongside remote URLs
|
||||
- `baoyu-url-to-markdown`: capture final URL from browser to track redirects for output path generation
|
||||
- `baoyu-url-to-markdown`: add agent quality gate documentation for post-capture content validation
|
||||
|
||||
### Dependencies
|
||||
- `baoyu-url-to-markdown`: upgrade defuddle ^0.12.0 → ^0.14.0
|
||||
|
||||
### Tests
|
||||
- `baoyu-url-to-markdown`: add unit tests for content-cleaner, html-to-markdown, legacy-converter, media-localizer
|
||||
|
||||
## 1.81.0 - 2026-03-24
|
||||
|
||||
### Features
|
||||
- `baoyu-youtube-transcript`: add yt-dlp fallback when YouTube blocks direct InnerTube API, with alternate client identity retry and cookie support via `YOUTUBE_TRANSCRIPT_COOKIES_FROM_BROWSER` env var
|
||||
|
||||
### Refactor
|
||||
- `baoyu-youtube-transcript`: split monolithic script into typed modules (youtube, transcript, storage, shared, types) and add unit tests
|
||||
|
||||
## 1.80.1 - 2026-03-24
|
||||
|
||||
### Fixes
|
||||
- `baoyu-image-gen`: use correct `prompt` field name for Jimeng API request
|
||||
|
||||
## 1.80.0 - 2026-03-24
|
||||
|
||||
### Features
|
||||
- `baoyu-image-gen`: add Azure OpenAI as independent image generation provider with flexible endpoint parsing, deployment-name resolution, quality mapping, and reference image validation
|
||||
|
||||
## 1.79.2 - 2026-03-23
|
||||
|
||||
### Fixes
|
||||
- `baoyu-cover-image`: simplify reference image handling — use `--ref` when model supports it, only create description files for models without reference image support
|
||||
- `baoyu-post-to-weibo`: add no-theme rule for article markdown-to-HTML conversion
|
||||
|
||||
### Tests
|
||||
- Fix Node-compatible parser tests and add parser test dependencies
|
||||
|
||||
## 1.79.1 - 2026-03-23
|
||||
|
||||
### Fixes
|
||||
- Consolidate to single plugin to prevent duplicate skill registration (by @TyrealQ)
|
||||
- `baoyu-article-illustrator`: remove opacity parameter from watermark prompt
|
||||
- `baoyu-comic`: fix Doraemon naming spacing and remove opacity from watermark prompt
|
||||
- `baoyu-xhs-images`: remove opacity from watermark prompt and fix CJK spacing
|
||||
|
||||
### Documentation
|
||||
- Update project documentation to reflect single-plugin architecture
|
||||
|
||||
## 1.79.0 - 2026-03-22
|
||||
|
||||
### Features
|
||||
- `baoyu-post-to-wechat`: improve credential loading with multi-source resolution, priority ordering, and diagnostics for skipped incomplete sources
|
||||
|
||||
## 1.78.0 - 2026-03-22
|
||||
|
||||
### Features
|
||||
|
|
|
|||
270
CHANGELOG.zh.md
270
CHANGELOG.zh.md
|
|
@ -2,6 +2,276 @@
|
|||
|
||||
[English](./CHANGELOG.md) | 中文
|
||||
|
||||
## 1.107.0 - 2026-04-15
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:新增 SVG 转 @2x PNG 转换脚本 —— 使用 Sharp 自动将生成的 SVG 图表转为 @2x PNG;精简合并参考文件,新增 `{baseDir}` 路径解析以支持可移植的技能加载
|
||||
|
||||
### 修复
|
||||
- `claude-plugin`:支持内联 marketplace manifest (#130)
|
||||
|
||||
## 1.106.0 - 2026-04-14
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:新增架构图丰富化规则 —— 自动扩展架构图,补充多客户端类型、各服务技术栈、数据库分层、消息总线和分色分类;新增完整结构布局模式、架构专用陷阱提示、网络拓扑模板和复杂图表布局计算
|
||||
|
||||
## 1.105.0 - 2026-04-13
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:统一为分析→确认→生成工作流 —— 移除单图/多图模式区分;技能现在分析任意输入素材,推荐图表类型和拆分策略,一次确认后批量生成所有图表
|
||||
|
||||
## 1.104.0 - 2026-04-13
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:新增 Mermaid 草图步骤(6d-0),在生成 SVG 前先写 Mermaid 代码块作为结构意图;在步骤 6f 新增 Mermaid–SVG 一致性检查
|
||||
|
||||
### 修复
|
||||
- `baoyu-post-to-wechat`:在粘贴和输入操作前校验编辑器焦点,避免粘贴静默失败
|
||||
|
||||
## 1.103.1 - 2026-04-13
|
||||
|
||||
### 修复
|
||||
- `baoyu-markdown-to-html`:修复文章摘要中 HTML 实体未解码及 HTML 标签未剥离的问题
|
||||
- `baoyu-post-to-weibo`:修复文章摘要中 HTML 实体未解码及 HTML 标签未剥离的问题
|
||||
|
||||
## 1.103.0 - 2026-04-12
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:新增多图模式 —— 分析文章内容,在识别出的位置批量生成图表;新增 `--density` 参数(`minimal`、`balanced`、`per-section`、`rich`)和 `--mode` 参数(`single`、`multi`、`auto`);根据输入自动判断模式(文件路径→多图,短主题→单图);自动在文章中插入图表链接;输出目录结构 `diagram/{article-slug}/NN-{type}-{slug}/`
|
||||
|
||||
### 修复
|
||||
- `baoyu-article-illustrator`:修复生成图像中出现颜色名称和色值文字的问题 —— 在所有调色板参考文件和提示构建规则中添加语义约束
|
||||
- `baoyu-cover-image`:修复生成图像中出现颜色名称和色值文字的问题 —— 在所有调色板参考文件和提示模板中添加约束
|
||||
- `baoyu-image-cards`:修复生成图像中出现颜色名称文字的问题
|
||||
- `baoyu-post-to-wechat`:修复文章摘要中 HTML 实体未解码及 HTML 标签未剥离的问题,避免微信文章摘要显示乱码
|
||||
|
||||
## 1.102.0 - 2026-04-12
|
||||
|
||||
### 新功能
|
||||
- `baoyu-imagine`:新增 OpenAI 兼容图像 API 方言支持 —— 新增 `--imageApiDialect` 参数、`OPENAI_IMAGE_API_DIALECT` 环境变量及 `default_image_api_dialect` 配置项,用于对接期望宽高比格式 `size` 加 `metadata.resolution` 的兼容网关
|
||||
|
||||
## 1.101.0 - 2026-04-12
|
||||
|
||||
### 新功能
|
||||
- `baoyu-imagine`:改进 Replicate 服务商兼容性 —— 针对不同模型系列(nano-banana、Seedream 4.5、Seedream 5 Lite、Wan 2.7 Image)实现专属输入构建器和验证器;将默认模型更新为 `google/nano-banana-2`;修复 Seedream 4.5 自定义尺寸编码(改用 width/height schema);修复不支持的 Replicate 模型的宽高比默认值继承问题;在请求到达 API 前拦截多图请求 (by @justnode)
|
||||
|
||||
## 1.100.0 - 2026-04-12
|
||||
|
||||
### 新功能
|
||||
- `baoyu-imagine`:新增 Z.AI GLM-Image 服务商支持,支持 `glm-image` 和 `cogview-4-250304` 模型,通过 Z.AI 同步图像 API 调用;配置 `ZAI_API_KEY`(或 `BIGMODEL_API_KEY` 向后兼容)
|
||||
|
||||
## 1.99.1 - 2026-04-11
|
||||
|
||||
### 修复
|
||||
- `baoyu-article-illustrator`:未指定 `--model` 时,批处理任务中不再写入 `model` 字段,改由 `baoyu-imagine` 从环境变量或配置中解析默认值
|
||||
|
||||
## 1.99.0 - 2026-04-10
|
||||
|
||||
### 新功能
|
||||
- `baoyu-diagram`:新增技能,用于生成可直接发布的 SVG 图表 —— 包括流程图、架构/结构图、示意图(直觉图解)。Claude 直接输出符合统一设计规范的真实 SVG 代码,产物是单个自包含的 `.svg` 文件,内嵌样式并自动支持深色模式,可直接嵌入文章、微信公众号、幻灯片和文档中
|
||||
|
||||
## 1.98.0 - 2026-04-10
|
||||
|
||||
### 新功能
|
||||
- `baoyu-xhs-images`:恢复为正式技能(移除废弃警告)
|
||||
- `baoyu-xhs-images`:新增 `sketch-notes` 风格 —— 手绘教育信息图,马卡龙配色,波动线条,暖奶油背景
|
||||
- `baoyu-xhs-images`:新增配色系统(`macaron`、`warm`、`neon`),支持 `--palette` 参数覆盖风格默认颜色
|
||||
- `baoyu-xhs-images`:新增 3 个预设:`hand-drawn-edu`、`sketch-card`、`sketch-summary`
|
||||
|
||||
## 1.97.1 - 2026-04-09
|
||||
|
||||
### 修复
|
||||
- `baoyu-image-cards`:将配色方案中 "Zone N" 角色名改为 "Block Color",防止 AI 将标签文字渲染到图片中
|
||||
|
||||
## 1.97.0 - 2026-04-09
|
||||
|
||||
### 新功能
|
||||
- `baoyu-image-cards`:新增 `sketch-notes` 风格、配色系统(`macaron`、`warm`、`neon`)及 3 个新预设(`hand-drawn-edu`、`sketch-card`、`sketch-summary`)
|
||||
|
||||
### 修复
|
||||
- `baoyu-xhs-images`:优化已弃用技能描述以改善路由匹配
|
||||
|
||||
## 1.96.0 - 2026-04-09
|
||||
|
||||
### 新功能
|
||||
- `baoyu-image-cards`:新增图片卡片系列技能,从 `baoyu-xhs-images` 迁移,解除小红书平台绑定
|
||||
- `baoyu-xhs-images`:已弃用,迁移至 `baoyu-image-cards`
|
||||
|
||||
## 1.95.1 - 2026-04-09
|
||||
|
||||
### 修复
|
||||
- `baoyu-slide-deck`:添加 `pptxgenjs` 依赖,PDF 合并时通过魔数字节检测图片格式替代文件扩展名判断
|
||||
|
||||
## 1.95.0 - 2026-04-08
|
||||
|
||||
### 新功能
|
||||
- `baoyu-infographic`:新增 `hand-drawn-edu` 风格 — 马卡龙柔和色块、手绘线条、火柴人角色
|
||||
- `baoyu-slide-deck`:新增 `hand-drawn-edu` 预设和 `macaron` 色调维度,含柔和马卡龙色板
|
||||
|
||||
## 1.94.0 - 2026-04-08
|
||||
|
||||
### 新功能
|
||||
- `baoyu-cover-image`:新增马卡龙色板和 hand-drawn-edu 风格预设
|
||||
|
||||
## 1.93.0 - 2026-04-08
|
||||
|
||||
### 新功能
|
||||
- `baoyu-article-illustrator`:新增 `hand-drawn-edu` 预设 — flowchart + sketch-notes + macaron 组合,用于手绘教育图解
|
||||
|
||||
### 重构
|
||||
- `baoyu-article-illustrator`:将色板(Palette)提取为独立的第三维度,形成 Type × Style × Palette 三维系统
|
||||
|
||||
### 修复
|
||||
- `baoyu-article-illustrator`:在工作流中添加显式的风格文件加载步骤
|
||||
|
||||
## 1.92.0 - 2026-04-08
|
||||
|
||||
### 新功能
|
||||
- `baoyu-article-illustrator`:新增 `macaron` 风格 — 马卡龙柔和色块(浅蓝、浅绿、浅紫、浅橙)配暖白底色,可选手绘模式;新增 `edu-visual` 预设
|
||||
|
||||
## 1.90.1 - 2026-04-05
|
||||
|
||||
### 修复
|
||||
- `baoyu-post-to-wechat`:通过 magic bytes 检测实际图片格式,修复 CDN 返回与 URL 扩展名不一致的 content-type 问题(如 .png URL 实际返回 WebP);WebP 格式按 PNG 策略处理以保留透明度
|
||||
|
||||
## 1.89.1 - 2026-04-01
|
||||
|
||||
### 新功能
|
||||
- `baoyu-chrome-cdp`:新增 `gracefulKillChrome`,等待 Chrome 进程退出并释放端口;修复 `killChrome` 使用 `exitCode`/`signalCode` 替代 `.killed` 以更可靠地检测进程状态
|
||||
- `baoyu-fetch`:在交互等待模式下自动检测登录状态,未登录时提示用户先登录再提取内容
|
||||
|
||||
### 维护
|
||||
- 同步 vendor baoyu-chrome-cdp 至所有 CDP 技能
|
||||
- `baoyu-url-to-markdown`:同步 vendor baoyu-fetch 的登录自动检测功能
|
||||
|
||||
## 1.89.0 - 2026-03-31
|
||||
|
||||
### 新功能
|
||||
- `baoyu-fetch`:新增 X 会话 Cookie 旁路文件,跨运行持久化登录状态;通过 Browser.close 优雅关闭 Chrome;自动检测并清理过期的 Chrome 配置锁文件
|
||||
- `baoyu-article-illustrator`:新增暖色调矢量插画配色方案,含 `warm-knowledge` 预设
|
||||
- `baoyu-post-to-x`:新增登录后 X 会话持久化、Chrome 锁文件恢复和优雅关闭
|
||||
|
||||
### 文档
|
||||
- `baoyu-post-to-weibo`:新增发帖类型自动选择规则,优化 CDP Chrome 终止指令
|
||||
|
||||
### 重构
|
||||
- `baoyu-danger-gemini-web`:使用优雅 Chrome 关闭替代强制终止
|
||||
- `baoyu-danger-x-to-markdown`:使用优雅 Chrome 关闭替代强制终止
|
||||
|
||||
### 修复
|
||||
- 同步 npm lockfile 及修复根目录 Node 测试
|
||||
|
||||
### 维护
|
||||
- `baoyu-url-to-markdown`:同步 vendor baoyu-fetch 的会话和生命周期改进
|
||||
- 更新 bun.lock 文件
|
||||
|
||||
## 1.88.0 - 2026-03-27
|
||||
|
||||
### 新功能
|
||||
- `baoyu-fetch`:新增 URL 阅读器 CLI 包,支持 Chrome CDP 和站点适配器(X/Twitter、YouTube、Hacker News、通用页面)
|
||||
|
||||
### 重构
|
||||
- `baoyu-url-to-markdown`:用 `baoyu-fetch` CLI 替换自定义 CDP/转换管道
|
||||
- `shared-skill-packages`:支持 `package.json` 的 `files` 白名单,vendor 同步时过滤测试文件、CHANGELOG 和 `.changeset` 目录
|
||||
|
||||
### 修复
|
||||
- `baoyu-md`:修正测试中图片路径 `images/` 为 `imgs/`
|
||||
|
||||
## 1.87.2 - 2026-03-26
|
||||
|
||||
### 重构
|
||||
- `baoyu-translate`:精简翻译提示词,将 15+ 条冗长原则压缩为 7 条,合并分析和审校步骤
|
||||
|
||||
## 1.87.1 - 2026-03-26
|
||||
|
||||
### 维护
|
||||
- 在 `baoyu-image-gen` SKILL.md 中添加废弃提示,引导用户使用 `baoyu-imagine`
|
||||
- 在 CLAUDE.md 中记录废弃技能策略
|
||||
|
||||
## 1.87.0 - 2026-03-26
|
||||
|
||||
### 维护
|
||||
- 移除已废弃的 `baoyu-image-gen` 重定向技能及插件清单条目 — 向 `baoyu-imagine` 的迁移已完成
|
||||
|
||||
## 1.86.0 - 2026-03-25
|
||||
|
||||
### 新功能
|
||||
- `baoyu-translate`:丰富翻译提示词的分析上下文 — 加入原文语气评估、结构化比喻映射表、理解难点推理、结构性/创造性翻译挑战,以及分块翻译的位置上下文
|
||||
|
||||
## 1.85.0 - 2026-03-25
|
||||
|
||||
### 新功能
|
||||
- `baoyu-imagine`:运行时自动迁移旧版 `baoyu-image-gen` 的 EXTEND.md 配置路径
|
||||
- 新增 `baoyu-image-gen` 废弃重定向技能,引导用户安装 `baoyu-imagine` 并移除旧技能
|
||||
|
||||
## 1.84.0 - 2026-03-25
|
||||
|
||||
### 新功能
|
||||
- 将 `baoyu-image-gen` 技能重命名为 `baoyu-imagine` — 更简短的命令名,所有文档、配置和依赖技能中的引用已同步更新
|
||||
|
||||
## 1.83.0 - 2026-03-25
|
||||
|
||||
### 新功能
|
||||
- `baoyu-image-gen`:新增 MiniMax 服务商(`image-01` / `image-01-live`),支持 subject_reference 角色/肖像一致性、自定义尺寸和宽高比
|
||||
|
||||
## 1.82.0 - 2026-03-24
|
||||
|
||||
### 新功能
|
||||
- `baoyu-url-to-markdown`:新增浏览器回退策略 — 默认无头模式优先,技术故障时自动重试有头 Chrome;新增 `--browser auto|headless|headed` 参数及 `--headless`/`--headed` 快捷方式
|
||||
- `baoyu-url-to-markdown`:新增内容清理模块,提取前预处理 HTML(移除广告、base64 图片、脚本、样式)
|
||||
- `baoyu-url-to-markdown`:媒体本地化支持 base64 data URI 图片
|
||||
- `baoyu-url-to-markdown`:从浏览器捕获最终 URL 以跟踪重定向,用于输出路径生成
|
||||
- `baoyu-url-to-markdown`:新增 Agent 质量门控文档,规范捕获后的内容验证流程
|
||||
|
||||
### 依赖
|
||||
- `baoyu-url-to-markdown`:升级 defuddle ^0.12.0 → ^0.14.0
|
||||
|
||||
### 测试
|
||||
- `baoyu-url-to-markdown`:新增 content-cleaner、html-to-markdown、legacy-converter、media-localizer 单元测试
|
||||
|
||||
## 1.81.0 - 2026-03-24
|
||||
|
||||
### 新功能
|
||||
- `baoyu-youtube-transcript`:YouTube 封锁直连 InnerTube API 时自动回退到 yt-dlp,支持备用客户端身份重试及通过 `YOUTUBE_TRANSCRIPT_COOKIES_FROM_BROWSER` 环境变量传递浏览器 Cookie
|
||||
|
||||
### 重构
|
||||
- `baoyu-youtube-transcript`:将单体脚本拆分为类型化模块(youtube、transcript、storage、shared、types)并添加单元测试
|
||||
|
||||
## 1.80.1 - 2026-03-24
|
||||
|
||||
### 修复
|
||||
- `baoyu-image-gen`:修正即梦 API 请求中的 `prompt` 字段名
|
||||
|
||||
## 1.80.0 - 2026-03-24
|
||||
|
||||
### 新功能
|
||||
- `baoyu-image-gen`:新增 Azure OpenAI 作为独立图像生成服务商,支持灵活的端点解析、部署名称推断、质量映射及参考图片格式校验
|
||||
|
||||
## 1.79.2 - 2026-03-23
|
||||
|
||||
### 修复
|
||||
- `baoyu-cover-image`:简化参考图片处理流程 — 模型支持 `--ref` 时直接传递,仅在模型不支持参考图时创建描述文件
|
||||
- `baoyu-post-to-weibo`:文章 Markdown 转 HTML 时不传递 --theme 参数
|
||||
|
||||
### 测试
|
||||
- 修复 Node 兼容的解析器测试,添加解析器测试依赖
|
||||
|
||||
## 1.79.1 - 2026-03-23
|
||||
|
||||
### 修复
|
||||
- 合并为单一插件,防止 skill 重复注册 (by @TyrealQ)
|
||||
- `baoyu-article-illustrator`:移除水印提示词中的不透明度参数
|
||||
- `baoyu-comic`:修正哆啦 A 梦命名间距,移除水印不透明度参数
|
||||
- `baoyu-xhs-images`:移除水印不透明度参数,修正中英文间距
|
||||
|
||||
### 文档
|
||||
- 更新项目文档以反映单一插件架构
|
||||
|
||||
## 1.79.0 - 2026-03-22
|
||||
|
||||
### 新功能
|
||||
- `baoyu-post-to-wechat`:改进凭据加载机制,支持多来源优先级解析,并提供不完整凭据来源的诊断信息
|
||||
|
||||
## 1.78.0 - 2026-03-22
|
||||
|
||||
### 新功能
|
||||
|
|
|
|||
25
CLAUDE.md
25
CLAUDE.md
|
|
@ -1,16 +1,16 @@
|
|||
# CLAUDE.md
|
||||
|
||||
Claude Code marketplace plugin providing AI-powered content generation skills. Version: **1.78.0**.
|
||||
Claude Code marketplace plugin providing AI-powered content generation skills. Version: **1.107.0**.
|
||||
|
||||
## Architecture
|
||||
|
||||
Skills organized into three categories in `.claude-plugin/marketplace.json` (defines plugin metadata, version, and skill paths):
|
||||
Skills are exposed through the single `baoyu-skills` plugin in `.claude-plugin/marketplace.json` (which defines plugin metadata, version, and skill paths). The repo docs still group them into three logical areas:
|
||||
|
||||
| Category | Description |
|
||||
|----------|-------------|
|
||||
| `content-skills` | Generate or publish content (images, slides, comics, posts) |
|
||||
| `ai-generation-skills` | AI generation backends |
|
||||
| `utility-skills` | Content processing (conversion, compression, translation) |
|
||||
| Group | Description |
|
||||
|-------|-------------|
|
||||
| Content Skills | Generate or publish content (images, slides, comics, posts) |
|
||||
| AI Generation Skills | AI generation backends |
|
||||
| Utility Skills | Content processing (conversion, compression, translation) |
|
||||
|
||||
Each skill contains `SKILL.md` (YAML front matter + docs), optional `scripts/`, `references/`, `prompts/`.
|
||||
|
||||
|
|
@ -31,7 +31,7 @@ Execute: `${BUN_X} skills/<skill>/scripts/main.ts [options]`
|
|||
|
||||
- **Bun**: TypeScript runtime (`bun` preferred, fallback `npx -y bun`)
|
||||
- **Chrome**: Required for CDP-based skills (gemini-web, post-to-x/wechat/weibo, url-to-markdown). All CDP skills share a single profile, override via `BAOYU_CHROME_PROFILE_DIR` env var. Platform paths: [docs/chrome-profile.md](docs/chrome-profile.md)
|
||||
- **Image generation APIs**: `baoyu-image-gen` requires API key (OpenAI, Google, OpenRouter, DashScope, or Replicate) configured in EXTEND.md
|
||||
- **Image generation APIs**: `baoyu-imagine` requires API key (OpenAI, Azure OpenAI, Google, OpenRouter, DashScope, or Replicate) configured in EXTEND.md
|
||||
- **Gemini Web auth**: Browser cookies (first run opens Chrome for login, `--login` to refresh)
|
||||
|
||||
## Security
|
||||
|
|
@ -46,10 +46,17 @@ Execute: `${BUN_X} skills/<skill>/scripts/main.ts [options]`
|
|||
| Rule | Description |
|
||||
|------|-------------|
|
||||
| **Load project skills first** | Project skills override system/user-level skills with same name |
|
||||
| **Default image generation** | Use `skills/baoyu-image-gen/SKILL.md` unless user specifies otherwise |
|
||||
| **Default image generation** | Use `skills/baoyu-imagine/SKILL.md` unless user specifies otherwise |
|
||||
|
||||
Priority: project `skills/` → `$HOME/.baoyu-skills/` → system-level.
|
||||
|
||||
## Deprecated Skills
|
||||
|
||||
| Skill | Note |
|
||||
|-------|------|
|
||||
| `baoyu-image-gen` | Migrated to `baoyu-imagine`. Do NOT add to `.claude-plugin/marketplace.json`. Do NOT update README for this skill. |
|
||||
| `baoyu-xhs-images` | Migrated to `baoyu-image-cards`. Do NOT add to `.claude-plugin/marketplace.json`. Do NOT update README for this skill. |
|
||||
|
||||
## Release Process
|
||||
|
||||
Use `/release-skills` workflow. Never skip:
|
||||
|
|
|
|||
239
README.md
239
README.md
|
|
@ -32,7 +32,7 @@ This repository now supports publishing each `skills/baoyu-*` directory as an in
|
|||
ClawHub installs skills individually, not as one marketplace bundle. After publishing, users can install specific skills such as:
|
||||
|
||||
```bash
|
||||
clawhub install baoyu-image-gen
|
||||
clawhub install baoyu-imagine
|
||||
clawhub install baoyu-markdown-to-html
|
||||
```
|
||||
|
||||
|
|
@ -52,16 +52,14 @@ Run the following command in Claude Code:
|
|||
|
||||
1. Select **Browse and install plugins**
|
||||
2. Select **baoyu-skills**
|
||||
3. Select the plugin(s) you want to install
|
||||
3. Select the **baoyu-skills** plugin
|
||||
4. Select **Install now**
|
||||
|
||||
**Option 2: Direct Install**
|
||||
|
||||
```bash
|
||||
# Install specific plugin
|
||||
/plugin install content-skills@baoyu-skills
|
||||
/plugin install ai-generation-skills@baoyu-skills
|
||||
/plugin install utility-skills@baoyu-skills
|
||||
# Install the marketplace's single plugin
|
||||
/plugin install baoyu-skills@baoyu-skills
|
||||
```
|
||||
|
||||
**Option 3: Ask the Agent**
|
||||
|
|
@ -70,13 +68,13 @@ Simply tell Claude Code:
|
|||
|
||||
> Please install Skills from github.com/JimLiu/baoyu-skills
|
||||
|
||||
### Available Plugins
|
||||
### Available Plugin
|
||||
|
||||
| Plugin | Description | Skills |
|
||||
|--------|-------------|--------|
|
||||
| **content-skills** | Content generation and publishing | [xhs-images](#baoyu-xhs-images), [infographic](#baoyu-infographic), [cover-image](#baoyu-cover-image), [slide-deck](#baoyu-slide-deck), [comic](#baoyu-comic), [article-illustrator](#baoyu-article-illustrator), [post-to-x](#baoyu-post-to-x), [post-to-wechat](#baoyu-post-to-wechat), [post-to-weibo](#baoyu-post-to-weibo) |
|
||||
| **ai-generation-skills** | AI-powered generation backends | [image-gen](#baoyu-image-gen), [danger-gemini-web](#baoyu-danger-gemini-web) |
|
||||
| **utility-skills** | Utility tools for content processing | [youtube-transcript](#baoyu-youtube-transcript), [url-to-markdown](#baoyu-url-to-markdown), [danger-x-to-markdown](#baoyu-danger-x-to-markdown), [compress-image](#baoyu-compress-image), [format-markdown](#baoyu-format-markdown), [markdown-to-html](#baoyu-markdown-to-html), [translate](#baoyu-translate) |
|
||||
The marketplace now exposes a single plugin so each skill is registered exactly once.
|
||||
|
||||
| Plugin | Description | Includes |
|
||||
|--------|-------------|----------|
|
||||
| **baoyu-skills** | Content generation, AI backends, and utility tools for daily work efficiency | All skills in this repository, organized below as Content Skills, AI Generation Skills, and Utility Skills |
|
||||
|
||||
## Update Skills
|
||||
|
||||
|
|
@ -101,7 +99,7 @@ Content generation and publishing skills.
|
|||
|
||||
#### baoyu-xhs-images
|
||||
|
||||
Xiaohongshu (RedNote) infographic series generator. Breaks down content into 1-10 cartoon-style infographics with **Style × Layout** two-dimensional system.
|
||||
Xiaohongshu image card series generator. Breaks down content into 1-10 cartoon-style image cards with **Style × Layout** system and optional palette override.
|
||||
|
||||
```bash
|
||||
# Auto-select style and layout
|
||||
|
|
@ -114,13 +112,22 @@ Xiaohongshu (RedNote) infographic series generator. Breaks down content into 1-1
|
|||
/baoyu-xhs-images posts/ai-future/article.md --layout dense
|
||||
|
||||
# Combine style and layout
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style tech --layout list
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style notion --layout list
|
||||
|
||||
# Override palette
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style notion --palette macaron
|
||||
|
||||
# Direct content input
|
||||
/baoyu-xhs-images 今日星座运势
|
||||
|
||||
# Non-interactive (skip all confirmations, for scheduled tasks)
|
||||
/baoyu-xhs-images posts/ai-future/article.md --yes
|
||||
/baoyu-xhs-images posts/ai-future/article.md --yes --preset knowledge-card
|
||||
```
|
||||
|
||||
**Styles** (visual aesthetics): `cute` (default), `fresh`, `warm`, `bold`, `minimal`, `retro`, `pop`, `notion`, `chalkboard`
|
||||
**Styles** (visual aesthetics): `cute` (default), `fresh`, `warm`, `bold`, `minimal`, `retro`, `pop`, `notion`, `chalkboard`, `study-notes`, `screen-print`, `sketch-notes`
|
||||
|
||||
**Palettes** (optional color override): `macaron`, `warm`, `neon`
|
||||
|
||||
**Style Previews**:
|
||||
|
||||
|
|
@ -154,7 +161,7 @@ Xiaohongshu (RedNote) infographic series generator. Breaks down content into 1-1
|
|||
|
||||
#### baoyu-infographic
|
||||
|
||||
Generate professional infographics with 20 layout types and 17 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics.
|
||||
Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics.
|
||||
|
||||
```bash
|
||||
# Auto-recommend combinations based on content
|
||||
|
|
@ -265,9 +272,46 @@ Generate professional infographics with 20 layout types and 17 visual styles. An
|
|||
|  |  | |
|
||||
| knolling | lego-brick | |
|
||||
|
||||
#### baoyu-diagram
|
||||
|
||||
Generate publication-ready SVG diagrams from source material — flowcharts, sequence/protocol diagrams, structural/architecture diagrams, and illustrative intuition diagrams. Analyzes input material to recommend diagram type(s) and splitting strategy, confirms the plan once, then generates all diagrams. Claude writes real SVG code directly following a cohesive design system. Output is self-contained `.svg` files with embedded styles and auto dark-mode.
|
||||
|
||||
```bash
|
||||
# Topic string — skill analyzes and proposes a plan
|
||||
/baoyu-diagram "how JWT authentication works"
|
||||
/baoyu-diagram "Kubernetes architecture" --type structural
|
||||
/baoyu-diagram "OAuth 2.0 flow" --type sequence
|
||||
|
||||
# File path — skill reads, analyzes, and proposes a plan
|
||||
/baoyu-diagram path/to/article.md
|
||||
|
||||
# Language and output path
|
||||
/baoyu-diagram "微服务架构" --lang zh
|
||||
/baoyu-diagram "build pipeline" --out docs/build-pipeline.svg
|
||||
```
|
||||
|
||||
**Options**:
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--type <name>` | `flowchart`, `sequence`, `structural`, `illustrative`, `class`, `auto` (default). Skips type recommendation. |
|
||||
| `--lang <code>` | Output language (en, zh, ja, ...) |
|
||||
| `--out <path>` | Output file path. Generates exactly one diagram focused on the most important aspect. |
|
||||
|
||||
**Diagram types**:
|
||||
|
||||
| Type | Reader need | Verbs that trigger it |
|
||||
|------|-------------|------------------------|
|
||||
| `flowchart` | Walk me through the steps in order | walk through, steps, process, lifecycle, workflow, state machine |
|
||||
| `sequence` | Who talks to whom, in what order | protocol, handshake, auth flow, OAuth, TCP, request/response |
|
||||
| `structural` | Show me what's inside what, how it's organised | architecture, components, topology, layout, what's inside |
|
||||
| `illustrative` | Give me the intuition — draw the mechanism | how does X work, explain X, intuition for, why does X do Y |
|
||||
| `class` | What are the types and how are they related | class diagram, UML, inheritance, interface, schema |
|
||||
|
||||
Not an image-generation skill — no LLM image model is called. Claude writes the SVG by hand with hand-computed layout math, so every diagram honors the design system. Embedded `<style>` block with `@media (prefers-color-scheme: dark)` means the same file renders correctly in both light and dark mode anywhere it's embedded.
|
||||
|
||||
#### baoyu-cover-image
|
||||
|
||||
Generate cover images for articles with 5 dimensions: Type × Palette × Rendering × Text × Mood. Combines 9 color palettes with 6 rendering styles for 54 unique combinations.
|
||||
Generate cover images for articles with 5 dimensions: Type × Palette × Rendering × Text × Mood. Combines 11 color palettes with 7 rendering styles for 77 unique combinations.
|
||||
|
||||
```bash
|
||||
# Auto-select all dimensions based on content
|
||||
|
|
@ -292,8 +336,8 @@ Generate cover images for articles with 5 dimensions: Type × Palette × Renderi
|
|||
|
||||
**Five Dimensions**:
|
||||
- **Type**: `hero`, `conceptual`, `typography`, `metaphor`, `scene`, `minimal`
|
||||
- **Palette**: `warm`, `elegant`, `cool`, `dark`, `earth`, `vivid`, `pastel`, `mono`, `retro`
|
||||
- **Rendering**: `flat-vector`, `hand-drawn`, `painterly`, `digital`, `pixel`, `chalk`
|
||||
- **Palette**: `warm`, `elegant`, `cool`, `dark`, `earth`, `vivid`, `pastel`, `mono`, `retro`, `duotone`, `macaron`
|
||||
- **Rendering**: `flat-vector`, `hand-drawn`, `painterly`, `digital`, `pixel`, `chalk`, `screen-print`
|
||||
- **Text**: `none`, `title-only` (default), `title-subtitle`, `text-rich`
|
||||
- **Mood**: `subtle`, `balanced` (default), `bold`
|
||||
|
||||
|
|
@ -471,20 +515,17 @@ Knowledge comic creator with flexible art style × tone combinations. Creates or
|
|||
|
||||
#### baoyu-article-illustrator
|
||||
|
||||
Smart article illustration skill with Type × Style two-dimension approach. Analyzes article structure, identifies positions requiring visual aids, and generates illustrations.
|
||||
Smart article illustration skill with Type × Style × Palette three-dimension approach. Analyzes article structure, identifies positions requiring visual aids, and generates illustrations.
|
||||
|
||||
```bash
|
||||
# Auto-select type and style based on content
|
||||
/baoyu-article-illustrator path/to/article.md
|
||||
|
||||
# Specify type
|
||||
/baoyu-article-illustrator path/to/article.md --type infographic
|
||||
|
||||
# Specify style
|
||||
/baoyu-article-illustrator path/to/article.md --style blueprint
|
||||
|
||||
# Combine type and style
|
||||
# Specify type and style
|
||||
/baoyu-article-illustrator path/to/article.md --type flowchart --style notion
|
||||
|
||||
# With palette override
|
||||
/baoyu-article-illustrator path/to/article.md --style vector-illustration --palette macaron
|
||||
```
|
||||
|
||||
**Types** (information structure):
|
||||
|
|
@ -498,7 +539,7 @@ Smart article illustration skill with Type × Style two-dimension approach. Anal
|
|||
| `framework` | Concept maps, relationship diagrams | Methodologies, architecture |
|
||||
| `timeline` | Chronological progression | History, project progress |
|
||||
|
||||
**Styles** (visual aesthetics):
|
||||
**Styles** (rendering approach):
|
||||
|
||||
| Style | Description | Best For |
|
||||
|-------|-------------|----------|
|
||||
|
|
@ -511,6 +552,14 @@ Smart article illustration skill with Type × Style two-dimension approach. Anal
|
|||
| `editorial` | Magazine-style infographic | Tech explainers, journalism |
|
||||
| `scientific` | Academic precise diagrams | Biology, chemistry, technical |
|
||||
|
||||
**Palettes** (optional color override):
|
||||
|
||||
| Palette | Description | Best For |
|
||||
|---------|-------------|----------|
|
||||
| `macaron` | Soft pastel blocks (blue, mint, lavender, peach) on warm cream | Educational, knowledge, tutorials |
|
||||
| `warm` | Warm earth tones on soft peach, no cool colors | Brand, product, lifestyle |
|
||||
| `neon` | Vibrant neon on dark purple | Gaming, retro, pop culture |
|
||||
|
||||
**Style Previews**:
|
||||
|
||||
| | | |
|
||||
|
|
@ -663,40 +712,67 @@ Post content to Weibo (微博). Supports regular posts with text, images, and vi
|
|||
|
||||
AI-powered generation backends.
|
||||
|
||||
#### baoyu-image-gen
|
||||
#### baoyu-imagine
|
||||
|
||||
AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, and quality presets.
|
||||
AI SDK-based image generation using OpenAI, Azure OpenAI, Google, OpenRouter, DashScope (Aliyun Tongyi Wanxiang), MiniMax, Jimeng (即梦), Seedream (豆包), and Replicate APIs. Supports text-to-image, reference images, aspect ratios, custom sizes, batch generation, and quality presets.
|
||||
|
||||
```bash
|
||||
# Basic generation (auto-detect provider)
|
||||
/baoyu-image-gen --prompt "A cute cat" --image cat.png
|
||||
/baoyu-imagine --prompt "A cute cat" --image cat.png
|
||||
|
||||
# With aspect ratio
|
||||
/baoyu-image-gen --prompt "A landscape" --image landscape.png --ar 16:9
|
||||
/baoyu-imagine --prompt "A landscape" --image landscape.png --ar 16:9
|
||||
|
||||
# High quality (2k)
|
||||
/baoyu-image-gen --prompt "A banner" --image banner.png --quality 2k
|
||||
/baoyu-imagine --prompt "A banner" --image banner.png --quality 2k
|
||||
|
||||
# Specific provider
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openai
|
||||
/baoyu-imagine --prompt "A cat" --image cat.png --provider openai
|
||||
|
||||
# Azure OpenAI (model = deployment name)
|
||||
/baoyu-imagine --prompt "A cat" --image cat.png --provider azure --model gpt-image-1.5
|
||||
|
||||
# OpenRouter
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider openrouter
|
||||
/baoyu-imagine --prompt "A cat" --image cat.png --provider openrouter
|
||||
|
||||
# OpenRouter with reference images
|
||||
/baoyu-imagine --prompt "Make it blue" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
|
||||
|
||||
# DashScope (Aliyun Tongyi Wanxiang)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
|
||||
# Replicate
|
||||
/baoyu-image-gen --prompt "A cat" --image cat.png --provider replicate
|
||||
# DashScope with custom size
|
||||
/baoyu-imagine --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
|
||||
|
||||
# Z.AI GLM-Image
|
||||
/baoyu-imagine --prompt "一张带清晰中文标题的科技海报" --image out.png --provider zai
|
||||
|
||||
# MiniMax
|
||||
/baoyu-imagine --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax
|
||||
|
||||
# MiniMax with subject reference
|
||||
/baoyu-imagine --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9
|
||||
|
||||
# Replicate (default: google/nano-banana-2)
|
||||
/baoyu-imagine --prompt "A cat" --image cat.png --provider replicate
|
||||
|
||||
# Replicate Seedream 4.5
|
||||
/baoyu-imagine --prompt "A studio portrait" --image portrait.png --provider replicate --model bytedance/seedream-4.5 --ar 3:2
|
||||
|
||||
# Replicate Wan 2.7 Image Pro
|
||||
/baoyu-imagine --prompt "A concept frame" --image frame.png --provider replicate --model wan-video/wan-2.7-image-pro --size 2048x1152
|
||||
|
||||
# Jimeng (即梦)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider jimeng
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider jimeng
|
||||
|
||||
# Seedream (豆包)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
|
||||
# With reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0)
|
||||
/baoyu-image-gen --prompt "Make it blue" --image out.png --ref source.png
|
||||
# With reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate, MiniMax, or Seedream 5.0/4.5/4.0)
|
||||
/baoyu-imagine --prompt "Make it blue" --image out.png --ref source.png
|
||||
|
||||
# Batch mode
|
||||
/baoyu-imagine --batchfile batch.json --jobs 4 --json
|
||||
```
|
||||
|
||||
**Options**:
|
||||
|
|
@ -705,44 +781,85 @@ AI SDK-based image generation using OpenAI, Google, OpenRouter, DashScope (Aliyu
|
|||
| `--prompt`, `-p` | Prompt text |
|
||||
| `--promptfiles` | Read prompt from files (concatenated) |
|
||||
| `--image` | Output image path (required) |
|
||||
| `--provider` | `google`, `openai`, `openrouter`, `dashscope`, `jimeng`, `seedream` or `replicate` (default: auto-detect; prefers google) |
|
||||
| `--model`, `-m` | Model ID |
|
||||
| `--batchfile` | JSON batch file for multi-image generation |
|
||||
| `--jobs` | Worker count for batch mode |
|
||||
| `--provider` | `google`, `openai`, `azure`, `openrouter`, `dashscope`, `zai`, `minimax`, `jimeng`, `seedream`, or `replicate` |
|
||||
| `--model`, `-m` | Model ID or deployment name. Azure uses deployment name; OpenRouter uses full model IDs; Z.AI uses `glm-image`; MiniMax uses `image-01` / `image-01-live` |
|
||||
| `--ar` | Aspect ratio (e.g., `16:9`, `1:1`, `4:3`) |
|
||||
| `--size` | Size (e.g., `1024x1024`) |
|
||||
| `--quality` | `normal` or `2k` (default: `2k`) |
|
||||
| `--ref` | Reference images (Google, OpenAI, OpenRouter, Replicate, or Seedream 5.0/4.5/4.0) |
|
||||
| `--imageSize` | `1K`, `2K`, or `4K` for Google/OpenRouter |
|
||||
| `--imageApiDialect` | `openai-native` or `ratio-metadata` for OpenAI-compatible gateways |
|
||||
| `--ref` | Reference images (Google, OpenAI, Azure OpenAI, OpenRouter, Replicate supported families, MiniMax, or Seedream 5.0/4.5/4.0) |
|
||||
| `--n` | Number of images per request (`replicate` currently requires `--n 1`) |
|
||||
| `--json` | JSON output |
|
||||
|
||||
**Environment Variables** (see [Environment Configuration](#environment-configuration) for setup):
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `OPENAI_API_KEY` | OpenAI API key | - |
|
||||
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key | - |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key | - |
|
||||
| `GOOGLE_API_KEY` | Google API key | - |
|
||||
| `GEMINI_API_KEY` | Alias for `GOOGLE_API_KEY` | - |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API key (Aliyun) | - |
|
||||
| `ZAI_API_KEY` | Z.AI API key | - |
|
||||
| `BIGMODEL_API_KEY` | Backward-compatible alias for Z.AI API key | - |
|
||||
| `MINIMAX_API_KEY` | MiniMax API key | - |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API token | - |
|
||||
| `JIMENG_ACCESS_KEY_ID` | Jimeng Volcengine access key | - |
|
||||
| `JIMENG_SECRET_ACCESS_KEY` | Jimeng Volcengine secret key | - |
|
||||
| `ARK_API_KEY` | Seedream Volcengine ARK API key | - |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI model | `gpt-image-1.5` |
|
||||
| `AZURE_OPENAI_DEPLOYMENT` | Azure default deployment name | - |
|
||||
| `AZURE_OPENAI_IMAGE_MODEL` | Backward-compatible Azure deployment/model alias | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter model | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google model | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope model | `qwen-image-2.0-pro` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-pro` |
|
||||
| `ZAI_IMAGE_MODEL` | Z.AI model | `glm-image` |
|
||||
| `BIGMODEL_IMAGE_MODEL` | Backward-compatible alias for Z.AI model | `glm-image` |
|
||||
| `MINIMAX_IMAGE_MODEL` | MiniMax model | `image-01` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate model | `google/nano-banana-2` |
|
||||
| `JIMENG_IMAGE_MODEL` | Jimeng model | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | Seedream model | `doubao-seedream-5-0-260128` |
|
||||
| `OPENAI_BASE_URL` | Custom OpenAI endpoint | - |
|
||||
| `OPENAI_IMAGE_API_DIALECT` | OpenAI-compatible image API dialect (`openai-native` or `ratio-metadata`) | `openai-native` |
|
||||
| `OPENAI_IMAGE_USE_CHAT` | Use `/chat/completions` for OpenAI image generation | `false` |
|
||||
| `AZURE_OPENAI_BASE_URL` | Azure resource or deployment endpoint | - |
|
||||
| `AZURE_API_VERSION` | Azure image API version | `2025-04-01-preview` |
|
||||
| `OPENROUTER_BASE_URL` | Custom OpenRouter endpoint | `https://openrouter.ai/api/v1` |
|
||||
| `OPENROUTER_HTTP_REFERER` | Optional app/site URL for OpenRouter attribution | - |
|
||||
| `OPENROUTER_TITLE` | Optional app name for OpenRouter attribution | - |
|
||||
| `GOOGLE_BASE_URL` | Custom Google endpoint | - |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope endpoint | - |
|
||||
| `ZAI_BASE_URL` | Custom Z.AI endpoint | `https://api.z.ai/api/paas/v4` |
|
||||
| `BIGMODEL_BASE_URL` | Backward-compatible alias for Z.AI endpoint | - |
|
||||
| `MINIMAX_BASE_URL` | Custom MiniMax endpoint | `https://api.minimax.io` |
|
||||
| `REPLICATE_BASE_URL` | Custom Replicate endpoint | - |
|
||||
| `JIMENG_BASE_URL` | Custom Jimeng endpoint | `https://visual.volcengineapi.com` |
|
||||
| `JIMENG_REGION` | Jimeng region | `cn-north-1` |
|
||||
| `SEEDREAM_BASE_URL` | Custom Seedream endpoint | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | Override batch worker cap | `10` |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | Override provider concurrency | provider-specific |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | Override provider request start gap | provider-specific |
|
||||
|
||||
**Provider Notes**:
|
||||
- Azure OpenAI: `--model` means Azure deployment name, not the underlying model family.
|
||||
- DashScope: `qwen-image-2.0-pro` is the recommended default for custom `--size`, `21:9`, and strong Chinese/English text rendering.
|
||||
- Z.AI: `glm-image` is recommended for posters, diagrams, and text-heavy Chinese/English images. Reference images are not supported.
|
||||
- MiniMax: `image-01` supports documented custom `width` / `height`; `image-01-live` is lower latency and works best with `--ar`.
|
||||
- MiniMax reference images are sent as `subject_reference`; the current API is specialized toward character / portrait consistency.
|
||||
- Jimeng does not support reference images.
|
||||
- Seedream reference images are supported by Seedream 5.0 / 4.5 / 4.0, not Seedream 3.0.
|
||||
- Replicate defaults to `google/nano-banana-2`. `baoyu-imagine` only enables Replicate advanced options for `google/nano-banana*`, `bytedance/seedream-4.5`, `bytedance/seedream-5-lite`, `wan-video/wan-2.7-image`, and `wan-video/wan-2.7-image-pro`.
|
||||
- Replicate currently saves exactly one output image per request. `--n > 1` is blocked locally instead of silently dropping extra results.
|
||||
- Replicate model behavior is family-specific: nano-banana uses `--quality` / `--ar`, Seedream uses validated `--size` / `--ar`, and Wan uses validated `--size` (with `--ar` converted locally to a concrete size).
|
||||
|
||||
**Provider Auto-Selection**:
|
||||
1. If `--provider` specified → use it
|
||||
2. If only one API key available → use that provider
|
||||
3. If multiple available → default to Google
|
||||
1. If `--provider` is specified → use it
|
||||
2. If `--ref` is provided and no provider is specified → try Google, then OpenAI, Azure, OpenRouter, Replicate, Seedream, and finally MiniMax
|
||||
3. If only one API key is available → use that provider
|
||||
4. If multiple providers are available → default to Google, then OpenAI, Azure, OpenRouter, DashScope, Z.AI, MiniMax, Replicate, Jimeng, Seedream
|
||||
|
||||
#### baoyu-danger-gemini-web
|
||||
|
||||
|
|
@ -1000,7 +1117,7 @@ Custom style descriptions are also accepted, e.g., `--style "poetic and lyrical"
|
|||
Some skills require API keys or custom configuration. Environment variables can be set in `.env` files:
|
||||
|
||||
**Load Priority** (higher priority overrides lower):
|
||||
1. CLI environment variables (e.g., `OPENAI_API_KEY=xxx /baoyu-image-gen ...`)
|
||||
1. CLI environment variables (e.g., `OPENAI_API_KEY=xxx /baoyu-imagine ...`)
|
||||
2. `process.env` (system environment)
|
||||
3. `<cwd>/.baoyu-skills/.env` (project-level)
|
||||
4. `~/.baoyu-skills/.env` (user-level)
|
||||
|
|
@ -1017,11 +1134,20 @@ cat > ~/.baoyu-skills/.env << 'EOF'
|
|||
OPENAI_API_KEY=sk-xxx
|
||||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
# OPENAI_IMAGE_USE_CHAT=false
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_OPENAI_API_KEY=xxx
|
||||
AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
||||
AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5
|
||||
# AZURE_API_VERSION=2025-04-01-preview
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=sk-or-xxx
|
||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||
# OPENROUTER_HTTP_REFERER=https://your-app.example.com
|
||||
# OPENROUTER_TITLE=Your App Name
|
||||
|
||||
# Google
|
||||
GOOGLE_API_KEY=xxx
|
||||
|
|
@ -1033,9 +1159,19 @@ DASHSCOPE_API_KEY=sk-xxx
|
|||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Z.AI
|
||||
ZAI_API_KEY=xxx
|
||||
ZAI_IMAGE_MODEL=glm-image
|
||||
# ZAI_BASE_URL=https://api.z.ai/api/paas/v4
|
||||
|
||||
# MiniMax
|
||||
MINIMAX_API_KEY=xxx
|
||||
MINIMAX_IMAGE_MODEL=image-01
|
||||
# MINIMAX_BASE_URL=https://api.minimax.io
|
||||
|
||||
# Replicate
|
||||
REPLICATE_API_TOKEN=r8_xxx
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-2
|
||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||
|
||||
# Jimeng (即梦)
|
||||
|
|
@ -1128,6 +1264,7 @@ This project was inspired by and builds upon the following open source projects:
|
|||
- [doocs/md](https://github.com/doocs/md) by [@doocs](https://github.com/doocs) — Core implementation logic for Markdown to HTML conversion
|
||||
- [High-density Infographic Prompt](https://waytoagi.feishu.cn/wiki/YG0zwalijihRREkgmPzcWRInnUg) by AJ@WaytoAGI — Inspiration for the infographic skill
|
||||
- [qiaomu-mondo-poster-design](https://github.com/joeseesun/qiaomu-mondo-poster-design) by [@joeseesun](https://github.com/joeseesun)(乔木) — Inspiration for the Mondo style
|
||||
- [architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) by [@Cocoon-AI](https://github.com/Cocoon-AI) — Inspiration for the diagram skill's design system
|
||||
|
||||
## License
|
||||
|
||||
|
|
|
|||
231
README.zh.md
231
README.zh.md
|
|
@ -32,7 +32,7 @@ npx skills add jimliu/baoyu-skills
|
|||
ClawHub 按“单个 skill”安装,不是把整个 marketplace 一次性装进去。发布后,用户可以按需安装:
|
||||
|
||||
```bash
|
||||
clawhub install baoyu-image-gen
|
||||
clawhub install baoyu-imagine
|
||||
clawhub install baoyu-markdown-to-html
|
||||
```
|
||||
|
||||
|
|
@ -52,16 +52,14 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
1. 选择 **Browse and install plugins**
|
||||
2. 选择 **baoyu-skills**
|
||||
3. 选择要安装的插件
|
||||
3. 选择 **baoyu-skills** 插件
|
||||
4. 选择 **Install now**
|
||||
|
||||
**方式二:直接安装**
|
||||
|
||||
```bash
|
||||
# 安装指定插件
|
||||
/plugin install content-skills@baoyu-skills
|
||||
/plugin install ai-generation-skills@baoyu-skills
|
||||
/plugin install utility-skills@baoyu-skills
|
||||
# 安装 marketplace 中唯一的插件
|
||||
/plugin install baoyu-skills@baoyu-skills
|
||||
```
|
||||
|
||||
**方式三:告诉 Agent**
|
||||
|
|
@ -72,11 +70,11 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
### 可用插件
|
||||
|
||||
| 插件 | 说明 | 包含技能 |
|
||||
现在 marketplace 只暴露一个插件,这样每个 skill 只会注册一次。
|
||||
|
||||
| 插件 | 说明 | 包含内容 |
|
||||
|------|------|----------|
|
||||
| **content-skills** | 内容生成和发布 | [xhs-images](#baoyu-xhs-images), [infographic](#baoyu-infographic), [cover-image](#baoyu-cover-image), [slide-deck](#baoyu-slide-deck), [comic](#baoyu-comic), [article-illustrator](#baoyu-article-illustrator), [post-to-x](#baoyu-post-to-x), [post-to-wechat](#baoyu-post-to-wechat), [post-to-weibo](#baoyu-post-to-weibo) |
|
||||
| **ai-generation-skills** | AI 生成后端 | [image-gen](#baoyu-image-gen), [danger-gemini-web](#baoyu-danger-gemini-web) |
|
||||
| **utility-skills** | 内容处理工具 | [youtube-transcript](#baoyu-youtube-transcript), [url-to-markdown](#baoyu-url-to-markdown), [danger-x-to-markdown](#baoyu-danger-x-to-markdown), [compress-image](#baoyu-compress-image), [format-markdown](#baoyu-format-markdown), [markdown-to-html](#baoyu-markdown-to-html), [translate](#baoyu-translate) |
|
||||
| **baoyu-skills** | 提供内容生成、AI 后端和日常效率工具技能 | 仓库中的全部 skills,仍按下方的内容技能、AI 生成技能、工具技能三个分类展示 |
|
||||
|
||||
## 更新技能
|
||||
|
||||
|
|
@ -101,7 +99,7 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
#### baoyu-xhs-images
|
||||
|
||||
小红书信息图系列生成器。将内容拆解为 1-10 张卡通风格信息图,支持 **风格 × 布局** 二维系统。
|
||||
小红书图片卡片系列生成器。将内容拆解为 1-10 张卡通风格图片卡片,支持 **风格 × 布局** 系统和可选配色覆盖。
|
||||
|
||||
```bash
|
||||
# 自动选择风格和布局
|
||||
|
|
@ -114,13 +112,22 @@ clawhub install baoyu-markdown-to-html
|
|||
/baoyu-xhs-images posts/ai-future/article.md --layout dense
|
||||
|
||||
# 组合风格和布局
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style tech --layout list
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style notion --layout list
|
||||
|
||||
# 覆盖配色
|
||||
/baoyu-xhs-images posts/ai-future/article.md --style notion --palette macaron
|
||||
|
||||
# 直接输入内容
|
||||
/baoyu-xhs-images 今日星座运势
|
||||
|
||||
# 非交互模式(跳过所有确认,适用于定时任务)
|
||||
/baoyu-xhs-images posts/ai-future/article.md --yes
|
||||
/baoyu-xhs-images posts/ai-future/article.md --yes --preset knowledge-card
|
||||
```
|
||||
|
||||
**风格**(视觉美学):`cute`(默认)、`fresh`、`warm`、`bold`、`minimal`、`retro`、`pop`、`notion`、`chalkboard`
|
||||
**风格**(视觉美学):`cute`(默认)、`fresh`、`warm`、`bold`、`minimal`、`retro`、`pop`、`notion`、`chalkboard`、`study-notes`、`screen-print`、`sketch-notes`
|
||||
|
||||
**配色**(可选颜色覆盖):`macaron`、`warm`、`neon`
|
||||
|
||||
**风格预览**:
|
||||
|
||||
|
|
@ -154,7 +161,7 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
#### baoyu-infographic
|
||||
|
||||
专业信息图生成器,支持 20 种布局和 17 种视觉风格。分析内容后推荐布局×风格组合,生成可发布的信息图。
|
||||
专业信息图生成器,支持 21 种布局和 21 种视觉风格。分析内容后推荐布局×风格组合,生成可发布的信息图。
|
||||
|
||||
```bash
|
||||
# 根据内容自动推荐组合
|
||||
|
|
@ -265,9 +272,46 @@ clawhub install baoyu-markdown-to-html
|
|||
|  |  | |
|
||||
| knolling | lego-brick | |
|
||||
|
||||
#### baoyu-diagram
|
||||
|
||||
从源素材生成可直接发布的 SVG 图表 —— 包括流程图、时序/协议图、架构/结构图、示意图(直觉图解)。分析输入素材,推荐图表类型和拆分策略,一次确认后批量生成。Claude 直接输出符合统一设计规范的真实 SVG 代码,产物是自包含的 `.svg` 文件,内嵌样式并自动支持深色模式。
|
||||
|
||||
```bash
|
||||
# 主题描述 —— 技能分析并提出方案
|
||||
/baoyu-diagram "JWT 认证流程是怎么工作的"
|
||||
/baoyu-diagram "Kubernetes 架构" --type structural
|
||||
/baoyu-diagram "OAuth 2.0 流程" --type sequence
|
||||
|
||||
# 文件路径 —— 技能读取、分析并提出方案
|
||||
/baoyu-diagram path/to/article.md
|
||||
|
||||
# 语言和输出路径
|
||||
/baoyu-diagram "微服务架构" --lang zh
|
||||
/baoyu-diagram "build pipeline" --out docs/build-pipeline.svg
|
||||
```
|
||||
|
||||
**参数**:
|
||||
| 参数 | 说明 |
|
||||
|------|------|
|
||||
| `--type <name>` | `flowchart`、`sequence`、`structural`、`illustrative`、`class`、`auto`(默认)。跳过类型推荐直接生成。 |
|
||||
| `--lang <code>` | 输出语言(en、zh、ja 等) |
|
||||
| `--out <path>` | 输出文件路径。生成聚焦于最重要内容的单张图表。 |
|
||||
|
||||
**五种图表类型**:
|
||||
|
||||
| 类型 | 适用场景 | 触发动词 |
|
||||
|------|----------|----------|
|
||||
| `flowchart` | 按顺序走一遍流程 | 流程、步骤、工作流、生命周期、状态机 |
|
||||
| `sequence` | 谁和谁通信、按什么顺序 | 协议、握手、认证流程、OAuth、TCP、请求/响应 |
|
||||
| `structural` | 展示什么包含什么、如何组织 | 架构、组件、拓扑、布局、什么在什么里面 |
|
||||
| `illustrative` | 建立直觉 —— 画出机制本身 | 怎么工作、原理、为什么、直观解释 |
|
||||
| `class` | 类型是什么、它们如何关联 | 类图、UML、继承、接口、数据模型 |
|
||||
|
||||
本技能不调用任何图像生成模型 —— Claude 通过手算坐标直接写 SVG 代码,确保每个图表都遵守设计规范。内嵌的 `<style>` 块包含 `@media (prefers-color-scheme: dark)`,同一个文件在浅色和深色模式下均正确渲染,可嵌入到任意支持 SVG 的宿主环境中。
|
||||
|
||||
#### baoyu-cover-image
|
||||
|
||||
为文章生成封面图,支持五维定制系统:类型 × 配色 × 渲染 × 文字 × 氛围。9 种配色方案与 6 种渲染风格组合,提供 54 种独特效果。
|
||||
为文章生成封面图,支持五维定制系统:类型 × 配色 × 渲染 × 文字 × 氛围。11 种配色方案与 7 种渲染风格组合,提供 77 种独特效果。
|
||||
|
||||
```bash
|
||||
# 根据内容自动选择所有维度
|
||||
|
|
@ -292,8 +336,8 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
**五个维度**:
|
||||
- **类型 (Type)**:`hero`、`conceptual`、`typography`、`metaphor`、`scene`、`minimal`
|
||||
- **配色 (Palette)**:`warm`、`elegant`、`cool`、`dark`、`earth`、`vivid`、`pastel`、`mono`、`retro`
|
||||
- **渲染 (Rendering)**:`flat-vector`、`hand-drawn`、`painterly`、`digital`、`pixel`、`chalk`
|
||||
- **配色 (Palette)**:`warm`、`elegant`、`cool`、`dark`、`earth`、`vivid`、`pastel`、`mono`、`retro`、`duotone`、`macaron`
|
||||
- **渲染 (Rendering)**:`flat-vector`、`hand-drawn`、`painterly`、`digital`、`pixel`、`chalk`、`screen-print`
|
||||
- **文字 (Text)**:`none`、`title-only`(默认)、`title-subtitle`、`text-rich`
|
||||
- **氛围 (Mood)**:`subtle`、`balanced`(默认)、`bold`
|
||||
|
||||
|
|
@ -471,20 +515,17 @@ clawhub install baoyu-markdown-to-html
|
|||
|
||||
#### baoyu-article-illustrator
|
||||
|
||||
智能文章插图技能,采用类型 × 风格二维系统。分析文章结构,识别需要视觉辅助的位置,生成插图。
|
||||
智能文章插图技能,采用类型 × 风格 × 色板三维系统。分析文章结构,识别需要视觉辅助的位置,生成插图。
|
||||
|
||||
```bash
|
||||
# 根据内容自动选择类型和风格
|
||||
/baoyu-article-illustrator path/to/article.md
|
||||
|
||||
# 指定类型
|
||||
/baoyu-article-illustrator path/to/article.md --type infographic
|
||||
|
||||
# 指定风格
|
||||
/baoyu-article-illustrator path/to/article.md --style blueprint
|
||||
|
||||
# 组合类型和风格
|
||||
/baoyu-article-illustrator path/to/article.md --type flowchart --style notion
|
||||
|
||||
# 使用色板覆盖
|
||||
/baoyu-article-illustrator path/to/article.md --style vector-illustration --palette macaron
|
||||
```
|
||||
|
||||
**类型**(信息结构):
|
||||
|
|
@ -498,7 +539,7 @@ clawhub install baoyu-markdown-to-html
|
|||
| `framework` | 概念图、关系图 | 方法论、架构 |
|
||||
| `timeline` | 时间线进展 | 历史、项目进度 |
|
||||
|
||||
**风格**(视觉美学):
|
||||
**风格**(渲染手法):
|
||||
|
||||
| 风格 | 描述 | 适用场景 |
|
||||
|------|------|----------|
|
||||
|
|
@ -511,6 +552,14 @@ clawhub install baoyu-markdown-to-html
|
|||
| `editorial` | 杂志风格信息图 | 科技解说、新闻 |
|
||||
| `scientific` | 学术精确图表 | 生物、化学、技术 |
|
||||
|
||||
**色板**(可选配色覆盖):
|
||||
|
||||
| 色板 | 描述 | 适用场景 |
|
||||
|------|------|----------|
|
||||
| `macaron` | 马卡龙柔和色块(浅蓝、浅绿、浅紫、浅橙)暖白底 | 教育、知识分享、教程 |
|
||||
| `warm` | 暖色系(橙、赭石、金)无冷色 | 品牌、产品、生活方式 |
|
||||
| `neon` | 霓虹色(粉、青、黄)深色底 | 游戏、复古、潮流 |
|
||||
|
||||
**风格预览**:
|
||||
|
||||
| | | |
|
||||
|
|
@ -663,40 +712,67 @@ accounts:
|
|||
|
||||
AI 驱动的生成后端。
|
||||
|
||||
#### baoyu-image-gen
|
||||
#### baoyu-imagine
|
||||
|
||||
基于 AI SDK 的图像生成,支持 OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比和质量预设。
|
||||
基于 AI SDK 的图像生成,支持 OpenAI、Azure OpenAI、Google、OpenRouter、DashScope(阿里通义万相)、MiniMax、即梦(Jimeng)、豆包(Seedream)和 Replicate API。支持文生图、参考图、宽高比、自定义尺寸、批量生成和质量预设。
|
||||
|
||||
```bash
|
||||
# 基础生成(自动检测服务商)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png
|
||||
|
||||
# 指定宽高比
|
||||
/baoyu-image-gen --prompt "风景图" --image landscape.png --ar 16:9
|
||||
/baoyu-imagine --prompt "风景图" --image landscape.png --ar 16:9
|
||||
|
||||
# 高质量(2k 分辨率)
|
||||
/baoyu-image-gen --prompt "横幅图" --image banner.png --quality 2k
|
||||
/baoyu-imagine --prompt "横幅图" --image banner.png --quality 2k
|
||||
|
||||
# 指定服务商
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openai
|
||||
/baoyu-imagine --prompt "一只猫" --image cat.png --provider openai
|
||||
|
||||
# Azure OpenAI(model 为部署名称)
|
||||
/baoyu-imagine --prompt "一只猫" --image cat.png --provider azure --model gpt-image-1.5
|
||||
|
||||
# OpenRouter
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider openrouter
|
||||
/baoyu-imagine --prompt "一只猫" --image cat.png --provider openrouter
|
||||
|
||||
# OpenRouter + 参考图
|
||||
/baoyu-imagine --prompt "把它变成蓝色" --image out.png --provider openrouter --model google/gemini-3.1-flash-image-preview --ref source.png
|
||||
|
||||
# DashScope(阿里通义万相)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider dashscope
|
||||
|
||||
# Replicate
|
||||
/baoyu-image-gen --prompt "一只猫" --image cat.png --provider replicate
|
||||
# DashScope 自定义尺寸
|
||||
/baoyu-imagine --prompt "为咖啡品牌设计一张 21:9 横幅海报,包含清晰中文标题" --image banner.png --provider dashscope --model qwen-image-2.0-pro --size 2048x872
|
||||
|
||||
# Z.AI GLM-Image
|
||||
/baoyu-imagine --prompt "一张带清晰中文标题的科技海报" --image out.png --provider zai
|
||||
|
||||
# MiniMax
|
||||
/baoyu-imagine --prompt "A fashion editorial portrait by a bright studio window" --image out.jpg --provider minimax
|
||||
|
||||
# MiniMax + 角色参考图
|
||||
/baoyu-imagine --prompt "A girl stands by the library window, cinematic lighting" --image out.jpg --provider minimax --model image-01 --ref portrait.png --ar 16:9
|
||||
|
||||
# Replicate(默认:google/nano-banana-2)
|
||||
/baoyu-imagine --prompt "一只猫" --image cat.png --provider replicate
|
||||
|
||||
# Replicate Seedream 4.5
|
||||
/baoyu-imagine --prompt "一张影棚人像" --image portrait.png --provider replicate --model bytedance/seedream-4.5 --ar 3:2
|
||||
|
||||
# Replicate Wan 2.7 Image Pro
|
||||
/baoyu-imagine --prompt "一张概念分镜" --image frame.png --provider replicate --model wan-video/wan-2.7-image-pro --size 2048x1152
|
||||
|
||||
# 即梦(Jimeng)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider jimeng
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider jimeng
|
||||
|
||||
# 豆包(Seedream)
|
||||
/baoyu-image-gen --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
/baoyu-imagine --prompt "一只可爱的猫" --image cat.png --provider seedream
|
||||
|
||||
# 带参考图(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0)
|
||||
/baoyu-image-gen --prompt "把它变成蓝色" --image out.png --ref source.png
|
||||
# 带参考图(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate、MiniMax 或 Seedream 5.0/4.5/4.0)
|
||||
/baoyu-imagine --prompt "把它变成蓝色" --image out.png --ref source.png
|
||||
|
||||
# 批量模式
|
||||
/baoyu-imagine --batchfile batch.json --jobs 4 --json
|
||||
```
|
||||
|
||||
**选项**:
|
||||
|
|
@ -705,44 +781,85 @@ AI 驱动的生成后端。
|
|||
| `--prompt`, `-p` | 提示词文本 |
|
||||
| `--promptfiles` | 从文件读取提示词(多文件拼接) |
|
||||
| `--image` | 输出图片路径(必需) |
|
||||
| `--provider` | `google`、`openai`、`openrouter`、`dashscope`、`jimeng`、`seedream` 或 `replicate`(默认:自动检测,优先 google) |
|
||||
| `--model`, `-m` | 模型 ID |
|
||||
| `--batchfile` | 多图批量生成的 JSON 文件 |
|
||||
| `--jobs` | 批量模式的并发 worker 数 |
|
||||
| `--provider` | `google`、`openai`、`azure`、`openrouter`、`dashscope`、`zai`、`minimax`、`jimeng`、`seedream` 或 `replicate` |
|
||||
| `--model`, `-m` | 模型 ID 或部署名。Azure 使用部署名;OpenRouter 使用完整模型 ID;Z.AI 使用 `glm-image`;MiniMax 使用 `image-01` / `image-01-live` |
|
||||
| `--ar` | 宽高比(如 `16:9`、`1:1`、`4:3`) |
|
||||
| `--size` | 尺寸(如 `1024x1024`) |
|
||||
| `--quality` | `normal` 或 `2k`(默认:`2k`) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、OpenRouter、Replicate 或 Seedream 5.0/4.5/4.0) |
|
||||
| `--imageSize` | Google/OpenRouter 使用的 `1K`、`2K`、`4K` |
|
||||
| `--imageApiDialect` | OpenAI 兼容网关的图像 API 方言(`openai-native` 或 `ratio-metadata`) |
|
||||
| `--ref` | 参考图片(Google、OpenAI、Azure OpenAI、OpenRouter、Replicate 支持的模型家族、MiniMax 或 Seedream 5.0/4.5/4.0) |
|
||||
| `--n` | 单次请求生成图片数量(`replicate` 当前只支持 `--n 1`) |
|
||||
| `--json` | 输出 JSON 结果 |
|
||||
|
||||
**环境变量**(配置方法见[环境配置](#环境配置)):
|
||||
| 变量 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `OPENAI_API_KEY` | OpenAI API 密钥 | - |
|
||||
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API 密钥 | - |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API 密钥 | - |
|
||||
| `GOOGLE_API_KEY` | Google API 密钥 | - |
|
||||
| `GEMINI_API_KEY` | `GOOGLE_API_KEY` 的别名 | - |
|
||||
| `DASHSCOPE_API_KEY` | DashScope API 密钥(阿里云) | - |
|
||||
| `ZAI_API_KEY` | Z.AI API 密钥 | - |
|
||||
| `BIGMODEL_API_KEY` | Z.AI API 密钥向后兼容别名 | - |
|
||||
| `MINIMAX_API_KEY` | MiniMax API 密钥 | - |
|
||||
| `REPLICATE_API_TOKEN` | Replicate API Token | - |
|
||||
| `JIMENG_ACCESS_KEY_ID` | 即梦火山引擎 Access Key | - |
|
||||
| `JIMENG_SECRET_ACCESS_KEY` | 即梦火山引擎 Secret Key | - |
|
||||
| `ARK_API_KEY` | 豆包火山引擎 ARK API 密钥 | - |
|
||||
| `OPENAI_IMAGE_MODEL` | OpenAI 模型 | `gpt-image-1.5` |
|
||||
| `AZURE_OPENAI_DEPLOYMENT` | Azure 默认部署名 | - |
|
||||
| `AZURE_OPENAI_IMAGE_MODEL` | 兼容旧配置的 Azure 部署/模型别名 | `gpt-image-1.5` |
|
||||
| `OPENROUTER_IMAGE_MODEL` | OpenRouter 模型 | `google/gemini-3.1-flash-image-preview` |
|
||||
| `GOOGLE_IMAGE_MODEL` | Google 模型 | `gemini-3-pro-image-preview` |
|
||||
| `DASHSCOPE_IMAGE_MODEL` | DashScope 模型 | `qwen-image-2.0-pro` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-pro` |
|
||||
| `ZAI_IMAGE_MODEL` | Z.AI 模型 | `glm-image` |
|
||||
| `BIGMODEL_IMAGE_MODEL` | Z.AI 模型向后兼容别名 | `glm-image` |
|
||||
| `MINIMAX_IMAGE_MODEL` | MiniMax 模型 | `image-01` |
|
||||
| `REPLICATE_IMAGE_MODEL` | Replicate 模型 | `google/nano-banana-2` |
|
||||
| `JIMENG_IMAGE_MODEL` | 即梦模型 | `jimeng_t2i_v40` |
|
||||
| `SEEDREAM_IMAGE_MODEL` | 豆包模型 | `doubao-seedream-5-0-260128` |
|
||||
| `OPENAI_BASE_URL` | 自定义 OpenAI 端点 | - |
|
||||
| `OPENAI_IMAGE_API_DIALECT` | OpenAI 兼容图像 API 方言(`openai-native` 或 `ratio-metadata`) | `openai-native` |
|
||||
| `OPENAI_IMAGE_USE_CHAT` | OpenAI 改走 `/chat/completions` | `false` |
|
||||
| `AZURE_OPENAI_BASE_URL` | Azure 资源或部署端点 | - |
|
||||
| `AZURE_API_VERSION` | Azure 图像 API 版本 | `2025-04-01-preview` |
|
||||
| `OPENROUTER_BASE_URL` | 自定义 OpenRouter 端点 | `https://openrouter.ai/api/v1` |
|
||||
| `OPENROUTER_HTTP_REFERER` | OpenRouter 归因用站点 URL | - |
|
||||
| `OPENROUTER_TITLE` | OpenRouter 归因用应用名 | - |
|
||||
| `GOOGLE_BASE_URL` | 自定义 Google 端点 | - |
|
||||
| `DASHSCOPE_BASE_URL` | 自定义 DashScope 端点 | - |
|
||||
| `ZAI_BASE_URL` | 自定义 Z.AI 端点 | `https://api.z.ai/api/paas/v4` |
|
||||
| `BIGMODEL_BASE_URL` | Z.AI 端点向后兼容别名 | - |
|
||||
| `MINIMAX_BASE_URL` | 自定义 MiniMax 端点 | `https://api.minimax.io` |
|
||||
| `REPLICATE_BASE_URL` | 自定义 Replicate 端点 | - |
|
||||
| `JIMENG_BASE_URL` | 自定义即梦端点 | `https://visual.volcengineapi.com` |
|
||||
| `JIMENG_REGION` | 即梦区域 | `cn-north-1` |
|
||||
| `SEEDREAM_BASE_URL` | 自定义豆包端点 | `https://ark.cn-beijing.volces.com/api/v3` |
|
||||
| `BAOYU_IMAGE_GEN_MAX_WORKERS` | 批量模式最大 worker 数 | `10` |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_CONCURRENCY` | 覆盖 provider 并发数 | provider 默认值 |
|
||||
| `BAOYU_IMAGE_GEN_<PROVIDER>_START_INTERVAL_MS` | 覆盖 provider 请求启动间隔 | provider 默认值 |
|
||||
|
||||
**Provider 说明**:
|
||||
- Azure OpenAI:`--model` 表示 Azure deployment name,不是底层模型家族名。
|
||||
- DashScope:`qwen-image-2.0-pro` 是自定义 `--size`、`21:9` 和中英文排版的推荐默认模型。
|
||||
- Z.AI:`glm-image` 适合海报、图表和中英文排版密集的图片生成,暂不支持参考图。
|
||||
- MiniMax:`image-01` 支持官方文档里的自定义 `width` / `height`;`image-01-live` 更偏低延迟,适合配合 `--ar` 使用。
|
||||
- MiniMax 参考图会走 `subject_reference`,当前能力更偏角色 / 人像一致性。
|
||||
- 即梦不支持参考图。
|
||||
- 豆包参考图能力仅适用于 Seedream 5.0 / 4.5 / 4.0,不适用于 Seedream 3.0。
|
||||
- Replicate 默认模型改为 `google/nano-banana-2`。`baoyu-imagine` 目前只对 `google/nano-banana*`、`bytedance/seedream-4.5`、`bytedance/seedream-5-lite`、`wan-video/wan-2.7-image` 和 `wan-video/wan-2.7-image-pro` 开启本地能力识别与校验。
|
||||
- Replicate 当前只保存单张输出图,`--n > 1` 会在本地直接报错,避免多图结果被静默丢弃。
|
||||
- Replicate 的参数能力按模型家族区分:nano-banana 走 `--quality` / `--ar`,Seedream 走校验后的 `--size` / `--ar`,Wan 走校验后的 `--size`(`--ar` 会先在本地换算成具体尺寸)。
|
||||
|
||||
**服务商自动选择**:
|
||||
1. 如果指定了 `--provider` → 使用指定的
|
||||
2. 如果只有一个 API 密钥 → 使用对应服务商
|
||||
3. 如果多个可用 → 默认使用 Google
|
||||
2. 如果传了 `--ref` 且未指定 provider → 依次尝试 Google、OpenAI、Azure、OpenRouter、Replicate、Seedream,最后是 MiniMax
|
||||
3. 如果只有一个 API 密钥 → 使用对应服务商
|
||||
4. 如果多个可用 → 默认使用 Google,然后依次为 OpenAI、Azure、OpenRouter、DashScope、Z.AI、MiniMax、Replicate、即梦、豆包
|
||||
|
||||
#### baoyu-danger-gemini-web
|
||||
|
||||
|
|
@ -1000,7 +1117,7 @@ AI 驱动的生成后端。
|
|||
部分技能需要 API 密钥或自定义配置。环境变量可以在 `.env` 文件中设置:
|
||||
|
||||
**加载优先级**(高优先级覆盖低优先级):
|
||||
1. 命令行环境变量(如 `OPENAI_API_KEY=xxx /baoyu-image-gen ...`)
|
||||
1. 命令行环境变量(如 `OPENAI_API_KEY=xxx /baoyu-imagine ...`)
|
||||
2. `process.env`(系统环境变量)
|
||||
3. `<cwd>/.baoyu-skills/.env`(项目级)
|
||||
4. `~/.baoyu-skills/.env`(用户级)
|
||||
|
|
@ -1017,11 +1134,20 @@ cat > ~/.baoyu-skills/.env << 'EOF'
|
|||
OPENAI_API_KEY=sk-xxx
|
||||
OPENAI_IMAGE_MODEL=gpt-image-1.5
|
||||
# OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
# OPENAI_IMAGE_USE_CHAT=false
|
||||
|
||||
# Azure OpenAI
|
||||
AZURE_OPENAI_API_KEY=xxx
|
||||
AZURE_OPENAI_BASE_URL=https://your-resource.openai.azure.com
|
||||
AZURE_OPENAI_DEPLOYMENT=gpt-image-1.5
|
||||
# AZURE_API_VERSION=2025-04-01-preview
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=sk-or-xxx
|
||||
OPENROUTER_IMAGE_MODEL=google/gemini-3.1-flash-image-preview
|
||||
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
||||
# OPENROUTER_HTTP_REFERER=https://your-app.example.com
|
||||
# OPENROUTER_TITLE=你的应用名
|
||||
|
||||
# Google
|
||||
GOOGLE_API_KEY=xxx
|
||||
|
|
@ -1033,9 +1159,19 @@ DASHSCOPE_API_KEY=sk-xxx
|
|||
DASHSCOPE_IMAGE_MODEL=qwen-image-2.0-pro
|
||||
# DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/api/v1
|
||||
|
||||
# Z.AI
|
||||
ZAI_API_KEY=xxx
|
||||
ZAI_IMAGE_MODEL=glm-image
|
||||
# ZAI_BASE_URL=https://api.z.ai/api/paas/v4
|
||||
|
||||
# MiniMax
|
||||
MINIMAX_API_KEY=xxx
|
||||
MINIMAX_IMAGE_MODEL=image-01
|
||||
# MINIMAX_BASE_URL=https://api.minimax.io
|
||||
|
||||
# Replicate
|
||||
REPLICATE_API_TOKEN=r8_xxx
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-pro
|
||||
REPLICATE_IMAGE_MODEL=google/nano-banana-2
|
||||
# REPLICATE_BASE_URL=https://api.replicate.com
|
||||
|
||||
# 即梦(Jimeng)
|
||||
|
|
@ -1128,6 +1264,7 @@ HTTP_PROXY=http://127.0.0.1:7890 HTTPS_PROXY=http://127.0.0.1:7890 /baoyu-danger
|
|||
- [doocs/md](https://github.com/doocs/md) by [@doocs](https://github.com/doocs) — Markdown 转 HTML 的核心实现逻辑
|
||||
- [高密度信息图 Prompt](https://waytoagi.feishu.cn/wiki/YG0zwalijihRREkgmPzcWRInnUg) by AJ@WaytoAGI — 信息图技能的灵感来源
|
||||
- [qiaomu-mondo-poster-design](https://github.com/joeseesun/qiaomu-mondo-poster-design) by [@joeseesun](https://github.com/joeseesun)(乔木) — Mondo 风格的灵感来源
|
||||
- [architecture-diagram-generator](https://github.com/Cocoon-AI/architecture-diagram-generator) by [@Cocoon-AI](https://github.com/Cocoon-AI) — 图表技能设计体系的灵感来源
|
||||
|
||||
## 许可证
|
||||
|
||||
|
|
|
|||
|
|
@ -34,20 +34,22 @@ metadata:
|
|||
1. Create `skills/baoyu-<name>/SKILL.md` with YAML front matter
|
||||
2. Add TypeScript in `skills/baoyu-<name>/scripts/` (if applicable)
|
||||
3. Add prompt templates in `skills/baoyu-<name>/prompts/` if needed
|
||||
4. Register in `marketplace.json` under appropriate category
|
||||
4. Register the skill in `.claude-plugin/marketplace.json` under the `baoyu-skills` plugin entry
|
||||
5. Add Script Directory section to SKILL.md if skill has scripts
|
||||
6. Add openclaw metadata to frontmatter
|
||||
|
||||
## Category Selection
|
||||
## Skill Grouping
|
||||
|
||||
| If your skill... | Use category |
|
||||
|------------------|--------------|
|
||||
| Generates visual content (images, slides, comics) | `content-skills` |
|
||||
| Publishes to platforms (X, WeChat, Weibo) | `content-skills` |
|
||||
| Provides AI generation backend | `ai-generation-skills` |
|
||||
| Converts or processes content | `utility-skills` |
|
||||
All skills are registered under the single `baoyu-skills` plugin. Use these logical groups when deciding where the skill should appear in the docs:
|
||||
|
||||
New category: add plugin object to `marketplace.json` with `name`, `description`, `skills[]`.
|
||||
| If your skill... | Use group |
|
||||
|------------------|-----------|
|
||||
| Generates visual content (images, slides, comics) | Content Skills |
|
||||
| Publishes to platforms (X, WeChat, Weibo) | Content Skills |
|
||||
| Provides AI generation backend | AI Generation Skills |
|
||||
| Converts or processes content | Utility Skills |
|
||||
|
||||
If you add a new logical group, update the docs that present grouped skills, but keep the skill registered under the single `baoyu-skills` plugin entry.
|
||||
|
||||
## Writing Descriptions
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ Skills that require image generation MUST delegate to available image generation
|
|||
|
||||
## Skill Selection
|
||||
|
||||
**Default**: `skills/baoyu-image-gen/SKILL.md` (unless user specifies otherwise).
|
||||
**Default**: `skills/baoyu-imagine/SKILL.md` (unless user specifies otherwise).
|
||||
|
||||
1. Read skill's SKILL.md for parameters and capabilities
|
||||
2. If user requests different skill, check `skills/` for alternatives
|
||||
|
|
@ -16,7 +16,7 @@ Skills that require image generation MUST delegate to available image generation
|
|||
### Step N: Generate Images
|
||||
|
||||
**Skill Selection**:
|
||||
1. Check available skills (`baoyu-image-gen` default, or `baoyu-danger-gemini-web`)
|
||||
1. Check available skills (`baoyu-imagine` default, or `baoyu-danger-gemini-web`)
|
||||
2. Read selected skill's SKILL.md for parameters
|
||||
3. If multiple skills available, ask user to choose
|
||||
|
||||
|
|
@ -27,7 +27,7 @@ Skills that require image generation MUST delegate to available image generation
|
|||
4. On failure, auto-retry once before reporting error
|
||||
```
|
||||
|
||||
**Batch Parallel** (`baoyu-image-gen` only): concurrent workers with per-provider throttling via `batch.max_workers` in EXTEND.md.
|
||||
**Batch Parallel** (`baoyu-imagine` only): concurrent workers with per-provider throttling via `batch.max_workers` in EXTEND.md.
|
||||
|
||||
## Output Path Convention
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
13
package.json
13
package.json
|
|
@ -6,10 +6,19 @@
|
|||
"packages/*"
|
||||
],
|
||||
"scripts": {
|
||||
"test": "node --import tsx --test",
|
||||
"test:coverage": "node --import tsx --experimental-test-coverage --test"
|
||||
"test": "node ./scripts/run-node-tests.mjs",
|
||||
"test:coverage": "node ./scripts/run-node-tests.mjs --experimental-test-coverage"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@mozilla/readability": "^0.6.0",
|
||||
"linkedom": "^0.18.12",
|
||||
"turndown": "^7.2.2",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"tsx": "^4.20.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"pdf-lib": "^1.17.1",
|
||||
"pptxgenjs": "^4.0.1",
|
||||
"sharp": "^0.34.5"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"type": "module",
|
||||
"files": [
|
||||
"src"
|
||||
],
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import {
|
|||
discoverRunningChromeDebugPort,
|
||||
findChromeExecutable,
|
||||
findExistingChromeDebugPort,
|
||||
gracefulKillChrome,
|
||||
getFreePort,
|
||||
openPageSession,
|
||||
resolveSharedChromeProfileDir,
|
||||
|
|
@ -110,6 +111,44 @@ async function stopProcess(child: ChildProcess | null): Promise<void> {
|
|||
await new Promise((resolve) => child.once("exit", resolve));
|
||||
}
|
||||
|
||||
async function startPortHoldingProcess(port: number): Promise<ChildProcess> {
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
[
|
||||
"-e",
|
||||
`
|
||||
const http = require("node:http");
|
||||
const port = Number(process.argv[1]);
|
||||
const server = http.createServer((_req, res) => res.end("ok"));
|
||||
server.listen(port, "127.0.0.1", () => process.stdout.write("ready\\n"));
|
||||
setInterval(() => {}, 1000);
|
||||
`,
|
||||
String(port),
|
||||
],
|
||||
{
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
},
|
||||
);
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const timer = setTimeout(() => reject(new Error("Timed out waiting for child server to start.")), 3_000);
|
||||
child.once("error", (error) => {
|
||||
clearTimeout(timer);
|
||||
reject(error);
|
||||
});
|
||||
child.stdout?.once("data", () => {
|
||||
clearTimeout(timer);
|
||||
resolve();
|
||||
});
|
||||
child.once("exit", () => {
|
||||
clearTimeout(timer);
|
||||
reject(new Error("Child server exited before becoming ready."));
|
||||
});
|
||||
});
|
||||
|
||||
return child;
|
||||
}
|
||||
|
||||
test("getFreePort honors a fixed environment override and otherwise allocates a TCP port", async (t) => {
|
||||
useEnv(t, { TEST_FIXED_PORT: "45678" });
|
||||
assert.equal(await getFreePort("TEST_FIXED_PORT"), 45678);
|
||||
|
|
@ -305,3 +344,19 @@ test("waitForChromeDebugPort retries until the debug endpoint becomes available"
|
|||
|
||||
assert.equal(websocketUrl, `ws://127.0.0.1:${port}/devtools/browser/demo`);
|
||||
});
|
||||
|
||||
test("gracefulKillChrome waits for the Chrome process to exit and release its port", async (t) => {
|
||||
const port = await getFreePort();
|
||||
const child = await startPortHoldingProcess(port);
|
||||
t.after(async () => { await stopProcess(child); });
|
||||
|
||||
assert.equal(await waitForChromeDebugPort(port, 1_000).catch(() => null), null);
|
||||
|
||||
await gracefulKillChrome(child, port, 4_000);
|
||||
|
||||
assert.ok(child.exitCode !== null || child.signalCode !== null);
|
||||
assert.equal(
|
||||
await fetch(`http://127.0.0.1:${port}`).then(() => true).catch(() => false),
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -478,7 +478,7 @@ export function killChrome(chrome: ChildProcess): void {
|
|||
chrome.kill("SIGTERM");
|
||||
} catch {}
|
||||
setTimeout(() => {
|
||||
if (!chrome.killed) {
|
||||
if (chrome.exitCode === null && chrome.signalCode === null) {
|
||||
try {
|
||||
chrome.kill("SIGKILL");
|
||||
} catch {}
|
||||
|
|
@ -486,6 +486,37 @@ export function killChrome(chrome: ChildProcess): void {
|
|||
}, 2_000).unref?.();
|
||||
}
|
||||
|
||||
export async function gracefulKillChrome(
|
||||
chrome: ChildProcess,
|
||||
port?: number,
|
||||
timeoutMs = 6_000,
|
||||
): Promise<void> {
|
||||
if (chrome.exitCode !== null || chrome.signalCode !== null) return;
|
||||
|
||||
const exitPromise = new Promise<void>((resolve) => {
|
||||
chrome.once("exit", () => resolve());
|
||||
});
|
||||
|
||||
killChrome(chrome);
|
||||
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
if (chrome.exitCode !== null || chrome.signalCode !== null) return;
|
||||
if (port !== undefined && !await isPortListening(port, 250)) return;
|
||||
|
||||
const exited = await Promise.race([
|
||||
exitPromise.then(() => true),
|
||||
sleep(100).then(() => false),
|
||||
]);
|
||||
if (exited) return;
|
||||
}
|
||||
|
||||
await Promise.race([
|
||||
exitPromise,
|
||||
sleep(250),
|
||||
]);
|
||||
}
|
||||
|
||||
export async function openPageSession(options: OpenPageSessionOptions): Promise<PageSession> {
|
||||
let targetId: string;
|
||||
let createdTarget = false;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
# Changesets
|
||||
|
||||
This folder stores release notes for version bumps managed by Changesets.
|
||||
|
||||
Create a new changeset before merging a user-facing change:
|
||||
|
||||
```bash
|
||||
bunx changeset
|
||||
```
|
||||
|
||||
After the changeset lands on `main`, GitHub Actions will open or update the release PR automatically. Merging that release PR publishes the next npm version.
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"$schema": "https://unpkg.com/@changesets/config@3.1.3/schema.json",
|
||||
"changelog": "@changesets/cli/changelog",
|
||||
"commit": false,
|
||||
"fixed": [],
|
||||
"linked": [],
|
||||
"access": "public",
|
||||
"baseBranch": "main",
|
||||
"updateInternalDependencies": "patch",
|
||||
"ignore": []
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
"baoyu-reader": patch
|
||||
---
|
||||
|
||||
Add a `defuddle.md` markdown fallback for generic extraction when the CLI is asked to produce markdown output.
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: 1.2.23
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24
|
||||
registry-url: https://registry.npmjs.org
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Type check
|
||||
run: bun run check
|
||||
|
||||
- name: Test
|
||||
run: bun test
|
||||
|
||||
- name: Verify npm package contents
|
||||
run: npm pack --dry-run
|
||||
|
||||
- name: Create release PR or publish to npm
|
||||
id: changesets
|
||||
uses: changesets/action@v1
|
||||
with:
|
||||
version: bun run version-packages
|
||||
publish: bun run release
|
||||
commit: "chore: release"
|
||||
title: "chore: release"
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
node_modules
|
||||
dist
|
||||
.DS_Store
|
||||
.debug
|
||||
coverage
|
||||
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
# Changelog
|
||||
|
||||
English | [简体中文](./CHANGELOG.zh-CN.md)
|
||||
|
||||
The format is inspired by Keep a Changelog, and the project follows Semantic
|
||||
Versioning.
|
||||
|
||||
## [0.1.1] - 2026-03-27
|
||||
|
||||
#### Added
|
||||
|
||||
- Added the `hn` adapter for Hacker News stories and comment threads.
|
||||
- Added `--download-media` and `--media-dir` to download extracted media and
|
||||
rewrite Markdown links.
|
||||
- Added Defuddle as the first generic extraction pass, with Readability +
|
||||
HTML-to-Markdown as fallback.
|
||||
- Added interactive wait modes for login and verification flows, including
|
||||
manual verification handoff and force-wait resume behavior.
|
||||
- Added `--format markdown|json` while keeping `--json` as a compatibility
|
||||
alias.
|
||||
- Added Changesets-based release automation for npm publishing.
|
||||
|
||||
#### Changed
|
||||
|
||||
- Renamed the package and CLI from `baoyu-markdown` to `baoyu-fetch`.
|
||||
- Changed the published package to run `src/cli.ts` directly with Bun instead of
|
||||
shipping a prebuilt `dist`.
|
||||
- Improved X extraction for threads, articles, note tweets, embeds, image URLs,
|
||||
login state handling, and media metadata.
|
||||
- Improved YouTube transcript extraction and normalized Markdown image output.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed X note tweet URL expansion.
|
||||
- Fixed media URL normalization before download, including Substack media links.
|
||||
- Fixed foreground behavior for interactive flows so manual steps are easier to
|
||||
complete.
|
||||
|
||||
## [0.1.0] - 2026-03-25
|
||||
|
||||
#### Added
|
||||
|
||||
- Initial public release as `baoyu-markdown`.
|
||||
- Added Chrome CDP session management, controlled tabs, and network journaling.
|
||||
- Added built-in adapters for `x`, `youtube`, and the generic fallback.
|
||||
- Added X article parsing, X single/tweet extraction, and YouTube transcript
|
||||
extraction.
|
||||
- Added Markdown rendering, document metadata output, and CLI support for file
|
||||
output, JSON output, debug exports, custom Chrome connection settings,
|
||||
headless mode, and timeout control.
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# 更新日志
|
||||
|
||||
[English](./CHANGELOG.md) | 简体中文
|
||||
|
||||
格式参考 Keep a Changelog,版本号遵循 Semantic Versioning。
|
||||
|
||||
## [0.1.1] - 2026-03-27
|
||||
|
||||
### 新增
|
||||
|
||||
- 新增 `hn` adapter,可提取 Hacker News 帖子与评论串。
|
||||
- 新增 `--download-media` 和 `--media-dir`,可下载提取出的媒体文件并重写
|
||||
Markdown 链接。
|
||||
- 通用提取链路新增 Defuddle 首选路径,并保留 Readability + HTML to Markdown
|
||||
作为回退方案。
|
||||
- 新增登录/验证场景的交互等待模式,支持手动验证接管和 force wait 自动恢复。
|
||||
- 新增 `--format markdown|json`,同时保留 `--json` 作为兼容别名。
|
||||
- 新增基于 Changesets 的 npm 发版自动化流程。
|
||||
|
||||
### 变更
|
||||
|
||||
- 将包名和 CLI 名称从 `baoyu-markdown` 更名为 `baoyu-fetch`。
|
||||
- npm 发布物改为直接以 Bun 执行 `src/cli.ts`,不再附带预构建的 `dist`。
|
||||
- 强化 X 提取链路,覆盖 thread、article、note tweet、embed、图片 URL、
|
||||
登录态判断与媒体元数据。
|
||||
- 增强 YouTube transcript 提取,并规范化 Markdown 图片输出。
|
||||
|
||||
### 修复
|
||||
|
||||
- 修复 X note tweet 的 URL 展开问题。
|
||||
- 修复媒体下载前的 URL 规范化问题,包括 Substack 媒体链接。
|
||||
- 修复交互模式的前台行为,使手动登录/验证流程更稳定。
|
||||
|
||||
## [0.1.0] - 2026-03-25
|
||||
|
||||
### 新增
|
||||
|
||||
- `baoyu-markdown` 的首个公开版本。
|
||||
- 新增 Chrome CDP 会话管理、受控 tab 与网络日志采集能力。
|
||||
- 新增内置 `x`、`youtube` 与通用 fallback adapters。
|
||||
- 新增 X article 解析、X 单条内容提取,以及 YouTube transcript 提取。
|
||||
- 新增 Markdown 渲染与文档元数据输出,并提供文件输出、JSON 输出、调试导出、
|
||||
自定义 Chrome 连接、headless 模式和超时控制等 CLI 能力。
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
# baoyu-fetch
|
||||
|
||||
English | [简体中文](./README.zh-CN.md) | [Changelog](./CHANGELOG.md) | [中文更新日志](./CHANGELOG.zh-CN.md)
|
||||
|
||||
`baoyu-fetch` is a Bun CLI built on Chrome CDP. Give it a URL and it returns
|
||||
high-quality `markdown` or `json`. When a site adapter matches, it prefers API
|
||||
responses or structured page data; otherwise it falls back to generic HTML
|
||||
extraction.
|
||||
|
||||
## Features
|
||||
|
||||
- Capture rendered page content through Chrome CDP
|
||||
- Observe network requests and responses, and fetch bodies when needed
|
||||
- Adapter registry that auto-selects a handler from the URL
|
||||
- Built-in adapters for `x`, `youtube`, and `hn`
|
||||
- Generic fallback: Defuddle first, then Readability + HTML-to-Markdown; when `--format markdown` is requested, it can also fall back to `defuddle.md`
|
||||
- Print `markdown` / `json` to stdout or save with `--output`
|
||||
- Optionally download extracted images or videos and rewrite Markdown links
|
||||
- Optional wait modes for login and verification flows
|
||||
- Chrome profile defaults to `baoyu-skills/chrome-profile`
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
For package usage, the quickest option is:
|
||||
|
||||
```bash
|
||||
bunx baoyu-fetch https://example.com
|
||||
```
|
||||
|
||||
You can also install it globally:
|
||||
|
||||
```bash
|
||||
npm install -g baoyu-fetch
|
||||
```
|
||||
|
||||
The npm package ships TypeScript source entrypoints instead of a prebuilt
|
||||
`dist`, so Bun is required at runtime.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
bun run src/cli.ts https://example.com
|
||||
bunx baoyu-fetch https://example.com
|
||||
baoyu-fetch https://example.com
|
||||
baoyu-fetch https://example.com --format markdown --output article.md
|
||||
baoyu-fetch https://example.com --format markdown --output article.md --download-media
|
||||
baoyu-fetch https://x.com/jack/status/20 --format json --output article.json
|
||||
baoyu-fetch https://x.com/jack/status/20 --json
|
||||
baoyu-fetch https://x.com/jack/status/20 --wait-for interaction
|
||||
baoyu-fetch https://x.com/jack/status/20 --wait-for force
|
||||
baoyu-fetch https://x.com/jack/status/20 --chrome-profile-dir ~/Library/Application\\ Support/baoyu-skills/chrome-profile
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
```bash
|
||||
baoyu-fetch <url> [options]
|
||||
|
||||
Options:
|
||||
--output <file> Save output to file
|
||||
--format <type> Output format: markdown | json
|
||||
--json Alias for --format json
|
||||
--adapter <name> Force an adapter (for example x / hn / generic)
|
||||
--download-media Download adapter-reported media into ./imgs and ./videos, then rewrite markdown links
|
||||
--media-dir <dir> Base directory for downloaded media. Defaults to the output directory
|
||||
--debug-dir <dir> Write debug artifacts (html, document.json, network.json)
|
||||
--cdp-url <url> Reuse an existing Chrome DevTools endpoint
|
||||
--browser-path <path> Explicit Chrome binary path
|
||||
--chrome-profile-dir <path>
|
||||
Chrome user data dir. Defaults to BAOYU_CHROME_PROFILE_DIR
|
||||
or baoyu-skills/chrome-profile
|
||||
--headless Launch a temporary headless Chrome if needed
|
||||
--wait-for <mode> Wait mode: interaction | force
|
||||
--wait-for-interaction
|
||||
Alias for --wait-for interaction
|
||||
--wait-for-login Alias for --wait-for interaction
|
||||
--interaction-timeout <ms>
|
||||
Manual interaction timeout. Default: 600000
|
||||
--interaction-poll-interval <ms>
|
||||
Poll interval while waiting. Default: 1500
|
||||
--login-timeout <ms> Alias for --interaction-timeout
|
||||
--login-poll-interval <ms>
|
||||
Alias for --interaction-poll-interval
|
||||
--timeout <ms> Page load timeout. Default: 30000
|
||||
--help Show help
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. The CLI parses the target URL and options.
|
||||
2. It opens or connects to a Chrome CDP session and creates a controlled tab.
|
||||
3. `NetworkJournal` records requests and responses.
|
||||
4. The adapter registry resolves a site-specific adapter when possible.
|
||||
5. The adapter returns a structured `ExtractedDocument`.
|
||||
6. If nothing matches, generic HTML extraction runs instead.
|
||||
7. The result is rendered as Markdown, or returned as JSON with both
|
||||
`document` and `markdown`.
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
bun run check
|
||||
bun run test
|
||||
bun run build
|
||||
```
|
||||
|
||||
## Release
|
||||
|
||||
When you make a user-visible change, add a changeset first:
|
||||
|
||||
```bash
|
||||
bunx changeset
|
||||
```
|
||||
|
||||
After the generated `.changeset/*.md` file lands on `main`, GitHub Actions will
|
||||
open or update the release PR. Merging that release PR publishes the package to
|
||||
npm.
|
||||
|
||||
The publish flow does not build `dist`; it publishes `src/*.ts` for Bun
|
||||
execution directly.
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
# baoyu-fetch
|
||||
|
||||
[English](./README.md) | 简体中文 | [更新日志](./CHANGELOG.zh-CN.md) | [English Changelog](./CHANGELOG.md)
|
||||
|
||||
`baoyu-fetch` 是一个基于 Chrome CDP 的 Bun CLI。输入 URL,它会输出高质量
|
||||
`markdown` 或 `json`;命中站点 adapter 时优先消费 API 返回或页面内结构化
|
||||
数据,未命中时回退到通用 HTML 提取。
|
||||
|
||||
## 当前能力
|
||||
|
||||
- 通过 Chrome CDP 抓取渲染后的页面内容
|
||||
- 监听网络请求与响应,按需拉取响应体
|
||||
- adapter registry,支持按 URL 自动命中站点处理器
|
||||
- 内置 `x`、`youtube`、`hn` adapters
|
||||
- 通用 fallback:Defuddle 优先,Readability + HTML to Markdown 回退;`--format markdown` 时会再尝试 `defuddle.md` 兜底
|
||||
- `stdout` 或 `--output` 输出 `markdown` / `json`
|
||||
- 可选下载提取出的图片/视频并重写 Markdown 链接
|
||||
- 提供登录/验证场景下的交互等待模式
|
||||
- Chrome profile 默认对齐 `baoyu-skills/chrome-profile`
|
||||
|
||||
## 安装
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
作为包使用时,推荐直接这样运行:
|
||||
|
||||
```bash
|
||||
bunx baoyu-fetch https://example.com
|
||||
```
|
||||
|
||||
也可以全局安装:
|
||||
|
||||
```bash
|
||||
npm install -g baoyu-fetch
|
||||
```
|
||||
|
||||
npm 包发布的是 TypeScript 源码入口,不包含预编译的 `dist`,所以运行时需要
|
||||
Bun。
|
||||
|
||||
## 用法
|
||||
|
||||
```bash
|
||||
bun run src/cli.ts https://example.com
|
||||
bunx baoyu-fetch https://example.com
|
||||
baoyu-fetch https://example.com
|
||||
baoyu-fetch https://example.com --format markdown --output article.md
|
||||
baoyu-fetch https://example.com --format markdown --output article.md --download-media
|
||||
baoyu-fetch https://x.com/jack/status/20 --format json --output article.json
|
||||
baoyu-fetch https://x.com/jack/status/20 --json
|
||||
baoyu-fetch https://x.com/jack/status/20 --wait-for interaction
|
||||
baoyu-fetch https://x.com/jack/status/20 --wait-for force
|
||||
baoyu-fetch https://x.com/jack/status/20 --chrome-profile-dir ~/Library/Application\\ Support/baoyu-skills/chrome-profile
|
||||
```
|
||||
|
||||
## 主要参数
|
||||
|
||||
```bash
|
||||
baoyu-fetch <url> [options]
|
||||
|
||||
Options:
|
||||
--output <file> 保存输出内容到文件
|
||||
--format <type> 输出格式:markdown | json
|
||||
--json `--format json` 的兼容别名
|
||||
--adapter <name> 强制使用指定 adapter(如 x / hn / generic)
|
||||
--download-media 下载 adapter 返回的媒体到 ./imgs 和 ./videos,并重写 markdown 链接
|
||||
--media-dir <dir> 指定媒体下载根目录;默认使用输出文件所在目录
|
||||
--debug-dir <dir> 导出调试信息(html、document.json、network.json)
|
||||
--cdp-url <url> 连接现有 Chrome 调试地址
|
||||
--browser-path <path> 指定 Chrome 可执行文件
|
||||
--chrome-profile-dir <path>
|
||||
指定 Chrome profile 目录。默认使用 BAOYU_CHROME_PROFILE_DIR,
|
||||
否则回退到 baoyu-skills/chrome-profile
|
||||
--headless 启动临时 headless Chrome(未连现有实例时)
|
||||
--wait-for <mode> 等待模式:interaction | force
|
||||
--wait-for-interaction
|
||||
`--wait-for interaction` 的别名
|
||||
--wait-for-login `--wait-for interaction` 的别名
|
||||
--interaction-timeout <ms>
|
||||
手动交互等待超时,默认 600000
|
||||
--interaction-poll-interval <ms>
|
||||
等待期间的轮询间隔,默认 1500
|
||||
--login-timeout <ms> `--interaction-timeout` 的别名
|
||||
--login-poll-interval <ms>
|
||||
`--interaction-poll-interval` 的别名
|
||||
--timeout <ms> 页面加载超时,默认 30000
|
||||
--help 显示帮助
|
||||
```
|
||||
|
||||
## 设计
|
||||
|
||||
核心链路:
|
||||
|
||||
1. CLI 解析 URL 和选项
|
||||
2. 建立 CDP 会话并创建受控 tab
|
||||
3. 启动 `NetworkJournal` 收集所有请求/响应
|
||||
4. 由 adapter registry 匹配站点 adapter
|
||||
5. adapter 返回结构化 `ExtractedDocument`
|
||||
6. 没命中则走通用 HTML 提取
|
||||
7. 按请求输出 Markdown,或输出包含 `document` 和 `markdown` 的 JSON
|
||||
|
||||
## 开发
|
||||
|
||||
```bash
|
||||
bun run check
|
||||
bun run test
|
||||
bun run build
|
||||
```
|
||||
|
||||
## 发版
|
||||
|
||||
新增用户可见改动后,先添加一个 changeset:
|
||||
|
||||
```bash
|
||||
bunx changeset
|
||||
```
|
||||
|
||||
把生成的 `.changeset/*.md` 一起合并到 `main` 后,GitHub Actions 会自动创建或
|
||||
更新 release PR;合并 release PR 之后,会自动发布到 npm。
|
||||
|
||||
发布流程不会编译 `dist`,而是直接把 `src/*.ts` 发布出去供 Bun 执行。
|
||||
|
|
@ -0,0 +1,514 @@
|
|||
{
|
||||
"lockfileVersion": 1,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "baoyu-fetch",
|
||||
"dependencies": {
|
||||
"@mozilla/readability": "^0.6.0",
|
||||
"chrome-launcher": "^1.2.1",
|
||||
"defuddle": "^0.14.0",
|
||||
"jsdom": "^26.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-parse": "^11.0.0",
|
||||
"turndown": "^7.2.0",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"unified": "^11.0.5",
|
||||
"ws": "^8.18.3",
|
||||
},
|
||||
"devDependencies": {
|
||||
"@changesets/cli": "^2.30.0",
|
||||
"@types/bun": "^1.2.23",
|
||||
"@types/jsdom": "^21.1.7",
|
||||
"@types/ws": "^8.18.1",
|
||||
"typescript": "^5.9.2",
|
||||
},
|
||||
},
|
||||
},
|
||||
"packages": {
|
||||
"@asamuzakjp/css-color": ["@asamuzakjp/css-color@3.2.0", "", { "dependencies": { "@csstools/css-calc": "^2.1.3", "@csstools/css-color-parser": "^3.0.9", "@csstools/css-parser-algorithms": "^3.0.4", "@csstools/css-tokenizer": "^3.0.3", "lru-cache": "^10.4.3" } }, "sha512-K1A6z8tS3XsmCMM86xoWdn7Fkdn9m6RSVtocUrJYIwZnFVkng/PvkEoWtOWmP+Scc6saYWHWZYbndEEXxl24jw=="],
|
||||
|
||||
"@babel/runtime": ["@babel/runtime@7.29.2", "", {}, "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g=="],
|
||||
|
||||
"@changesets/apply-release-plan": ["@changesets/apply-release-plan@7.1.0", "", { "dependencies": { "@changesets/config": "^3.1.3", "@changesets/get-version-range-type": "^0.4.0", "@changesets/git": "^3.0.4", "@changesets/should-skip-package": "^0.1.2", "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3", "detect-indent": "^6.0.0", "fs-extra": "^7.0.1", "lodash.startcase": "^4.4.0", "outdent": "^0.5.0", "prettier": "^2.7.1", "resolve-from": "^5.0.0", "semver": "^7.5.3" } }, "sha512-yq8ML3YS7koKQ/9bk1PqO0HMzApIFNwjlwCnwFEXMzNe8NpzeeYYKCmnhWJGkN8g7E51MnWaSbqRcTcdIxUgnQ=="],
|
||||
|
||||
"@changesets/assemble-release-plan": ["@changesets/assemble-release-plan@6.0.9", "", { "dependencies": { "@changesets/errors": "^0.2.0", "@changesets/get-dependents-graph": "^2.1.3", "@changesets/should-skip-package": "^0.1.2", "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3", "semver": "^7.5.3" } }, "sha512-tPgeeqCHIwNo8sypKlS3gOPmsS3wP0zHt67JDuL20P4QcXiw/O4Hl7oXiuLnP9yg+rXLQ2sScdV1Kkzde61iSQ=="],
|
||||
|
||||
"@changesets/changelog-git": ["@changesets/changelog-git@0.2.1", "", { "dependencies": { "@changesets/types": "^6.1.0" } }, "sha512-x/xEleCFLH28c3bQeQIyeZf8lFXyDFVn1SgcBiR2Tw/r4IAWlk1fzxCEZ6NxQAjF2Nwtczoen3OA2qR+UawQ8Q=="],
|
||||
|
||||
"@changesets/cli": ["@changesets/cli@2.30.0", "", { "dependencies": { "@changesets/apply-release-plan": "^7.1.0", "@changesets/assemble-release-plan": "^6.0.9", "@changesets/changelog-git": "^0.2.1", "@changesets/config": "^3.1.3", "@changesets/errors": "^0.2.0", "@changesets/get-dependents-graph": "^2.1.3", "@changesets/get-release-plan": "^4.0.15", "@changesets/git": "^3.0.4", "@changesets/logger": "^0.1.1", "@changesets/pre": "^2.0.2", "@changesets/read": "^0.6.7", "@changesets/should-skip-package": "^0.1.2", "@changesets/types": "^6.1.0", "@changesets/write": "^0.4.0", "@inquirer/external-editor": "^1.0.2", "@manypkg/get-packages": "^1.1.3", "ansi-colors": "^4.1.3", "enquirer": "^2.4.1", "fs-extra": "^7.0.1", "mri": "^1.2.0", "package-manager-detector": "^0.2.0", "picocolors": "^1.1.0", "resolve-from": "^5.0.0", "semver": "^7.5.3", "spawndamnit": "^3.0.1", "term-size": "^2.1.0" }, "bin": { "changeset": "bin.js" } }, "sha512-5D3Nk2JPqMI1wK25pEymeWRSlSMdo5QOGlyfrKg0AOufrUcjEE3RQgaCpHoBiM31CSNrtSgdJ0U6zL1rLDDfBA=="],
|
||||
|
||||
"@changesets/config": ["@changesets/config@3.1.3", "", { "dependencies": { "@changesets/errors": "^0.2.0", "@changesets/get-dependents-graph": "^2.1.3", "@changesets/logger": "^0.1.1", "@changesets/should-skip-package": "^0.1.2", "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3", "fs-extra": "^7.0.1", "micromatch": "^4.0.8" } }, "sha512-vnXjcey8YgBn2L1OPWd3ORs0bGC4LoYcK/ubpgvzNVr53JXV5GiTVj7fWdMRsoKUH7hhhMAQnsJUqLr21EncNw=="],
|
||||
|
||||
"@changesets/errors": ["@changesets/errors@0.2.0", "", { "dependencies": { "extendable-error": "^0.1.5" } }, "sha512-6BLOQUscTpZeGljvyQXlWOItQyU71kCdGz7Pi8H8zdw6BI0g3m43iL4xKUVPWtG+qrrL9DTjpdn8eYuCQSRpow=="],
|
||||
|
||||
"@changesets/get-dependents-graph": ["@changesets/get-dependents-graph@2.1.3", "", { "dependencies": { "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3", "picocolors": "^1.1.0", "semver": "^7.5.3" } }, "sha512-gphr+v0mv2I3Oxt19VdWRRUxq3sseyUpX9DaHpTUmLj92Y10AGy+XOtV+kbM6L/fDcpx7/ISDFK6T8A/P3lOdQ=="],
|
||||
|
||||
"@changesets/get-release-plan": ["@changesets/get-release-plan@4.0.15", "", { "dependencies": { "@changesets/assemble-release-plan": "^6.0.9", "@changesets/config": "^3.1.3", "@changesets/pre": "^2.0.2", "@changesets/read": "^0.6.7", "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3" } }, "sha512-Q04ZaRPuEVZtA+auOYgFaVQQSA98dXiVe/yFaZfY7hoSmQICHGvP0TF4u3EDNHWmmCS4ekA/XSpKlSM2PyTS2g=="],
|
||||
|
||||
"@changesets/get-version-range-type": ["@changesets/get-version-range-type@0.4.0", "", {}, "sha512-hwawtob9DryoGTpixy1D3ZXbGgJu1Rhr+ySH2PvTLHvkZuQ7sRT4oQwMh0hbqZH1weAooedEjRsbrWcGLCeyVQ=="],
|
||||
|
||||
"@changesets/git": ["@changesets/git@3.0.4", "", { "dependencies": { "@changesets/errors": "^0.2.0", "@manypkg/get-packages": "^1.1.3", "is-subdir": "^1.1.1", "micromatch": "^4.0.8", "spawndamnit": "^3.0.1" } }, "sha512-BXANzRFkX+XcC1q/d27NKvlJ1yf7PSAgi8JG6dt8EfbHFHi4neau7mufcSca5zRhwOL8j9s6EqsxmT+s+/E6Sw=="],
|
||||
|
||||
"@changesets/logger": ["@changesets/logger@0.1.1", "", { "dependencies": { "picocolors": "^1.1.0" } }, "sha512-OQtR36ZlnuTxKqoW4Sv6x5YIhOmClRd5pWsjZsddYxpWs517R0HkyiefQPIytCVh4ZcC5x9XaG8KTdd5iRQUfg=="],
|
||||
|
||||
"@changesets/parse": ["@changesets/parse@0.4.3", "", { "dependencies": { "@changesets/types": "^6.1.0", "js-yaml": "^4.1.1" } }, "sha512-ZDmNc53+dXdWEv7fqIUSgRQOLYoUom5Z40gmLgmATmYR9NbL6FJJHwakcCpzaeCy+1D0m0n7mT4jj2B/MQPl7A=="],
|
||||
|
||||
"@changesets/pre": ["@changesets/pre@2.0.2", "", { "dependencies": { "@changesets/errors": "^0.2.0", "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3", "fs-extra": "^7.0.1" } }, "sha512-HaL/gEyFVvkf9KFg6484wR9s0qjAXlZ8qWPDkTyKF6+zqjBe/I2mygg3MbpZ++hdi0ToqNUF8cjj7fBy0dg8Ug=="],
|
||||
|
||||
"@changesets/read": ["@changesets/read@0.6.7", "", { "dependencies": { "@changesets/git": "^3.0.4", "@changesets/logger": "^0.1.1", "@changesets/parse": "^0.4.3", "@changesets/types": "^6.1.0", "fs-extra": "^7.0.1", "p-filter": "^2.1.0", "picocolors": "^1.1.0" } }, "sha512-D1G4AUYGrBEk8vj8MGwf75k9GpN6XL3wg8i42P2jZZwFLXnlr2Pn7r9yuQNbaMCarP7ZQWNJbV6XLeysAIMhTA=="],
|
||||
|
||||
"@changesets/should-skip-package": ["@changesets/should-skip-package@0.1.2", "", { "dependencies": { "@changesets/types": "^6.1.0", "@manypkg/get-packages": "^1.1.3" } }, "sha512-qAK/WrqWLNCP22UDdBTMPH5f41elVDlsNyat180A33dWxuUDyNpg6fPi/FyTZwRriVjg0L8gnjJn2F9XAoF0qw=="],
|
||||
|
||||
"@changesets/types": ["@changesets/types@6.1.0", "", {}, "sha512-rKQcJ+o1nKNgeoYRHKOS07tAMNd3YSN0uHaJOZYjBAgxfV7TUE7JE+z4BzZdQwb5hKaYbayKN5KrYV7ODb2rAA=="],
|
||||
|
||||
"@changesets/write": ["@changesets/write@0.4.0", "", { "dependencies": { "@changesets/types": "^6.1.0", "fs-extra": "^7.0.1", "human-id": "^4.1.1", "prettier": "^2.7.1" } }, "sha512-CdTLvIOPiCNuH71pyDu3rA+Q0n65cmAbXnwWH84rKGiFumFzkmHNT8KHTMEchcxN+Kl8I54xGUhJ7l3E7X396Q=="],
|
||||
|
||||
"@csstools/color-helpers": ["@csstools/color-helpers@5.1.0", "", {}, "sha512-S11EXWJyy0Mz5SYvRmY8nJYTFFd1LCNV+7cXyAgQtOOuzb4EsgfqDufL+9esx72/eLhsRdGZwaldu/h+E4t4BA=="],
|
||||
|
||||
"@csstools/css-calc": ["@csstools/css-calc@2.1.4", "", { "peerDependencies": { "@csstools/css-parser-algorithms": "^3.0.5", "@csstools/css-tokenizer": "^3.0.4" } }, "sha512-3N8oaj+0juUw/1H3YwmDDJXCgTB1gKU6Hc/bB502u9zR0q2vd786XJH9QfrKIEgFlZmhZiq6epXl4rHqhzsIgQ=="],
|
||||
|
||||
"@csstools/css-color-parser": ["@csstools/css-color-parser@3.1.0", "", { "dependencies": { "@csstools/color-helpers": "^5.1.0", "@csstools/css-calc": "^2.1.4" }, "peerDependencies": { "@csstools/css-parser-algorithms": "^3.0.5", "@csstools/css-tokenizer": "^3.0.4" } }, "sha512-nbtKwh3a6xNVIp/VRuXV64yTKnb1IjTAEEh3irzS+HkKjAOYLTGNb9pmVNntZ8iVBHcWDA2Dof0QtPgFI1BaTA=="],
|
||||
|
||||
"@csstools/css-parser-algorithms": ["@csstools/css-parser-algorithms@3.0.5", "", { "peerDependencies": { "@csstools/css-tokenizer": "^3.0.4" } }, "sha512-DaDeUkXZKjdGhgYaHNJTV9pV7Y9B3b644jCLs9Upc3VeNGg6LWARAT6O+Q+/COo+2gg/bM5rhpMAtf70WqfBdQ=="],
|
||||
|
||||
"@csstools/css-tokenizer": ["@csstools/css-tokenizer@3.0.4", "", {}, "sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw=="],
|
||||
|
||||
"@inquirer/external-editor": ["@inquirer/external-editor@1.0.3", "", { "dependencies": { "chardet": "^2.1.1", "iconv-lite": "^0.7.0" }, "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-RWbSrDiYmO4LbejWY7ttpxczuwQyZLBUyygsA9Nsv95hpzUWwnNTVQmAq3xuh7vNwCp07UTmE5i11XAEExx4RA=="],
|
||||
|
||||
"@manypkg/find-root": ["@manypkg/find-root@1.1.0", "", { "dependencies": { "@babel/runtime": "^7.5.5", "@types/node": "^12.7.1", "find-up": "^4.1.0", "fs-extra": "^8.1.0" } }, "sha512-mki5uBvhHzO8kYYix/WRy2WX8S3B5wdVSc9D6KcU5lQNglP2yt58/VfLuAK49glRXChosY8ap2oJ1qgma3GUVA=="],
|
||||
|
||||
"@manypkg/get-packages": ["@manypkg/get-packages@1.1.3", "", { "dependencies": { "@babel/runtime": "^7.5.5", "@changesets/types": "^4.0.1", "@manypkg/find-root": "^1.1.0", "fs-extra": "^8.1.0", "globby": "^11.0.0", "read-yaml-file": "^1.1.0" } }, "sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A=="],
|
||||
|
||||
"@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
|
||||
|
||||
"@mozilla/readability": ["@mozilla/readability@0.6.0", "", {}, "sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ=="],
|
||||
|
||||
"@nodelib/fs.scandir": ["@nodelib/fs.scandir@2.1.5", "", { "dependencies": { "@nodelib/fs.stat": "2.0.5", "run-parallel": "^1.1.9" } }, "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g=="],
|
||||
|
||||
"@nodelib/fs.stat": ["@nodelib/fs.stat@2.0.5", "", {}, "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A=="],
|
||||
|
||||
"@nodelib/fs.walk": ["@nodelib/fs.walk@1.2.8", "", { "dependencies": { "@nodelib/fs.scandir": "2.1.5", "fastq": "^1.6.0" } }, "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg=="],
|
||||
|
||||
"@types/bun": ["@types/bun@1.3.11", "", { "dependencies": { "bun-types": "1.3.11" } }, "sha512-5vPne5QvtpjGpsGYXiFyycfpDF2ECyPcTSsFBMa0fraoxiQyMJ3SmuQIGhzPg2WJuWxVBoxWJ2kClYTcw/4fAg=="],
|
||||
|
||||
"@types/debug": ["@types/debug@4.1.13", "", { "dependencies": { "@types/ms": "*" } }, "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw=="],
|
||||
|
||||
"@types/jsdom": ["@types/jsdom@21.1.7", "", { "dependencies": { "@types/node": "*", "@types/tough-cookie": "*", "parse5": "^7.0.0" } }, "sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA=="],
|
||||
|
||||
"@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="],
|
||||
|
||||
"@types/ms": ["@types/ms@2.1.0", "", {}, "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="],
|
||||
|
||||
"@types/node": ["@types/node@25.5.0", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw=="],
|
||||
|
||||
"@types/tough-cookie": ["@types/tough-cookie@4.0.5", "", {}, "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA=="],
|
||||
|
||||
"@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="],
|
||||
|
||||
"@types/ws": ["@types/ws@8.18.1", "", { "dependencies": { "@types/node": "*" } }, "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg=="],
|
||||
|
||||
"@xmldom/xmldom": ["@xmldom/xmldom@0.8.11", "", {}, "sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw=="],
|
||||
|
||||
"agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="],
|
||||
|
||||
"ansi-colors": ["ansi-colors@4.1.3", "", {}, "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw=="],
|
||||
|
||||
"ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
|
||||
|
||||
"argparse": ["argparse@2.0.1", "", {}, "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="],
|
||||
|
||||
"array-union": ["array-union@2.1.0", "", {}, "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw=="],
|
||||
|
||||
"bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="],
|
||||
|
||||
"better-path-resolve": ["better-path-resolve@1.0.0", "", { "dependencies": { "is-windows": "^1.0.0" } }, "sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g=="],
|
||||
|
||||
"boolbase": ["boolbase@1.0.0", "", {}, "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww=="],
|
||||
|
||||
"braces": ["braces@3.0.3", "", { "dependencies": { "fill-range": "^7.1.1" } }, "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA=="],
|
||||
|
||||
"bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
|
||||
|
||||
"ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="],
|
||||
|
||||
"character-entities": ["character-entities@2.0.2", "", {}, "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ=="],
|
||||
|
||||
"chardet": ["chardet@2.1.1", "", {}, "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ=="],
|
||||
|
||||
"chrome-launcher": ["chrome-launcher@1.2.1", "", { "dependencies": { "@types/node": "*", "escape-string-regexp": "^4.0.0", "is-wsl": "^2.2.0", "lighthouse-logger": "^2.0.1" }, "bin": { "print-chrome-path": "bin/print-chrome-path.cjs" } }, "sha512-qmFR5PLMzHyuNJHwOloHPAHhbaNglkfeV/xDtt5b7xiFFyU1I+AZZX0PYseMuhenJSSirgxELYIbswcoc+5H4A=="],
|
||||
|
||||
"commander": ["commander@12.1.0", "", {}, "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA=="],
|
||||
|
||||
"cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],
|
||||
|
||||
"css-select": ["css-select@5.2.2", "", { "dependencies": { "boolbase": "^1.0.0", "css-what": "^6.1.0", "domhandler": "^5.0.2", "domutils": "^3.0.1", "nth-check": "^2.0.1" } }, "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw=="],
|
||||
|
||||
"css-what": ["css-what@6.2.2", "", {}, "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA=="],
|
||||
|
||||
"cssom": ["cssom@0.5.0", "", {}, "sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw=="],
|
||||
|
||||
"cssstyle": ["cssstyle@4.6.0", "", { "dependencies": { "@asamuzakjp/css-color": "^3.2.0", "rrweb-cssom": "^0.8.0" } }, "sha512-2z+rWdzbbSZv6/rhtvzvqeZQHrBaqgogqt85sqFNbabZOuFbCVFb8kPeEtZjiKkbrm395irpNKiYeFeLiQnFPg=="],
|
||||
|
||||
"data-urls": ["data-urls@5.0.0", "", { "dependencies": { "whatwg-mimetype": "^4.0.0", "whatwg-url": "^14.0.0" } }, "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg=="],
|
||||
|
||||
"debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
|
||||
|
||||
"decimal.js": ["decimal.js@10.6.0", "", {}, "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg=="],
|
||||
|
||||
"decode-named-character-reference": ["decode-named-character-reference@1.3.0", "", { "dependencies": { "character-entities": "^2.0.0" } }, "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q=="],
|
||||
|
||||
"defuddle": ["defuddle@0.14.0", "", { "dependencies": { "commander": "^12.1.0" }, "optionalDependencies": { "linkedom": "^0.18.12", "mathml-to-latex": "^1.5.0", "temml": "^0.13.1", "turndown": "^7.2.0" }, "bin": { "defuddle": "dist/cli.js" } }, "sha512-btavZGd1WgiVqrVM62WGRXMUi/aU7ckTZiq0xXWLZMHvzIqNZjwIFQEDRx8MarD7fIgsB90NXZ9xHJkKtapt2Q=="],
|
||||
|
||||
"dequal": ["dequal@2.0.3", "", {}, "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA=="],
|
||||
|
||||
"detect-indent": ["detect-indent@6.1.0", "", {}, "sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA=="],
|
||||
|
||||
"devlop": ["devlop@1.1.0", "", { "dependencies": { "dequal": "^2.0.0" } }, "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA=="],
|
||||
|
||||
"dir-glob": ["dir-glob@3.0.1", "", { "dependencies": { "path-type": "^4.0.0" } }, "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA=="],
|
||||
|
||||
"dom-serializer": ["dom-serializer@2.0.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.2", "entities": "^4.2.0" } }, "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg=="],
|
||||
|
||||
"domelementtype": ["domelementtype@2.3.0", "", {}, "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw=="],
|
||||
|
||||
"domhandler": ["domhandler@5.0.3", "", { "dependencies": { "domelementtype": "^2.3.0" } }, "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w=="],
|
||||
|
||||
"domutils": ["domutils@3.2.2", "", { "dependencies": { "dom-serializer": "^2.0.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3" } }, "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw=="],
|
||||
|
||||
"enquirer": ["enquirer@2.4.1", "", { "dependencies": { "ansi-colors": "^4.1.1", "strip-ansi": "^6.0.1" } }, "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ=="],
|
||||
|
||||
"entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="],
|
||||
|
||||
"escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="],
|
||||
|
||||
"esprima": ["esprima@4.0.1", "", { "bin": { "esparse": "./bin/esparse.js", "esvalidate": "./bin/esvalidate.js" } }, "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A=="],
|
||||
|
||||
"extend": ["extend@3.0.2", "", {}, "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="],
|
||||
|
||||
"extendable-error": ["extendable-error@0.1.7", "", {}, "sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg=="],
|
||||
|
||||
"fast-glob": ["fast-glob@3.3.3", "", { "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", "micromatch": "^4.0.8" } }, "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg=="],
|
||||
|
||||
"fastq": ["fastq@1.20.1", "", { "dependencies": { "reusify": "^1.0.4" } }, "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw=="],
|
||||
|
||||
"fill-range": ["fill-range@7.1.1", "", { "dependencies": { "to-regex-range": "^5.0.1" } }, "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg=="],
|
||||
|
||||
"find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="],
|
||||
|
||||
"fs-extra": ["fs-extra@7.0.1", "", { "dependencies": { "graceful-fs": "^4.1.2", "jsonfile": "^4.0.0", "universalify": "^0.1.0" } }, "sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw=="],
|
||||
|
||||
"glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="],
|
||||
|
||||
"globby": ["globby@11.1.0", "", { "dependencies": { "array-union": "^2.1.0", "dir-glob": "^3.0.1", "fast-glob": "^3.2.9", "ignore": "^5.2.0", "merge2": "^1.4.1", "slash": "^3.0.0" } }, "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g=="],
|
||||
|
||||
"graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="],
|
||||
|
||||
"html-encoding-sniffer": ["html-encoding-sniffer@4.0.0", "", { "dependencies": { "whatwg-encoding": "^3.1.1" } }, "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ=="],
|
||||
|
||||
"html-escaper": ["html-escaper@3.0.3", "", {}, "sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ=="],
|
||||
|
||||
"htmlparser2": ["htmlparser2@10.1.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.3", "domutils": "^3.2.2", "entities": "^7.0.1" } }, "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ=="],
|
||||
|
||||
"http-proxy-agent": ["http-proxy-agent@7.0.2", "", { "dependencies": { "agent-base": "^7.1.0", "debug": "^4.3.4" } }, "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig=="],
|
||||
|
||||
"https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="],
|
||||
|
||||
"human-id": ["human-id@4.1.3", "", { "bin": { "human-id": "dist/cli.js" } }, "sha512-tsYlhAYpjCKa//8rXZ9DqKEawhPoSytweBC2eNvcaDK+57RZLHGqNs3PZTQO6yekLFSuvA6AlnAfrw1uBvtb+Q=="],
|
||||
|
||||
"iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="],
|
||||
|
||||
"ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="],
|
||||
|
||||
"is-docker": ["is-docker@2.2.1", "", { "bin": { "is-docker": "cli.js" } }, "sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ=="],
|
||||
|
||||
"is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="],
|
||||
|
||||
"is-glob": ["is-glob@4.0.3", "", { "dependencies": { "is-extglob": "^2.1.1" } }, "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg=="],
|
||||
|
||||
"is-number": ["is-number@7.0.0", "", {}, "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng=="],
|
||||
|
||||
"is-plain-obj": ["is-plain-obj@4.1.0", "", {}, "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg=="],
|
||||
|
||||
"is-potential-custom-element-name": ["is-potential-custom-element-name@1.0.1", "", {}, "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ=="],
|
||||
|
||||
"is-subdir": ["is-subdir@1.2.0", "", { "dependencies": { "better-path-resolve": "1.0.0" } }, "sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw=="],
|
||||
|
||||
"is-windows": ["is-windows@1.0.2", "", {}, "sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA=="],
|
||||
|
||||
"is-wsl": ["is-wsl@2.2.0", "", { "dependencies": { "is-docker": "^2.0.0" } }, "sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww=="],
|
||||
|
||||
"isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="],
|
||||
|
||||
"js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="],
|
||||
|
||||
"jsdom": ["jsdom@26.1.0", "", { "dependencies": { "cssstyle": "^4.2.1", "data-urls": "^5.0.0", "decimal.js": "^10.5.0", "html-encoding-sniffer": "^4.0.0", "http-proxy-agent": "^7.0.2", "https-proxy-agent": "^7.0.6", "is-potential-custom-element-name": "^1.0.1", "nwsapi": "^2.2.16", "parse5": "^7.2.1", "rrweb-cssom": "^0.8.0", "saxes": "^6.0.0", "symbol-tree": "^3.2.4", "tough-cookie": "^5.1.1", "w3c-xmlserializer": "^5.0.0", "webidl-conversions": "^7.0.0", "whatwg-encoding": "^3.1.1", "whatwg-mimetype": "^4.0.0", "whatwg-url": "^14.1.1", "ws": "^8.18.0", "xml-name-validator": "^5.0.0" }, "peerDependencies": { "canvas": "^3.0.0" }, "optionalPeers": ["canvas"] }, "sha512-Cvc9WUhxSMEo4McES3P7oK3QaXldCfNWp7pl2NNeiIFlCoLr3kfq9kb1fxftiwk1FLV7CvpvDfonxtzUDeSOPg=="],
|
||||
|
||||
"jsonfile": ["jsonfile@4.0.0", "", { "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg=="],
|
||||
|
||||
"lighthouse-logger": ["lighthouse-logger@2.0.2", "", { "dependencies": { "debug": "^4.4.1", "marky": "^1.2.2" } }, "sha512-vWl2+u5jgOQuZR55Z1WM0XDdrJT6mzMP8zHUct7xTlWhuQs+eV0g+QL0RQdFjT54zVmbhLCP8vIVpy1wGn/gCg=="],
|
||||
|
||||
"linkedom": ["linkedom@0.18.12", "", { "dependencies": { "css-select": "^5.1.0", "cssom": "^0.5.0", "html-escaper": "^3.0.3", "htmlparser2": "^10.0.0", "uhyphen": "^0.2.0" }, "peerDependencies": { "canvas": ">= 2" }, "optionalPeers": ["canvas"] }, "sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q=="],
|
||||
|
||||
"locate-path": ["locate-path@5.0.0", "", { "dependencies": { "p-locate": "^4.1.0" } }, "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g=="],
|
||||
|
||||
"lodash.startcase": ["lodash.startcase@4.4.0", "", {}, "sha512-+WKqsK294HMSc2jEbNgpHpd0JfIBhp7rEV4aqXWqFr6AlXov+SlcgB1Fv01y2kGe3Gc8nMW7VA0SrGuSkRfIEg=="],
|
||||
|
||||
"longest-streak": ["longest-streak@3.1.0", "", {}, "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g=="],
|
||||
|
||||
"lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="],
|
||||
|
||||
"markdown-table": ["markdown-table@3.0.4", "", {}, "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw=="],
|
||||
|
||||
"marky": ["marky@1.3.0", "", {}, "sha512-ocnPZQLNpvbedwTy9kNrQEsknEfgvcLMvOtz3sFeWApDq1MXH1TqkCIx58xlpESsfwQOnuBO9beyQuNGzVvuhQ=="],
|
||||
|
||||
"mathml-to-latex": ["mathml-to-latex@1.5.0", "", { "dependencies": { "@xmldom/xmldom": "^0.8.10" } }, "sha512-rrWn0eEvcEcdMM4xfHcSGIy+i01DX9byOdXTLWg+w1iJ6O6ohP5UXY1dVzNUZLhzfl3EGcRekWLhY7JT5Omaew=="],
|
||||
|
||||
"mdast-util-find-and-replace": ["mdast-util-find-and-replace@3.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "escape-string-regexp": "^5.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg=="],
|
||||
|
||||
"mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.3", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-W4mAWTvSlKvf8L6J+VN9yLSqQ9AOAAvHuoDAmPkz4dHf553m5gVj2ejadHJhoJmcmxEnOv6Pa8XJhpxE93kb8Q=="],
|
||||
|
||||
"mdast-util-gfm": ["mdast-util-gfm@3.1.0", "", { "dependencies": { "mdast-util-from-markdown": "^2.0.0", "mdast-util-gfm-autolink-literal": "^2.0.0", "mdast-util-gfm-footnote": "^2.0.0", "mdast-util-gfm-strikethrough": "^2.0.0", "mdast-util-gfm-table": "^2.0.0", "mdast-util-gfm-task-list-item": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ=="],
|
||||
|
||||
"mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@2.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "ccount": "^2.0.0", "devlop": "^1.0.0", "mdast-util-find-and-replace": "^3.0.0", "micromark-util-character": "^2.0.0" } }, "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ=="],
|
||||
|
||||
"mdast-util-gfm-footnote": ["mdast-util-gfm-footnote@2.1.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0" } }, "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ=="],
|
||||
|
||||
"mdast-util-gfm-strikethrough": ["mdast-util-gfm-strikethrough@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg=="],
|
||||
|
||||
"mdast-util-gfm-table": ["mdast-util-gfm-table@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "markdown-table": "^3.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg=="],
|
||||
|
||||
"mdast-util-gfm-task-list-item": ["mdast-util-gfm-task-list-item@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ=="],
|
||||
|
||||
"mdast-util-phrasing": ["mdast-util-phrasing@4.1.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "unist-util-is": "^6.0.0" } }, "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w=="],
|
||||
|
||||
"mdast-util-to-markdown": ["mdast-util-to-markdown@2.1.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "longest-streak": "^3.0.0", "mdast-util-phrasing": "^4.0.0", "mdast-util-to-string": "^4.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "unist-util-visit": "^5.0.0", "zwitch": "^2.0.0" } }, "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA=="],
|
||||
|
||||
"mdast-util-to-string": ["mdast-util-to-string@4.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0" } }, "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg=="],
|
||||
|
||||
"merge2": ["merge2@1.4.1", "", {}, "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg=="],
|
||||
|
||||
"micromark": ["micromark@4.0.2", "", { "dependencies": { "@types/debug": "^4.0.0", "debug": "^4.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-encode": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA=="],
|
||||
|
||||
"micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="],
|
||||
|
||||
"micromark-extension-gfm": ["micromark-extension-gfm@3.0.0", "", { "dependencies": { "micromark-extension-gfm-autolink-literal": "^2.0.0", "micromark-extension-gfm-footnote": "^2.0.0", "micromark-extension-gfm-strikethrough": "^2.0.0", "micromark-extension-gfm-table": "^2.0.0", "micromark-extension-gfm-tagfilter": "^2.0.0", "micromark-extension-gfm-task-list-item": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w=="],
|
||||
|
||||
"micromark-extension-gfm-autolink-literal": ["micromark-extension-gfm-autolink-literal@2.1.0", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw=="],
|
||||
|
||||
"micromark-extension-gfm-footnote": ["micromark-extension-gfm-footnote@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw=="],
|
||||
|
||||
"micromark-extension-gfm-strikethrough": ["micromark-extension-gfm-strikethrough@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw=="],
|
||||
|
||||
"micromark-extension-gfm-table": ["micromark-extension-gfm-table@2.1.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg=="],
|
||||
|
||||
"micromark-extension-gfm-tagfilter": ["micromark-extension-gfm-tagfilter@2.0.0", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg=="],
|
||||
|
||||
"micromark-extension-gfm-task-list-item": ["micromark-extension-gfm-task-list-item@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw=="],
|
||||
|
||||
"micromark-factory-destination": ["micromark-factory-destination@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA=="],
|
||||
|
||||
"micromark-factory-label": ["micromark-factory-label@2.0.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg=="],
|
||||
|
||||
"micromark-factory-space": ["micromark-factory-space@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg=="],
|
||||
|
||||
"micromark-factory-title": ["micromark-factory-title@2.0.1", "", { "dependencies": { "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw=="],
|
||||
|
||||
"micromark-factory-whitespace": ["micromark-factory-whitespace@2.0.1", "", { "dependencies": { "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ=="],
|
||||
|
||||
"micromark-util-character": ["micromark-util-character@2.1.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q=="],
|
||||
|
||||
"micromark-util-chunked": ["micromark-util-chunked@2.0.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA=="],
|
||||
|
||||
"micromark-util-classify-character": ["micromark-util-classify-character@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q=="],
|
||||
|
||||
"micromark-util-combine-extensions": ["micromark-util-combine-extensions@2.0.1", "", { "dependencies": { "micromark-util-chunked": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg=="],
|
||||
|
||||
"micromark-util-decode-numeric-character-reference": ["micromark-util-decode-numeric-character-reference@2.0.2", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw=="],
|
||||
|
||||
"micromark-util-decode-string": ["micromark-util-decode-string@2.0.1", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-symbol": "^2.0.0" } }, "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ=="],
|
||||
|
||||
"micromark-util-encode": ["micromark-util-encode@2.0.1", "", {}, "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw=="],
|
||||
|
||||
"micromark-util-html-tag-name": ["micromark-util-html-tag-name@2.0.1", "", {}, "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA=="],
|
||||
|
||||
"micromark-util-normalize-identifier": ["micromark-util-normalize-identifier@2.0.1", "", { "dependencies": { "micromark-util-symbol": "^2.0.0" } }, "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q=="],
|
||||
|
||||
"micromark-util-resolve-all": ["micromark-util-resolve-all@2.0.1", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg=="],
|
||||
|
||||
"micromark-util-sanitize-uri": ["micromark-util-sanitize-uri@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-encode": "^2.0.0", "micromark-util-symbol": "^2.0.0" } }, "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ=="],
|
||||
|
||||
"micromark-util-subtokenize": ["micromark-util-subtokenize@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA=="],
|
||||
|
||||
"micromark-util-symbol": ["micromark-util-symbol@2.0.1", "", {}, "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q=="],
|
||||
|
||||
"micromark-util-types": ["micromark-util-types@2.0.2", "", {}, "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA=="],
|
||||
|
||||
"micromatch": ["micromatch@4.0.8", "", { "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" } }, "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA=="],
|
||||
|
||||
"mri": ["mri@1.2.0", "", {}, "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA=="],
|
||||
|
||||
"ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
|
||||
|
||||
"nth-check": ["nth-check@2.1.1", "", { "dependencies": { "boolbase": "^1.0.0" } }, "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w=="],
|
||||
|
||||
"nwsapi": ["nwsapi@2.2.23", "", {}, "sha512-7wfH4sLbt4M0gCDzGE6vzQBo0bfTKjU7Sfpqy/7gs1qBfYz2vEJH6vXcBKpO3+6Yu1telwd0t9HpyOoLEQQbIQ=="],
|
||||
|
||||
"outdent": ["outdent@0.5.0", "", {}, "sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q=="],
|
||||
|
||||
"p-filter": ["p-filter@2.1.0", "", { "dependencies": { "p-map": "^2.0.0" } }, "sha512-ZBxxZ5sL2HghephhpGAQdoskxplTwr7ICaehZwLIlfL6acuVgZPm8yBNuRAFBGEqtD/hmUeq9eqLg2ys9Xr/yw=="],
|
||||
|
||||
"p-limit": ["p-limit@2.3.0", "", { "dependencies": { "p-try": "^2.0.0" } }, "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w=="],
|
||||
|
||||
"p-locate": ["p-locate@4.1.0", "", { "dependencies": { "p-limit": "^2.2.0" } }, "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A=="],
|
||||
|
||||
"p-map": ["p-map@2.1.0", "", {}, "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw=="],
|
||||
|
||||
"p-try": ["p-try@2.2.0", "", {}, "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ=="],
|
||||
|
||||
"package-manager-detector": ["package-manager-detector@0.2.11", "", { "dependencies": { "quansync": "^0.2.7" } }, "sha512-BEnLolu+yuz22S56CU1SUKq3XC3PkwD5wv4ikR4MfGvnRVcmzXR9DwSlW2fEamyTPyXHomBJRzgapeuBvRNzJQ=="],
|
||||
|
||||
"parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="],
|
||||
|
||||
"path-exists": ["path-exists@4.0.0", "", {}, "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w=="],
|
||||
|
||||
"path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="],
|
||||
|
||||
"path-type": ["path-type@4.0.0", "", {}, "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw=="],
|
||||
|
||||
"picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="],
|
||||
|
||||
"picomatch": ["picomatch@2.3.2", "", {}, "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA=="],
|
||||
|
||||
"pify": ["pify@4.0.1", "", {}, "sha512-uB80kBFb/tfd68bVleG9T5GGsGPjJrLAUpR5PZIrhBnIaRTQRjqdJSsIKkOP6OAIFbj7GOrcudc5pNjZ+geV2g=="],
|
||||
|
||||
"prettier": ["prettier@2.8.8", "", { "bin": { "prettier": "bin-prettier.js" } }, "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q=="],
|
||||
|
||||
"punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="],
|
||||
|
||||
"quansync": ["quansync@0.2.11", "", {}, "sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA=="],
|
||||
|
||||
"queue-microtask": ["queue-microtask@1.2.3", "", {}, "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A=="],
|
||||
|
||||
"read-yaml-file": ["read-yaml-file@1.1.0", "", { "dependencies": { "graceful-fs": "^4.1.5", "js-yaml": "^3.6.1", "pify": "^4.0.1", "strip-bom": "^3.0.0" } }, "sha512-VIMnQi/Z4HT2Fxuwg5KrY174U1VdUIASQVWXXyqtNRtxSr9IYkn1rsI6Tb6HsrHCmB7gVpNwX6JxPTHcH6IoTA=="],
|
||||
|
||||
"remark-gfm": ["remark-gfm@4.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-gfm": "^3.0.0", "micromark-extension-gfm": "^3.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg=="],
|
||||
|
||||
"remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="],
|
||||
|
||||
"remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="],
|
||||
|
||||
"resolve-from": ["resolve-from@5.0.0", "", {}, "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw=="],
|
||||
|
||||
"reusify": ["reusify@1.1.0", "", {}, "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw=="],
|
||||
|
||||
"rrweb-cssom": ["rrweb-cssom@0.8.0", "", {}, "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw=="],
|
||||
|
||||
"run-parallel": ["run-parallel@1.2.0", "", { "dependencies": { "queue-microtask": "^1.2.2" } }, "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA=="],
|
||||
|
||||
"safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="],
|
||||
|
||||
"saxes": ["saxes@6.0.0", "", { "dependencies": { "xmlchars": "^2.2.0" } }, "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA=="],
|
||||
|
||||
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
|
||||
|
||||
"shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="],
|
||||
|
||||
"shebang-regex": ["shebang-regex@3.0.0", "", {}, "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="],
|
||||
|
||||
"signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
|
||||
|
||||
"slash": ["slash@3.0.0", "", {}, "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q=="],
|
||||
|
||||
"spawndamnit": ["spawndamnit@3.0.1", "", { "dependencies": { "cross-spawn": "^7.0.5", "signal-exit": "^4.0.1" } }, "sha512-MmnduQUuHCoFckZoWnXsTg7JaiLBJrKFj9UI2MbRPGaJeVpsLcVBu6P/IGZovziM/YBsellCmsprgNA+w0CzVg=="],
|
||||
|
||||
"sprintf-js": ["sprintf-js@1.0.3", "", {}, "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g=="],
|
||||
|
||||
"strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
|
||||
|
||||
"strip-bom": ["strip-bom@3.0.0", "", {}, "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA=="],
|
||||
|
||||
"symbol-tree": ["symbol-tree@3.2.4", "", {}, "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="],
|
||||
|
||||
"temml": ["temml@0.13.2", "", {}, "sha512-n8fDRSsLscq9nh9j6z+FgkCvFMT0IJm6GCgwfzh+7AHT3Sfb4jFTQlsA6hVcF2dYYr3b66oDBVES95RfoukyrA=="],
|
||||
|
||||
"term-size": ["term-size@2.2.1", "", {}, "sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg=="],
|
||||
|
||||
"tldts": ["tldts@6.1.86", "", { "dependencies": { "tldts-core": "^6.1.86" }, "bin": { "tldts": "bin/cli.js" } }, "sha512-WMi/OQ2axVTf/ykqCQgXiIct+mSQDFdH2fkwhPwgEwvJ1kSzZRiinb0zF2Xb8u4+OqPChmyI6MEu4EezNJz+FQ=="],
|
||||
|
||||
"tldts-core": ["tldts-core@6.1.86", "", {}, "sha512-Je6p7pkk+KMzMv2XXKmAE3McmolOQFdxkKw0R8EYNr7sELW46JqnNeTX8ybPiQgvg1ymCoF8LXs5fzFaZvJPTA=="],
|
||||
|
||||
"to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="],
|
||||
|
||||
"tough-cookie": ["tough-cookie@5.1.2", "", { "dependencies": { "tldts": "^6.1.32" } }, "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A=="],
|
||||
|
||||
"tr46": ["tr46@5.1.1", "", { "dependencies": { "punycode": "^2.3.1" } }, "sha512-hdF5ZgjTqgAntKkklYw0R03MG2x/bSzTtkxmIRw/sTNV8YXsCJ1tfLAX23lhxhHJlEf3CRCOCGGWw3vI3GaSPw=="],
|
||||
|
||||
"trough": ["trough@2.2.0", "", {}, "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw=="],
|
||||
|
||||
"turndown": ["turndown@7.2.2", "", { "dependencies": { "@mixmark-io/domino": "^2.2.0" } }, "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ=="],
|
||||
|
||||
"turndown-plugin-gfm": ["turndown-plugin-gfm@1.0.2", "", {}, "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="],
|
||||
|
||||
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
|
||||
|
||||
"uhyphen": ["uhyphen@0.2.0", "", {}, "sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA=="],
|
||||
|
||||
"undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
|
||||
|
||||
"unified": ["unified@11.0.5", "", { "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", "devlop": "^1.0.0", "extend": "^3.0.0", "is-plain-obj": "^4.0.0", "trough": "^2.0.0", "vfile": "^6.0.0" } }, "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA=="],
|
||||
|
||||
"unist-util-is": ["unist-util-is@6.0.1", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g=="],
|
||||
|
||||
"unist-util-stringify-position": ["unist-util-stringify-position@4.0.0", "", { "dependencies": { "@types/unist": "^3.0.0" } }, "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ=="],
|
||||
|
||||
"unist-util-visit": ["unist-util-visit@5.1.0", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-m+vIdyeCOpdr/QeQCu2EzxX/ohgS8KbnPDgFni4dQsfSCtpz8UqDyY5GjRru8PDKuYn7Fq19j1CQ+nJSsGKOzg=="],
|
||||
|
||||
"unist-util-visit-parents": ["unist-util-visit-parents@6.0.2", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-is": "^6.0.0" } }, "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ=="],
|
||||
|
||||
"universalify": ["universalify@0.1.2", "", {}, "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg=="],
|
||||
|
||||
"vfile": ["vfile@6.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "vfile-message": "^4.0.0" } }, "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q=="],
|
||||
|
||||
"vfile-message": ["vfile-message@4.0.3", "", { "dependencies": { "@types/unist": "^3.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw=="],
|
||||
|
||||
"w3c-xmlserializer": ["w3c-xmlserializer@5.0.0", "", { "dependencies": { "xml-name-validator": "^5.0.0" } }, "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA=="],
|
||||
|
||||
"webidl-conversions": ["webidl-conversions@7.0.0", "", {}, "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g=="],
|
||||
|
||||
"whatwg-encoding": ["whatwg-encoding@3.1.1", "", { "dependencies": { "iconv-lite": "0.6.3" } }, "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ=="],
|
||||
|
||||
"whatwg-mimetype": ["whatwg-mimetype@4.0.0", "", {}, "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg=="],
|
||||
|
||||
"whatwg-url": ["whatwg-url@14.2.0", "", { "dependencies": { "tr46": "^5.1.0", "webidl-conversions": "^7.0.0" } }, "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw=="],
|
||||
|
||||
"which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
|
||||
|
||||
"ws": ["ws@8.20.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA=="],
|
||||
|
||||
"xml-name-validator": ["xml-name-validator@5.0.0", "", {}, "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg=="],
|
||||
|
||||
"xmlchars": ["xmlchars@2.2.0", "", {}, "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="],
|
||||
|
||||
"zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="],
|
||||
|
||||
"@manypkg/find-root/@types/node": ["@types/node@12.20.55", "", {}, "sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ=="],
|
||||
|
||||
"@manypkg/find-root/fs-extra": ["fs-extra@8.1.0", "", { "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^4.0.0", "universalify": "^0.1.0" } }, "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g=="],
|
||||
|
||||
"@manypkg/get-packages/@changesets/types": ["@changesets/types@4.1.0", "", {}, "sha512-LDQvVDv5Kb50ny2s25Fhm3d9QSZimsoUGBsUioj6MC3qbMUCuC8GPIvk/M6IvXx3lYhAs0lwWUQLb+VIEUCECw=="],
|
||||
|
||||
"@manypkg/get-packages/fs-extra": ["fs-extra@8.1.0", "", { "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^4.0.0", "universalify": "^0.1.0" } }, "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g=="],
|
||||
|
||||
"dom-serializer/entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="],
|
||||
|
||||
"htmlparser2/entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="],
|
||||
|
||||
"mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="],
|
||||
|
||||
"read-yaml-file/js-yaml": ["js-yaml@3.14.2", "", { "dependencies": { "argparse": "^1.0.7", "esprima": "^4.0.0" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-PMSmkqxr106Xa156c2M265Z+FTrPl+oxd/rgOQy2tijQeK5TxQ43psO1ZCwhVOSdnn+RzkzlRz/eY4BgJBYVpg=="],
|
||||
|
||||
"whatwg-encoding/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
|
||||
|
||||
"read-yaml-file/js-yaml/argparse": ["argparse@1.0.10", "", { "dependencies": { "sprintf-js": "~1.0.2" } }, "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg=="],
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"name": "baoyu-fetch",
|
||||
"version": "0.1.1",
|
||||
"description": "Read URLs into high-quality Markdown or JSON with Chrome CDP and site adapters.",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"baoyu-fetch": "./src/cli.ts"
|
||||
},
|
||||
"files": [
|
||||
"README.zh-CN.md",
|
||||
"src/adapters",
|
||||
"src/browser",
|
||||
"src/cli.ts",
|
||||
"src/commands",
|
||||
"src/extract",
|
||||
"src/media",
|
||||
"src/types",
|
||||
"src/utils",
|
||||
"README.md"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/JimLiu/baoyu-skills.git",
|
||||
"directory": "packages/baoyu-fetch"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/JimLiu/baoyu-skills/issues"
|
||||
},
|
||||
"homepage": "https://github.com/JimLiu/baoyu-skills/tree/main/packages/baoyu-fetch#readme",
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "rm -rf dist && bun build ./src/cli.ts --target bun --outfile ./dist/cli.js && chmod +x ./dist/cli.js",
|
||||
"check": "tsc --noEmit",
|
||||
"dev": "bun run ./src/cli.ts",
|
||||
"release": "changeset publish",
|
||||
"test": "bun test",
|
||||
"version-packages": "changeset version"
|
||||
},
|
||||
"engines": {
|
||||
"bun": ">=1.2.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@mozilla/readability": "^0.6.0",
|
||||
"chrome-launcher": "^1.2.1",
|
||||
"defuddle": "^0.14.0",
|
||||
"jsdom": "^26.0.0",
|
||||
"remark-gfm": "^4.0.1",
|
||||
"remark-parse": "^11.0.0",
|
||||
"turndown": "^7.2.0",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"unified": "^11.0.5",
|
||||
"ws": "^8.18.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@changesets/cli": "^2.30.0",
|
||||
"@types/bun": "^1.2.23",
|
||||
"@types/jsdom": "^21.1.7",
|
||||
"@types/ws": "^8.18.1",
|
||||
"typescript": "^5.9.2"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { resolveAdapter } from "../adapters";
|
||||
|
||||
describe("adapter registry", () => {
|
||||
test("matches x adapter for x.com status URLs", () => {
|
||||
const adapter = resolveAdapter({
|
||||
url: new URL("https://x.com/openai/status/1234567890"),
|
||||
});
|
||||
expect(adapter.name).toBe("x");
|
||||
});
|
||||
|
||||
test("matches hn adapter for item URLs", () => {
|
||||
const adapter = resolveAdapter({
|
||||
url: new URL("https://news.ycombinator.com/item?id=47534848"),
|
||||
});
|
||||
expect(adapter.name).toBe("hn");
|
||||
});
|
||||
|
||||
test("falls back to generic adapter", () => {
|
||||
const adapter = resolveAdapter({
|
||||
url: new URL("https://example.com/post"),
|
||||
});
|
||||
expect(adapter.name).toBe("generic");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { HELP_TEXT, parseArgs } from "../cli";
|
||||
|
||||
describe("parseArgs", () => {
|
||||
test("defaults to markdown output", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com"]);
|
||||
expect(options.format).toBe("markdown");
|
||||
});
|
||||
|
||||
test("parses explicit json output format", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com", "--format", "json"]);
|
||||
expect(options.format).toBe("json");
|
||||
});
|
||||
|
||||
test("maps --json to json output format", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com", "--json"]);
|
||||
expect(options.format).toBe("json");
|
||||
});
|
||||
|
||||
test("parses --wait-for interaction", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com", "--wait-for", "interaction"]);
|
||||
expect(options.waitMode).toBe("interaction");
|
||||
});
|
||||
|
||||
test("parses --wait-for force", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com", "--wait-for", "force"]);
|
||||
expect(options.waitMode).toBe("force");
|
||||
});
|
||||
|
||||
test("maps legacy wait flags to interaction mode", () => {
|
||||
const options = parseArgs(["bun", "src/cli.ts", "https://example.com", "--wait-for-interaction"]);
|
||||
expect(options.waitMode).toBe("interaction");
|
||||
});
|
||||
|
||||
test("parses media download options", () => {
|
||||
const options = parseArgs([
|
||||
"bun",
|
||||
"src/cli.ts",
|
||||
"https://example.com",
|
||||
"--download-media",
|
||||
"--media-dir",
|
||||
"./assets",
|
||||
]);
|
||||
|
||||
expect(options.downloadMedia).toBe(true);
|
||||
expect(options.mediaDir).toBe("./assets");
|
||||
});
|
||||
|
||||
test("rejects invalid wait modes", () => {
|
||||
expect(() =>
|
||||
parseArgs(["bun", "src/cli.ts", "https://example.com", "--wait-for", "unknown"]),
|
||||
).toThrow("Invalid wait mode");
|
||||
});
|
||||
|
||||
test("rejects invalid output formats", () => {
|
||||
expect(() =>
|
||||
parseArgs(["bun", "src/cli.ts", "https://example.com", "--format", "xml"]),
|
||||
).toThrow("Invalid output format");
|
||||
});
|
||||
|
||||
test("documents wait modes in help text", () => {
|
||||
expect(HELP_TEXT).toContain("baoyu-fetch");
|
||||
expect(HELP_TEXT).toContain("--format <type>");
|
||||
expect(HELP_TEXT).toContain("--wait-for <mode>");
|
||||
expect(HELP_TEXT).toContain("--download-media");
|
||||
expect(HELP_TEXT).toContain("force: start visible Chrome, then auto-continue");
|
||||
expect(HELP_TEXT).toContain("or continue immediately when you press Enter");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { formatOutputContent } from "../commands/convert";
|
||||
|
||||
describe("formatOutputContent", () => {
|
||||
test("returns raw markdown for markdown output", () => {
|
||||
expect(
|
||||
formatOutputContent("markdown", {
|
||||
adapter: "generic",
|
||||
status: "ok",
|
||||
media: [],
|
||||
downloads: null,
|
||||
document: {
|
||||
url: "https://example.com",
|
||||
content: [],
|
||||
},
|
||||
markdown: "# Example",
|
||||
}),
|
||||
).toBe("# Example");
|
||||
});
|
||||
|
||||
test("returns structured json for json output", () => {
|
||||
const parsed = JSON.parse(
|
||||
formatOutputContent("json", {
|
||||
adapter: "generic",
|
||||
status: "ok",
|
||||
media: [],
|
||||
downloads: null,
|
||||
document: {
|
||||
url: "https://example.com",
|
||||
content: [],
|
||||
},
|
||||
markdown: "# Example",
|
||||
}),
|
||||
);
|
||||
|
||||
expect(parsed.status).toBe("ok");
|
||||
expect(parsed.markdown).toBe("# Example");
|
||||
expect(parsed.document.url).toBe("https://example.com");
|
||||
});
|
||||
|
||||
test("rejects markdown output for interaction-required payloads", () => {
|
||||
expect(() =>
|
||||
formatOutputContent("markdown", {
|
||||
adapter: "x",
|
||||
status: "needs_interaction",
|
||||
interaction: {
|
||||
type: "wait_for_interaction",
|
||||
kind: "login",
|
||||
provider: "x",
|
||||
prompt: "Login required",
|
||||
},
|
||||
}),
|
||||
).toThrow("Markdown output is only available");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { renderMarkdown } from "../extract/markdown-renderer";
|
||||
import {
|
||||
buildHnDocument,
|
||||
buildHnThreadMarkdown,
|
||||
extractHnThreadFromHtml,
|
||||
parseHnItemId,
|
||||
type HnCommentNode,
|
||||
type HnItem,
|
||||
} from "../adapters/hn";
|
||||
|
||||
describe("hn adapter helpers", () => {
|
||||
test("parses item id from hn item url", () => {
|
||||
expect(parseHnItemId(new URL("https://news.ycombinator.com/item?id=47534848"))).toBe(47534848);
|
||||
expect(parseHnItemId(new URL("https://news.ycombinator.com/newest"))).toBeNull();
|
||||
});
|
||||
|
||||
test("renders threaded comments with author, time, and nested indentation", () => {
|
||||
const story: HnItem = {
|
||||
id: 47534848,
|
||||
type: "story",
|
||||
by: "mmcclure",
|
||||
time: 1774554485,
|
||||
title: "Example & Title",
|
||||
url: "https://example.com/post",
|
||||
score: 257,
|
||||
descendants: 2,
|
||||
};
|
||||
|
||||
const comments: HnCommentNode[] = [
|
||||
{
|
||||
item: {
|
||||
id: 47535377,
|
||||
type: "comment",
|
||||
by: "jackfruitpeel",
|
||||
time: 1774557334,
|
||||
text: "Root comment<p>With two paragraphs.",
|
||||
},
|
||||
children: [
|
||||
{
|
||||
item: {
|
||||
id: 47535469,
|
||||
type: "comment",
|
||||
by: "__MatrixMan__",
|
||||
time: 1774557848,
|
||||
text: "Nested reply with a <a href=\"item?id=1\">relative link</a>.",
|
||||
},
|
||||
children: [],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const body = buildHnThreadMarkdown(story, comments, "https://news.ycombinator.com/item?id=47534848");
|
||||
expect(body).toContain("Source: [https://example.com/post](https://example.com/post)");
|
||||
expect(body).toContain("Submitted by mmcclure at 2026-03-26 19:48:05 UTC");
|
||||
expect(body).toContain("- jackfruitpeel · [2026-03-26 20:35:34 UTC](https://news.ycombinator.com/item?id=47534848#47535377)");
|
||||
expect(body).toContain(" Root comment");
|
||||
expect(body).toContain(" With two paragraphs.");
|
||||
expect(body).toContain(" - __MatrixMan__ · [2026-03-26 20:44:08 UTC](https://news.ycombinator.com/item?id=47534848#47535469)");
|
||||
expect(body).toContain(" Nested reply with a [relative link](https://news.ycombinator.com/item?id=1).");
|
||||
});
|
||||
|
||||
test("extracts story metadata and nested comments from hn html", () => {
|
||||
const parsed = extractHnThreadFromHtml(
|
||||
`
|
||||
<html>
|
||||
<body>
|
||||
<table class="fatitem">
|
||||
<tr class="athing submission" id="47534848">
|
||||
<td class="title">
|
||||
<span class="titleline">
|
||||
<a href="https://example.com/post">Example story</a>
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="subtext">
|
||||
<span class="subline">
|
||||
<span class="score">257 points</span>
|
||||
by <a href="user?id=mmcclure" class="hnuser">mmcclure</a>
|
||||
<span class="age" title="2026-03-26T19:48:05 1774554485">
|
||||
<a href="item?id=47534848">1 hour ago</a>
|
||||
</span>
|
||||
<a href="item?id=47534848">152 comments</a>
|
||||
</span>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="toptext">Story <p>body</p></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<table class="comment-tree">
|
||||
<tr class="athing comtr" id="47535377">
|
||||
<td class="ind" indent="0"></td>
|
||||
<td class="default">
|
||||
<span class="comhead">
|
||||
<a href="user?id=jackfruitpeel" class="hnuser">jackfruitpeel</a>
|
||||
<span class="age" title="2026-03-26T20:35:34 1774557334">
|
||||
<a href="item?id=47535377">36 minutes ago</a>
|
||||
</span>
|
||||
</span>
|
||||
<div class="comment"><div class="commtext c00">Root</div></div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr class="athing comtr" id="47535469">
|
||||
<td class="ind" indent="1"></td>
|
||||
<td class="default">
|
||||
<span class="comhead">
|
||||
<a href="user?id=willio58" class="hnuser">willio58</a>
|
||||
<span class="age" title="2026-03-26T20:44:08 1774557848">
|
||||
<a href="item?id=47535469">27 minutes ago</a>
|
||||
</span>
|
||||
</span>
|
||||
<div class="comment"><div class="commtext c00">Child</div></div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
`,
|
||||
"https://news.ycombinator.com/item?id=47534848",
|
||||
);
|
||||
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed?.story.title).toBe("Example story");
|
||||
expect(parsed?.story.url).toBe("https://example.com/post");
|
||||
expect(parsed?.story.by).toBe("mmcclure");
|
||||
expect(parsed?.story.time).toBe(1774554485);
|
||||
expect(parsed?.story.score).toBe(257);
|
||||
expect(parsed?.story.descendants).toBe(152);
|
||||
expect(parsed?.story.text).toContain("Story");
|
||||
expect(parsed?.comments).toHaveLength(1);
|
||||
expect(parsed?.comments[0]?.item.by).toBe("jackfruitpeel");
|
||||
expect(parsed?.comments[0]?.children).toHaveLength(1);
|
||||
expect(parsed?.comments[0]?.children[0]?.item.by).toBe("willio58");
|
||||
});
|
||||
|
||||
test("builds hn document with metadata and markdown body", () => {
|
||||
const document = buildHnDocument(
|
||||
{
|
||||
id: 123,
|
||||
type: "story",
|
||||
by: "pg",
|
||||
time: 1175714200,
|
||||
title: "Ask HN: Example",
|
||||
text: "What are you working on?",
|
||||
score: 111,
|
||||
descendants: 0,
|
||||
},
|
||||
[],
|
||||
"https://news.ycombinator.com/item?id=123",
|
||||
);
|
||||
|
||||
const markdown = renderMarkdown(document);
|
||||
expect(document.adapter).toBe("hn");
|
||||
expect(document.siteName).toBe("Hacker News");
|
||||
expect(document.publishedAt).toBe("2007-04-04T19:16:40.000Z");
|
||||
expect(markdown).toContain('adapter: "hn"');
|
||||
expect(markdown).toContain('siteName: "Hacker News"');
|
||||
expect(markdown).toContain("# Ask HN: Example");
|
||||
expect(markdown).toContain("## Post");
|
||||
expect(markdown).toContain("What are you working on?");
|
||||
expect(markdown).toContain("## Comments");
|
||||
expect(markdown).toContain("No comments.");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
import { afterEach, describe, expect, test } from "bun:test";
|
||||
import {
|
||||
convertHtmlToMarkdown,
|
||||
extractTitleFromMarkdownDocument,
|
||||
} from "../extract/html-to-markdown";
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
});
|
||||
|
||||
describe("extractTitleFromMarkdownDocument", () => {
|
||||
test("prefers frontmatter title when present", () => {
|
||||
const title = extractTitleFromMarkdownDocument(`---
|
||||
title: "Frontmatter Title"
|
||||
---
|
||||
|
||||
# Heading Title
|
||||
`);
|
||||
|
||||
expect(title).toBe("Frontmatter Title");
|
||||
});
|
||||
|
||||
test("falls back to the first markdown heading", () => {
|
||||
const title = extractTitleFromMarkdownDocument(`
|
||||
Intro text
|
||||
|
||||
# Heading Title
|
||||
|
||||
Body text.
|
||||
`);
|
||||
|
||||
expect(title).toBe("Heading Title");
|
||||
});
|
||||
});
|
||||
|
||||
describe("convertHtmlToMarkdown remote fallback", () => {
|
||||
test("does not call defuddle.md when the remote fallback option is disabled", async () => {
|
||||
let fetchCalls = 0;
|
||||
globalThis.fetch = Object.assign(
|
||||
async () => {
|
||||
fetchCalls += 1;
|
||||
return new Response("# Remote Title\n\nRemote body.", {
|
||||
headers: {
|
||||
"content-type": "text/markdown",
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
preconnect: originalFetch.preconnect,
|
||||
},
|
||||
) as typeof fetch;
|
||||
|
||||
const result = await convertHtmlToMarkdown(
|
||||
"<!doctype html><html><head><title>Local Title</title></head><body></body></html>",
|
||||
"https://example.com/post",
|
||||
);
|
||||
|
||||
expect(fetchCalls).toBe(0);
|
||||
expect(result.conversionMethod).not.toBe("defuddle-api");
|
||||
});
|
||||
|
||||
test("uses defuddle.md markdown when local extraction is empty", async () => {
|
||||
const fetchCalls: Array<{ input: RequestInfo | URL; init?: RequestInit }> = [];
|
||||
globalThis.fetch = Object.assign(
|
||||
async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
fetchCalls.push({ input, init });
|
||||
return new Response(`---
|
||||
title: "Remote Title"
|
||||
---
|
||||
|
||||
# Remote Title
|
||||
|
||||
Remote body.
|
||||
`, {
|
||||
headers: {
|
||||
"content-type": "text/markdown",
|
||||
},
|
||||
});
|
||||
},
|
||||
{
|
||||
preconnect: originalFetch.preconnect,
|
||||
},
|
||||
) as typeof fetch;
|
||||
|
||||
const result = await convertHtmlToMarkdown(
|
||||
"<!doctype html><html><head><title>Local Title</title></head><body></body></html>",
|
||||
"https://example.com/post",
|
||||
{ enableRemoteMarkdownFallback: true },
|
||||
);
|
||||
|
||||
expect(fetchCalls).toHaveLength(1);
|
||||
expect(String(fetchCalls[0]?.input)).toBe(
|
||||
"https://defuddle.md/https%3A%2F%2Fexample.com%2Fpost",
|
||||
);
|
||||
expect(fetchCalls[0]?.init?.headers).toEqual({
|
||||
accept: "text/markdown,text/plain;q=0.9,*/*;q=0.1",
|
||||
});
|
||||
expect(result.conversionMethod).toBe("defuddle-api");
|
||||
expect(result.metadata.title).toBe("Remote Title");
|
||||
expect(result.markdown).toBe("# Remote Title\n\nRemote body.");
|
||||
expect(result.fallbackReason).toContain("defuddle.md");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { detectInteractionGateFromSnapshot } from "../browser/interaction-gates";
|
||||
|
||||
describe("detectInteractionGateFromSnapshot", () => {
|
||||
test("detects cloudflare challenge", () => {
|
||||
const gate = detectInteractionGateFromSnapshot({
|
||||
title: "Just a moment...",
|
||||
currentUrl: "https://example.com/cdn-cgi/challenge-platform/h/b",
|
||||
bodyText: "Checking your browser before accessing example.com",
|
||||
hasCloudflareTurnstile: true,
|
||||
hasCloudflareChallenge: true,
|
||||
hasRecaptcha: false,
|
||||
hasRecaptchaIframe: false,
|
||||
hasHcaptcha: false,
|
||||
hasHcaptchaIframe: false,
|
||||
});
|
||||
|
||||
expect(gate?.kind).toBe("cloudflare");
|
||||
expect(gate?.provider).toBe("cloudflare");
|
||||
});
|
||||
|
||||
test("detects google recaptcha", () => {
|
||||
const gate = detectInteractionGateFromSnapshot({
|
||||
title: "Protected page",
|
||||
currentUrl: "https://example.com/form",
|
||||
bodyText: "Please verify that you're not a robot via reCAPTCHA",
|
||||
hasCloudflareTurnstile: false,
|
||||
hasCloudflareChallenge: false,
|
||||
hasRecaptcha: true,
|
||||
hasRecaptchaIframe: true,
|
||||
hasHcaptcha: false,
|
||||
hasHcaptchaIframe: false,
|
||||
});
|
||||
|
||||
expect(gate?.kind).toBe("recaptcha");
|
||||
expect(gate?.provider).toBe("google_recaptcha");
|
||||
});
|
||||
|
||||
test("returns null when no challenge is present", () => {
|
||||
const gate = detectInteractionGateFromSnapshot({
|
||||
title: "Example",
|
||||
currentUrl: "https://example.com/article",
|
||||
bodyText: "Normal article body",
|
||||
hasCloudflareTurnstile: false,
|
||||
hasCloudflareChallenge: false,
|
||||
hasRecaptcha: false,
|
||||
hasRecaptchaIframe: false,
|
||||
hasHcaptcha: false,
|
||||
hasHcaptchaIframe: false,
|
||||
});
|
||||
|
||||
expect(gate).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
collectMediaFromDocument,
|
||||
collectMediaFromMarkdown,
|
||||
normalizeMarkdownMediaLinks,
|
||||
rewriteMarkdownMediaLinks,
|
||||
} from "../media/markdown-media";
|
||||
|
||||
describe("markdown media helpers", () => {
|
||||
test("collects cover, image markdown, and plain media urls from a document", () => {
|
||||
const media = collectMediaFromDocument({
|
||||
url: "https://example.com/post",
|
||||
metadata: {
|
||||
coverImage: "https://cdn.example.com/cover.jpg",
|
||||
},
|
||||
content: [
|
||||
{ type: "paragraph", text: "Poster: https://cdn.example.com/poster.png" },
|
||||
{ type: "markdown", markdown: "\n\n[video](https://cdn.example.com/clip.mp4)" },
|
||||
],
|
||||
});
|
||||
|
||||
expect(media).toEqual([
|
||||
{ url: "https://cdn.example.com/cover.jpg", kind: "image", role: "cover" },
|
||||
{ url: "https://cdn.example.com/poster.png", kind: "image", role: "inline" },
|
||||
{ url: "https://cdn.example.com/body.webp", kind: "image", role: "inline" },
|
||||
{ url: "https://cdn.example.com/clip.mp4", kind: "video", role: "inline" },
|
||||
]);
|
||||
});
|
||||
|
||||
test("rewrites markdown links, frontmatter cover images, and plain url mentions", () => {
|
||||
const markdown = `---
|
||||
coverImage: "https://cdn.example.com/cover.jpg"
|
||||
---
|
||||
|
||||

|
||||
|
||||
Poster: https://cdn.example.com/poster.png
|
||||
`;
|
||||
|
||||
const rewritten = rewriteMarkdownMediaLinks(markdown, [
|
||||
{
|
||||
url: "https://cdn.example.com/cover.jpg",
|
||||
localPath: "imgs/img-001-cover.jpg",
|
||||
absolutePath: "/tmp/imgs/img-001-cover.jpg",
|
||||
kind: "image",
|
||||
},
|
||||
{
|
||||
url: "https://cdn.example.com/body.webp",
|
||||
localPath: "imgs/img-002-body.webp",
|
||||
absolutePath: "/tmp/imgs/img-002-body.webp",
|
||||
kind: "image",
|
||||
},
|
||||
{
|
||||
url: "https://cdn.example.com/poster.png",
|
||||
localPath: "imgs/img-003-poster.png",
|
||||
absolutePath: "/tmp/imgs/img-003-poster.png",
|
||||
kind: "image",
|
||||
},
|
||||
]);
|
||||
|
||||
expect(rewritten).toContain('coverImage: "imgs/img-001-cover.jpg"');
|
||||
expect(rewritten).toContain("");
|
||||
expect(rewritten).toContain("Poster: imgs/img-003-poster.png");
|
||||
});
|
||||
|
||||
test("normalizes and dedupes linked Substack CDN image variants", () => {
|
||||
const resizedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const linkedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const canonicalUrl =
|
||||
"https://substack-post-media.s3.amazonaws.com/public/images/b83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const markdown = `[](${linkedUrl})`;
|
||||
|
||||
expect(normalizeMarkdownMediaLinks(markdown)).toBe(``);
|
||||
expect(collectMediaFromMarkdown(markdown)).toEqual([
|
||||
{
|
||||
url: canonicalUrl,
|
||||
kind: "image",
|
||||
role: "inline",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test("collapses linked images when href equals image url after normalization", () => {
|
||||
const resizedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const linkedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const canonicalUrl =
|
||||
"https://substack-post-media.s3.amazonaws.com/public/images/b83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const markdown = `[
|
||||
|
||||

|
||||
|
||||
](${linkedUrl})`;
|
||||
|
||||
expect(normalizeMarkdownMediaLinks(markdown)).toBe(``);
|
||||
});
|
||||
|
||||
test("compacts linked images when href differs from the image url", () => {
|
||||
const markdown = `[
|
||||
|
||||

|
||||
|
||||
](https://example.com/source)`;
|
||||
|
||||
expect(normalizeMarkdownMediaLinks(markdown)).toBe(
|
||||
"[](https://example.com/source)",
|
||||
);
|
||||
});
|
||||
|
||||
test("keeps single-line linked images on one line after parser-based normalization", () => {
|
||||
const markdown = `[](https://example.com/source)`;
|
||||
|
||||
expect(normalizeMarkdownMediaLinks(markdown)).toBe(
|
||||
"[](https://example.com/source)",
|
||||
);
|
||||
});
|
||||
|
||||
test("repairs broken linked image blocks without disturbing surrounding paragraphs", () => {
|
||||
const markdown = `Before
|
||||
|
||||
[
|
||||
|
||||

|
||||
|
||||
](https://example.com/source)
|
||||
|
||||
After`;
|
||||
|
||||
expect(normalizeMarkdownMediaLinks(markdown)).toBe(`Before
|
||||
|
||||
[](https://example.com/source)
|
||||
|
||||
After`);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { renderMarkdown } from "../extract/markdown-renderer";
|
||||
|
||||
describe("renderMarkdown", () => {
|
||||
test("renders frontmatter and content blocks", () => {
|
||||
const markdown = renderMarkdown({
|
||||
url: "https://example.com/post",
|
||||
requestedUrl: "https://example.com/post?ref=test",
|
||||
title: "Example Title",
|
||||
author: "Alice",
|
||||
siteName: "Example",
|
||||
publishedAt: "2026-03-25",
|
||||
adapter: "generic",
|
||||
metadata: {
|
||||
authorName: "Alice Example",
|
||||
authorUsername: "alice",
|
||||
authorUrl: "https://example.com/@alice",
|
||||
kind: "generic/article",
|
||||
},
|
||||
content: [
|
||||
{ type: "paragraph", text: "First paragraph." },
|
||||
{ type: "list", ordered: false, items: ["One", "Two"] },
|
||||
],
|
||||
});
|
||||
|
||||
expect(markdown).toContain("---");
|
||||
expect(markdown).toContain('title: "Example Title"');
|
||||
expect(markdown).toContain('url: "https://example.com/post"');
|
||||
expect(markdown).toContain('requestedUrl: "https://example.com/post?ref=test"');
|
||||
expect(markdown).toContain('author: "Alice"');
|
||||
expect(markdown).toContain('authorName: "Alice Example"');
|
||||
expect(markdown).toContain('authorUsername: "alice"');
|
||||
expect(markdown).toContain('authorUrl: "https://example.com/@alice"');
|
||||
expect(markdown).toContain("# Example Title");
|
||||
expect(markdown).toContain("First paragraph.");
|
||||
expect(markdown).toContain("- One");
|
||||
});
|
||||
|
||||
test("avoids duplicating the title when body already starts with it", () => {
|
||||
const markdown = renderMarkdown({
|
||||
url: "https://example.com/post",
|
||||
title: "Example Title",
|
||||
content: [{ type: "markdown", markdown: "# Example Title\n\nBody text." }],
|
||||
});
|
||||
|
||||
expect(markdown.match(/# Example Title/g)?.length).toBe(1);
|
||||
expect(markdown).toContain("Body text.");
|
||||
});
|
||||
|
||||
test("normalizes Substack CDN image links in rendered markdown", () => {
|
||||
const resizedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const linkedUrl =
|
||||
"https://substackcdn.com/image/fetch/$s_!wORh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
const canonicalUrl =
|
||||
"https://substack-post-media.s3.amazonaws.com/public/images/b83f9d2f-711f-4edd-bc8a-303b8de422e5_1600x1300.png";
|
||||
|
||||
const markdown = renderMarkdown({
|
||||
url: "https://example.com/post",
|
||||
metadata: {
|
||||
coverImage: resizedUrl,
|
||||
},
|
||||
content: [
|
||||
{
|
||||
type: "markdown",
|
||||
markdown: `[
|
||||
|
||||

|
||||
|
||||
](${linkedUrl})`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(markdown).toContain(`coverImage: "${canonicalUrl}"`);
|
||||
expect(markdown).toContain(``);
|
||||
expect(markdown).not.toContain(`[](${canonicalUrl})`);
|
||||
expect(markdown).not.toContain("substackcdn.com/image/fetch");
|
||||
});
|
||||
|
||||
test("renders linked images on a single line when href differs from the image url", () => {
|
||||
const markdown = renderMarkdown({
|
||||
url: "https://example.com/post",
|
||||
content: [
|
||||
{
|
||||
type: "markdown",
|
||||
markdown: `[
|
||||
|
||||

|
||||
|
||||
](https://example.com/source)`,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(markdown).toContain("[](https://example.com/source)");
|
||||
expect(markdown).not.toContain("](https://example.com/source)\n");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
import { afterEach, describe, expect, test } from "bun:test";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import os from "node:os";
|
||||
import {
|
||||
ensureChromeProfileDir,
|
||||
hasChromeLockArtifacts,
|
||||
resolveChromeProfileDir,
|
||||
shouldRetryChromeLaunchRecovery,
|
||||
} from "../browser/profile";
|
||||
|
||||
const originalProfile = process.env.BAOYU_CHROME_PROFILE_DIR;
|
||||
|
||||
afterEach(() => {
|
||||
if (originalProfile === undefined) {
|
||||
delete process.env.BAOYU_CHROME_PROFILE_DIR;
|
||||
} else {
|
||||
process.env.BAOYU_CHROME_PROFILE_DIR = originalProfile;
|
||||
}
|
||||
});
|
||||
|
||||
describe("resolveChromeProfileDir", () => {
|
||||
test("uses BAOYU_CHROME_PROFILE_DIR when set", () => {
|
||||
process.env.BAOYU_CHROME_PROFILE_DIR = "/tmp/baoyu-profile";
|
||||
expect(resolveChromeProfileDir()).toBe("/tmp/baoyu-profile");
|
||||
});
|
||||
|
||||
test("falls back to shared baoyu-skills profile path", () => {
|
||||
delete process.env.BAOYU_CHROME_PROFILE_DIR;
|
||||
const resolved = resolveChromeProfileDir();
|
||||
if (process.platform === "darwin") {
|
||||
expect(resolved).toBe(path.join(os.homedir(), "Library", "Application Support", "baoyu-skills", "chrome-profile"));
|
||||
} else if (process.platform === "win32") {
|
||||
expect(resolved.endsWith(path.join("baoyu-skills", "chrome-profile"))).toBe(true);
|
||||
} else {
|
||||
expect(resolved.endsWith(path.join("baoyu-skills", "chrome-profile"))).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("ensureChromeProfileDir", () => {
|
||||
test("creates the profile directory when missing", () => {
|
||||
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "baoyu-fetch-profile-"));
|
||||
const profileDir = path.join(tempRoot, "nested", "chrome-profile");
|
||||
|
||||
try {
|
||||
expect(fs.existsSync(profileDir)).toBe(false);
|
||||
expect(ensureChromeProfileDir(profileDir)).toBe(profileDir);
|
||||
expect(fs.statSync(profileDir).isDirectory()).toBe(true);
|
||||
} finally {
|
||||
fs.rmSync(tempRoot, { force: true, recursive: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("stale lock recovery helpers", () => {
|
||||
test("detects Chrome singleton lock artifacts", () => {
|
||||
expect(hasChromeLockArtifacts(["Cookies", "SingletonLock"])).toBe(true);
|
||||
expect(hasChromeLockArtifacts(["chrome.pid"])).toBe(true);
|
||||
expect(hasChromeLockArtifacts(["Preferences", "Cookies"])).toBe(false);
|
||||
});
|
||||
|
||||
test("only retries stale-lock recovery when no live owner exists", () => {
|
||||
expect(shouldRetryChromeLaunchRecovery({ hasLockArtifacts: true, hasLiveOwner: false })).toBe(true);
|
||||
expect(shouldRetryChromeLaunchRecovery({ hasLockArtifacts: true, hasLiveOwner: true })).toBe(false);
|
||||
expect(shouldRetryChromeLaunchRecovery({ hasLockArtifacts: false, hasLiveOwner: false })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { shouldAutoContinueForceWait, shouldKeepBrowserOpenAfterInteraction } from "../commands/convert";
|
||||
|
||||
describe("shouldAutoContinueForceWait", () => {
|
||||
test("continues when a challenge disappears", () => {
|
||||
expect(
|
||||
shouldAutoContinueForceWait(
|
||||
{
|
||||
url: "https://example.com/challenge",
|
||||
hasGate: true,
|
||||
loginState: "unknown",
|
||||
sessionReady: true,
|
||||
},
|
||||
{
|
||||
url: "https://example.com/article",
|
||||
hasGate: false,
|
||||
loginState: "unknown",
|
||||
sessionReady: true,
|
||||
},
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("waits for X session cookies before continuing after login", () => {
|
||||
expect(
|
||||
shouldAutoContinueForceWait(
|
||||
{
|
||||
url: "https://x.com/i/flow/login",
|
||||
hasGate: false,
|
||||
loginState: "logged_out",
|
||||
sessionReady: false,
|
||||
},
|
||||
{
|
||||
url: "https://x.com/home",
|
||||
hasGate: false,
|
||||
loginState: "logged_in",
|
||||
sessionReady: false,
|
||||
},
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
expect(
|
||||
shouldAutoContinueForceWait(
|
||||
{
|
||||
url: "https://x.com/i/flow/login",
|
||||
hasGate: false,
|
||||
loginState: "logged_out",
|
||||
sessionReady: false,
|
||||
},
|
||||
{
|
||||
url: "https://x.com/home",
|
||||
hasGate: false,
|
||||
loginState: "logged_in",
|
||||
sessionReady: true,
|
||||
},
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("does not continue when nothing changed yet", () => {
|
||||
expect(
|
||||
shouldAutoContinueForceWait(
|
||||
{
|
||||
url: "https://x.com/lennysan/status/2036483059407810640",
|
||||
hasGate: false,
|
||||
loginState: "unknown",
|
||||
sessionReady: false,
|
||||
},
|
||||
{
|
||||
url: "https://x.com/lennysan/status/2036483059407810640",
|
||||
hasGate: false,
|
||||
loginState: "unknown",
|
||||
sessionReady: false,
|
||||
},
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldKeepBrowserOpenAfterInteraction", () => {
|
||||
test("keeps launched X login browsers open", () => {
|
||||
expect(
|
||||
shouldKeepBrowserOpenAfterInteraction({
|
||||
launched: true,
|
||||
interaction: { kind: "login", provider: "x" },
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("does not keep reused or non-login browsers open", () => {
|
||||
expect(
|
||||
shouldKeepBrowserOpenAfterInteraction({
|
||||
launched: false,
|
||||
interaction: { kind: "login", provider: "x" },
|
||||
}),
|
||||
).toBe(false);
|
||||
|
||||
expect(
|
||||
shouldKeepBrowserOpenAfterInteraction({
|
||||
launched: true,
|
||||
interaction: { kind: "cloudflare", provider: "cloudflare" },
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,342 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { extractArticleDocumentFromPayload } from "../adapters/x/article";
|
||||
|
||||
describe("x article extraction", () => {
|
||||
test("renders markdown entities referenced by atomic blocks", () => {
|
||||
const payload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "2036762680401223946",
|
||||
legacy: {
|
||||
full_text: "Fallback text",
|
||||
favorite_count: 12,
|
||||
retweet_count: 3,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 11:10:38 +0000 2026",
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Eric Zakariasson",
|
||||
screen_name: "ericzakariasson",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
article: {
|
||||
article_results: {
|
||||
result: {
|
||||
title: "Building CLIs for agents",
|
||||
content_state: {
|
||||
blocks: [
|
||||
{
|
||||
type: "unstyled",
|
||||
text: "Make it non-interactive.",
|
||||
data: {},
|
||||
entityRanges: [],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
{
|
||||
type: "atomic",
|
||||
text: " ",
|
||||
data: {},
|
||||
entityRanges: [{ key: 0, length: 1, offset: 0 }],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
{
|
||||
type: "unstyled",
|
||||
text: "Return data on success.",
|
||||
data: {},
|
||||
entityRanges: [],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
],
|
||||
entityMap: [
|
||||
{
|
||||
key: "0",
|
||||
value: {
|
||||
type: "MARKDOWN",
|
||||
mutability: "Mutable",
|
||||
data: {
|
||||
markdown: "```bash\n$ mycli deploy --env production --dry-run\n```",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const document = extractArticleDocumentFromPayload(
|
||||
payload,
|
||||
"2036762680401223946",
|
||||
"https://x.com/ericzakariasson/status/2036762680401223946",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
expect(document?.metadata?.kind).toBe("x/article");
|
||||
|
||||
const content = document?.content[0];
|
||||
expect(content?.type).toBe("markdown");
|
||||
if (!content || content.type !== "markdown") {
|
||||
throw new Error("Expected markdown content");
|
||||
}
|
||||
|
||||
expect(content.markdown).toContain("```bash");
|
||||
expect(content.markdown).toContain("$ mycli deploy --env production --dry-run");
|
||||
expect(content.markdown).toContain("Make it non-interactive.");
|
||||
expect(content.markdown).toContain("Return data on success.");
|
||||
});
|
||||
|
||||
test("renders media, embedded tweets, and cover image from article entities", () => {
|
||||
const embeddedTweetPayload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "999",
|
||||
legacy: {
|
||||
full_text: "Embedded tweet text",
|
||||
favorite_count: 4,
|
||||
retweet_count: 2,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 11:10:38 +0000 2026",
|
||||
extended_entities: {
|
||||
media: [
|
||||
{
|
||||
type: "photo",
|
||||
media_url_https: "https://pbs.twimg.com/media/embedded.jpg",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
core: {
|
||||
name: "Embedded Author",
|
||||
screen_name: "embedded_author",
|
||||
},
|
||||
legacy: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const articlePayload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "2036670816344064290",
|
||||
legacy: {
|
||||
full_text: "Fallback text",
|
||||
favorite_count: 12,
|
||||
retweet_count: 3,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 11:10:38 +0000 2026",
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Eric Zakariasson",
|
||||
screen_name: "ericzakariasson",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
article: {
|
||||
article_results: {
|
||||
result: {
|
||||
title: "Article with media",
|
||||
cover_media: {
|
||||
media_info: {
|
||||
original_img_url: "https://pbs.twimg.com/media/cover?format=jpeg&name=small",
|
||||
},
|
||||
},
|
||||
media_entities: [
|
||||
{
|
||||
media_id: "42",
|
||||
media_info: {
|
||||
original_img_url: "https://pbs.twimg.com/media/body.jpg",
|
||||
},
|
||||
},
|
||||
],
|
||||
content_state: {
|
||||
blocks: [
|
||||
{
|
||||
type: "unstyled",
|
||||
text: "Read more: https://t.co/example",
|
||||
data: {},
|
||||
entityRanges: [{ key: 2, length: 20, offset: 11 }],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
{
|
||||
type: "atomic",
|
||||
text: " ",
|
||||
data: {},
|
||||
entityRanges: [{ key: 0, length: 1, offset: 0 }],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
{
|
||||
type: "atomic",
|
||||
text: " ",
|
||||
data: {},
|
||||
entityRanges: [{ key: 1, length: 1, offset: 0 }],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
],
|
||||
entityMap: [
|
||||
{
|
||||
key: "0",
|
||||
value: {
|
||||
type: "MEDIA",
|
||||
mutability: "Immutable",
|
||||
data: {
|
||||
mediaItems: [{ mediaId: "42" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
key: "1",
|
||||
value: {
|
||||
type: "TWEET",
|
||||
mutability: "Immutable",
|
||||
data: {
|
||||
tweetId: "999",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
key: "2",
|
||||
value: {
|
||||
type: "LINK",
|
||||
mutability: "Mutable",
|
||||
data: {
|
||||
url: "https://example.com/report",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const document = extractArticleDocumentFromPayload(
|
||||
articlePayload,
|
||||
"2036670816344064290",
|
||||
"https://x.com/ericzakariasson/status/2036670816344064290",
|
||||
[articlePayload, embeddedTweetPayload],
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
expect(document?.metadata?.coverImage).toBe(
|
||||
"https://pbs.twimg.com/media/cover?format=jpg&name=4096x4096",
|
||||
);
|
||||
|
||||
const content = document?.content[0];
|
||||
expect(content?.type).toBe("markdown");
|
||||
if (!content || content.type !== "markdown") {
|
||||
throw new Error("Expected markdown content");
|
||||
}
|
||||
|
||||
expect(content.markdown).toContain("https://example.com/report");
|
||||
expect(content.markdown).toContain("");
|
||||
expect(content.markdown).toContain("> Embedded Author (@embedded_author)");
|
||||
expect(content.markdown).toContain("> Embedded tweet text");
|
||||
expect(content.markdown).toContain(
|
||||
"> ",
|
||||
);
|
||||
});
|
||||
|
||||
test("prefers expanded link entity urls in article blocks", () => {
|
||||
const payload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "2036670816344064290",
|
||||
legacy: {
|
||||
full_text: "Fallback text",
|
||||
favorite_count: 12,
|
||||
retweet_count: 3,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 11:10:38 +0000 2026",
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Eric Zakariasson",
|
||||
screen_name: "ericzakariasson",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
article: {
|
||||
article_results: {
|
||||
result: {
|
||||
title: "Article with expanded links",
|
||||
content_state: {
|
||||
blocks: [
|
||||
{
|
||||
type: "unstyled",
|
||||
text: "Read more: https://t.co/example",
|
||||
data: {},
|
||||
entityRanges: [{ key: 0, length: 20, offset: 11 }],
|
||||
inlineStyleRanges: [],
|
||||
},
|
||||
],
|
||||
entityMap: [
|
||||
{
|
||||
key: "0",
|
||||
value: {
|
||||
type: "LINK",
|
||||
mutability: "Mutable",
|
||||
data: {
|
||||
expanded_url: "https://example.com/report",
|
||||
url: "https://t.co/example",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const document = extractArticleDocumentFromPayload(
|
||||
payload,
|
||||
"2036670816344064290",
|
||||
"https://x.com/ericzakariasson/status/2036670816344064290",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
|
||||
const content = document?.content[0];
|
||||
expect(content?.type).toBe("markdown");
|
||||
if (!content || content.type !== "markdown") {
|
||||
throw new Error("Expected markdown content");
|
||||
}
|
||||
|
||||
expect(content.markdown).toContain("https://example.com/report");
|
||||
expect(content.markdown).not.toContain("https://t.co/example");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import { buildXSessionCookieMap, hasRequiredXSessionCookies } from "../adapters/x/session";
|
||||
|
||||
describe("X session helpers", () => {
|
||||
test("keeps non-empty X session cookies", () => {
|
||||
expect(
|
||||
buildXSessionCookieMap([
|
||||
{ name: "auth_token", value: "auth" },
|
||||
{ name: "ct0", value: "csrf" },
|
||||
{ name: "twid", value: "u=123" },
|
||||
{ name: "ct0", value: "" },
|
||||
{ name: "", value: "ignored" },
|
||||
{ name: "gt", value: undefined },
|
||||
]),
|
||||
).toEqual({
|
||||
auth_token: "auth",
|
||||
ct0: "csrf",
|
||||
twid: "u=123",
|
||||
});
|
||||
});
|
||||
|
||||
test("requires auth_token and ct0 for a ready X session", () => {
|
||||
expect(hasRequiredXSessionCookies({ auth_token: "auth" })).toBe(false);
|
||||
expect(hasRequiredXSessionCookies({ ct0: "csrf" })).toBe(false);
|
||||
expect(hasRequiredXSessionCookies({ auth_token: "auth", ct0: "csrf" })).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,187 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { extractSingleTweetDocumentFromPayload } from "../adapters/x/single";
|
||||
|
||||
describe("x single tweet extraction", () => {
|
||||
test("replaces t.co links in note tweets with expanded urls", () => {
|
||||
const payload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "2036483061635039711",
|
||||
legacy: {
|
||||
full_text:
|
||||
"First, some context:\n\n1. This analysis is based on data from @trueupio, one of my favorite collaborators and sources of data. They track job openings at tech companies and top startups around the world (over 9,000 companies) and make it easy to browse open gigs. Their data looks",
|
||||
favorite_count: 43,
|
||||
retweet_count: 1,
|
||||
reply_count: 1,
|
||||
created_at: "Tue Mar 24 16:39:32 +0000 2026",
|
||||
entities: {
|
||||
hashtags: [],
|
||||
symbols: [],
|
||||
timestamps: [],
|
||||
urls: [],
|
||||
user_mentions: [
|
||||
{
|
||||
id_str: "1407256023547613193",
|
||||
indices: [61, 70],
|
||||
name: "TrueUp",
|
||||
screen_name: "trueupio",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
note_tweet: {
|
||||
note_tweet_results: {
|
||||
result: {
|
||||
text:
|
||||
"First, some context:\n\n1. This analysis is based on data from @trueupio, one of my favorite collaborators and sources of data. They track job openings at tech companies and top startups around the world (over 9,000 companies) and make it easy to browse open gigs. Their data looks at roles at tech companies—the most sought-after and lucrative jobs. (It doesn’t include roles at non-tech companies and consulting agencies.) Browse open roles here: https://t.co/x7ff2NjpP1\n\n2. Keep reading for highlights, or jump straight to the full report: https://t.co/AbqPp2TEde",
|
||||
entity_set: {
|
||||
hashtags: [],
|
||||
symbols: [],
|
||||
urls: [
|
||||
{
|
||||
display_url: "trueup.io/jobs",
|
||||
expanded_url: "https://trueup.io/jobs",
|
||||
indices: [447, 470],
|
||||
url: "https://t.co/x7ff2NjpP1",
|
||||
},
|
||||
{
|
||||
display_url: "lennysnewsletter.com/i/191595250/if…",
|
||||
expanded_url:
|
||||
"https://www.lennysnewsletter.com/i/191595250/if-youre-having-trouble-finding-a-job",
|
||||
indices: [541, 564],
|
||||
url: "https://t.co/AbqPp2TEde",
|
||||
},
|
||||
],
|
||||
user_mentions: [
|
||||
{
|
||||
id_str: "1407256023547613193",
|
||||
indices: [61, 70],
|
||||
name: "TrueUp",
|
||||
screen_name: "trueupio",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Lenny Rachitsky",
|
||||
screen_name: "lennysan",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const document = extractSingleTweetDocumentFromPayload(
|
||||
payload,
|
||||
"2036483061635039711",
|
||||
"https://x.com/lennysan/status/2036483061635039711",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
|
||||
const paragraphBlock = document?.content.find((block) => block.type === "paragraph");
|
||||
expect(paragraphBlock).toEqual({
|
||||
type: "paragraph",
|
||||
text:
|
||||
"First, some context:\n\n1. This analysis is based on data from @trueupio, one of my favorite collaborators and sources of data. They track job openings at tech companies and top startups around the world (over 9,000 companies) and make it easy to browse open gigs. Their data looks at roles at tech companies—the most sought-after and lucrative jobs. (It doesn’t include roles at non-tech companies and consulting agencies.) Browse open roles here: https://trueup.io/jobs\n\n2. Keep reading for highlights, or jump straight to the full report: https://www.lennysnewsletter.com/i/191595250/if-youre-having-trouble-finding-a-job",
|
||||
});
|
||||
});
|
||||
|
||||
test("upgrades image urls to high resolution for tweet and quoted tweet media", () => {
|
||||
const payload = {
|
||||
data: {
|
||||
tweetResult: {
|
||||
result: {
|
||||
rest_id: "2036762680401223946",
|
||||
legacy: {
|
||||
full_text: "Main tweet text https://t.co/media",
|
||||
favorite_count: 12,
|
||||
retweet_count: 3,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 11:10:38 +0000 2026",
|
||||
extended_entities: {
|
||||
media: [
|
||||
{
|
||||
type: "photo",
|
||||
media_url_https: "https://pbs.twimg.com/media/main-image.png",
|
||||
url: "https://t.co/media",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Eric Zakariasson",
|
||||
screen_name: "ericzakariasson",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
quoted_status_result: {
|
||||
result: {
|
||||
rest_id: "999",
|
||||
legacy: {
|
||||
full_text: "Quoted tweet text",
|
||||
favorite_count: 4,
|
||||
retweet_count: 2,
|
||||
reply_count: 1,
|
||||
created_at: "Wed Mar 25 10:10:38 +0000 2026",
|
||||
extended_entities: {
|
||||
media: [
|
||||
{
|
||||
type: "photo",
|
||||
media_url_https: "https://pbs.twimg.com/media/quoted?format=jpeg&name=small",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
legacy: {
|
||||
name: "Quoted Author",
|
||||
screen_name: "quoted_author",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const document = extractSingleTweetDocumentFromPayload(
|
||||
payload,
|
||||
"2036762680401223946",
|
||||
"https://x.com/ericzakariasson/status/2036762680401223946",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
|
||||
const imageBlock = document?.content.find((block) => block.type === "image");
|
||||
expect(imageBlock).toEqual({
|
||||
type: "image",
|
||||
url: "https://pbs.twimg.com/media/main-image?format=png&name=4096x4096",
|
||||
});
|
||||
|
||||
const quoteBlock = document?.content.find((block) => block.type === "quote");
|
||||
expect(quoteBlock).toEqual({
|
||||
type: "quote",
|
||||
text:
|
||||
"Quoted Author (@quoted_author)\n\nQuoted tweet text\n\nphoto: https://pbs.twimg.com/media/quoted?format=jpg&name=4096x4096",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,305 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import { extractThreadDocumentFromPayloads, extractThreadTweetsFromPayloads } from "../adapters/x/thread";
|
||||
|
||||
function buildTweet(options: {
|
||||
id: string;
|
||||
text: string;
|
||||
createdAt: string;
|
||||
userId?: string;
|
||||
screenName?: string;
|
||||
name?: string;
|
||||
conversationId?: string;
|
||||
inReplyToStatusId?: string;
|
||||
inReplyToUserId?: string;
|
||||
quotedTweet?: unknown;
|
||||
}) {
|
||||
const userId = options.userId ?? "3178231";
|
||||
const screenName = options.screenName ?? "dotey";
|
||||
const name = options.name ?? "宝玉";
|
||||
|
||||
return {
|
||||
__typename: "Tweet",
|
||||
rest_id: options.id,
|
||||
legacy: {
|
||||
id_str: options.id,
|
||||
full_text: options.text,
|
||||
favorite_count: 0,
|
||||
retweet_count: 0,
|
||||
reply_count: 0,
|
||||
created_at: options.createdAt,
|
||||
user_id_str: userId,
|
||||
conversation_id_str: options.conversationId ?? options.id,
|
||||
in_reply_to_status_id_str: options.inReplyToStatusId,
|
||||
in_reply_to_user_id_str: options.inReplyToUserId,
|
||||
},
|
||||
core: {
|
||||
user_results: {
|
||||
result: {
|
||||
core: {
|
||||
name,
|
||||
screen_name: screenName,
|
||||
},
|
||||
legacy: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
quoted_status_result: options.quotedTweet
|
||||
? {
|
||||
result: options.quotedTweet,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
function tweetEntry(tweet: unknown) {
|
||||
return {
|
||||
content: {
|
||||
itemContent: {
|
||||
tweet_results: {
|
||||
result: tweet,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function moduleTweetItem(tweet: unknown) {
|
||||
return {
|
||||
item: {
|
||||
itemContent: {
|
||||
tweet_results: {
|
||||
result: tweet,
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe("x thread extraction", () => {
|
||||
test("keeps only the continuous same-author reply chain", () => {
|
||||
const rootId = "1996285439867556304";
|
||||
const reply1Id = "1996285442275340783";
|
||||
const reply2Id = "1996285444582146559";
|
||||
const quotedId = "1993729800922341810";
|
||||
const quotedInsideThreadId = "1993729800922341811";
|
||||
const otherAuthorReplyId = "2000000000000000001";
|
||||
const sameAuthorAfterOtherId = "2000000000000000002";
|
||||
|
||||
const root = buildTweet({
|
||||
id: rootId,
|
||||
text: "A thread for my nana banana pro prompts 🧵",
|
||||
createdAt: "Wed Dec 03 18:28:32 +0000 2025",
|
||||
conversationId: rootId,
|
||||
});
|
||||
const reply1 = buildTweet({
|
||||
id: reply1Id,
|
||||
text: "Prompt 1",
|
||||
createdAt: "Wed Dec 03 18:28:33 +0000 2025",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: rootId,
|
||||
inReplyToUserId: "3178231",
|
||||
quotedTweet: buildTweet({
|
||||
id: quotedInsideThreadId,
|
||||
text: "Quoted inside the thread body",
|
||||
createdAt: "Tue Nov 25 18:28:35 +0000 2025",
|
||||
screenName: "quoted_author",
|
||||
name: "Quoted Author",
|
||||
}),
|
||||
});
|
||||
const reply2 = buildTweet({
|
||||
id: reply2Id,
|
||||
text: "Prompt 2",
|
||||
createdAt: "Wed Dec 03 18:28:34 +0000 2025",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: reply1Id,
|
||||
inReplyToUserId: "3178231",
|
||||
});
|
||||
const quotedSameAuthor = buildTweet({
|
||||
id: quotedId,
|
||||
text: "Quoted standalone tweet",
|
||||
createdAt: "Tue Nov 25 18:28:34 +0000 2025",
|
||||
});
|
||||
const otherAuthorReply = buildTweet({
|
||||
id: otherAuthorReplyId,
|
||||
text: "Another author joined the conversation",
|
||||
createdAt: "Wed Dec 03 18:28:35 +0000 2025",
|
||||
userId: "42",
|
||||
screenName: "someone_else",
|
||||
name: "Someone Else",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: reply2Id,
|
||||
inReplyToUserId: "3178231",
|
||||
});
|
||||
const sameAuthorAfterOther = buildTweet({
|
||||
id: sameAuthorAfterOtherId,
|
||||
text: "This should not be part of the continuous author chain",
|
||||
createdAt: "Wed Dec 03 18:28:36 +0000 2025",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: otherAuthorReplyId,
|
||||
inReplyToUserId: "42",
|
||||
});
|
||||
|
||||
const payloads = [
|
||||
{
|
||||
data: {
|
||||
threaded_conversation_with_injections_v2: {
|
||||
instructions: [
|
||||
{
|
||||
type: "TimelineAddEntries",
|
||||
entries: [
|
||||
tweetEntry(root),
|
||||
tweetEntry(reply1),
|
||||
tweetEntry(quotedSameAuthor),
|
||||
{
|
||||
content: {
|
||||
items: [moduleTweetItem(reply2)],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "TimelineAddToModule",
|
||||
moduleItems: [moduleTweetItem(otherAuthorReply), moduleTweetItem(sameAuthorAfterOther)],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const tweets = extractThreadTweetsFromPayloads(
|
||||
payloads,
|
||||
rootId,
|
||||
"https://x.com/dotey/status/1996285439867556304",
|
||||
);
|
||||
|
||||
expect(tweets.map((tweet) => tweet.id)).toEqual([rootId, reply1Id, reply2Id]);
|
||||
|
||||
const document = extractThreadDocumentFromPayloads(
|
||||
payloads,
|
||||
rootId,
|
||||
"https://x.com/dotey/status/1996285439867556304",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
expect(document?.metadata?.tweetCount).toBe(3);
|
||||
expect(document?.metadata?.lastTweetId).toBe(reply2Id);
|
||||
|
||||
const content = document?.content[0];
|
||||
expect(content?.type).toBe("markdown");
|
||||
if (!content || content.type !== "markdown") {
|
||||
throw new Error("Expected markdown content");
|
||||
}
|
||||
|
||||
expect(content.markdown).toContain("Prompt 1");
|
||||
expect(content.markdown).toContain("Prompt 2");
|
||||
expect(content.markdown).toContain("Quoted inside the thread body");
|
||||
expect(content.markdown).not.toContain("Quoted standalone tweet");
|
||||
expect(content.markdown).not.toContain("This should not be part of the continuous author chain");
|
||||
});
|
||||
|
||||
test("returns null when there is no same-author reply chain", () => {
|
||||
const rootId = "1996285439867556304";
|
||||
const root = buildTweet({
|
||||
id: rootId,
|
||||
text: "Root tweet",
|
||||
createdAt: "Wed Dec 03 18:28:32 +0000 2025",
|
||||
conversationId: rootId,
|
||||
});
|
||||
const quotedSameAuthor = buildTweet({
|
||||
id: "1993729800922341810",
|
||||
text: "Quoted standalone tweet",
|
||||
createdAt: "Tue Nov 25 18:28:34 +0000 2025",
|
||||
});
|
||||
|
||||
const payloads = [
|
||||
{
|
||||
data: {
|
||||
threaded_conversation_with_injections_v2: {
|
||||
instructions: [
|
||||
{
|
||||
type: "TimelineAddEntries",
|
||||
entries: [tweetEntry(root), tweetEntry(quotedSameAuthor)],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
expect(
|
||||
extractThreadDocumentFromPayloads(
|
||||
payloads,
|
||||
rootId,
|
||||
"https://x.com/dotey/status/1996285439867556304",
|
||||
),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
test("restores ancestors when the requested tweet is in the middle of a thread", () => {
|
||||
const rootId = "1996285439867556304";
|
||||
const reply1Id = "1996285442275340783";
|
||||
const reply2Id = "1996285444582146559";
|
||||
|
||||
const root = buildTweet({
|
||||
id: rootId,
|
||||
text: "Root tweet",
|
||||
createdAt: "Wed Dec 03 18:28:32 +0000 2025",
|
||||
conversationId: rootId,
|
||||
});
|
||||
const reply1 = buildTweet({
|
||||
id: reply1Id,
|
||||
text: "Middle tweet",
|
||||
createdAt: "Wed Dec 03 18:28:33 +0000 2025",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: rootId,
|
||||
inReplyToUserId: "3178231",
|
||||
});
|
||||
const reply2 = buildTweet({
|
||||
id: reply2Id,
|
||||
text: "Last tweet",
|
||||
createdAt: "Wed Dec 03 18:28:34 +0000 2025",
|
||||
conversationId: rootId,
|
||||
inReplyToStatusId: reply1Id,
|
||||
inReplyToUserId: "3178231",
|
||||
});
|
||||
|
||||
const payloads = [
|
||||
{
|
||||
data: {
|
||||
threaded_conversation_with_injections_v2: {
|
||||
instructions: [
|
||||
{
|
||||
type: "TimelineAddEntries",
|
||||
entries: [
|
||||
tweetEntry(root),
|
||||
tweetEntry(reply1),
|
||||
tweetEntry(reply2),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const tweets = extractThreadTweetsFromPayloads(
|
||||
payloads,
|
||||
reply1Id,
|
||||
"https://x.com/dotey/status/1996285442275340783",
|
||||
);
|
||||
|
||||
expect(tweets.map((tweet) => tweet.id)).toEqual([rootId, reply1Id, reply2Id]);
|
||||
|
||||
const document = extractThreadDocumentFromPayloads(
|
||||
payloads,
|
||||
reply1Id,
|
||||
"https://x.com/dotey/status/1996285442275340783",
|
||||
);
|
||||
|
||||
expect(document).not.toBeNull();
|
||||
expect(document?.metadata?.tweetId).toBe(rootId);
|
||||
expect(document?.metadata?.lastTweetId).toBe(reply2Id);
|
||||
expect(document?.metadata?.tweetCount).toBe(3);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
import { describe, expect, test } from "bun:test";
|
||||
import {
|
||||
buildYouTubeThumbnailCandidates,
|
||||
formatTimestampRange,
|
||||
parseYouTubeDescriptionChapters,
|
||||
parseYouTubeVideoId,
|
||||
renderYouTubeTranscriptMarkdown,
|
||||
} from "../adapters/youtube/utils";
|
||||
|
||||
describe("parseYouTubeVideoId", () => {
|
||||
test("parses watch URLs", () => {
|
||||
expect(parseYouTubeVideoId(new URL("https://www.youtube.com/watch?v=abc123"))).toBe("abc123");
|
||||
});
|
||||
|
||||
test("parses youtu.be URLs", () => {
|
||||
expect(parseYouTubeVideoId(new URL("https://youtu.be/abc123"))).toBe("abc123");
|
||||
});
|
||||
|
||||
test("parses shorts URLs", () => {
|
||||
expect(parseYouTubeVideoId(new URL("https://www.youtube.com/shorts/abc123"))).toBe("abc123");
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseYouTubeDescriptionChapters", () => {
|
||||
test("extracts chapter timestamps from description lines", () => {
|
||||
expect(
|
||||
parseYouTubeDescriptionChapters(`0:00 Intro
|
||||
2:15 What is a product engineer?
|
||||
10:05 Career paths`),
|
||||
).toEqual([
|
||||
{ title: "Intro", time: 0 },
|
||||
{ title: "What is a product engineer?", time: 135 },
|
||||
{ title: "Career paths", time: 605 },
|
||||
]);
|
||||
});
|
||||
|
||||
test("ignores isolated timestamps that do not look like chapters", () => {
|
||||
expect(parseYouTubeDescriptionChapters("Published on 2026-03-26\nSee you at 1:23")).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("renderYouTubeTranscriptMarkdown", () => {
|
||||
test("renders description before chapters and keeps every segment on its own line", () => {
|
||||
const markdown = renderYouTubeTranscriptMarkdown({
|
||||
description: "Line one\nLine two",
|
||||
chapters: [
|
||||
{ title: "Intro", time: 0 },
|
||||
{ title: "Deep Dive", time: 4 },
|
||||
],
|
||||
segments: [
|
||||
{ start: 0, end: 2, text: "Hello everyone." },
|
||||
{ start: 2, end: 4, text: "Welcome back." },
|
||||
{ start: 4, end: 7, text: "Now the details." },
|
||||
],
|
||||
});
|
||||
|
||||
expect(markdown).toContain("## Description");
|
||||
expect(markdown).toContain("Line one \nLine two");
|
||||
expect(markdown).toContain("## Chapters");
|
||||
expect(markdown).toContain("### Intro [0:00 -> 0:04]");
|
||||
expect(markdown).toContain("[0:00 -> 0:02] Hello everyone.");
|
||||
expect(markdown).toContain("[0:02 -> 0:04] Welcome back.");
|
||||
expect(markdown).toContain("### Deep Dive [0:04 -> 0:07]");
|
||||
expect(markdown).toContain("[0:04 -> 0:07] Now the details.");
|
||||
});
|
||||
|
||||
test("falls back to a transcript section when chapters are unavailable", () => {
|
||||
const markdown = renderYouTubeTranscriptMarkdown({
|
||||
segments: [{ start: 65, end: 70, text: "Single line." }],
|
||||
chapters: [],
|
||||
});
|
||||
|
||||
expect(markdown).toContain("## Transcript");
|
||||
expect(markdown).toContain("[1:05 -> 1:10] Single line.");
|
||||
});
|
||||
});
|
||||
|
||||
describe("thumbnail helpers", () => {
|
||||
test("prefers max resolution thumbnail candidates before listed fallbacks", () => {
|
||||
expect(
|
||||
buildYouTubeThumbnailCandidates("abc123", [
|
||||
"https://i.ytimg.com/vi/abc123/hqdefault.jpg",
|
||||
"https://i.ytimg.com/vi/abc123/mqdefault.jpg?foo=bar",
|
||||
]),
|
||||
).toEqual([
|
||||
"https://i.ytimg.com/vi/abc123/maxresdefault.jpg",
|
||||
"https://i.ytimg.com/vi/abc123/sddefault.jpg",
|
||||
"https://i.ytimg.com/vi/abc123/hqdefault.jpg",
|
||||
"https://i.ytimg.com/vi/abc123/mqdefault.jpg",
|
||||
"https://i.ytimg.com/vi/abc123/default.jpg",
|
||||
]);
|
||||
});
|
||||
|
||||
test("renders timestamp ranges with start and end values", () => {
|
||||
expect(formatTimestampRange(3661, 3675)).toBe("[1:01:01 -> 1:01:15]");
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
import type { Adapter } from "../types";
|
||||
import { detectInteractionGate } from "../../browser/interaction-gates";
|
||||
import { captureNormalizedPageSnapshot } from "../../browser/page-snapshot";
|
||||
import { convertHtmlToMarkdown } from "../../extract/html-to-markdown";
|
||||
|
||||
export const genericAdapter: Adapter = {
|
||||
name: "generic",
|
||||
match() {
|
||||
return true;
|
||||
},
|
||||
async process(context) {
|
||||
context.log.info(`Loading ${context.input.url.toString()} with generic adapter`);
|
||||
await context.browser.goto(context.input.url.toString(), context.timeoutMs);
|
||||
|
||||
try {
|
||||
await context.network.waitForIdle({
|
||||
idleMs: 1_200,
|
||||
timeoutMs: Math.min(context.timeoutMs, 15_000),
|
||||
});
|
||||
} catch {
|
||||
context.log.debug("Network idle timed out on initial load; continuing.");
|
||||
}
|
||||
|
||||
await context.browser.scrollToEnd({ maxSteps: 4, delayMs: 300 });
|
||||
|
||||
try {
|
||||
await context.network.waitForIdle({
|
||||
idleMs: 900,
|
||||
timeoutMs: Math.min(context.timeoutMs, 10_000),
|
||||
});
|
||||
} catch {
|
||||
context.log.debug("Network idle timed out after scrolling; continuing.");
|
||||
}
|
||||
|
||||
const interaction = await detectInteractionGate(context.browser);
|
||||
if (interaction) {
|
||||
return {
|
||||
status: "needs_interaction",
|
||||
interaction,
|
||||
};
|
||||
}
|
||||
|
||||
const snapshot = await captureNormalizedPageSnapshot(context.browser);
|
||||
const converted = await convertHtmlToMarkdown(snapshot.html, snapshot.finalUrl, {
|
||||
enableRemoteMarkdownFallback: context.outputFormat === "markdown",
|
||||
preserveBase64Images: context.downloadMedia,
|
||||
});
|
||||
const document = {
|
||||
url: snapshot.finalUrl,
|
||||
canonicalUrl: converted.metadata.canonicalUrl,
|
||||
title: converted.metadata.title,
|
||||
author: converted.metadata.author,
|
||||
siteName: converted.metadata.siteName,
|
||||
publishedAt: converted.metadata.publishedAt,
|
||||
summary: converted.metadata.summary,
|
||||
adapter: "generic",
|
||||
metadata: {
|
||||
coverImage: converted.metadata.coverImage,
|
||||
language: converted.metadata.language,
|
||||
capturedAt: converted.metadata.capturedAt,
|
||||
conversionMethod: converted.conversionMethod,
|
||||
fallbackReason: converted.fallbackReason,
|
||||
kind: "generic/article",
|
||||
},
|
||||
content: converted.markdown ? [{ type: "markdown" as const, markdown: converted.markdown }] : [],
|
||||
};
|
||||
|
||||
return {
|
||||
status: "ok",
|
||||
document,
|
||||
media: converted.media,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,391 @@
|
|||
import { JSDOM } from "jsdom";
|
||||
import TurndownService from "turndown";
|
||||
import { gfm } from "turndown-plugin-gfm";
|
||||
import type { Adapter } from "../types";
|
||||
import type { ExtractedDocument } from "../../extract/document";
|
||||
import { collectMediaFromDocument } from "../../media/markdown-media";
|
||||
|
||||
const HN_BASE_URL = "https://news.ycombinator.com";
|
||||
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: "atx",
|
||||
bulletListMarker: "-",
|
||||
codeBlockStyle: "fenced",
|
||||
});
|
||||
|
||||
turndown.use(gfm);
|
||||
|
||||
export interface HnItem {
|
||||
id: number;
|
||||
type: "story" | "comment" | "job" | "poll" | "pollopt" | string;
|
||||
by?: string;
|
||||
time?: number;
|
||||
text?: string;
|
||||
title?: string;
|
||||
url?: string;
|
||||
score?: number;
|
||||
descendants?: number;
|
||||
kids?: number[];
|
||||
parent?: number;
|
||||
deleted?: boolean;
|
||||
dead?: boolean;
|
||||
}
|
||||
|
||||
export interface HnCommentNode {
|
||||
item: HnItem;
|
||||
children: HnCommentNode[];
|
||||
}
|
||||
|
||||
interface ParsedHnThread {
|
||||
story: HnItem;
|
||||
comments: HnCommentNode[];
|
||||
}
|
||||
|
||||
function decodeHtmlText(value: string | undefined): string | undefined {
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const dom = new JSDOM(`<!doctype html><html><body>${value}</body></html>`);
|
||||
return dom.window.document.body.textContent?.trim() || undefined;
|
||||
}
|
||||
|
||||
function normalizeMarkdown(markdown: string): string {
|
||||
return markdown
|
||||
.replace(/\r\n/g, "\n")
|
||||
.replace(/[ \t]+\n/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function convertHnHtmlToMarkdown(html: string | undefined, baseUrl: string): string {
|
||||
if (!html?.trim()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const dom = new JSDOM(`<div id="__root">${html}</div>`, { url: baseUrl });
|
||||
const root = dom.window.document.querySelector("#__root");
|
||||
if (!root) {
|
||||
return "";
|
||||
}
|
||||
|
||||
root.querySelectorAll("a[href]").forEach((element) => {
|
||||
const href = element.getAttribute("href");
|
||||
if (!href) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
element.setAttribute("href", new URL(href, baseUrl).toString());
|
||||
} catch {
|
||||
// Ignore malformed URLs and keep the original href.
|
||||
}
|
||||
});
|
||||
|
||||
return normalizeMarkdown(turndown.turndown(root.innerHTML));
|
||||
}
|
||||
|
||||
function formatIsoTimestamp(unixSeconds: number | undefined): string | undefined {
|
||||
if (!unixSeconds || !Number.isFinite(unixSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return new Date(unixSeconds * 1_000).toISOString();
|
||||
}
|
||||
|
||||
function formatDisplayTimestamp(unixSeconds: number | undefined): string {
|
||||
const iso = formatIsoTimestamp(unixSeconds);
|
||||
if (!iso) {
|
||||
return "unknown time";
|
||||
}
|
||||
|
||||
return iso.replace("T", " ").replace(".000Z", " UTC");
|
||||
}
|
||||
|
||||
function indentMarkdown(markdown: string, spaces: number): string {
|
||||
const prefix = " ".repeat(spaces);
|
||||
return markdown
|
||||
.split("\n")
|
||||
.map((line) => (line ? `${prefix}${line}` : prefix))
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function renderCommentHeader(item: HnItem, pageUrl: string): string {
|
||||
const author = item.by ?? "[deleted]";
|
||||
const time = item.id
|
||||
? `[${formatDisplayTimestamp(item.time)}](${pageUrl}#${item.id})`
|
||||
: formatDisplayTimestamp(item.time);
|
||||
return `${author} · ${time}`;
|
||||
}
|
||||
|
||||
function renderCommentNode(node: HnCommentNode, pageUrl: string, depth = 0): string {
|
||||
const baseIndent = " ".repeat(depth * 4);
|
||||
const lines = [`${baseIndent}- ${renderCommentHeader(node.item, pageUrl)}`];
|
||||
const body = convertHnHtmlToMarkdown(node.item.text, pageUrl);
|
||||
|
||||
if (body) {
|
||||
lines.push("");
|
||||
lines.push(indentMarkdown(body, depth * 4 + 4));
|
||||
} else if (node.item.deleted || node.item.dead) {
|
||||
lines.push("");
|
||||
lines.push(`${baseIndent} [comment unavailable]`);
|
||||
}
|
||||
|
||||
for (const child of node.children) {
|
||||
lines.push("");
|
||||
lines.push(renderCommentNode(child, pageUrl, depth + 1));
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
export function buildHnThreadMarkdown(
|
||||
story: HnItem,
|
||||
comments: HnCommentNode[],
|
||||
pageUrl: string,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
const storyUrl = story.url ? new URL(story.url, pageUrl).toString() : undefined;
|
||||
const storyText = convertHnHtmlToMarkdown(story.text, pageUrl);
|
||||
|
||||
if (storyUrl && storyUrl !== pageUrl) {
|
||||
lines.push(`Source: [${storyUrl}](${storyUrl})`);
|
||||
}
|
||||
lines.push(`HN Item: [${story.id}](${pageUrl})`);
|
||||
|
||||
const submittedBy = story.by ? ` by ${story.by}` : "";
|
||||
const submittedAt = formatDisplayTimestamp(story.time);
|
||||
lines.push(`Submitted${submittedBy} at ${submittedAt}`);
|
||||
|
||||
const stats: string[] = [];
|
||||
if (typeof story.score === "number") {
|
||||
stats.push(`${story.score} points`);
|
||||
}
|
||||
if (typeof story.descendants === "number") {
|
||||
stats.push(`${story.descendants} comments`);
|
||||
}
|
||||
if (stats.length > 0) {
|
||||
lines.push(stats.join(" | "));
|
||||
}
|
||||
|
||||
if (storyText) {
|
||||
lines.push("");
|
||||
lines.push("## Post");
|
||||
lines.push("");
|
||||
lines.push(storyText);
|
||||
}
|
||||
|
||||
lines.push("");
|
||||
lines.push("## Comments");
|
||||
lines.push("");
|
||||
|
||||
if (comments.length === 0) {
|
||||
lines.push("No comments.");
|
||||
} else {
|
||||
lines.push(comments.map((comment) => renderCommentNode(comment, pageUrl)).join("\n\n"));
|
||||
}
|
||||
|
||||
return normalizeMarkdown(lines.join("\n"));
|
||||
}
|
||||
|
||||
export function buildHnDocument(
|
||||
story: HnItem,
|
||||
comments: HnCommentNode[],
|
||||
pageUrl: string,
|
||||
): ExtractedDocument {
|
||||
const decodedTitle = decodeHtmlText(story.title) ?? `HN Item ${story.id}`;
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
canonicalUrl: pageUrl,
|
||||
title: decodedTitle,
|
||||
author: story.by,
|
||||
siteName: "Hacker News",
|
||||
publishedAt: formatIsoTimestamp(story.time),
|
||||
adapter: "hn",
|
||||
metadata: {
|
||||
kind: "hn/story",
|
||||
storyId: story.id,
|
||||
storyUrl: story.url ? new URL(story.url, pageUrl).toString() : undefined,
|
||||
points: story.score,
|
||||
commentCount: story.descendants,
|
||||
},
|
||||
content: [
|
||||
{
|
||||
type: "markdown",
|
||||
markdown: buildHnThreadMarkdown(story, comments, pageUrl),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
export function parseHnItemId(url: URL): number | null {
|
||||
if (url.hostname !== "news.ycombinator.com") {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (url.pathname !== "/item") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const value = url.searchParams.get("id");
|
||||
if (!value || !/^\d+$/.test(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return Number(value);
|
||||
}
|
||||
|
||||
function extractUnixSecondsFromAge(element: Element | null): number | undefined {
|
||||
const title = element?.getAttribute("title")?.trim();
|
||||
if (!title) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const match = title.match(/(\d{9,})$/);
|
||||
return match ? Number(match[1]) : undefined;
|
||||
}
|
||||
|
||||
function extractScore(text: string | null | undefined): number | undefined {
|
||||
if (!text) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const match = text.match(/(\d+)/);
|
||||
return match ? Number(match[1]) : undefined;
|
||||
}
|
||||
|
||||
function extractCommentCount(container: ParentNode): number | undefined {
|
||||
const anchors = Array.from(container.querySelectorAll("a"));
|
||||
for (const anchor of anchors) {
|
||||
const match = anchor.textContent?.trim().match(/(\d+)\s+comments?/i);
|
||||
if (match) {
|
||||
return Number(match[1]);
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeStoryUrl(storyId: number, href: string | null | undefined, pageUrl: string): string | undefined {
|
||||
if (!href) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const resolved = new URL(href, pageUrl).toString();
|
||||
if (resolved === pageUrl || resolved === `${HN_BASE_URL}/item?id=${storyId}`) {
|
||||
return undefined;
|
||||
}
|
||||
return resolved;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function extractHnThreadFromHtml(html: string, pageUrl: string): ParsedHnThread | null {
|
||||
const dom = new JSDOM(html, { url: pageUrl });
|
||||
const { document } = dom.window;
|
||||
const storyRow = document.querySelector("table.fatitem tr.athing.submission");
|
||||
if (!storyRow) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const storyId = Number(storyRow.getAttribute("id"));
|
||||
if (!Number.isFinite(storyId)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const titleLink = storyRow.querySelector(".titleline > a");
|
||||
const subline = document.querySelector("table.fatitem .subline");
|
||||
const topText = document.querySelector("table.fatitem .toptext");
|
||||
|
||||
const story: HnItem = {
|
||||
id: storyId,
|
||||
type: "story",
|
||||
by: subline?.querySelector(".hnuser")?.textContent?.trim() || undefined,
|
||||
time: extractUnixSecondsFromAge(subline?.querySelector(".age") ?? null),
|
||||
title: titleLink?.innerHTML?.trim() || undefined,
|
||||
url: normalizeStoryUrl(storyId, titleLink?.getAttribute("href"), pageUrl),
|
||||
text: topText?.innerHTML?.trim() || undefined,
|
||||
score: extractScore(subline?.querySelector(".score")?.textContent),
|
||||
descendants: extractCommentCount(subline ?? document),
|
||||
};
|
||||
|
||||
const roots: HnCommentNode[] = [];
|
||||
const stack: HnCommentNode[] = [];
|
||||
|
||||
document.querySelectorAll("tr.athing.comtr").forEach((row) => {
|
||||
const commentId = Number(row.getAttribute("id"));
|
||||
if (!Number.isFinite(commentId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const indentRaw = row.querySelector("td.ind")?.getAttribute("indent");
|
||||
const depth = indentRaw && /^\d+$/.test(indentRaw) ? Number(indentRaw) : 0;
|
||||
const comhead = row.querySelector(".comhead");
|
||||
const item: HnItem = {
|
||||
id: commentId,
|
||||
type: "comment",
|
||||
by: comhead?.querySelector(".hnuser")?.textContent?.trim() || undefined,
|
||||
time: extractUnixSecondsFromAge(comhead?.querySelector(".age") ?? null),
|
||||
text: row.querySelector(".comment > .commtext")?.innerHTML?.trim() || undefined,
|
||||
deleted: row.querySelector(".comment > .commtext") === null,
|
||||
};
|
||||
|
||||
const node: HnCommentNode = {
|
||||
item,
|
||||
children: [],
|
||||
};
|
||||
|
||||
while (stack.length > depth) {
|
||||
stack.pop();
|
||||
}
|
||||
|
||||
const parent = stack[stack.length - 1];
|
||||
if (parent) {
|
||||
parent.children.push(node);
|
||||
} else {
|
||||
roots.push(node);
|
||||
}
|
||||
|
||||
stack.push(node);
|
||||
});
|
||||
|
||||
return {
|
||||
story,
|
||||
comments: roots,
|
||||
};
|
||||
}
|
||||
|
||||
export const hnAdapter: Adapter = {
|
||||
name: "hn",
|
||||
match(input) {
|
||||
return parseHnItemId(input.url) !== null;
|
||||
},
|
||||
async process(context) {
|
||||
const itemId = parseHnItemId(context.input.url);
|
||||
if (!itemId) {
|
||||
return {
|
||||
status: "no_document",
|
||||
};
|
||||
}
|
||||
|
||||
const pageUrl = context.input.url.toString();
|
||||
context.log.info(`Loading ${pageUrl} with hn adapter`);
|
||||
await context.browser.goto(pageUrl, context.timeoutMs);
|
||||
const html = await context.browser.getHTML();
|
||||
const thread = extractHnThreadFromHtml(html, pageUrl);
|
||||
if (!thread) {
|
||||
return {
|
||||
status: "no_document",
|
||||
};
|
||||
}
|
||||
|
||||
const document = buildHnDocument(thread.story, thread.comments, pageUrl);
|
||||
return {
|
||||
status: "ok",
|
||||
document,
|
||||
media: collectMediaFromDocument(document),
|
||||
};
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
import type { Adapter, AdapterInput } from "./types";
|
||||
import { genericAdapter } from "./generic";
|
||||
import { hnAdapter } from "./hn";
|
||||
import { xAdapter } from "./x";
|
||||
import { youtubeAdapter } from "./youtube";
|
||||
|
||||
const adapters: Adapter[] = [xAdapter, youtubeAdapter, hnAdapter, genericAdapter];
|
||||
|
||||
export function listAdapters(): Adapter[] {
|
||||
return adapters;
|
||||
}
|
||||
|
||||
export function resolveAdapter(input: AdapterInput, forcedName?: string): Adapter {
|
||||
if (forcedName) {
|
||||
const forced = adapters.find((adapter) => adapter.name === forcedName);
|
||||
if (!forced) {
|
||||
throw new Error(`Unknown adapter: ${forcedName}`);
|
||||
}
|
||||
return forced;
|
||||
}
|
||||
|
||||
const matched = adapters.find((adapter) => adapter.match(input));
|
||||
if (!matched) {
|
||||
throw new Error("No adapter matched the URL");
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
export { genericAdapter };
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
import type { BrowserSession } from "../browser/session";
|
||||
import type { CdpClient } from "../browser/cdp-client";
|
||||
import type { NetworkJournal } from "../browser/network-journal";
|
||||
import type { ExtractedDocument } from "../extract/document";
|
||||
import type { MediaDownloadRequest, MediaDownloadResult, MediaAsset } from "../media/types";
|
||||
import type { Logger } from "../utils/logger";
|
||||
|
||||
export interface AdapterInput {
|
||||
url: URL;
|
||||
}
|
||||
|
||||
export type LoginState = "logged_in" | "logged_out" | "unknown";
|
||||
export type InteractionKind = "login" | "cloudflare" | "recaptcha" | "hcaptcha" | "captcha" | "challenge";
|
||||
|
||||
export interface AdapterLoginInfo {
|
||||
provider: string;
|
||||
state: LoginState;
|
||||
required?: boolean;
|
||||
username?: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export interface WaitForInteractionRequest {
|
||||
type: "wait_for_interaction";
|
||||
kind: InteractionKind;
|
||||
provider: string;
|
||||
prompt: string;
|
||||
reason?: string;
|
||||
timeoutMs?: number;
|
||||
pollIntervalMs?: number;
|
||||
requiresVisibleBrowser?: boolean;
|
||||
}
|
||||
|
||||
export type AdapterProcessResult =
|
||||
| {
|
||||
status: "ok";
|
||||
document: ExtractedDocument;
|
||||
media?: MediaAsset[];
|
||||
login?: AdapterLoginInfo;
|
||||
}
|
||||
| {
|
||||
status: "needs_interaction";
|
||||
interaction: WaitForInteractionRequest;
|
||||
login?: AdapterLoginInfo;
|
||||
}
|
||||
| {
|
||||
status: "no_document";
|
||||
login?: AdapterLoginInfo;
|
||||
};
|
||||
|
||||
export interface AdapterContext {
|
||||
input: AdapterInput;
|
||||
browser: BrowserSession;
|
||||
network: NetworkJournal;
|
||||
cdp: CdpClient;
|
||||
log: Logger;
|
||||
outputFormat: "markdown" | "json";
|
||||
timeoutMs: number;
|
||||
interactive: boolean;
|
||||
downloadMedia: boolean;
|
||||
}
|
||||
|
||||
export interface Adapter {
|
||||
name: string;
|
||||
match(input: AdapterInput): boolean;
|
||||
checkLogin?(context: AdapterContext): Promise<AdapterLoginInfo>;
|
||||
exportCookies?(context: AdapterContext, profileDir?: string): Promise<boolean>;
|
||||
restoreCookies?(context: AdapterContext, profileDir?: string): Promise<boolean>;
|
||||
downloadMedia?(request: MediaDownloadRequest): Promise<MediaDownloadResult>;
|
||||
process(context: AdapterContext): Promise<AdapterProcessResult>;
|
||||
}
|
||||
|
||||
export type { MediaAsset };
|
||||
|
|
@ -0,0 +1,433 @@
|
|||
import type { ExtractedDocument } from "../../extract/document";
|
||||
import {
|
||||
findTweetNode,
|
||||
findTweetNodeById,
|
||||
formatMediaList,
|
||||
formatTweetAuthor,
|
||||
getTweetAuthorMetadata,
|
||||
getTweetText,
|
||||
getUser,
|
||||
isRecord,
|
||||
normalizeTitle,
|
||||
toHighResXImageUrl,
|
||||
toXTweet,
|
||||
} from "./shared";
|
||||
import type { JsonObject } from "./types";
|
||||
|
||||
function resolveArticleMediaUrl(mediaInfo: JsonObject): string {
|
||||
const rawUrl =
|
||||
(typeof mediaInfo.original_img_url === "string" && mediaInfo.original_img_url) ||
|
||||
(typeof mediaInfo.url === "string" && mediaInfo.url) ||
|
||||
"";
|
||||
|
||||
return rawUrl ? toHighResXImageUrl(rawUrl) : "";
|
||||
}
|
||||
|
||||
function normalizeEntityMap(entityMap: unknown): Map<string, JsonObject> {
|
||||
const normalized = new Map<string, JsonObject>();
|
||||
|
||||
if (Array.isArray(entityMap)) {
|
||||
for (const entry of entityMap) {
|
||||
if (!isRecord(entry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const key =
|
||||
typeof entry.key === "string" || typeof entry.key === "number"
|
||||
? String(entry.key)
|
||||
: undefined;
|
||||
const value = isRecord(entry.value) ? entry.value : undefined;
|
||||
if (!key || !value) {
|
||||
continue;
|
||||
}
|
||||
normalized.set(key, value);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
if (!isRecord(entityMap)) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(entityMap)) {
|
||||
if (!isRecord(value)) {
|
||||
continue;
|
||||
}
|
||||
normalized.set(key, value);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function getEntityMarkdown(entityMap: Map<string, JsonObject>, entityKey: unknown): string | null {
|
||||
const key =
|
||||
typeof entityKey === "string" || typeof entityKey === "number"
|
||||
? String(entityKey)
|
||||
: undefined;
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const entity = entityMap.get(key);
|
||||
if (!entity || entity.type !== "MARKDOWN") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = isRecord(entity.data) ? entity.data : {};
|
||||
if (typeof data.markdown !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const markdown = data.markdown.trim();
|
||||
return markdown || null;
|
||||
}
|
||||
|
||||
function getLinkUrl(entityMap: Map<string, JsonObject>, entityKey: unknown): string | null {
|
||||
const key =
|
||||
typeof entityKey === "string" || typeof entityKey === "number"
|
||||
? String(entityKey)
|
||||
: undefined;
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const entity = entityMap.get(key);
|
||||
if (!entity || entity.type !== "LINK") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = isRecord(entity.data) ? entity.data : {};
|
||||
const candidates = [
|
||||
data.expanded_url,
|
||||
data.expandedUrl,
|
||||
data.original_url,
|
||||
data.originalUrl,
|
||||
data.url,
|
||||
data.display_url,
|
||||
data.displayUrl,
|
||||
];
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (typeof candidate === "string" && candidate.trim()) {
|
||||
return candidate.trim();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getTweetId(entityMap: Map<string, JsonObject>, entityKey: unknown): string | null {
|
||||
const key =
|
||||
typeof entityKey === "string" || typeof entityKey === "number"
|
||||
? String(entityKey)
|
||||
: undefined;
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const entity = entityMap.get(key);
|
||||
if (!entity || entity.type !== "TWEET") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = isRecord(entity.data) ? entity.data : {};
|
||||
if (typeof data.tweetId !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
return data.tweetId;
|
||||
}
|
||||
|
||||
function buildMediaUrlMap(articleResult: JsonObject): Map<string, string> {
|
||||
const mediaMap = new Map<string, string>();
|
||||
const mediaEntities = Array.isArray(articleResult.media_entities) ? articleResult.media_entities : [];
|
||||
|
||||
for (const entity of mediaEntities) {
|
||||
if (!isRecord(entity) || typeof entity.media_id !== "string" || !isRecord(entity.media_info)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const mediaInfo = entity.media_info;
|
||||
const url = resolveArticleMediaUrl(mediaInfo);
|
||||
if (url) {
|
||||
mediaMap.set(entity.media_id, url);
|
||||
}
|
||||
}
|
||||
|
||||
const coverMedia = isRecord(articleResult.cover_media) ? articleResult.cover_media : null;
|
||||
if (coverMedia && typeof coverMedia.media_id === "string" && isRecord(coverMedia.media_info)) {
|
||||
const url = resolveArticleMediaUrl(coverMedia.media_info);
|
||||
if (url) {
|
||||
mediaMap.set(coverMedia.media_id, url);
|
||||
}
|
||||
}
|
||||
|
||||
return mediaMap;
|
||||
}
|
||||
|
||||
function getMediaMarkdown(entityMap: Map<string, JsonObject>, entityKey: unknown, mediaMap: Map<string, string>): string[] {
|
||||
const key =
|
||||
typeof entityKey === "string" || typeof entityKey === "number"
|
||||
? String(entityKey)
|
||||
: undefined;
|
||||
if (!key) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const entity = entityMap.get(key);
|
||||
if (!entity || entity.type !== "MEDIA") {
|
||||
return [];
|
||||
}
|
||||
|
||||
const data = isRecord(entity.data) ? entity.data : {};
|
||||
const mediaItems = Array.isArray(data.mediaItems) ? data.mediaItems : [];
|
||||
const urls: string[] = [];
|
||||
|
||||
for (const item of mediaItems) {
|
||||
if (!isRecord(item) || typeof item.mediaId !== "string") {
|
||||
continue;
|
||||
}
|
||||
const url = mediaMap.get(item.mediaId);
|
||||
if (url && !urls.includes(url)) {
|
||||
urls.push(url);
|
||||
}
|
||||
}
|
||||
|
||||
return urls.map((url) => ``);
|
||||
}
|
||||
|
||||
function resolveTweetMarkdown(payloads: unknown[], tweetId: string, pageUrl: string): string | null {
|
||||
for (const payload of payloads) {
|
||||
const tweet = findTweetNodeById(payload, tweetId);
|
||||
if (!tweet) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const xTweet = toXTweet(tweet, pageUrl);
|
||||
const author = formatTweetAuthor(xTweet) ?? xTweet.url;
|
||||
const lines = [`> ${author}`, ...xTweet.text.split("\n").map((line) => `> ${line}`)];
|
||||
|
||||
const media = formatMediaList(xTweet.media).map((line) =>
|
||||
line.startsWith("photo: ") ? `> })` : `> - ${line}`,
|
||||
);
|
||||
|
||||
const parts = [lines.join("\n")];
|
||||
if (media.length > 0) {
|
||||
parts.push([">", ...media].join("\n"));
|
||||
}
|
||||
parts.push(`> ${xTweet.url}`);
|
||||
|
||||
return parts.join("\n").trim();
|
||||
}
|
||||
|
||||
return `> Embedded tweet: https://x.com/i/status/${tweetId}`;
|
||||
}
|
||||
|
||||
function replaceLinkEntities(text: string, block: JsonObject, entityMap: Map<string, JsonObject>): string {
|
||||
const entityRanges = Array.isArray(block.entityRanges) ? block.entityRanges : [];
|
||||
const replacements = entityRanges
|
||||
.filter((range): range is JsonObject => isRecord(range))
|
||||
.map((range) => {
|
||||
const offset = typeof range.offset === "number" ? range.offset : -1;
|
||||
const length = typeof range.length === "number" ? range.length : -1;
|
||||
const url = getLinkUrl(entityMap, range.key);
|
||||
return { offset, length, url };
|
||||
})
|
||||
.filter((range) => range.offset >= 0 && range.length > 0 && range.url)
|
||||
.sort((left, right) => right.offset - left.offset);
|
||||
|
||||
let next = text;
|
||||
for (const replacement of replacements) {
|
||||
next =
|
||||
next.slice(0, replacement.offset) +
|
||||
replacement.url +
|
||||
next.slice(replacement.offset + replacement.length);
|
||||
}
|
||||
return next;
|
||||
}
|
||||
|
||||
function renderAtomicBlock(
|
||||
block: JsonObject,
|
||||
entityMap: Map<string, JsonObject>,
|
||||
mediaMap: Map<string, string>,
|
||||
payloads: unknown[],
|
||||
pageUrl: string,
|
||||
): string | null {
|
||||
const entityRanges = Array.isArray(block.entityRanges) ? block.entityRanges : [];
|
||||
const parts: string[] = [];
|
||||
|
||||
for (const range of entityRanges) {
|
||||
if (!isRecord(range)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const markdown = getEntityMarkdown(entityMap, range.key);
|
||||
if (markdown) {
|
||||
parts.push(markdown);
|
||||
continue;
|
||||
}
|
||||
|
||||
const mediaMarkdown = getMediaMarkdown(entityMap, range.key, mediaMap);
|
||||
if (mediaMarkdown.length > 0) {
|
||||
parts.push(mediaMarkdown.join("\n\n"));
|
||||
continue;
|
||||
}
|
||||
|
||||
const tweetId = getTweetId(entityMap, range.key);
|
||||
if (tweetId) {
|
||||
const tweetMarkdown = resolveTweetMarkdown(payloads, tweetId, pageUrl);
|
||||
if (tweetMarkdown) {
|
||||
parts.push(tweetMarkdown);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (parts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return parts.join("\n\n");
|
||||
}
|
||||
|
||||
function renderArticleBlocks(
|
||||
blocks: unknown[],
|
||||
entityMap: Map<string, JsonObject>,
|
||||
mediaMap: Map<string, string>,
|
||||
payloads: unknown[],
|
||||
pageUrl: string,
|
||||
): string {
|
||||
const parts: string[] = [];
|
||||
let orderedCounter = 0;
|
||||
|
||||
for (const block of blocks) {
|
||||
if (!isRecord(block)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const blockType = typeof block.type === "string" ? block.type : "unstyled";
|
||||
const rawText = typeof block.text === "string" ? block.text : "";
|
||||
const text = replaceLinkEntities(rawText, block, entityMap).trim();
|
||||
if (!text && blockType !== "atomic") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (blockType !== "ordered-list-item") {
|
||||
orderedCounter = 0;
|
||||
}
|
||||
|
||||
switch (blockType) {
|
||||
case "header-one":
|
||||
parts.push(`# ${text}`);
|
||||
break;
|
||||
case "header-two":
|
||||
parts.push(`## ${text}`);
|
||||
break;
|
||||
case "header-three":
|
||||
parts.push(`### ${text}`);
|
||||
break;
|
||||
case "blockquote":
|
||||
parts.push(`> ${text}`);
|
||||
break;
|
||||
case "unordered-list-item":
|
||||
parts.push(`- ${text}`);
|
||||
break;
|
||||
case "ordered-list-item":
|
||||
orderedCounter += 1;
|
||||
parts.push(`${orderedCounter}. ${text}`);
|
||||
break;
|
||||
case "code-block":
|
||||
parts.push(`\`\`\`\n${text}\n\`\`\``);
|
||||
break;
|
||||
case "atomic": {
|
||||
const markdown = renderAtomicBlock(block, entityMap, mediaMap, payloads, pageUrl);
|
||||
if (markdown) {
|
||||
parts.push(markdown);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
parts.push(text);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join("\n\n").trim();
|
||||
}
|
||||
|
||||
function getArticleResult(tweet: JsonObject): JsonObject | null {
|
||||
if (
|
||||
isRecord(tweet.article) &&
|
||||
isRecord(tweet.article.article_results) &&
|
||||
isRecord(tweet.article.article_results.result)
|
||||
) {
|
||||
return tweet.article.article_results.result as JsonObject;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractSummary(markdown: string): string | undefined {
|
||||
const segments = markdown
|
||||
.split(/\n\n+/)
|
||||
.map((segment) => segment.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
const preferred = segments.find((segment) => !/^(#|>|- |\d+\. |\`\`\`)/.test(segment));
|
||||
return preferred?.slice(0, 220);
|
||||
}
|
||||
|
||||
export function extractArticleDocumentFromPayload(
|
||||
payload: unknown,
|
||||
statusId: string,
|
||||
pageUrl: string,
|
||||
payloads: unknown[] = [payload],
|
||||
): ExtractedDocument | null {
|
||||
const tweet = findTweetNode(payload, statusId);
|
||||
if (!tweet) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const articleResult = getArticleResult(tweet);
|
||||
if (!articleResult) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const title = typeof articleResult.title === "string" ? articleResult.title.trim() : undefined;
|
||||
const contentState = isRecord(articleResult.content_state) ? articleResult.content_state : {};
|
||||
const blocks = Array.isArray(contentState.blocks) ? contentState.blocks : [];
|
||||
const entityMap = normalizeEntityMap(contentState.entityMap);
|
||||
const mediaMap = buildMediaUrlMap(articleResult);
|
||||
const richMarkdown = renderArticleBlocks(blocks, entityMap, mediaMap, payloads, pageUrl);
|
||||
const plainText = typeof articleResult.plain_text === "string" ? articleResult.plain_text.trim() : "";
|
||||
const markdown = richMarkdown || plainText || getTweetText(tweet);
|
||||
if (!markdown) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const xTweet = toXTweet(tweet, pageUrl);
|
||||
const user = getUser(tweet);
|
||||
const coverMedia = isRecord(articleResult.cover_media) ? articleResult.cover_media : null;
|
||||
const coverMediaInfo = coverMedia && isRecord(coverMedia.media_info) ? coverMedia.media_info : null;
|
||||
const coverImage = coverMediaInfo ? resolveArticleMediaUrl(coverMediaInfo) || undefined : undefined;
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
canonicalUrl: xTweet.url,
|
||||
title: title || normalizeTitle(xTweet.text, "X Article"),
|
||||
author: formatTweetAuthor(xTweet),
|
||||
siteName: "X",
|
||||
publishedAt: xTweet.createdAt,
|
||||
summary: extractSummary(markdown) || xTweet.text.slice(0, 200) || undefined,
|
||||
adapter: "x",
|
||||
metadata: {
|
||||
kind: "x/article",
|
||||
tweetId: xTweet.id,
|
||||
coverImage,
|
||||
authorName: xTweet.authorName ?? user.name,
|
||||
authorUsername: xTweet.author ?? user.screenName,
|
||||
authorUrl: (xTweet.author ?? user.screenName) ? `https://x.com/${xTweet.author ?? user.screenName}` : undefined,
|
||||
...getTweetAuthorMetadata(xTweet),
|
||||
},
|
||||
content: [{ type: "markdown", markdown }],
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
import type { Adapter, AdapterLoginInfo } from "../types";
|
||||
import { exportCookies, restoreCookies, type CookieSidecarConfig } from "../../browser/cookie-sidecar";
|
||||
import { detectInteractionGate } from "../../browser/interaction-gates";
|
||||
import type { ExtractedDocument } from "../../extract/document";
|
||||
import { collectMediaFromDocument } from "../../media/markdown-media";
|
||||
import { extractArticleDocumentFromPayload } from "./article";
|
||||
import { buildNeedsLoginResult, detectXLogin } from "./login";
|
||||
import { extractStatusId, isXHost } from "./match";
|
||||
import { collectXJsonPayloads, waitForInitialXPayload } from "./payloads";
|
||||
import { extractSingleTweetDocumentFromPayload } from "./single";
|
||||
import { extractThreadDocumentFromPayloads } from "./thread";
|
||||
import { loadFullXThread } from "./thread-loader";
|
||||
|
||||
const cookieConfig: CookieSidecarConfig = {
|
||||
urls: ["https://x.com/", "https://twitter.com/"],
|
||||
filename: "x-session-cookies.json",
|
||||
requiredCookieNames: ["auth_token", "ct0"],
|
||||
filterCookie: (c) => {
|
||||
const d = c.domain ?? "";
|
||||
return d.endsWith("x.com") || d.endsWith("twitter.com");
|
||||
},
|
||||
};
|
||||
|
||||
function extractDocumentFromPayloads(
|
||||
payloads: unknown[],
|
||||
statusId: string,
|
||||
pageUrl: string,
|
||||
): ExtractedDocument | null {
|
||||
for (const payload of payloads) {
|
||||
const articleDocument = extractArticleDocumentFromPayload(payload, statusId, pageUrl, payloads);
|
||||
if (articleDocument) {
|
||||
return articleDocument;
|
||||
}
|
||||
}
|
||||
|
||||
const threadDocument = extractThreadDocumentFromPayloads(payloads, statusId, pageUrl);
|
||||
if (threadDocument) {
|
||||
return threadDocument;
|
||||
}
|
||||
|
||||
for (const payload of payloads) {
|
||||
const singleDocument = extractSingleTweetDocumentFromPayload(payload, statusId, pageUrl);
|
||||
if (singleDocument) {
|
||||
return singleDocument;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function ensureXLoginState(context: Parameters<Adapter["process"]>[0]): Promise<AdapterLoginInfo> {
|
||||
return detectXLogin(context);
|
||||
}
|
||||
|
||||
export const xAdapter: Adapter = {
|
||||
name: "x",
|
||||
match(input) {
|
||||
return isXHost(input.url.hostname);
|
||||
},
|
||||
async checkLogin(context) {
|
||||
return detectXLogin(context);
|
||||
},
|
||||
async exportCookies(context, profileDir) {
|
||||
return exportCookies(context.browser.targetSession, cookieConfig, profileDir);
|
||||
},
|
||||
async restoreCookies(context, profileDir) {
|
||||
return restoreCookies(context.browser.targetSession, cookieConfig, profileDir);
|
||||
},
|
||||
async process(context) {
|
||||
const statusId = extractStatusId(context.input.url);
|
||||
if (!statusId) {
|
||||
return {
|
||||
status: "no_document",
|
||||
};
|
||||
}
|
||||
|
||||
context.log.info(`Loading ${context.input.url.toString()} with x adapter`);
|
||||
await context.browser.goto(context.input.url.toString(), context.timeoutMs);
|
||||
|
||||
const interaction = await detectInteractionGate(context.browser);
|
||||
if (interaction) {
|
||||
return {
|
||||
status: "needs_interaction",
|
||||
interaction,
|
||||
};
|
||||
}
|
||||
|
||||
let login = await ensureXLoginState(context);
|
||||
if (login.state === "logged_out") {
|
||||
return buildNeedsLoginResult(login);
|
||||
}
|
||||
|
||||
await waitForInitialXPayload(context);
|
||||
await loadFullXThread(context, statusId);
|
||||
|
||||
const pageUrl = await context.browser.getURL();
|
||||
const postLoadInteraction = await detectInteractionGate(context.browser);
|
||||
if (postLoadInteraction) {
|
||||
return {
|
||||
status: "needs_interaction",
|
||||
interaction: postLoadInteraction,
|
||||
login,
|
||||
};
|
||||
}
|
||||
|
||||
login = await ensureXLoginState(context).catch(() => login);
|
||||
if (login.state === "logged_out") {
|
||||
return buildNeedsLoginResult(login);
|
||||
}
|
||||
|
||||
const payloads = await collectXJsonPayloads(context);
|
||||
if (payloads.length === 0) {
|
||||
return {
|
||||
status: "no_document",
|
||||
login,
|
||||
};
|
||||
}
|
||||
|
||||
const document = extractDocumentFromPayloads(payloads, statusId, pageUrl);
|
||||
if (document) {
|
||||
return {
|
||||
status: "ok",
|
||||
document,
|
||||
media: collectMediaFromDocument(document),
|
||||
login,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
status: "no_document",
|
||||
login,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
import type { AdapterContext, AdapterLoginInfo, AdapterProcessResult } from "../types";
|
||||
|
||||
interface XLoginSnapshot {
|
||||
currentUrl: string;
|
||||
hasAccountMenu: boolean;
|
||||
hasLoginInputs: boolean;
|
||||
bodyText: string;
|
||||
}
|
||||
|
||||
export async function detectXLogin(context: AdapterContext): Promise<AdapterLoginInfo> {
|
||||
const snapshot = await context.browser.evaluate<XLoginSnapshot>(`
|
||||
(() => {
|
||||
const bodyText = (document.body?.innerText ?? "").slice(0, 2500);
|
||||
return {
|
||||
currentUrl: window.location.href,
|
||||
hasAccountMenu: Boolean(
|
||||
document.querySelector(
|
||||
'[data-testid="SideNav_AccountSwitcher_Button"], [data-testid="AppTabBar_Profile_Link"], [aria-label="Account menu"]'
|
||||
)
|
||||
),
|
||||
hasLoginInputs: Boolean(
|
||||
document.querySelector(
|
||||
'input[name="text"], input[name="password"], input[autocomplete="username"], input[autocomplete="current-password"]'
|
||||
)
|
||||
),
|
||||
bodyText,
|
||||
};
|
||||
})()
|
||||
`).catch(async () => ({
|
||||
currentUrl: await context.browser.getURL().catch(() => context.input.url.toString()),
|
||||
hasAccountMenu: false,
|
||||
hasLoginInputs: false,
|
||||
bodyText: "",
|
||||
}));
|
||||
|
||||
if (
|
||||
/\/i\/flow\/login|\/login/i.test(snapshot.currentUrl) ||
|
||||
snapshot.hasLoginInputs ||
|
||||
/sign in to x|join x today|登录 x|注册 x|登录到 x/i.test(snapshot.bodyText)
|
||||
) {
|
||||
return {
|
||||
provider: "x",
|
||||
state: "logged_out",
|
||||
required: true,
|
||||
reason: "X login page detected",
|
||||
};
|
||||
}
|
||||
|
||||
if (snapshot.hasAccountMenu) {
|
||||
return {
|
||||
provider: "x",
|
||||
state: "logged_in",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
provider: "x",
|
||||
state: "unknown",
|
||||
};
|
||||
}
|
||||
|
||||
export function buildNeedsLoginResult(login: AdapterLoginInfo): AdapterProcessResult {
|
||||
return {
|
||||
status: "needs_interaction",
|
||||
login: {
|
||||
...login,
|
||||
provider: "x",
|
||||
state: login.state === "logged_in" ? "unknown" : login.state,
|
||||
required: true,
|
||||
},
|
||||
interaction: {
|
||||
type: "wait_for_interaction",
|
||||
kind: "login",
|
||||
provider: "x",
|
||||
reason: login.reason,
|
||||
prompt: "Please sign in to X in the opened Chrome window. Extraction will continue automatically once login is detected.",
|
||||
requiresVisibleBrowser: true,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
export function isXHost(hostname: string): boolean {
|
||||
return ["x.com", "www.x.com", "twitter.com", "www.twitter.com"].includes(hostname);
|
||||
}
|
||||
|
||||
export function extractStatusId(url: URL): string | undefined {
|
||||
const match = url.pathname.match(/\/(?:status|article)\/(\d+)/);
|
||||
return match?.[1];
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
import type { AdapterContext } from "../types";
|
||||
import { filterXGraphQlEntries } from "./shared";
|
||||
|
||||
export function getRelevantXThreadEntries(context: AdapterContext) {
|
||||
return filterXGraphQlEntries(context.network.getEntries()).filter(
|
||||
(entry) =>
|
||||
entry.method === "GET" &&
|
||||
entry.finished &&
|
||||
(
|
||||
entry.url.includes("TweetDetail") ||
|
||||
entry.url.includes("TweetResultByRestId") ||
|
||||
entry.url.includes("TweetResultsByRestIds")
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
export async function prefetchRelevantXThreadBodies(context: AdapterContext): Promise<void> {
|
||||
const entries = getRelevantXThreadEntries(context).filter((entry) => entry.body === undefined && !entry.bodyError);
|
||||
for (const entry of entries) {
|
||||
await context.network.ensureBody(entry);
|
||||
}
|
||||
}
|
||||
|
||||
export async function collectXJsonPayloads(context: AdapterContext): Promise<unknown[]> {
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
const entries = getRelevantXThreadEntries(context);
|
||||
|
||||
const payloads: unknown[] = [];
|
||||
for (const entry of entries) {
|
||||
const payload = await context.network.getJsonBody(entry);
|
||||
if (payload) {
|
||||
payloads.push(payload);
|
||||
}
|
||||
}
|
||||
return payloads;
|
||||
}
|
||||
|
||||
export async function waitForInitialXPayload(context: AdapterContext): Promise<void> {
|
||||
try {
|
||||
await context.network.waitForResponse(
|
||||
(entry) =>
|
||||
entry.url.includes("/graphql/") &&
|
||||
(entry.url.includes("TweetDetail") || entry.url.includes("TweetResultByRestId")),
|
||||
{ timeoutMs: Math.min(context.timeoutMs, 15_000) },
|
||||
);
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
} catch {
|
||||
context.log.debug("No tweet GraphQL response observed before timeout.");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import type { AdapterContext } from "../types";
|
||||
|
||||
const X_SESSION_URLS = ["https://x.com/", "https://twitter.com/"] as const;
|
||||
const REQUIRED_X_SESSION_COOKIES = ["auth_token", "ct0"] as const;
|
||||
|
||||
interface CookieLike {
|
||||
name?: string;
|
||||
value?: string | null;
|
||||
}
|
||||
|
||||
interface NetworkGetCookiesResult {
|
||||
cookies?: CookieLike[];
|
||||
}
|
||||
|
||||
export function buildXSessionCookieMap(cookies: readonly CookieLike[]): Record<string, string> {
|
||||
const cookieMap: Record<string, string> = {};
|
||||
for (const cookie of cookies) {
|
||||
const name = cookie.name?.trim();
|
||||
const value = cookie.value?.trim();
|
||||
if (!name || !value) {
|
||||
continue;
|
||||
}
|
||||
cookieMap[name] = value;
|
||||
}
|
||||
return cookieMap;
|
||||
}
|
||||
|
||||
export function hasRequiredXSessionCookies(cookieMap: Record<string, string>): boolean {
|
||||
return REQUIRED_X_SESSION_COOKIES.every((name) => Boolean(cookieMap[name]));
|
||||
}
|
||||
|
||||
export async function readXSessionCookieMap(
|
||||
context: Pick<AdapterContext, "browser">,
|
||||
): Promise<Record<string, string>> {
|
||||
const { cookies } = await context.browser.targetSession.send<NetworkGetCookiesResult>(
|
||||
"Network.getCookies",
|
||||
{ urls: [...X_SESSION_URLS] },
|
||||
);
|
||||
return buildXSessionCookieMap(cookies ?? []);
|
||||
}
|
||||
|
||||
export async function isXSessionReady(
|
||||
context: Pick<AdapterContext, "browser">,
|
||||
): Promise<boolean> {
|
||||
const cookieMap = await readXSessionCookieMap(context);
|
||||
return hasRequiredXSessionCookies(cookieMap);
|
||||
}
|
||||
|
|
@ -0,0 +1,386 @@
|
|||
import path from "node:path";
|
||||
import type { NetworkEntry } from "../../browser/network-journal";
|
||||
import type { XMedia, XQuotedTweet, XTweet, XUser, JsonObject } from "./types";
|
||||
|
||||
const X_IMAGE_EXTENSIONS = new Set(["jpg", "jpeg", "png", "webp", "gif", "bmp", "avif"]);
|
||||
|
||||
function emptyObject(): JsonObject {
|
||||
return {};
|
||||
}
|
||||
|
||||
export function isRecord(value: unknown): value is JsonObject {
|
||||
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
export function walk(value: unknown, visitor: (node: unknown) => boolean | void): boolean {
|
||||
if (visitor(value)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
for (const item of value) {
|
||||
if (walk(item, visitor)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isRecord(value)) {
|
||||
for (const child of Object.values(value)) {
|
||||
if (walk(child, visitor)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function hasTweetText(node: JsonObject): boolean {
|
||||
const legacy = isRecord(node.legacy) ? node.legacy : emptyObject();
|
||||
return (
|
||||
typeof legacy.full_text === "string" ||
|
||||
typeof getNoteTweetText(node) === "string"
|
||||
);
|
||||
}
|
||||
|
||||
export function findTweetNodeById(payload: unknown, tweetId: string): JsonObject | null {
|
||||
let match: JsonObject | null = null;
|
||||
|
||||
walk(payload, (node) => {
|
||||
if (!isRecord(node) || typeof node.rest_id !== "string" || !isRecord(node.legacy)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasTweetText(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (node.rest_id === tweetId) {
|
||||
match = node;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
export function findTweetNode(payload: unknown, statusId: string): JsonObject | null {
|
||||
let firstMatch: JsonObject | null = null;
|
||||
const exactMatch = findTweetNodeById(payload, statusId);
|
||||
if (exactMatch) {
|
||||
return exactMatch;
|
||||
}
|
||||
|
||||
walk(payload, (node) => {
|
||||
if (!isRecord(node) || typeof node.rest_id !== "string" || !isRecord(node.legacy)) {
|
||||
return false;
|
||||
}
|
||||
if (!hasTweetText(node)) {
|
||||
return false;
|
||||
}
|
||||
if (!firstMatch) {
|
||||
firstMatch = node;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
return firstMatch;
|
||||
}
|
||||
|
||||
export function getLegacy(tweet: JsonObject): JsonObject {
|
||||
return isRecord(tweet.legacy) ? tweet.legacy : emptyObject();
|
||||
}
|
||||
|
||||
export function unwrapTweetResult(node: unknown): JsonObject | null {
|
||||
if (!isRecord(node)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (node.__typename === "TweetWithVisibilityResults" && isRecord(node.tweet)) {
|
||||
return unwrapTweetResult(node.tweet);
|
||||
}
|
||||
|
||||
const tweet = isRecord(node.tweet) ? (node.tweet as JsonObject) : node;
|
||||
if (typeof tweet.rest_id !== "string" || !isRecord(tweet.legacy)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return tweet;
|
||||
}
|
||||
|
||||
export function getUser(tweet: JsonObject): XUser {
|
||||
const result =
|
||||
isRecord(tweet.core) &&
|
||||
isRecord(tweet.core.user_results) &&
|
||||
isRecord(tweet.core.user_results.result)
|
||||
? (tweet.core.user_results.result as JsonObject)
|
||||
: emptyObject();
|
||||
const legacy = isRecord(result.legacy) ? result.legacy : emptyObject();
|
||||
const core = isRecord(result.core) ? result.core : emptyObject();
|
||||
return {
|
||||
name:
|
||||
(typeof legacy.name === "string" ? legacy.name : undefined) ??
|
||||
(typeof core.name === "string" ? core.name : undefined),
|
||||
screenName:
|
||||
(typeof legacy.screen_name === "string" ? legacy.screen_name : undefined) ??
|
||||
(typeof core.screen_name === "string" ? core.screen_name : undefined),
|
||||
};
|
||||
}
|
||||
|
||||
function getNoteTweetResult(tweet: JsonObject): JsonObject | null {
|
||||
if (
|
||||
!isRecord(tweet.note_tweet) ||
|
||||
!isRecord(tweet.note_tweet.note_tweet_results) ||
|
||||
!isRecord(tweet.note_tweet.note_tweet_results.result)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return tweet.note_tweet.note_tweet_results.result as JsonObject;
|
||||
}
|
||||
|
||||
function getNoteTweetText(tweet: JsonObject): string | undefined {
|
||||
const noteTweet = getNoteTweetResult(tweet);
|
||||
return typeof noteTweet?.text === "string" ? noteTweet.text : undefined;
|
||||
}
|
||||
|
||||
interface TweetUrlEntity {
|
||||
url: string;
|
||||
expandedUrl?: string;
|
||||
displayUrl?: string;
|
||||
}
|
||||
|
||||
function collectTweetUrlEntities(values: unknown[]): TweetUrlEntity[] {
|
||||
return values.reduce<TweetUrlEntity[]>((entities, value) => {
|
||||
if (!isRecord(value) || typeof value.url !== "string" || !value.url) {
|
||||
return entities;
|
||||
}
|
||||
|
||||
entities.push({
|
||||
url: value.url,
|
||||
expandedUrl: typeof value.expanded_url === "string" ? value.expanded_url : undefined,
|
||||
displayUrl: typeof value.display_url === "string" ? value.display_url : undefined,
|
||||
});
|
||||
|
||||
return entities;
|
||||
}, []);
|
||||
}
|
||||
|
||||
function getTweetUrlEntities(tweet: JsonObject): TweetUrlEntity[] {
|
||||
const noteTweet = getNoteTweetResult(tweet);
|
||||
const noteTweetEntitySet = noteTweet && isRecord(noteTweet.entity_set) ? noteTweet.entity_set : emptyObject();
|
||||
const noteTweetUrls = collectTweetUrlEntities(Array.isArray(noteTweetEntitySet.urls) ? noteTweetEntitySet.urls : []);
|
||||
|
||||
const legacy = getLegacy(tweet);
|
||||
const legacyEntities = isRecord(legacy.entities) ? legacy.entities : emptyObject();
|
||||
const legacyUrls = collectTweetUrlEntities(Array.isArray(legacyEntities.urls) ? legacyEntities.urls : []);
|
||||
|
||||
const seen = new Set<string>();
|
||||
return [...noteTweetUrls, ...legacyUrls].filter((value) => {
|
||||
if (seen.has(value.url)) {
|
||||
return false;
|
||||
}
|
||||
seen.add(value.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
export function getTweetText(tweet: JsonObject): string {
|
||||
const legacy = getLegacy(tweet);
|
||||
let text =
|
||||
getNoteTweetText(tweet) ?? (typeof legacy.full_text === "string" ? legacy.full_text : "");
|
||||
|
||||
for (const value of getTweetUrlEntities(tweet)) {
|
||||
const replacement =
|
||||
(typeof value.expandedUrl === "string" && value.expandedUrl) ||
|
||||
(typeof value.displayUrl === "string" && value.displayUrl) ||
|
||||
value.url;
|
||||
text = text.replaceAll(value.url, replacement);
|
||||
}
|
||||
|
||||
const extendedEntities = isRecord(legacy.extended_entities) ? legacy.extended_entities : emptyObject();
|
||||
const media = Array.isArray(extendedEntities.media) ? extendedEntities.media : [];
|
||||
for (const value of media) {
|
||||
if (isRecord(value) && typeof value.url === "string") {
|
||||
text = text.replaceAll(value.url, "").trim();
|
||||
}
|
||||
}
|
||||
|
||||
return text.replace(/\n{3,}/g, "\n\n").trim();
|
||||
}
|
||||
|
||||
function normalizeXImageExtension(raw: string | undefined | null): string | undefined {
|
||||
if (!raw) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const normalized = raw.replace(/^\./, "").trim().toLowerCase();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return normalized === "jpeg" ? "jpg" : normalized;
|
||||
}
|
||||
|
||||
export function toHighResXImageUrl(rawUrl: string): string {
|
||||
try {
|
||||
const parsed = new URL(rawUrl);
|
||||
if (parsed.hostname.toLowerCase() !== "pbs.twimg.com") {
|
||||
return rawUrl;
|
||||
}
|
||||
|
||||
const pathExtension = normalizeXImageExtension(path.posix.extname(parsed.pathname));
|
||||
const format = normalizeXImageExtension(parsed.searchParams.get("format")) ?? pathExtension;
|
||||
if (!format || !X_IMAGE_EXTENSIONS.has(format)) {
|
||||
return rawUrl;
|
||||
}
|
||||
|
||||
if (pathExtension) {
|
||||
parsed.pathname = parsed.pathname.replace(new RegExp(`\\.${pathExtension}$`, "i"), "");
|
||||
}
|
||||
|
||||
parsed.searchParams.set("format", format);
|
||||
parsed.searchParams.set("name", "4096x4096");
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
return rawUrl;
|
||||
}
|
||||
}
|
||||
|
||||
export function getTweetMedia(tweet: JsonObject): XMedia[] {
|
||||
const legacy = getLegacy(tweet);
|
||||
const extendedEntities = isRecord(legacy.extended_entities) ? legacy.extended_entities : emptyObject();
|
||||
const media = Array.isArray(extendedEntities.media) ? extendedEntities.media : [];
|
||||
|
||||
return media
|
||||
.map((value) => {
|
||||
if (!isRecord(value) || typeof value.type !== "string") {
|
||||
return null;
|
||||
}
|
||||
if (value.type === "photo" && typeof value.media_url_https === "string") {
|
||||
return {
|
||||
type: value.type,
|
||||
url: toHighResXImageUrl(value.media_url_https),
|
||||
alt: typeof value.ext_alt_text === "string" ? value.ext_alt_text : undefined,
|
||||
};
|
||||
}
|
||||
if ((value.type === "video" || value.type === "animated_gif") && typeof value.media_url_https === "string") {
|
||||
return {
|
||||
type: value.type,
|
||||
url: value.media_url_https,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter((value): value is XMedia => value !== null);
|
||||
}
|
||||
|
||||
export function getTweetUrl(tweet: JsonObject, fallbackUrl: string): string {
|
||||
const user = getUser(tweet);
|
||||
const fallbackScreenName = extractScreenNameFromUrl(fallbackUrl);
|
||||
const id = typeof tweet.rest_id === "string" ? tweet.rest_id : "";
|
||||
const screenName = user.screenName ?? fallbackScreenName;
|
||||
if (screenName && id) {
|
||||
return `https://x.com/${screenName}/status/${id}`;
|
||||
}
|
||||
return fallbackUrl;
|
||||
}
|
||||
|
||||
export function getQuotedTweet(tweet: JsonObject, fallbackUrl: string): XQuotedTweet | undefined {
|
||||
const quoted = unwrapTweetResult(
|
||||
isRecord(tweet.quoted_status_result) ? tweet.quoted_status_result.result : null,
|
||||
);
|
||||
if (!quoted) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const user = getUser(quoted);
|
||||
return {
|
||||
id: typeof quoted.rest_id === "string" ? quoted.rest_id : "",
|
||||
author: user.screenName,
|
||||
authorName: user.name,
|
||||
text: getTweetText(quoted),
|
||||
url: getTweetUrl(quoted, fallbackUrl),
|
||||
media: getTweetMedia(quoted),
|
||||
};
|
||||
}
|
||||
|
||||
export function extractScreenNameFromUrl(url: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const match = parsed.pathname.match(/^\/([^/]+)\/(?:status|article)\//);
|
||||
if (!match) {
|
||||
return undefined;
|
||||
}
|
||||
if (match[1] === "i") {
|
||||
return undefined;
|
||||
}
|
||||
return match[1];
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function toXTweet(tweet: JsonObject, fallbackUrl: string): XTweet {
|
||||
const legacy = getLegacy(tweet);
|
||||
const user = getUser(tweet);
|
||||
const fallbackScreenName = extractScreenNameFromUrl(fallbackUrl);
|
||||
const screenName = user.screenName ?? fallbackScreenName;
|
||||
return {
|
||||
id: typeof tweet.rest_id === "string" ? tweet.rest_id : "",
|
||||
author: screenName,
|
||||
authorName: user.name,
|
||||
text: getTweetText(tweet),
|
||||
likes: typeof legacy.favorite_count === "number" ? legacy.favorite_count : 0,
|
||||
retweets: typeof legacy.retweet_count === "number" ? legacy.retweet_count : 0,
|
||||
replies: typeof legacy.reply_count === "number" ? legacy.reply_count : 0,
|
||||
createdAt: typeof legacy.created_at === "string" ? legacy.created_at : undefined,
|
||||
inReplyTo: typeof legacy.in_reply_to_status_id_str === "string" ? legacy.in_reply_to_status_id_str : undefined,
|
||||
url: getTweetUrl(tweet, fallbackUrl),
|
||||
media: getTweetMedia(tweet),
|
||||
quotedTweet: getQuotedTweet(tweet, fallbackUrl),
|
||||
};
|
||||
}
|
||||
|
||||
export function normalizeTitle(text: string, fallback: string): string {
|
||||
const firstLine = text.split("\n")[0]?.trim();
|
||||
if (!firstLine) {
|
||||
return fallback;
|
||||
}
|
||||
return firstLine.slice(0, 120);
|
||||
}
|
||||
|
||||
export function formatTweetAuthor(tweet: XTweet): string | undefined {
|
||||
if (tweet.author && tweet.authorName) {
|
||||
return `${tweet.authorName} (@${tweet.author})`;
|
||||
}
|
||||
if (tweet.author) {
|
||||
return `@${tweet.author}`;
|
||||
}
|
||||
return tweet.authorName;
|
||||
}
|
||||
|
||||
export function getTweetAuthorMetadata(tweet: XTweet): Record<string, unknown> {
|
||||
return {
|
||||
authorName: tweet.authorName,
|
||||
authorUsername: tweet.author,
|
||||
authorUrl: tweet.author ? `https://x.com/${tweet.author}` : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export function formatMediaList(media: XMedia[]): string[] {
|
||||
return media.map((item) => {
|
||||
if (item.type === "photo") {
|
||||
return `photo: ${item.url}`;
|
||||
}
|
||||
return `${item.type}: ${item.url}`;
|
||||
});
|
||||
}
|
||||
|
||||
export function filterXGraphQlEntries(entries: NetworkEntry[]): NetworkEntry[] {
|
||||
return entries.filter((entry) => entry.url.includes("/graphql/"));
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
import type { ExtractedDocument, ContentBlock } from "../../extract/document";
|
||||
import { findTweetNode, formatMediaList, formatTweetAuthor, getTweetAuthorMetadata, normalizeTitle, toXTweet } from "./shared";
|
||||
|
||||
export function extractSingleTweetDocumentFromPayload(
|
||||
payload: unknown,
|
||||
statusId: string,
|
||||
pageUrl: string,
|
||||
): ExtractedDocument | null {
|
||||
const tweet = findTweetNode(payload, statusId);
|
||||
if (!tweet) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const xTweet = toXTweet(tweet, pageUrl);
|
||||
const content: ContentBlock[] = [];
|
||||
|
||||
if (xTweet.text) {
|
||||
content.push({ type: "paragraph", text: xTweet.text });
|
||||
}
|
||||
|
||||
for (const mediaLine of formatMediaList(xTweet.media)) {
|
||||
if (mediaLine.startsWith("photo: ")) {
|
||||
content.push({
|
||||
type: "image",
|
||||
url: mediaLine.slice("photo: ".length),
|
||||
});
|
||||
} else {
|
||||
content.push({
|
||||
type: "list",
|
||||
ordered: false,
|
||||
items: [mediaLine],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (xTweet.quotedTweet) {
|
||||
const quotedLines: string[] = [];
|
||||
const quotedAuthor =
|
||||
xTweet.quotedTweet.author && xTweet.quotedTweet.authorName
|
||||
? `${xTweet.quotedTweet.authorName} (@${xTweet.quotedTweet.author})`
|
||||
: xTweet.quotedTweet.author
|
||||
? `@${xTweet.quotedTweet.author}`
|
||||
: xTweet.quotedTweet.authorName;
|
||||
|
||||
if (quotedAuthor) {
|
||||
quotedLines.push(quotedAuthor);
|
||||
}
|
||||
if (xTweet.quotedTweet.text) {
|
||||
quotedLines.push(xTweet.quotedTweet.text);
|
||||
}
|
||||
quotedLines.push(...formatMediaList(xTweet.quotedTweet.media));
|
||||
|
||||
if (quotedLines.length > 0) {
|
||||
content.push({ type: "heading", depth: 2, text: "Quoted Tweet" });
|
||||
content.push({ type: "quote", text: quotedLines.join("\n\n") });
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
canonicalUrl: xTweet.url,
|
||||
title: normalizeTitle(
|
||||
xTweet.author ? `@${xTweet.author}: ${xTweet.text}` : xTweet.text,
|
||||
"Tweet",
|
||||
),
|
||||
author: formatTweetAuthor(xTweet),
|
||||
siteName: "X",
|
||||
publishedAt: xTweet.createdAt,
|
||||
summary: xTweet.text.slice(0, 200) || undefined,
|
||||
adapter: "x",
|
||||
metadata: {
|
||||
kind: "x/post",
|
||||
tweetId: xTweet.id,
|
||||
...getTweetAuthorMetadata(xTweet),
|
||||
conversationId:
|
||||
typeof tweet.legacy === "object" &&
|
||||
tweet.legacy !== null &&
|
||||
typeof (tweet.legacy as Record<string, unknown>).conversation_id_str === "string"
|
||||
? (tweet.legacy as Record<string, unknown>).conversation_id_str
|
||||
: undefined,
|
||||
favoriteCount: xTweet.likes,
|
||||
replyCount: xTweet.replies,
|
||||
retweetCount: xTweet.retweets,
|
||||
},
|
||||
content,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,286 @@
|
|||
import type { AdapterContext } from "../types";
|
||||
import { extractThreadTweetsFromPayloads } from "./thread";
|
||||
import { collectXJsonPayloads, getRelevantXThreadEntries, prefetchRelevantXThreadBodies } from "./payloads";
|
||||
|
||||
interface ClickTextResult {
|
||||
clicked: boolean;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
interface ScrollStepResult {
|
||||
moved: boolean;
|
||||
atTop: boolean;
|
||||
atBottom: boolean;
|
||||
}
|
||||
|
||||
interface ThreadProgress {
|
||||
tweetCount: number;
|
||||
firstTweetId?: string;
|
||||
lastTweetId?: string;
|
||||
requestCount: number;
|
||||
tweetDetailCount: number;
|
||||
}
|
||||
|
||||
interface TopProbeState {
|
||||
requestCount: number;
|
||||
tweetDetailCount: number;
|
||||
scrollHeight: number;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function waitForXNetworkSettle(context: AdapterContext, reason: string): Promise<void> {
|
||||
try {
|
||||
await context.network.waitForIdle({
|
||||
idleMs: 650,
|
||||
timeoutMs: Math.min(context.timeoutMs, 5_000),
|
||||
});
|
||||
} catch {
|
||||
context.log.debug(`Network idle timed out after ${reason}.`);
|
||||
}
|
||||
}
|
||||
|
||||
async function captureTopProbeState(context: AdapterContext): Promise<TopProbeState> {
|
||||
const entries = getRelevantXThreadEntries(context);
|
||||
const scrollHeight = await context.browser.evaluate<number>(`
|
||||
(() => {
|
||||
const scrollRoot = document.scrollingElement ?? document.documentElement ?? document.body;
|
||||
return scrollRoot.scrollHeight;
|
||||
})()
|
||||
`);
|
||||
|
||||
return {
|
||||
requestCount: entries.length,
|
||||
tweetDetailCount: entries.filter((entry) => entry.url.includes("TweetDetail")).length,
|
||||
scrollHeight,
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForTopProbe(context: AdapterContext): Promise<boolean> {
|
||||
const initial = await captureTopProbeState(context);
|
||||
const deadline = Date.now() + 1_200;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
await context.network.waitForIdle({
|
||||
idleMs: 250,
|
||||
timeoutMs: 350,
|
||||
});
|
||||
} catch {
|
||||
// Keep polling until the shorter top-probe budget expires.
|
||||
}
|
||||
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
const next = await captureTopProbeState(context);
|
||||
if (
|
||||
next.requestCount > initial.requestCount ||
|
||||
next.tweetDetailCount > initial.tweetDetailCount ||
|
||||
next.scrollHeight > initial.scrollHeight + 4
|
||||
) {
|
||||
context.log.debug("Observed additional X thread activity while probing the page top.");
|
||||
return true;
|
||||
}
|
||||
|
||||
await sleep(120);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function scrollThreadToTop(context: AdapterContext): Promise<void> {
|
||||
let settledTopChecks = 0;
|
||||
|
||||
while (settledTopChecks < 2) {
|
||||
const scroll = await context.browser.evaluate<ScrollStepResult>(`
|
||||
(() => {
|
||||
const scrollRoot = document.scrollingElement ?? document.documentElement ?? document.body;
|
||||
const before = window.scrollY;
|
||||
window.scrollTo({ top: 0, left: 0, behavior: "instant" });
|
||||
const after = window.scrollY;
|
||||
return {
|
||||
moved: after !== before,
|
||||
atTop: after <= 4,
|
||||
atBottom: window.innerHeight + after >= scrollRoot.scrollHeight - 4,
|
||||
};
|
||||
})()
|
||||
`);
|
||||
await sleep(140);
|
||||
await waitForXNetworkSettle(context, "scrolling X thread to top");
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
|
||||
if (scroll.moved) {
|
||||
settledTopChecks = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
const observedTopActivity = await waitForTopProbe(context);
|
||||
if (observedTopActivity) {
|
||||
settledTopChecks = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
settledTopChecks += 1;
|
||||
}
|
||||
}
|
||||
|
||||
async function clickVisibleShowReplies(context: AdapterContext): Promise<ClickTextResult> {
|
||||
return context.browser.evaluate<ClickTextResult>(`
|
||||
(() => {
|
||||
const normalize = (value) => value.replace(/\\s+/g, " ").trim();
|
||||
const matches = [
|
||||
/^Show replies$/i,
|
||||
/^Show more replies$/i,
|
||||
/^Show additional replies$/i,
|
||||
/^显示回复$/,
|
||||
/^展开回复$/,
|
||||
];
|
||||
const isVisible = (element) => {
|
||||
if (!(element instanceof HTMLElement)) {
|
||||
return false;
|
||||
}
|
||||
const rect = element.getBoundingClientRect();
|
||||
const style = window.getComputedStyle(element);
|
||||
return (
|
||||
rect.width > 0 &&
|
||||
rect.height > 0 &&
|
||||
style.visibility !== "hidden" &&
|
||||
style.display !== "none"
|
||||
);
|
||||
};
|
||||
|
||||
const selectors = [
|
||||
"a",
|
||||
"button",
|
||||
'[role="button"]',
|
||||
'[role="link"]',
|
||||
];
|
||||
|
||||
for (const element of document.querySelectorAll(selectors.join(","))) {
|
||||
if (!isVisible(element)) {
|
||||
continue;
|
||||
}
|
||||
const text = normalize(element.textContent ?? "");
|
||||
if (!text || !matches.some((pattern) => pattern.test(text))) {
|
||||
continue;
|
||||
}
|
||||
element.scrollIntoView({ block: "center", inline: "nearest" });
|
||||
if (element instanceof HTMLElement) {
|
||||
element.click();
|
||||
return { clicked: true, text };
|
||||
}
|
||||
}
|
||||
|
||||
return { clicked: false };
|
||||
})()
|
||||
`);
|
||||
}
|
||||
|
||||
async function expandVisibleShowReplies(context: AdapterContext): Promise<number> {
|
||||
let clickCount = 0;
|
||||
|
||||
while (clickCount < 8) {
|
||||
const result = await clickVisibleShowReplies(context).catch<ClickTextResult>(() => ({ clicked: false }));
|
||||
if (!result.clicked) {
|
||||
break;
|
||||
}
|
||||
|
||||
clickCount += 1;
|
||||
context.log.debug(`Expanded X thread replies via "${result.text ?? "Show replies"}".`);
|
||||
await sleep(250);
|
||||
await waitForXNetworkSettle(context, "expanding Show replies");
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
}
|
||||
|
||||
return clickCount;
|
||||
}
|
||||
|
||||
async function scrollThreadBy(context: AdapterContext, stepPx: number): Promise<ScrollStepResult> {
|
||||
const result = await context.browser.evaluate<ScrollStepResult>(`
|
||||
(() => {
|
||||
const scrollRoot = document.scrollingElement ?? document.documentElement ?? document.body;
|
||||
const before = window.scrollY;
|
||||
window.scrollBy({ top: ${stepPx}, left: 0, behavior: "instant" });
|
||||
const after = window.scrollY;
|
||||
return {
|
||||
moved: after !== before,
|
||||
atTop: after <= 4,
|
||||
atBottom: window.innerHeight + after >= scrollRoot.scrollHeight - 4,
|
||||
};
|
||||
})()
|
||||
`);
|
||||
|
||||
await sleep(140);
|
||||
await waitForXNetworkSettle(context, "scrolling X thread");
|
||||
await prefetchRelevantXThreadBodies(context);
|
||||
return result;
|
||||
}
|
||||
|
||||
async function captureThreadProgress(context: AdapterContext, statusId: string): Promise<ThreadProgress> {
|
||||
const entries = getRelevantXThreadEntries(context);
|
||||
const payloads = await collectXJsonPayloads(context);
|
||||
const tweets = extractThreadTweetsFromPayloads(payloads, statusId, context.input.url.toString());
|
||||
return {
|
||||
tweetCount: tweets.length,
|
||||
firstTweetId: tweets[0]?.id,
|
||||
lastTweetId: tweets[tweets.length - 1]?.id,
|
||||
requestCount: entries.length,
|
||||
tweetDetailCount: entries.filter((entry) => entry.url.includes("TweetDetail")).length,
|
||||
};
|
||||
}
|
||||
|
||||
export async function loadFullXThread(context: AdapterContext, statusId: string): Promise<void> {
|
||||
await scrollThreadToTop(context);
|
||||
|
||||
let progress = await captureThreadProgress(context, statusId);
|
||||
let stagnantRounds = 0;
|
||||
let roundsWithoutMovement = 0;
|
||||
let distanceWithoutThreadActivityPx = 0;
|
||||
|
||||
for (let round = 0; ; round += 1) {
|
||||
const stepPx = round < 12 ? 1_200 : 1_600;
|
||||
let expandedCount = await expandVisibleShowReplies(context);
|
||||
const scroll = await scrollThreadBy(context, stepPx);
|
||||
expandedCount += await expandVisibleShowReplies(context);
|
||||
const nextProgress = await captureThreadProgress(context, statusId);
|
||||
const grew =
|
||||
nextProgress.tweetCount > progress.tweetCount ||
|
||||
nextProgress.firstTweetId !== progress.firstTweetId ||
|
||||
nextProgress.lastTweetId !== progress.lastTweetId ||
|
||||
nextProgress.requestCount > progress.requestCount ||
|
||||
nextProgress.tweetDetailCount > progress.tweetDetailCount;
|
||||
|
||||
if (grew) {
|
||||
context.log.debug(
|
||||
`X thread progress: ${nextProgress.tweetCount} tweets (${nextProgress.firstTweetId ?? "unknown"} -> ${nextProgress.lastTweetId ?? "unknown"}), ${nextProgress.requestCount} requests, ${nextProgress.tweetDetailCount} TweetDetail.`,
|
||||
);
|
||||
stagnantRounds = 0;
|
||||
distanceWithoutThreadActivityPx = 0;
|
||||
} else if (expandedCount > 0) {
|
||||
stagnantRounds = 0;
|
||||
distanceWithoutThreadActivityPx = 0;
|
||||
} else {
|
||||
stagnantRounds += 1;
|
||||
distanceWithoutThreadActivityPx += stepPx;
|
||||
}
|
||||
|
||||
roundsWithoutMovement = scroll.moved ? 0 : roundsWithoutMovement + 1;
|
||||
progress = nextProgress;
|
||||
|
||||
if (scroll.atBottom && stagnantRounds >= 6) {
|
||||
context.log.debug("Stopping X thread scroll after reaching page bottom with no further thread progress.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (roundsWithoutMovement >= 2 && stagnantRounds >= 4) {
|
||||
context.log.debug("Stopping X thread scroll after repeated downward scrolls no longer move the page.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (distanceWithoutThreadActivityPx >= 24_000 && stagnantRounds >= 12) {
|
||||
context.log.debug("Stopping X thread scroll after a long stretch with no thread-related progress.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,316 @@
|
|||
import type { ExtractedDocument } from "../../extract/document";
|
||||
import {
|
||||
formatMediaList,
|
||||
formatTweetAuthor,
|
||||
getLegacy,
|
||||
getTweetAuthorMetadata,
|
||||
isRecord,
|
||||
normalizeTitle,
|
||||
toXTweet,
|
||||
unwrapTweetResult,
|
||||
} from "./shared";
|
||||
import type { JsonObject, XQuotedTweet, XTweet } from "./types";
|
||||
|
||||
interface ParsedThreadTweet extends XTweet {
|
||||
userId?: string;
|
||||
conversationId?: string;
|
||||
inReplyToUserId?: string;
|
||||
sortTimestamp: number;
|
||||
}
|
||||
|
||||
function compareTweetIds(left: string, right: string): number {
|
||||
try {
|
||||
const leftId = BigInt(left);
|
||||
const rightId = BigInt(right);
|
||||
if (leftId === rightId) {
|
||||
return 0;
|
||||
}
|
||||
return leftId < rightId ? -1 : 1;
|
||||
} catch {
|
||||
return left.localeCompare(right);
|
||||
}
|
||||
}
|
||||
|
||||
function toTimestamp(value: string | undefined): number {
|
||||
if (!value) {
|
||||
return 0;
|
||||
}
|
||||
const parsed = Date.parse(value);
|
||||
return Number.isNaN(parsed) ? 0 : parsed;
|
||||
}
|
||||
|
||||
function scoreParsedTweet(tweet: ParsedThreadTweet): number {
|
||||
return (
|
||||
(tweet.text ? 4 : 0) +
|
||||
(tweet.author ? 2 : 0) +
|
||||
(tweet.authorName ? 2 : 0) +
|
||||
(tweet.media.length > 0 ? 1 : 0)
|
||||
);
|
||||
}
|
||||
|
||||
function toParsedThreadTweet(tweet: JsonObject, pageUrl: string): ParsedThreadTweet {
|
||||
const legacy = getLegacy(tweet);
|
||||
const xTweet = toXTweet(tweet, pageUrl);
|
||||
|
||||
return {
|
||||
...xTweet,
|
||||
userId: typeof legacy.user_id_str === "string" ? legacy.user_id_str : undefined,
|
||||
conversationId: typeof legacy.conversation_id_str === "string" ? legacy.conversation_id_str : undefined,
|
||||
inReplyToUserId: typeof legacy.in_reply_to_user_id_str === "string" ? legacy.in_reply_to_user_id_str : undefined,
|
||||
sortTimestamp: toTimestamp(xTweet.createdAt),
|
||||
};
|
||||
}
|
||||
|
||||
function collectTweetFromItemContent(
|
||||
itemContent: unknown,
|
||||
pageUrl: string,
|
||||
tweets: Map<string, ParsedThreadTweet>,
|
||||
): void {
|
||||
if (!isRecord(itemContent)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const tweet = unwrapTweetResult(
|
||||
isRecord(itemContent.tweet_results) ? itemContent.tweet_results.result : null,
|
||||
);
|
||||
if (!tweet || typeof tweet.rest_id !== "string") {
|
||||
return;
|
||||
}
|
||||
|
||||
const parsed = toParsedThreadTweet(tweet, pageUrl);
|
||||
const existing = tweets.get(parsed.id);
|
||||
if (!existing || scoreParsedTweet(parsed) >= scoreParsedTweet(existing)) {
|
||||
tweets.set(parsed.id, parsed);
|
||||
}
|
||||
}
|
||||
|
||||
function collectTweetsFromItems(
|
||||
items: unknown,
|
||||
pageUrl: string,
|
||||
tweets: Map<string, ParsedThreadTweet>,
|
||||
): void {
|
||||
if (!Array.isArray(items)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const item of items) {
|
||||
if (!isRecord(item)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isRecord(item.item) && isRecord(item.item.itemContent)) {
|
||||
collectTweetFromItemContent(item.item.itemContent, pageUrl, tweets);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isRecord(item.itemContent)) {
|
||||
collectTweetFromItemContent(item.itemContent, pageUrl, tweets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getInstructions(payload: unknown): unknown[] {
|
||||
if (!isRecord(payload) || !isRecord(payload.data)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const { data } = payload;
|
||||
return (
|
||||
(isRecord(data.threaded_conversation_with_injections_v2) &&
|
||||
Array.isArray(data.threaded_conversation_with_injections_v2.instructions)
|
||||
? data.threaded_conversation_with_injections_v2.instructions
|
||||
: undefined) ??
|
||||
(isRecord(data.threaded_conversation_with_injections) &&
|
||||
Array.isArray(data.threaded_conversation_with_injections.instructions)
|
||||
? data.threaded_conversation_with_injections.instructions
|
||||
: undefined) ??
|
||||
(isRecord(data.tweetResult) &&
|
||||
isRecord(data.tweetResult.result) &&
|
||||
isRecord(data.tweetResult.result.timeline) &&
|
||||
Array.isArray(data.tweetResult.result.timeline.instructions)
|
||||
? data.tweetResult.result.timeline.instructions
|
||||
: [])
|
||||
);
|
||||
}
|
||||
|
||||
function parseTweetDetailPayload(payload: unknown, pageUrl: string): ParsedThreadTweet[] {
|
||||
const tweets = new Map<string, ParsedThreadTweet>();
|
||||
|
||||
const instructions = getInstructions(payload);
|
||||
for (const instruction of instructions) {
|
||||
if (!isRecord(instruction)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
collectTweetsFromItems(instruction.moduleItems, pageUrl, tweets);
|
||||
|
||||
if (!Array.isArray(instruction.entries)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const entry of instruction.entries) {
|
||||
if (!isRecord(entry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = isRecord(entry.content) ? entry.content : {};
|
||||
collectTweetFromItemContent(content.itemContent, pageUrl, tweets);
|
||||
collectTweetsFromItems(content.items, pageUrl, tweets);
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(tweets.values());
|
||||
}
|
||||
|
||||
function buildContinuousThread(tweets: ParsedThreadTweet[], statusId: string): ParsedThreadTweet[] {
|
||||
const byId = new Map<string, ParsedThreadTweet>();
|
||||
for (const tweet of tweets) {
|
||||
const existing = byId.get(tweet.id);
|
||||
if (!existing || scoreParsedTweet(tweet) >= scoreParsedTweet(existing)) {
|
||||
byId.set(tweet.id, tweet);
|
||||
}
|
||||
}
|
||||
|
||||
const rootTweet = byId.get(statusId);
|
||||
if (!rootTweet?.userId || !rootTweet.conversationId) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const candidates = Array.from(byId.values()).filter(
|
||||
(tweet) =>
|
||||
tweet.id === statusId ||
|
||||
(tweet.userId === rootTweet.userId && tweet.conversationId === rootTweet.conversationId),
|
||||
);
|
||||
|
||||
const repliesByParent = new Map<string, ParsedThreadTweet[]>();
|
||||
for (const tweet of candidates) {
|
||||
if (!tweet.inReplyTo || tweet.id === statusId) {
|
||||
continue;
|
||||
}
|
||||
const bucket = repliesByParent.get(tweet.inReplyTo) ?? [];
|
||||
bucket.push(tweet);
|
||||
bucket.sort((left, right) => {
|
||||
if (left.sortTimestamp !== right.sortTimestamp) {
|
||||
return left.sortTimestamp - right.sortTimestamp;
|
||||
}
|
||||
return compareTweetIds(left.id, right.id);
|
||||
});
|
||||
repliesByParent.set(tweet.inReplyTo, bucket);
|
||||
}
|
||||
|
||||
const ancestorPath: ParsedThreadTweet[] = [rootTweet];
|
||||
const ancestorSeen = new Set<string>([rootTweet.id]);
|
||||
let currentAncestor = rootTweet;
|
||||
|
||||
while (currentAncestor.inReplyTo) {
|
||||
const parent = byId.get(currentAncestor.inReplyTo);
|
||||
if (!parent || ancestorSeen.has(parent.id)) {
|
||||
break;
|
||||
}
|
||||
ancestorPath.unshift(parent);
|
||||
ancestorSeen.add(parent.id);
|
||||
currentAncestor = parent;
|
||||
}
|
||||
|
||||
const chain = ancestorPath.slice();
|
||||
const seen = new Set<string>(chain.map((tweet) => tweet.id));
|
||||
let currentId = rootTweet.id;
|
||||
|
||||
while (true) {
|
||||
const next = (repliesByParent.get(currentId) ?? []).find((tweet) => !seen.has(tweet.id));
|
||||
if (!next) {
|
||||
break;
|
||||
}
|
||||
chain.push(next);
|
||||
seen.add(next.id);
|
||||
currentId = next.id;
|
||||
}
|
||||
|
||||
return chain;
|
||||
}
|
||||
|
||||
export function extractThreadTweetsFromPayloads(
|
||||
payloads: unknown[],
|
||||
statusId: string,
|
||||
pageUrl: string,
|
||||
): XTweet[] {
|
||||
const parsedTweets: ParsedThreadTweet[] = [];
|
||||
|
||||
for (const payload of payloads) {
|
||||
parsedTweets.push(...parseTweetDetailPayload(payload, pageUrl));
|
||||
}
|
||||
|
||||
return buildContinuousThread(parsedTweets, statusId).map(({ sortTimestamp: _sortTimestamp, ...tweet }) => tweet);
|
||||
}
|
||||
|
||||
function buildQuotedTweetMarkdown(quotedTweet: XQuotedTweet): string {
|
||||
const author = quotedTweet.author ? `@${quotedTweet.author}` : "Unknown";
|
||||
const name = quotedTweet.authorName ? `${quotedTweet.authorName} ` : "";
|
||||
const lines: string[] = [`Quoted Tweet${quotedTweet.author || quotedTweet.authorName ? `: ${name}${author}`.trim() : ""}`];
|
||||
|
||||
if (quotedTweet.text) {
|
||||
lines.push(...quotedTweet.text.split("\n"));
|
||||
}
|
||||
|
||||
for (const mediaLine of formatMediaList(quotedTweet.media)) {
|
||||
lines.push(mediaLine);
|
||||
}
|
||||
|
||||
return lines.map((line) => (line ? `> ${line}` : ">")).join("\n");
|
||||
}
|
||||
|
||||
function buildThreadMarkdown(tweets: XTweet[]): string {
|
||||
return tweets
|
||||
.map((tweet, index) => {
|
||||
const lines: string[] = [];
|
||||
const author = tweet.author ? `@${tweet.author}` : "Unknown";
|
||||
const name = tweet.authorName ? `${tweet.authorName} ` : "";
|
||||
lines.push(`## ${index + 1}. ${name}${author}`.trim());
|
||||
if (tweet.createdAt) {
|
||||
lines.push(`_Published: ${tweet.createdAt}_`);
|
||||
}
|
||||
lines.push(tweet.text || "(No text)");
|
||||
const mediaLines = formatMediaList(tweet.media);
|
||||
if (mediaLines.length > 0) {
|
||||
lines.push(mediaLines.map((line) => `- ${line}`).join("\n"));
|
||||
}
|
||||
if (tweet.quotedTweet) {
|
||||
lines.push(buildQuotedTweetMarkdown(tweet.quotedTweet));
|
||||
}
|
||||
return lines.join("\n\n");
|
||||
})
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
export function extractThreadDocumentFromPayloads(
|
||||
payloads: unknown[],
|
||||
statusId: string,
|
||||
pageUrl: string,
|
||||
): ExtractedDocument | null {
|
||||
const tweets = extractThreadTweetsFromPayloads(payloads, statusId, pageUrl);
|
||||
if (tweets.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const rootTweet = tweets[0];
|
||||
const rootAuthor = formatTweetAuthor(rootTweet);
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
canonicalUrl: rootTweet.url,
|
||||
title: normalizeTitle(rootTweet.text, "X Thread"),
|
||||
author: rootAuthor,
|
||||
siteName: "X",
|
||||
publishedAt: rootTweet.createdAt,
|
||||
summary: rootTweet.text.slice(0, 200) || undefined,
|
||||
adapter: "x",
|
||||
metadata: {
|
||||
kind: "x/thread",
|
||||
tweetId: rootTweet.id,
|
||||
tweetCount: tweets.length,
|
||||
lastTweetId: tweets[tweets.length - 1]?.id,
|
||||
...getTweetAuthorMetadata(rootTweet),
|
||||
},
|
||||
content: [{ type: "markdown", markdown: buildThreadMarkdown(tweets) }],
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
export type JsonObject = Record<string, unknown>;
|
||||
|
||||
export interface XUser {
|
||||
name?: string;
|
||||
screenName?: string;
|
||||
}
|
||||
|
||||
export interface XMedia {
|
||||
type: string;
|
||||
url: string;
|
||||
alt?: string;
|
||||
}
|
||||
|
||||
export interface XQuotedTweet {
|
||||
id: string;
|
||||
author?: string;
|
||||
authorName?: string;
|
||||
text: string;
|
||||
url: string;
|
||||
media: XMedia[];
|
||||
}
|
||||
|
||||
export interface XTweet {
|
||||
id: string;
|
||||
author?: string;
|
||||
authorName?: string;
|
||||
text: string;
|
||||
likes: number;
|
||||
retweets: number;
|
||||
replies: number;
|
||||
createdAt?: string;
|
||||
inReplyTo?: string;
|
||||
url: string;
|
||||
media: XMedia[];
|
||||
quotedTweet?: XQuotedTweet;
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import type { Adapter } from "../types";
|
||||
import { collectMediaFromDocument } from "../../media/markdown-media";
|
||||
import { extractYouTubeTranscriptDocument } from "./transcript";
|
||||
import { isYouTubeHost, parseYouTubeVideoId } from "./utils";
|
||||
|
||||
export const youtubeAdapter: Adapter = {
|
||||
name: "youtube",
|
||||
match(input) {
|
||||
return isYouTubeHost(input.url.hostname);
|
||||
},
|
||||
async process(context) {
|
||||
const videoId = parseYouTubeVideoId(context.input.url);
|
||||
if (!videoId) {
|
||||
return {
|
||||
status: "no_document",
|
||||
};
|
||||
}
|
||||
|
||||
context.log.info(`Loading ${context.input.url.toString()} with youtube adapter`);
|
||||
const document = await extractYouTubeTranscriptDocument(context, videoId);
|
||||
if (!document) {
|
||||
return {
|
||||
status: "no_document",
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
status: "ok",
|
||||
document,
|
||||
media: collectMediaFromDocument(document),
|
||||
};
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,392 @@
|
|||
import type { ExtractedDocument } from "../../extract/document";
|
||||
import { detectInteractionGate } from "../../browser/interaction-gates";
|
||||
import {
|
||||
buildYouTubeThumbnailCandidates,
|
||||
parseYouTubeDescriptionChapters,
|
||||
renderYouTubeTranscriptMarkdown,
|
||||
type YouTubeChapter,
|
||||
type YouTubeTranscriptSegment,
|
||||
} from "./utils";
|
||||
|
||||
interface CaptionInfo {
|
||||
captionUrl: string;
|
||||
language: string;
|
||||
kind: string;
|
||||
available: string[];
|
||||
title?: string;
|
||||
author?: string;
|
||||
authorUrl?: string;
|
||||
channelId?: string;
|
||||
description?: string;
|
||||
publishedAt?: string;
|
||||
viewCount?: number;
|
||||
durationSeconds?: number;
|
||||
keywords: string[];
|
||||
category?: string;
|
||||
isLiveContent?: boolean;
|
||||
coverImages: string[];
|
||||
}
|
||||
|
||||
function normalizeUrl(url: string | undefined): string | undefined {
|
||||
if (!url) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
if (parsed.protocol === "http:") {
|
||||
parsed.protocol = "https:";
|
||||
}
|
||||
return parsed.toString();
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
function buildSummary(description: string | undefined, segments: YouTubeTranscriptSegment[]): string | undefined {
|
||||
const descriptionSummary = description
|
||||
?.replace(/\r\n/g, "\n")
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.find((line) => line && !/^https?:\/\//i.test(line));
|
||||
|
||||
if (descriptionSummary) {
|
||||
return descriptionSummary.slice(0, 240);
|
||||
}
|
||||
|
||||
const transcriptSummary = segments
|
||||
.slice(0, 8)
|
||||
.map((segment) => segment.text)
|
||||
.join(" ")
|
||||
.slice(0, 240)
|
||||
.trim();
|
||||
|
||||
return transcriptSummary || undefined;
|
||||
}
|
||||
|
||||
async function canFetchThumbnail(url: string): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(url, { method: "HEAD", redirect: "follow" });
|
||||
if (response.ok) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (response.status === 405) {
|
||||
const fallbackResponse = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { Range: "bytes=0-0" },
|
||||
redirect: "follow",
|
||||
});
|
||||
return fallbackResponse.ok;
|
||||
}
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function resolveBestCoverImage(videoId: string, coverImages: string[]): Promise<string | undefined> {
|
||||
const candidates = buildYouTubeThumbnailCandidates(videoId, coverImages);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (await canFetchThumbnail(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return candidates[0];
|
||||
}
|
||||
|
||||
export async function extractYouTubeTranscriptDocument(
|
||||
context: Parameters<import("../types").Adapter["process"]>[0],
|
||||
videoId: string,
|
||||
): Promise<ExtractedDocument | null> {
|
||||
const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
|
||||
await context.browser.goto(videoUrl, context.timeoutMs);
|
||||
|
||||
const interaction = await detectInteractionGate(context.browser);
|
||||
if (interaction) {
|
||||
context.log.debug(`Interaction gate detected on YouTube: ${interaction.provider}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
await context.network.waitForIdle({
|
||||
idleMs: 1_000,
|
||||
timeoutMs: Math.min(context.timeoutMs, 8_000),
|
||||
});
|
||||
} catch {
|
||||
context.log.debug("Network idle timed out on YouTube load.");
|
||||
}
|
||||
|
||||
const captionInfo = await context.browser.evaluate<CaptionInfo | { error: string }>(`
|
||||
(async () => {
|
||||
function readText(value) {
|
||||
if (!value) return undefined;
|
||||
if (typeof value === 'string') {
|
||||
const text = value.trim();
|
||||
return text || undefined;
|
||||
}
|
||||
if (typeof value.simpleText === 'string') {
|
||||
const text = value.simpleText.trim();
|
||||
return text || undefined;
|
||||
}
|
||||
if (Array.isArray(value.runs)) {
|
||||
const text = value.runs
|
||||
.map((run) => typeof run?.text === 'string' ? run.text : '')
|
||||
.join('')
|
||||
.trim();
|
||||
return text || undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function parsePositiveInteger(value) {
|
||||
if (typeof value === 'number' && Number.isFinite(value) && value >= 0) {
|
||||
return Math.floor(value);
|
||||
}
|
||||
if (typeof value !== 'string') {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = value.replace(/[^\\d]/g, '');
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
const parsed = Number.parseInt(normalized, 10);
|
||||
return Number.isFinite(parsed) ? parsed : undefined;
|
||||
}
|
||||
|
||||
const apiKey = window.ytcfg?.data_?.INNERTUBE_API_KEY;
|
||||
const playerResponse = window.ytInitialPlayerResponse;
|
||||
const videoDetails = playerResponse?.videoDetails || {};
|
||||
const microformat = playerResponse?.microformat?.playerMicroformatRenderer || {};
|
||||
const title =
|
||||
videoDetails.title ||
|
||||
readText(microformat.title) ||
|
||||
document.title.replace(/ - YouTube$/, '').trim();
|
||||
const author =
|
||||
videoDetails.author ||
|
||||
microformat.ownerChannelName ||
|
||||
document.querySelector('link[itemprop="name"]')?.getAttribute('content') ||
|
||||
undefined;
|
||||
const authorUrl =
|
||||
microformat.ownerProfileUrl ||
|
||||
(typeof videoDetails.channelId === 'string' && videoDetails.channelId
|
||||
? 'https://www.youtube.com/channel/' + videoDetails.channelId
|
||||
: undefined);
|
||||
const description =
|
||||
readText(microformat.description) ||
|
||||
(typeof videoDetails.shortDescription === 'string' ? videoDetails.shortDescription.trim() : undefined);
|
||||
const keywords = Array.isArray(videoDetails.keywords)
|
||||
? videoDetails.keywords.filter((keyword) => typeof keyword === 'string' && keyword.trim())
|
||||
: [];
|
||||
const thumbnails = [
|
||||
...(Array.isArray(videoDetails.thumbnail?.thumbnails) ? videoDetails.thumbnail.thumbnails : []),
|
||||
...(Array.isArray(microformat.thumbnail?.thumbnails) ? microformat.thumbnail.thumbnails : []),
|
||||
]
|
||||
.filter((thumbnail) => typeof thumbnail?.url === 'string' && thumbnail.url)
|
||||
.sort((left, right) => ((right?.width || 0) * (right?.height || 0)) - ((left?.width || 0) * (left?.height || 0)))
|
||||
.map((thumbnail) => thumbnail.url);
|
||||
|
||||
if (!apiKey) {
|
||||
return { error: 'INNERTUBE_API_KEY not found on page' };
|
||||
}
|
||||
|
||||
const response = await fetch('/youtubei/v1/player?key=' + apiKey + '&prettyPrint=false', {
|
||||
method: 'POST',
|
||||
credentials: 'include',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
context: { client: { clientName: 'ANDROID', clientVersion: '20.10.38' } },
|
||||
videoId: ${JSON.stringify(videoId)}
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return { error: 'InnerTube player API returned HTTP ' + response.status };
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const renderer = data.captions?.playerCaptionsTracklistRenderer;
|
||||
if (!renderer?.captionTracks?.length) {
|
||||
return { error: 'No captions available for this video' };
|
||||
}
|
||||
|
||||
const tracks = renderer.captionTracks;
|
||||
const track = tracks.find((item) => item.kind !== 'asr') || tracks[0];
|
||||
|
||||
return {
|
||||
captionUrl: track.baseUrl,
|
||||
language: track.languageCode,
|
||||
kind: track.kind || 'manual',
|
||||
available: tracks.map((item) => {
|
||||
const languageLabel = readText(item.name) || item.languageCode;
|
||||
return item.kind === 'asr'
|
||||
? languageLabel + ' [' + item.languageCode + ', auto]'
|
||||
: languageLabel + ' [' + item.languageCode + ']';
|
||||
}),
|
||||
title,
|
||||
author,
|
||||
authorUrl,
|
||||
channelId: typeof videoDetails.channelId === 'string' ? videoDetails.channelId : undefined,
|
||||
description,
|
||||
publishedAt:
|
||||
(typeof microformat.publishDate === 'string' && microformat.publishDate) ||
|
||||
(typeof microformat.uploadDate === 'string' && microformat.uploadDate) ||
|
||||
document.querySelector('meta[itemprop="datePublished"]')?.getAttribute('content') ||
|
||||
undefined,
|
||||
viewCount: parsePositiveInteger(videoDetails.viewCount) ?? parsePositiveInteger(microformat.viewCount),
|
||||
durationSeconds: parsePositiveInteger(videoDetails.lengthSeconds),
|
||||
keywords,
|
||||
category: typeof microformat.category === 'string' ? microformat.category : undefined,
|
||||
isLiveContent: Boolean(videoDetails.isLiveContent || microformat.isLiveContent),
|
||||
coverImages: thumbnails,
|
||||
};
|
||||
})()
|
||||
`);
|
||||
|
||||
if ("error" in captionInfo) {
|
||||
context.log.debug(`YouTube transcript unavailable: ${captionInfo.error}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const segments = await context.browser.evaluate<YouTubeTranscriptSegment[] | { error: string }>(`
|
||||
(async () => {
|
||||
const response = await fetch(${JSON.stringify(captionInfo.captionUrl)});
|
||||
const xml = await response.text();
|
||||
if (!xml) {
|
||||
return { error: 'Caption XML is empty' };
|
||||
}
|
||||
|
||||
function getAttr(tag, name) {
|
||||
const needle = name + '="';
|
||||
const index = tag.indexOf(needle);
|
||||
if (index === -1) return '';
|
||||
const valueStart = index + needle.length;
|
||||
const valueEnd = tag.indexOf('"', valueStart);
|
||||
if (valueEnd === -1) return '';
|
||||
return tag.substring(valueStart, valueEnd);
|
||||
}
|
||||
|
||||
function decodeEntities(value) {
|
||||
return value
|
||||
.replaceAll('&', '&')
|
||||
.replaceAll('<', '<')
|
||||
.replaceAll('>', '>')
|
||||
.replaceAll('"', '"')
|
||||
.replaceAll(''', "'");
|
||||
}
|
||||
|
||||
const marker = xml.includes('<p t="') ? '<p ' : '<text ';
|
||||
const endMarker = marker === '<p ' ? '</p>' : '</text>';
|
||||
const results = [];
|
||||
let position = 0;
|
||||
|
||||
while (true) {
|
||||
const tagStart = xml.indexOf(marker, position);
|
||||
if (tagStart === -1) break;
|
||||
let contentStart = xml.indexOf('>', tagStart);
|
||||
if (contentStart === -1) break;
|
||||
contentStart += 1;
|
||||
const tagEnd = xml.indexOf(endMarker, contentStart);
|
||||
if (tagEnd === -1) break;
|
||||
|
||||
const attrString = xml.substring(tagStart + marker.length, contentStart - 1);
|
||||
const content = xml.substring(contentStart, tagEnd);
|
||||
const start = marker === '<p '
|
||||
? (parseFloat(getAttr(attrString, 't')) || 0) / 1000
|
||||
: (parseFloat(getAttr(attrString, 'start')) || 0);
|
||||
const duration = marker === '<p '
|
||||
? (parseFloat(getAttr(attrString, 'd')) || 0) / 1000
|
||||
: (parseFloat(getAttr(attrString, 'dur')) || 0);
|
||||
const text = decodeEntities(content.replace(/<[^>]+>/g, '')).split('\\n').join(' ').trim();
|
||||
if (text) {
|
||||
results.push({ start, end: start + duration, text });
|
||||
}
|
||||
|
||||
position = tagEnd + endMarker.length;
|
||||
}
|
||||
|
||||
if (results.length === 0) {
|
||||
return { error: 'Parsed 0 transcript segments' };
|
||||
}
|
||||
return results;
|
||||
})()
|
||||
`);
|
||||
|
||||
if (!Array.isArray(segments) || segments.length === 0) {
|
||||
context.log.debug("Parsed no YouTube transcript segments.");
|
||||
return null;
|
||||
}
|
||||
|
||||
const extractedChapters = await context.browser.evaluate<YouTubeChapter[]>(`
|
||||
(() => {
|
||||
const data = window.ytInitialData;
|
||||
const markers = data?.playerOverlays?.playerOverlayRenderer
|
||||
?.decoratedPlayerBarRenderer?.decoratedPlayerBarRenderer
|
||||
?.playerBar?.multiMarkersPlayerBarRenderer?.markersMap || [];
|
||||
const results = [];
|
||||
|
||||
for (const marker of markers) {
|
||||
const chapters = marker?.value?.chapters;
|
||||
if (!Array.isArray(chapters)) continue;
|
||||
for (const chapter of chapters) {
|
||||
const renderer = chapter?.chapterRenderer;
|
||||
const title = renderer?.title?.simpleText;
|
||||
const timeRangeStartMillis = renderer?.timeRangeStartMillis;
|
||||
if (title && typeof timeRangeStartMillis === 'number') {
|
||||
results.push({ title, time: Math.floor(timeRangeStartMillis / 1000) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
})()
|
||||
`).catch(() => []);
|
||||
|
||||
const descriptionChapters = parseYouTubeDescriptionChapters(captionInfo.description);
|
||||
const chapters = extractedChapters.length > 0 ? extractedChapters : descriptionChapters;
|
||||
const markdown = renderYouTubeTranscriptMarkdown({
|
||||
description: captionInfo.description,
|
||||
segments,
|
||||
chapters,
|
||||
});
|
||||
|
||||
if (!markdown) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const pageUrl = await context.browser.getURL();
|
||||
const coverImage = await resolveBestCoverImage(videoId, captionInfo.coverImages);
|
||||
const summary = buildSummary(captionInfo.description, segments);
|
||||
|
||||
return {
|
||||
url: pageUrl,
|
||||
canonicalUrl: pageUrl,
|
||||
title: captionInfo.title || "YouTube Transcript",
|
||||
author: captionInfo.author,
|
||||
publishedAt: captionInfo.publishedAt,
|
||||
siteName: "YouTube",
|
||||
summary,
|
||||
adapter: "youtube",
|
||||
metadata: {
|
||||
kind: "youtube/transcript",
|
||||
videoId,
|
||||
authorUrl: normalizeUrl(captionInfo.authorUrl),
|
||||
channelId: captionInfo.channelId,
|
||||
coverImage,
|
||||
description: captionInfo.description,
|
||||
durationSeconds: captionInfo.durationSeconds,
|
||||
language: captionInfo.language,
|
||||
captionKind: captionInfo.kind,
|
||||
availableLanguages: captionInfo.available,
|
||||
viewCount: captionInfo.viewCount,
|
||||
keywords: captionInfo.keywords,
|
||||
category: captionInfo.category,
|
||||
isLiveContent: captionInfo.isLiveContent,
|
||||
chapterCount: chapters.length,
|
||||
},
|
||||
content: [{ type: "markdown", markdown }],
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,253 @@
|
|||
export interface YouTubeTranscriptSegment {
|
||||
start: number;
|
||||
end: number;
|
||||
text: string;
|
||||
}
|
||||
|
||||
export interface YouTubeChapter {
|
||||
title: string;
|
||||
time: number;
|
||||
}
|
||||
|
||||
interface RenderYouTubeTranscriptMarkdownInput {
|
||||
description?: string;
|
||||
segments: YouTubeTranscriptSegment[];
|
||||
chapters: YouTubeChapter[];
|
||||
}
|
||||
|
||||
const DESCRIPTION_CHAPTER_RE = /^((?:\d{1,2}:)?\d{1,2}:\d{2})(?:\s+[-|:]\s+|\s+)(.+)$/;
|
||||
const YOUTUBE_THUMBNAIL_VARIANTS = [
|
||||
"maxresdefault.jpg",
|
||||
"sddefault.jpg",
|
||||
"hqdefault.jpg",
|
||||
"mqdefault.jpg",
|
||||
"default.jpg",
|
||||
];
|
||||
|
||||
export function isYouTubeHost(hostname: string): boolean {
|
||||
return [
|
||||
"youtube.com",
|
||||
"www.youtube.com",
|
||||
"m.youtube.com",
|
||||
"youtu.be",
|
||||
].includes(hostname);
|
||||
}
|
||||
|
||||
export function parseYouTubeVideoId(url: URL): string | null {
|
||||
if (url.hostname === "youtu.be") {
|
||||
return url.pathname.split("/").filter(Boolean)[0] ?? null;
|
||||
}
|
||||
|
||||
if (url.pathname === "/watch") {
|
||||
return url.searchParams.get("v");
|
||||
}
|
||||
|
||||
const shortsMatch = url.pathname.match(/^\/shorts\/([^/?#]+)/);
|
||||
if (shortsMatch) {
|
||||
return shortsMatch[1];
|
||||
}
|
||||
|
||||
const liveMatch = url.pathname.match(/^\/live\/([^/?#]+)/);
|
||||
if (liveMatch) {
|
||||
return liveMatch[1];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseTimestampValue(raw: string): number | null {
|
||||
const parts = raw
|
||||
.split(":")
|
||||
.map((part) => Number.parseInt(part, 10))
|
||||
.filter((part) => Number.isFinite(part));
|
||||
|
||||
if (parts.length < 2 || parts.length > 3) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parts.some((part) => part < 0)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parts.length === 2) {
|
||||
const [minutes, seconds] = parts;
|
||||
return minutes * 60 + seconds;
|
||||
}
|
||||
|
||||
const [hours, minutes, seconds] = parts;
|
||||
return hours * 3600 + minutes * 60 + seconds;
|
||||
}
|
||||
|
||||
export function formatTimestamp(totalSeconds: number): string {
|
||||
const rounded = Math.max(0, Math.floor(totalSeconds));
|
||||
const hours = Math.floor(rounded / 3600);
|
||||
const minutes = Math.floor((rounded % 3600) / 60);
|
||||
const seconds = rounded % 60;
|
||||
|
||||
if (hours > 0) {
|
||||
return `${hours}:${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`;
|
||||
}
|
||||
return `${minutes}:${String(seconds).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export function formatTimestampRange(start: number, end: number): string {
|
||||
const safeStart = Math.max(0, start);
|
||||
const safeEnd = Math.max(safeStart, end);
|
||||
return `[${formatTimestamp(safeStart)} -> ${formatTimestamp(safeEnd)}]`;
|
||||
}
|
||||
|
||||
export function normalizeYouTubeChapters(chapters: YouTubeChapter[]): YouTubeChapter[] {
|
||||
const seenTimes = new Set<number>();
|
||||
|
||||
return chapters
|
||||
.map((chapter) => ({
|
||||
title: chapter.title.trim(),
|
||||
time: Math.max(0, Math.floor(chapter.time)),
|
||||
}))
|
||||
.filter((chapter) => chapter.title)
|
||||
.sort((left, right) => left.time - right.time)
|
||||
.filter((chapter) => {
|
||||
if (seenTimes.has(chapter.time)) {
|
||||
return false;
|
||||
}
|
||||
seenTimes.add(chapter.time);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
export function parseYouTubeDescriptionChapters(description?: string | null): YouTubeChapter[] {
|
||||
if (!description) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const chapters: YouTubeChapter[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const rawLine of description.replace(/\r\n/g, "\n").split("\n")) {
|
||||
const line = rawLine.trim();
|
||||
if (!line) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const match = line.match(DESCRIPTION_CHAPTER_RE);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const time = parseTimestampValue(match[1]);
|
||||
const title = match[2]?.trim();
|
||||
if (time === null || !title) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const key = `${time}:${title.toLowerCase()}`;
|
||||
if (seen.has(key)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
seen.add(key);
|
||||
chapters.push({ title, time });
|
||||
}
|
||||
|
||||
const normalized = normalizeYouTubeChapters(chapters);
|
||||
if (normalized.length >= 2) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
if (normalized.length === 1 && normalized[0]?.time === 0) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
function renderDescriptionMarkdown(description: string): string {
|
||||
return description
|
||||
.replace(/\r\n/g, "\n")
|
||||
.trim()
|
||||
.split(/\n{2,}/)
|
||||
.map((block) => block.split("\n").map((line) => line.trimEnd()).join(" \n"))
|
||||
.join("\n\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function renderSegmentLine(segment: YouTubeTranscriptSegment): string {
|
||||
return `${formatTimestampRange(segment.start, segment.end)} ${segment.text}`;
|
||||
}
|
||||
|
||||
export function renderYouTubeTranscriptMarkdown({
|
||||
description,
|
||||
segments,
|
||||
chapters,
|
||||
}: RenderYouTubeTranscriptMarkdownInput): string {
|
||||
if (segments.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
const normalizedDescription = description?.trim();
|
||||
const transcriptEnd = segments.reduce((maxEnd, segment) => Math.max(maxEnd, segment.end, segment.start), 0);
|
||||
const normalizedChapters = normalizeYouTubeChapters(chapters).filter(
|
||||
(chapter) => transcriptEnd <= 0 || chapter.time < transcriptEnd,
|
||||
);
|
||||
|
||||
if (normalizedDescription) {
|
||||
parts.push("## Description");
|
||||
parts.push(renderDescriptionMarkdown(normalizedDescription));
|
||||
}
|
||||
|
||||
if (normalizedChapters.length > 0) {
|
||||
parts.push("## Chapters");
|
||||
|
||||
for (let index = 0; index < normalizedChapters.length; index += 1) {
|
||||
const chapter = normalizedChapters[index];
|
||||
const nextChapter = normalizedChapters[index + 1];
|
||||
const chapterEnd = nextChapter ? nextChapter.time : transcriptEnd;
|
||||
const chapterSegments = segments.filter(
|
||||
(segment) => segment.start >= chapter.time && segment.start < chapterEnd,
|
||||
);
|
||||
|
||||
parts.push(`### ${chapter.title} ${formatTimestampRange(chapter.time, chapterEnd)}`);
|
||||
|
||||
if (chapterSegments.length > 0) {
|
||||
parts.push(chapterSegments.map(renderSegmentLine).join("\n"));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
parts.push("## Transcript");
|
||||
parts.push(segments.map(renderSegmentLine).join("\n"));
|
||||
}
|
||||
|
||||
return parts.filter(Boolean).join("\n\n").trim();
|
||||
}
|
||||
|
||||
function normalizeThumbnailKey(url: string): string {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
return `${parsed.origin}${parsed.pathname}`;
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
export function buildYouTubeThumbnailCandidates(videoId: string, listedUrls: string[]): string[] {
|
||||
const candidates = [
|
||||
...YOUTUBE_THUMBNAIL_VARIANTS.map((variant) => `https://i.ytimg.com/vi/${videoId}/${variant}`),
|
||||
...listedUrls,
|
||||
];
|
||||
|
||||
const seen = new Set<string>();
|
||||
return candidates.filter((candidate) => {
|
||||
if (!candidate) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const key = normalizeThumbnailKey(candidate);
|
||||
if (seen.has(key)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
seen.add(key);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
import { EventEmitter } from "node:events";
|
||||
import WebSocket from "ws";
|
||||
|
||||
type JsonObject = Record<string, unknown>;
|
||||
|
||||
interface CdpPendingCommand {
|
||||
resolve(value: unknown): void;
|
||||
reject(error: unknown): void;
|
||||
method: string;
|
||||
}
|
||||
|
||||
interface CdpErrorShape {
|
||||
message?: string;
|
||||
}
|
||||
|
||||
interface CdpCommandResult<T> {
|
||||
result?: T;
|
||||
error?: CdpErrorShape;
|
||||
}
|
||||
|
||||
interface CreatePageSessionOptions {
|
||||
initialUrl?: string;
|
||||
visible?: boolean;
|
||||
}
|
||||
|
||||
export class TargetSession extends EventEmitter {
|
||||
constructor(
|
||||
private readonly client: CdpClient,
|
||||
public readonly targetId: string,
|
||||
public readonly sessionId: string,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
async send<T>(method: string, params: JsonObject = {}): Promise<T> {
|
||||
return this.client.sendSessionCommand<T>(this.sessionId, method, params);
|
||||
}
|
||||
|
||||
handleEvent(method: string, params: JsonObject): void {
|
||||
this.emit(method, params);
|
||||
this.emit("event", { method, params });
|
||||
}
|
||||
|
||||
async waitForEvent<T extends JsonObject>(
|
||||
method: string,
|
||||
predicate?: (params: T) => boolean,
|
||||
timeoutMs = 30_000,
|
||||
): Promise<T> {
|
||||
return new Promise<T>((resolve, reject) => {
|
||||
const timeout = setTimeout(() => {
|
||||
this.off(method, listener);
|
||||
reject(new Error(`Timed out waiting for ${method}`));
|
||||
}, timeoutMs);
|
||||
|
||||
const listener = (params: T): void => {
|
||||
if (predicate && !predicate(params)) {
|
||||
return;
|
||||
}
|
||||
clearTimeout(timeout);
|
||||
this.off(method, listener);
|
||||
resolve(params);
|
||||
};
|
||||
|
||||
this.on(method, listener);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export class CdpClient {
|
||||
private readonly ws: WebSocket;
|
||||
private readonly pending = new Map<number, CdpPendingCommand>();
|
||||
private readonly sessions = new Map<string, TargetSession>();
|
||||
private nextId = 1;
|
||||
|
||||
private constructor(ws: WebSocket) {
|
||||
this.ws = ws;
|
||||
this.ws.on("message", (raw) => {
|
||||
this.handleMessage(raw.toString());
|
||||
});
|
||||
}
|
||||
|
||||
static async connect(browserWsUrl: string): Promise<CdpClient> {
|
||||
const ws = await new Promise<WebSocket>((resolve, reject) => {
|
||||
const socket = new WebSocket(browserWsUrl);
|
||||
socket.once("open", () => resolve(socket));
|
||||
socket.once("error", (error) => reject(error));
|
||||
});
|
||||
|
||||
return new CdpClient(ws);
|
||||
}
|
||||
|
||||
private handleMessage(rawMessage: string): void {
|
||||
const message = JSON.parse(rawMessage) as {
|
||||
id?: number;
|
||||
sessionId?: string;
|
||||
method?: string;
|
||||
params?: JsonObject;
|
||||
result?: unknown;
|
||||
error?: CdpErrorShape;
|
||||
};
|
||||
|
||||
if (typeof message.id === "number") {
|
||||
const pending = this.pending.get(message.id);
|
||||
if (!pending) {
|
||||
return;
|
||||
}
|
||||
this.pending.delete(message.id);
|
||||
if (message.error) {
|
||||
pending.reject(new Error(`${pending.method}: ${message.error.message ?? "Unknown CDP error"}`));
|
||||
return;
|
||||
}
|
||||
pending.resolve(message.result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof message.sessionId === "string" && typeof message.method === "string") {
|
||||
const session = this.sessions.get(message.sessionId);
|
||||
if (session) {
|
||||
session.handleEvent(message.method, (message.params ?? {}) as JsonObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async sendCommand<T>(
|
||||
method: string,
|
||||
params: JsonObject = {},
|
||||
sessionId?: string,
|
||||
): Promise<T> {
|
||||
const id = this.nextId;
|
||||
this.nextId += 1;
|
||||
|
||||
const payload = sessionId ? { id, method, params, sessionId } : { id, method, params };
|
||||
|
||||
const result = new Promise<T>((resolve, reject) => {
|
||||
this.pending.set(id, {
|
||||
resolve: (value) => resolve(value as T),
|
||||
reject,
|
||||
method,
|
||||
});
|
||||
});
|
||||
|
||||
this.ws.send(JSON.stringify(payload));
|
||||
return result;
|
||||
}
|
||||
|
||||
async sendBrowserCommand<T>(method: string, params: JsonObject = {}): Promise<T> {
|
||||
return this.sendCommand<T>(method, params);
|
||||
}
|
||||
|
||||
async sendSessionCommand<T>(sessionId: string, method: string, params: JsonObject = {}): Promise<T> {
|
||||
return this.sendCommand<T>(method, params, sessionId);
|
||||
}
|
||||
|
||||
private async createPageTarget(initialUrl: string, visible = false): Promise<{ targetId: string }> {
|
||||
const attempts: JsonObject[] = visible
|
||||
? [
|
||||
{
|
||||
url: initialUrl,
|
||||
newWindow: true,
|
||||
focus: true,
|
||||
},
|
||||
{
|
||||
url: initialUrl,
|
||||
focus: true,
|
||||
},
|
||||
{
|
||||
url: initialUrl,
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
url: initialUrl,
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
url: initialUrl,
|
||||
background: true,
|
||||
focus: false,
|
||||
},
|
||||
{
|
||||
url: initialUrl,
|
||||
},
|
||||
];
|
||||
|
||||
let lastError: unknown;
|
||||
|
||||
for (const params of attempts) {
|
||||
try {
|
||||
return await this.sendBrowserCommand<{ targetId: string }>("Target.createTarget", params);
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError instanceof Error ? lastError : new Error("Target.createTarget failed");
|
||||
}
|
||||
|
||||
async createPageSession(options: CreatePageSessionOptions = {}): Promise<TargetSession> {
|
||||
const initialUrl = options.initialUrl ?? "about:blank";
|
||||
const created = await this.createPageTarget(initialUrl, Boolean(options.visible));
|
||||
const attached = await this.sendBrowserCommand<{ sessionId: string }>("Target.attachToTarget", {
|
||||
targetId: created.targetId,
|
||||
flatten: true,
|
||||
});
|
||||
|
||||
const session = new TargetSession(this, created.targetId, attached.sessionId);
|
||||
this.sessions.set(attached.sessionId, session);
|
||||
|
||||
if (options.visible) {
|
||||
await this.sendBrowserCommand("Target.activateTarget", {
|
||||
targetId: created.targetId,
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
await session.send("Page.enable");
|
||||
await session.send("Runtime.enable");
|
||||
await session.send("DOM.enable");
|
||||
|
||||
if (options.visible) {
|
||||
await session.send("Page.bringToFront").catch(() => {});
|
||||
}
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
async closeTarget(targetId: string): Promise<void> {
|
||||
try {
|
||||
await this.sendBrowserCommand("Target.closeTarget", { targetId });
|
||||
} catch {
|
||||
// Target may already be gone.
|
||||
}
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await new Promise<void>((resolve) => {
|
||||
if (this.ws.readyState === WebSocket.CLOSED) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
this.ws.once("close", () => resolve());
|
||||
this.ws.close();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function evaluateRuntime<T>(session: TargetSession, expression: string): Promise<T> {
|
||||
const response = await session.send<CdpCommandResult<{ value?: T; description?: string }>>("Runtime.evaluate", {
|
||||
expression,
|
||||
awaitPromise: true,
|
||||
returnByValue: true,
|
||||
});
|
||||
|
||||
if (response.error) {
|
||||
throw new Error(response.error.message ?? "Runtime.evaluate failed");
|
||||
}
|
||||
|
||||
return (response.result?.value as T | undefined) ?? (undefined as T);
|
||||
}
|
||||
|
|
@ -0,0 +1,187 @@
|
|||
import { launch, type LaunchedChrome } from "chrome-launcher";
|
||||
import WebSocket from "ws";
|
||||
import type { Logger } from "../utils/logger";
|
||||
import {
|
||||
cleanChromeLockArtifacts,
|
||||
ensureChromeProfileDir,
|
||||
findChromeProcessUsingProfile,
|
||||
findExistingChromeDebugPort,
|
||||
hasChromeLockArtifacts,
|
||||
listChromeProfileEntries,
|
||||
resolveChromeProfileDir,
|
||||
shouldRetryChromeLaunchRecovery,
|
||||
} from "./profile";
|
||||
|
||||
interface ChromeVersionResponse {
|
||||
webSocketDebuggerUrl: string;
|
||||
}
|
||||
|
||||
export interface ChromeConnectOptions {
|
||||
cdpUrl?: string;
|
||||
browserPath?: string;
|
||||
headless?: boolean;
|
||||
logger?: Logger;
|
||||
profileDir?: string;
|
||||
}
|
||||
|
||||
export interface ChromeConnection {
|
||||
browserWsUrl: string;
|
||||
origin?: string;
|
||||
port?: number;
|
||||
profileDir?: string;
|
||||
launched: boolean;
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string): Promise<T> {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch ${url}: HTTP ${response.status}`);
|
||||
}
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
async function connectToHttpEndpoint(origin: string): Promise<ChromeConnection> {
|
||||
const normalizedOrigin = origin.replace(/\/$/, "");
|
||||
const version = await fetchJson<ChromeVersionResponse>(`${normalizedOrigin}/json/version`);
|
||||
return {
|
||||
browserWsUrl: version.webSocketDebuggerUrl,
|
||||
origin: normalizedOrigin,
|
||||
port: Number(new URL(normalizedOrigin).port || 80),
|
||||
launched: false,
|
||||
async close() {
|
||||
// Reused external Chrome, nothing to close here.
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function tryReuseChrome(profileDir: string, logger?: Logger): Promise<ChromeConnection | null> {
|
||||
const port = await findExistingChromeDebugPort({ profileDir });
|
||||
if (!port) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const origin = `http://127.0.0.1:${port}`;
|
||||
try {
|
||||
const connection = await connectToHttpEndpoint(origin);
|
||||
logger?.info(`Reusing Chrome debugger at ${origin} for profile ${profileDir}`);
|
||||
return {
|
||||
...connection,
|
||||
profileDir,
|
||||
};
|
||||
} catch {
|
||||
// Debugger disappeared between detection and connect.
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function launchFreshChrome(
|
||||
profileDir: string,
|
||||
options: Pick<ChromeConnectOptions, "browserPath" | "headless">,
|
||||
): Promise<ChromeConnection> {
|
||||
let launchedChrome: LaunchedChrome | null = null;
|
||||
try {
|
||||
launchedChrome = await launch({
|
||||
chromePath: options.browserPath,
|
||||
userDataDir: profileDir,
|
||||
chromeFlags: [
|
||||
"--disable-background-networking",
|
||||
"--disable-default-apps",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-sync",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--remote-allow-origins=*",
|
||||
...(!options.headless ? ["--no-startup-window"] : []),
|
||||
...(options.headless ? ["--headless=new"] : []),
|
||||
],
|
||||
});
|
||||
|
||||
const origin = `http://127.0.0.1:${launchedChrome.port}`;
|
||||
const version = await fetchJson<ChromeVersionResponse>(`${origin}/json/version`);
|
||||
|
||||
const chrome = launchedChrome;
|
||||
return {
|
||||
browserWsUrl: version.webSocketDebuggerUrl,
|
||||
origin,
|
||||
port: launchedChrome.port,
|
||||
profileDir,
|
||||
launched: true,
|
||||
async close() {
|
||||
if (!chrome) return;
|
||||
await gracefulCloseChrome(chrome, origin);
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
launchedChrome?.kill();
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function gracefulCloseChrome(chrome: LaunchedChrome, origin: string): Promise<void> {
|
||||
try {
|
||||
const resp = await fetch(`${origin}/json/version`);
|
||||
const { webSocketDebuggerUrl } = (await resp.json()) as ChromeVersionResponse;
|
||||
if (webSocketDebuggerUrl) {
|
||||
const ws = await new Promise<WebSocket>((resolve, reject) => {
|
||||
const socket = new WebSocket(webSocketDebuggerUrl);
|
||||
socket.once("open", () => resolve(socket));
|
||||
socket.once("error", reject);
|
||||
});
|
||||
const id = 1;
|
||||
ws.send(JSON.stringify({ id, method: "Browser.close" }));
|
||||
await new Promise<void>((resolve) => {
|
||||
const timer = setTimeout(() => { ws.close(); resolve(); }, 5_000);
|
||||
ws.once("close", () => { clearTimeout(timer); resolve(); });
|
||||
});
|
||||
const exited = await new Promise<boolean>((resolve) => {
|
||||
if (chrome.pid && !isProcessAlive(chrome.pid)) { resolve(true); return; }
|
||||
const timer = setTimeout(() => resolve(false), 3_000);
|
||||
chrome.process.once("exit", () => { clearTimeout(timer); resolve(true); });
|
||||
});
|
||||
if (exited) return;
|
||||
}
|
||||
} catch {}
|
||||
chrome.kill();
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try { process.kill(pid, 0); return true; } catch { return false; }
|
||||
}
|
||||
|
||||
export async function connectChrome(options: ChromeConnectOptions): Promise<ChromeConnection> {
|
||||
if (options.cdpUrl) {
|
||||
if (options.cdpUrl.startsWith("ws://") || options.cdpUrl.startsWith("wss://")) {
|
||||
return {
|
||||
browserWsUrl: options.cdpUrl,
|
||||
launched: false,
|
||||
async close() {},
|
||||
};
|
||||
}
|
||||
return connectToHttpEndpoint(options.cdpUrl);
|
||||
}
|
||||
|
||||
const profileDir = ensureChromeProfileDir(resolveChromeProfileDir(options.profileDir));
|
||||
const reused = await tryReuseChrome(profileDir, options.logger);
|
||||
if (reused) {
|
||||
return reused;
|
||||
}
|
||||
|
||||
options.logger?.warn(`No running Chrome debugger found for profile ${profileDir}. Launching Chrome with that profile.`);
|
||||
try {
|
||||
return await launchFreshChrome(profileDir, options);
|
||||
} catch (error) {
|
||||
const entries = await listChromeProfileEntries(profileDir);
|
||||
const shouldRetry = shouldRetryChromeLaunchRecovery({
|
||||
hasLockArtifacts: hasChromeLockArtifacts(entries),
|
||||
hasLiveOwner: findChromeProcessUsingProfile(profileDir),
|
||||
});
|
||||
if (!shouldRetry) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
options.logger?.warn(`Chrome launch failed with stale profile locks. Cleaning ${profileDir} and retrying once.`);
|
||||
cleanChromeLockArtifacts(profileDir);
|
||||
return await launchFreshChrome(profileDir, options);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
||||
import { dirname, join } from "node:path";
|
||||
import { resolveChromeProfileDir } from "./profile";
|
||||
import type { TargetSession } from "./cdp-client";
|
||||
|
||||
export interface CdpCookie {
|
||||
name: string;
|
||||
value: string;
|
||||
domain: string;
|
||||
path: string;
|
||||
expires: number;
|
||||
size: number;
|
||||
httpOnly: boolean;
|
||||
secure: boolean;
|
||||
session: boolean;
|
||||
sameSite?: string;
|
||||
priority?: string;
|
||||
sameParty?: boolean;
|
||||
sourceScheme?: string;
|
||||
sourcePort?: number;
|
||||
partitionKey?: string;
|
||||
}
|
||||
|
||||
interface SidecarData {
|
||||
savedAt: string;
|
||||
cookies: CdpCookie[];
|
||||
}
|
||||
|
||||
export interface CookieSidecarConfig {
|
||||
urls: readonly string[];
|
||||
filename: string;
|
||||
requiredCookieNames: readonly string[];
|
||||
filterCookie?: (cookie: CdpCookie) => boolean;
|
||||
}
|
||||
|
||||
function sidecarPath(filename: string, profileDir?: string): string {
|
||||
return join(resolveChromeProfileDir(profileDir), filename);
|
||||
}
|
||||
|
||||
function hasRequired(cookies: CdpCookie[], names: readonly string[]): boolean {
|
||||
return names.every((name) =>
|
||||
cookies.some((c) => c.name === name && Boolean(c.value)),
|
||||
);
|
||||
}
|
||||
|
||||
async function getCookies(session: TargetSession, urls: readonly string[]): Promise<CdpCookie[]> {
|
||||
const { cookies } = await session.send<{ cookies: CdpCookie[] }>(
|
||||
"Network.getCookies",
|
||||
{ urls: [...urls] },
|
||||
);
|
||||
return cookies ?? [];
|
||||
}
|
||||
|
||||
export async function exportCookies(
|
||||
session: TargetSession,
|
||||
config: CookieSidecarConfig,
|
||||
profileDir?: string,
|
||||
): Promise<boolean> {
|
||||
const all = await getCookies(session, config.urls);
|
||||
const filtered = config.filterCookie ? all.filter(config.filterCookie) : all;
|
||||
if (!hasRequired(filtered, config.requiredCookieNames)) return false;
|
||||
|
||||
const filePath = sidecarPath(config.filename, profileDir);
|
||||
await mkdir(dirname(filePath), { recursive: true });
|
||||
const data: SidecarData = { savedAt: new Date().toISOString(), cookies: filtered };
|
||||
await writeFile(filePath, JSON.stringify(data, null, 2));
|
||||
return true;
|
||||
}
|
||||
|
||||
export async function restoreCookies(
|
||||
session: TargetSession,
|
||||
config: CookieSidecarConfig,
|
||||
profileDir?: string,
|
||||
): Promise<boolean> {
|
||||
const live = await getCookies(session, config.urls);
|
||||
if (hasRequired(live, config.requiredCookieNames)) return false;
|
||||
|
||||
const filePath = sidecarPath(config.filename, profileDir);
|
||||
const raw = await readFile(filePath, "utf8");
|
||||
const data = JSON.parse(raw) as SidecarData;
|
||||
if (!data.cookies?.length) return false;
|
||||
|
||||
const now = Date.now() / 1000;
|
||||
const valid = data.cookies.filter((c) => c.session || !c.expires || c.expires > now);
|
||||
if (!hasRequired(valid, config.requiredCookieNames)) return false;
|
||||
|
||||
await session.send("Network.setCookies", {
|
||||
cookies: valid.map((c) => ({
|
||||
name: c.name,
|
||||
value: c.value,
|
||||
domain: c.domain,
|
||||
path: c.path,
|
||||
httpOnly: c.httpOnly,
|
||||
secure: c.secure,
|
||||
sameSite: c.sameSite,
|
||||
expires: c.expires,
|
||||
})),
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
import type { WaitForInteractionRequest } from "../adapters/types";
|
||||
import type { BrowserSession } from "./session";
|
||||
|
||||
interface GateSnapshot {
|
||||
title: string;
|
||||
currentUrl: string;
|
||||
bodyText: string;
|
||||
hasCloudflareTurnstile: boolean;
|
||||
hasCloudflareChallenge: boolean;
|
||||
hasRecaptcha: boolean;
|
||||
hasRecaptchaIframe: boolean;
|
||||
hasHcaptcha: boolean;
|
||||
hasHcaptchaIframe: boolean;
|
||||
}
|
||||
|
||||
export function detectInteractionGateFromSnapshot(snapshot: GateSnapshot): WaitForInteractionRequest | null {
|
||||
const text = snapshot.bodyText.toLowerCase();
|
||||
const title = snapshot.title.toLowerCase();
|
||||
const url = snapshot.currentUrl.toLowerCase();
|
||||
|
||||
if (
|
||||
snapshot.hasCloudflareTurnstile ||
|
||||
snapshot.hasCloudflareChallenge ||
|
||||
title.includes("just a moment") ||
|
||||
text.includes("verify you are human") ||
|
||||
text.includes("checking your browser before accessing") ||
|
||||
text.includes("enable javascript and cookies to continue") ||
|
||||
url.includes("/cdn-cgi/challenge-platform/")
|
||||
) {
|
||||
return {
|
||||
type: "wait_for_interaction",
|
||||
kind: "cloudflare",
|
||||
provider: "cloudflare",
|
||||
reason: "Cloudflare human verification detected",
|
||||
prompt: "Please complete the Cloudflare verification in the opened Chrome window. Extraction will continue automatically once the challenge disappears.",
|
||||
requiresVisibleBrowser: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
snapshot.hasRecaptcha ||
|
||||
snapshot.hasRecaptchaIframe ||
|
||||
text.includes("i'm not a robot") ||
|
||||
text.includes("recaptcha")
|
||||
) {
|
||||
return {
|
||||
type: "wait_for_interaction",
|
||||
kind: "recaptcha",
|
||||
provider: "google_recaptcha",
|
||||
reason: "Google reCAPTCHA detected",
|
||||
prompt: "Please complete the reCAPTCHA verification in the opened Chrome window. Extraction will continue automatically once the challenge disappears.",
|
||||
requiresVisibleBrowser: true,
|
||||
};
|
||||
}
|
||||
|
||||
if (
|
||||
snapshot.hasHcaptcha ||
|
||||
snapshot.hasHcaptchaIframe ||
|
||||
text.includes("hcaptcha")
|
||||
) {
|
||||
return {
|
||||
type: "wait_for_interaction",
|
||||
kind: "hcaptcha",
|
||||
provider: "hcaptcha",
|
||||
reason: "hCaptcha verification detected",
|
||||
prompt: "Please complete the hCaptcha verification in the opened Chrome window. Extraction will continue automatically once the challenge disappears.",
|
||||
requiresVisibleBrowser: true,
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function detectInteractionGate(browser: BrowserSession): Promise<WaitForInteractionRequest | null> {
|
||||
const snapshot = await browser.evaluate<GateSnapshot>(`
|
||||
(() => {
|
||||
const bodyText = (document.body?.innerText ?? "").slice(0, 4000);
|
||||
return {
|
||||
title: document.title ?? "",
|
||||
currentUrl: window.location.href,
|
||||
bodyText,
|
||||
hasCloudflareTurnstile: Boolean(
|
||||
document.querySelector(
|
||||
'.cf-turnstile, [name="cf-turnstile-response"], iframe[src*="challenges.cloudflare.com"]'
|
||||
)
|
||||
),
|
||||
hasCloudflareChallenge: Boolean(
|
||||
document.querySelector(
|
||||
'#challenge-running, #cf-challenge-running, .challenge-platform, [data-ray], [data-translate="checking_browser"]'
|
||||
)
|
||||
),
|
||||
hasRecaptcha: Boolean(
|
||||
document.querySelector(
|
||||
'.g-recaptcha, textarea[name="g-recaptcha-response"], iframe[title*="reCAPTCHA"]'
|
||||
)
|
||||
),
|
||||
hasRecaptchaIframe: Boolean(
|
||||
document.querySelector('iframe[src*="google.com/recaptcha"], iframe[src*="recaptcha/api2"]')
|
||||
),
|
||||
hasHcaptcha: Boolean(
|
||||
document.querySelector(
|
||||
'.h-captcha, textarea[name="h-captcha-response"], iframe[title*="hCaptcha"]'
|
||||
)
|
||||
),
|
||||
hasHcaptchaIframe: Boolean(
|
||||
document.querySelector('iframe[src*="hcaptcha.com"]')
|
||||
),
|
||||
};
|
||||
})()
|
||||
`).catch(() => ({
|
||||
title: "",
|
||||
currentUrl: "",
|
||||
bodyText: "",
|
||||
hasCloudflareTurnstile: false,
|
||||
hasCloudflareChallenge: false,
|
||||
hasRecaptcha: false,
|
||||
hasRecaptchaIframe: false,
|
||||
hasHcaptcha: false,
|
||||
hasHcaptchaIframe: false,
|
||||
}));
|
||||
|
||||
return detectInteractionGateFromSnapshot(snapshot);
|
||||
}
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
import type { TargetSession } from "./cdp-client";
|
||||
import type { Logger } from "../utils/logger";
|
||||
|
||||
type JsonObject = Record<string, unknown>;
|
||||
|
||||
export interface NetworkEntry {
|
||||
requestId: string;
|
||||
url: string;
|
||||
method: string;
|
||||
resourceType: string;
|
||||
timestamp: number;
|
||||
requestHeaders?: Record<string, string>;
|
||||
requestBody?: string;
|
||||
status?: number;
|
||||
statusText?: string;
|
||||
responseHeaders?: Record<string, string>;
|
||||
mimeType?: string;
|
||||
body?: string;
|
||||
bodyBase64?: boolean;
|
||||
bodyError?: string;
|
||||
failed?: boolean;
|
||||
failureReason?: string;
|
||||
finished: boolean;
|
||||
}
|
||||
|
||||
function normalizeHeaders(headers: unknown): Record<string, string> | undefined {
|
||||
if (!headers || typeof headers !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
return Object.fromEntries(
|
||||
Object.entries(headers as Record<string, unknown>).map(([key, value]) => [key, String(value)]),
|
||||
);
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export class NetworkJournal {
|
||||
private readonly entries = new Map<string, NetworkEntry>();
|
||||
private lastActivityAt = Date.now();
|
||||
private started = false;
|
||||
|
||||
constructor(
|
||||
private readonly session: TargetSession,
|
||||
private readonly log: Logger,
|
||||
) {}
|
||||
|
||||
async start(): Promise<void> {
|
||||
if (this.started) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.started = true;
|
||||
this.session.on("Network.requestWillBeSent", this.handleRequestWillBeSent);
|
||||
this.session.on("Network.responseReceived", this.handleResponseReceived);
|
||||
this.session.on("Network.loadingFinished", this.handleLoadingFinished);
|
||||
this.session.on("Network.loadingFailed", this.handleLoadingFailed);
|
||||
await this.session.send("Network.enable");
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
if (!this.started) {
|
||||
return;
|
||||
}
|
||||
this.session.off("Network.requestWillBeSent", this.handleRequestWillBeSent);
|
||||
this.session.off("Network.responseReceived", this.handleResponseReceived);
|
||||
this.session.off("Network.loadingFinished", this.handleLoadingFinished);
|
||||
this.session.off("Network.loadingFailed", this.handleLoadingFailed);
|
||||
this.started = false;
|
||||
}
|
||||
|
||||
private touch(): void {
|
||||
this.lastActivityAt = Date.now();
|
||||
}
|
||||
|
||||
private readonly handleRequestWillBeSent = (params: JsonObject): void => {
|
||||
const requestId = typeof params.requestId === "string" ? params.requestId : undefined;
|
||||
const request = params.request as JsonObject | undefined;
|
||||
if (!requestId || !request) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.touch();
|
||||
this.entries.set(requestId, {
|
||||
requestId,
|
||||
url: String(request.url ?? ""),
|
||||
method: String(request.method ?? "GET"),
|
||||
resourceType: String(params.type ?? "Other"),
|
||||
timestamp: Date.now(),
|
||||
requestHeaders: normalizeHeaders(request.headers),
|
||||
requestBody: typeof request.postData === "string" ? request.postData : undefined,
|
||||
finished: false,
|
||||
});
|
||||
};
|
||||
|
||||
private readonly handleResponseReceived = (params: JsonObject): void => {
|
||||
const requestId = typeof params.requestId === "string" ? params.requestId : undefined;
|
||||
const response = params.response as JsonObject | undefined;
|
||||
if (!requestId || !response) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.touch();
|
||||
const existing = this.entries.get(requestId);
|
||||
if (!existing) {
|
||||
return;
|
||||
}
|
||||
|
||||
existing.status = typeof response.status === "number" ? response.status : undefined;
|
||||
existing.statusText = typeof response.statusText === "string" ? response.statusText : undefined;
|
||||
existing.responseHeaders = normalizeHeaders(response.headers);
|
||||
existing.mimeType = typeof response.mimeType === "string" ? response.mimeType : undefined;
|
||||
this.entries.set(requestId, existing);
|
||||
};
|
||||
|
||||
private readonly handleLoadingFinished = (params: JsonObject): void => {
|
||||
const requestId = typeof params.requestId === "string" ? params.requestId : undefined;
|
||||
if (!requestId) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.touch();
|
||||
const existing = this.entries.get(requestId);
|
||||
if (!existing) {
|
||||
return;
|
||||
}
|
||||
existing.finished = true;
|
||||
this.entries.set(requestId, existing);
|
||||
};
|
||||
|
||||
private readonly handleLoadingFailed = (params: JsonObject): void => {
|
||||
const requestId = typeof params.requestId === "string" ? params.requestId : undefined;
|
||||
if (!requestId) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.touch();
|
||||
const existing = this.entries.get(requestId);
|
||||
if (!existing) {
|
||||
return;
|
||||
}
|
||||
existing.finished = true;
|
||||
existing.failed = true;
|
||||
existing.failureReason = typeof params.errorText === "string" ? params.errorText : "Unknown error";
|
||||
this.entries.set(requestId, existing);
|
||||
};
|
||||
|
||||
getEntries(): NetworkEntry[] {
|
||||
return Array.from(this.entries.values());
|
||||
}
|
||||
|
||||
findEntries(predicate: (entry: NetworkEntry) => boolean): NetworkEntry[] {
|
||||
return this.getEntries().filter(predicate);
|
||||
}
|
||||
|
||||
async waitForIdle(options: { idleMs?: number; timeoutMs?: number } = {}): Promise<void> {
|
||||
const idleMs = options.idleMs ?? 1_200;
|
||||
const timeoutMs = options.timeoutMs ?? 15_000;
|
||||
const startedAt = Date.now();
|
||||
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
if (Date.now() - this.lastActivityAt >= idleMs) {
|
||||
return;
|
||||
}
|
||||
await sleep(Math.min(150, idleMs));
|
||||
}
|
||||
|
||||
throw new Error("Timed out waiting for network idle");
|
||||
}
|
||||
|
||||
async waitForResponse(
|
||||
predicate: (entry: NetworkEntry) => boolean,
|
||||
options: { timeoutMs?: number } = {},
|
||||
): Promise<NetworkEntry> {
|
||||
const timeoutMs = options.timeoutMs ?? 10_000;
|
||||
const startedAt = Date.now();
|
||||
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
const matched = this.getEntries().find((entry) => entry.finished && predicate(entry));
|
||||
if (matched) {
|
||||
return matched;
|
||||
}
|
||||
await sleep(150);
|
||||
}
|
||||
|
||||
throw new Error("Timed out waiting for matching network response");
|
||||
}
|
||||
|
||||
async ensureBody(entry: NetworkEntry): Promise<string | undefined> {
|
||||
if (entry.body !== undefined) {
|
||||
return entry.body;
|
||||
}
|
||||
if (entry.bodyError || entry.failed || !entry.finished) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await this.session.send<{ body: string; base64Encoded: boolean }>("Network.getResponseBody", {
|
||||
requestId: entry.requestId,
|
||||
});
|
||||
entry.bodyBase64 = result.base64Encoded;
|
||||
entry.body = result.base64Encoded ? Buffer.from(result.body, "base64").toString("utf8") : result.body;
|
||||
return entry.body;
|
||||
} catch (error) {
|
||||
entry.bodyError = error instanceof Error ? error.message : String(error);
|
||||
this.log.debug(`Failed to fetch response body for ${entry.url}: ${entry.bodyError}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async getJsonBody(entry: NetworkEntry): Promise<unknown | null> {
|
||||
const body = await this.ensureBody(entry);
|
||||
if (!body) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(body);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async toJSON(options: { includeBodies?: boolean } = {}): Promise<NetworkEntry[]> {
|
||||
const entries = this.getEntries();
|
||||
if (!options.includeBodies) {
|
||||
return entries;
|
||||
}
|
||||
|
||||
await Promise.all(entries.map((entry) => this.ensureBody(entry)));
|
||||
return entries;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
import type { BrowserSession } from "./session";
|
||||
|
||||
export interface CapturedPageSnapshot {
|
||||
html: string;
|
||||
finalUrl: string;
|
||||
}
|
||||
|
||||
export const CAPTURE_NORMALIZED_PAGE_SCRIPT = String.raw`
|
||||
(() => {
|
||||
const baseUrl = document.baseURI || location.href;
|
||||
const htmlClone = document.documentElement.cloneNode(true);
|
||||
|
||||
function materializeShadowDom(sourceRoot, cloneRoot) {
|
||||
const sourceElements = Array.from(sourceRoot.querySelectorAll("*"));
|
||||
const cloneElements = Array.from(cloneRoot.querySelectorAll("*"));
|
||||
|
||||
for (let index = sourceElements.length - 1; index >= 0; index -= 1) {
|
||||
const sourceElement = sourceElements[index];
|
||||
const cloneElement = cloneElements[index];
|
||||
const shadowRoot = sourceElement && sourceElement.shadowRoot;
|
||||
if (!shadowRoot || !cloneElement || !shadowRoot.innerHTML) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cloneElement.tagName && cloneElement.tagName.includes("-")) {
|
||||
const wrapper = document.createElement("div");
|
||||
wrapper.setAttribute("data-shadow-host", cloneElement.tagName.toLowerCase());
|
||||
wrapper.innerHTML = shadowRoot.innerHTML;
|
||||
cloneElement.replaceWith(wrapper);
|
||||
} else {
|
||||
cloneElement.innerHTML = shadowRoot.innerHTML;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function toAbsolute(url) {
|
||||
if (!url) return url;
|
||||
try {
|
||||
return new URL(url, baseUrl).href;
|
||||
} catch {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
|
||||
function absolutizeAttribute(root, selector, attribute) {
|
||||
root.querySelectorAll(selector).forEach((element) => {
|
||||
const value = element.getAttribute(attribute);
|
||||
if (!value) return;
|
||||
const absolute = toAbsolute(value);
|
||||
if (absolute) {
|
||||
element.setAttribute(attribute, absolute);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function absolutizeSrcset(root, selector) {
|
||||
root.querySelectorAll(selector).forEach((element) => {
|
||||
const srcset = element.getAttribute("srcset");
|
||||
if (!srcset) return;
|
||||
element.setAttribute(
|
||||
"srcset",
|
||||
srcset
|
||||
.split(",")
|
||||
.map((part) => {
|
||||
const trimmed = part.trim();
|
||||
if (!trimmed) return "";
|
||||
const [url, ...descriptor] = trimmed.split(/\s+/);
|
||||
const absolute = toAbsolute(url);
|
||||
return descriptor.length > 0 ? absolute + " " + descriptor.join(" ") : absolute;
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join(", "),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
materializeShadowDom(document.documentElement, htmlClone);
|
||||
|
||||
htmlClone
|
||||
.querySelectorAll("img[data-src], video[data-src], audio[data-src], source[data-src]")
|
||||
.forEach((element) => {
|
||||
const dataSource = element.getAttribute("data-src");
|
||||
const current = element.getAttribute("src");
|
||||
if (dataSource && (!current || current === "" || current.startsWith("data:"))) {
|
||||
element.setAttribute("src", dataSource);
|
||||
}
|
||||
});
|
||||
|
||||
absolutizeAttribute(htmlClone, "a[href]", "href");
|
||||
absolutizeAttribute(htmlClone, "img[src], video[src], audio[src], source[src], iframe[src]", "src");
|
||||
absolutizeAttribute(htmlClone, "video[poster]", "poster");
|
||||
absolutizeSrcset(htmlClone, "img[srcset], source[srcset]");
|
||||
|
||||
return {
|
||||
html: "<!doctype html>\n" + htmlClone.outerHTML,
|
||||
finalUrl: location.href,
|
||||
};
|
||||
})()
|
||||
`;
|
||||
|
||||
export async function captureNormalizedPageSnapshot(
|
||||
browser: BrowserSession,
|
||||
): Promise<CapturedPageSnapshot> {
|
||||
return browser.evaluate<CapturedPageSnapshot>(CAPTURE_NORMALIZED_PAGE_SCRIPT);
|
||||
}
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
import { spawnSync } from "node:child_process";
|
||||
|
||||
export interface ResolveSharedChromeProfileDirOptions {
|
||||
envNames?: string[];
|
||||
appDataDirName?: string;
|
||||
profileDirName?: string;
|
||||
}
|
||||
|
||||
export interface FindExistingChromeDebugPortOptions {
|
||||
profileDir: string;
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
interface ChromeVersionResponse {
|
||||
webSocketDebuggerUrl?: string;
|
||||
}
|
||||
|
||||
const CHROME_LOCK_FILE_NAMES = ["SingletonLock", "SingletonSocket", "SingletonCookie", "chrome.pid"] as const;
|
||||
|
||||
function resolveDataBaseDir(): string {
|
||||
if (process.platform === "darwin") {
|
||||
return path.join(os.homedir(), "Library", "Application Support");
|
||||
}
|
||||
if (process.platform === "win32") {
|
||||
return process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming");
|
||||
}
|
||||
return process.env.XDG_DATA_HOME ?? path.join(os.homedir(), ".local", "share");
|
||||
}
|
||||
|
||||
export function resolveSharedChromeProfileDir(
|
||||
options: ResolveSharedChromeProfileDirOptions = {},
|
||||
): string {
|
||||
for (const envName of options.envNames ?? []) {
|
||||
const override = process.env[envName]?.trim();
|
||||
if (override) {
|
||||
return path.resolve(override);
|
||||
}
|
||||
}
|
||||
|
||||
const appDataDirName = options.appDataDirName ?? "baoyu-skills";
|
||||
const profileDirName = options.profileDirName ?? "chrome-profile";
|
||||
return path.join(resolveDataBaseDir(), appDataDirName, profileDirName);
|
||||
}
|
||||
|
||||
export function resolveChromeProfileDir(profileDir?: string): string {
|
||||
if (profileDir?.trim()) {
|
||||
return path.resolve(profileDir.trim());
|
||||
}
|
||||
|
||||
return resolveSharedChromeProfileDir({
|
||||
envNames: ["BAOYU_CHROME_PROFILE_DIR"],
|
||||
appDataDirName: "baoyu-skills",
|
||||
profileDirName: "chrome-profile",
|
||||
});
|
||||
}
|
||||
|
||||
export function ensureChromeProfileDir(profileDir: string): string {
|
||||
fs.mkdirSync(profileDir, { recursive: true });
|
||||
return profileDir;
|
||||
}
|
||||
|
||||
export function hasChromeLockArtifacts(entries: readonly string[]): boolean {
|
||||
return CHROME_LOCK_FILE_NAMES.some((name) => entries.includes(name));
|
||||
}
|
||||
|
||||
export function shouldRetryChromeLaunchRecovery(options: {
|
||||
hasLockArtifacts: boolean;
|
||||
hasLiveOwner: boolean;
|
||||
}): boolean {
|
||||
return options.hasLockArtifacts && !options.hasLiveOwner;
|
||||
}
|
||||
|
||||
export function findChromeProcessUsingProfile(profileDir: string): boolean {
|
||||
if (process.platform === "win32") {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = spawnSync("ps", ["aux"], {
|
||||
encoding: "utf8",
|
||||
timeout: 5_000,
|
||||
});
|
||||
if (result.status !== 0 || !result.stdout) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return result.stdout
|
||||
.split("\n")
|
||||
.some((line) => line.includes(`--user-data-dir=${profileDir}`));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function cleanChromeLockArtifacts(profileDir: string): void {
|
||||
for (const name of CHROME_LOCK_FILE_NAMES) {
|
||||
try {
|
||||
fs.unlinkSync(path.join(profileDir, name));
|
||||
} catch {
|
||||
// Ignore missing files and continue cleaning the remaining artifacts.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function listChromeProfileEntries(profileDir: string): Promise<string[]> {
|
||||
try {
|
||||
return await fs.promises.readdir(profileDir);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchWithTimeout(url: string, timeoutMs = 3_000): Promise<Response> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
return await fetch(url, {
|
||||
redirect: "follow",
|
||||
signal: controller.signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchJson<T>(url: string, timeoutMs = 3_000): Promise<T> {
|
||||
const response = await fetchWithTimeout(url, timeoutMs);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Request failed: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
async function isDebugPortReady(port: number, timeoutMs = 3_000): Promise<boolean> {
|
||||
try {
|
||||
const version = await fetchJson<ChromeVersionResponse>(`http://127.0.0.1:${port}/json/version`, timeoutMs);
|
||||
return Boolean(version.webSocketDebuggerUrl);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function parseDevToolsActivePort(filePath: string): { port: number; wsPath: string } | null {
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, "utf8");
|
||||
const lines = content.split(/\r?\n/);
|
||||
const port = Number.parseInt(lines[0]?.trim() ?? "", 10);
|
||||
const wsPath = lines[1]?.trim() ?? "";
|
||||
if (port > 0 && wsPath) {
|
||||
return { port, wsPath };
|
||||
}
|
||||
} catch {
|
||||
// Ignore and fall back to process inspection.
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function findExistingChromeDebugPort(
|
||||
options: FindExistingChromeDebugPortOptions,
|
||||
): Promise<number | null> {
|
||||
const timeoutMs = options.timeoutMs ?? 3_000;
|
||||
const activePort = parseDevToolsActivePort(path.join(options.profileDir, "DevToolsActivePort"));
|
||||
if (activePort && await isDebugPortReady(activePort.port, timeoutMs)) {
|
||||
return activePort.port;
|
||||
}
|
||||
|
||||
if (process.platform === "win32") {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = spawnSync("ps", ["aux"], {
|
||||
encoding: "utf8",
|
||||
timeout: 5_000,
|
||||
});
|
||||
if (result.status !== 0 || !result.stdout) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const lines = result.stdout
|
||||
.split("\n")
|
||||
.filter((line) => line.includes(options.profileDir) && line.includes("--remote-debugging-port="));
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(/--remote-debugging-port=(\d+)/);
|
||||
const port = Number.parseInt(match?.[1] ?? "", 10);
|
||||
if (port > 0 && await isDebugPortReady(port, timeoutMs)) {
|
||||
return port;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Ignore and report no reusable debugger.
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
import { execFile } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { CdpClient, TargetSession, evaluateRuntime } from "./cdp-client";
|
||||
|
||||
interface NavigationResult {
|
||||
errorText?: string;
|
||||
}
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const MACOS_BROWSER_APP_IDS = [
|
||||
"com.google.Chrome",
|
||||
"org.chromium.Chromium",
|
||||
"com.brave.Browser",
|
||||
"com.microsoft.edgemac",
|
||||
];
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function activateBrowserApp(): Promise<void> {
|
||||
if (process.platform !== "darwin") {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const appId of MACOS_BROWSER_APP_IDS) {
|
||||
try {
|
||||
await execFileAsync("osascript", ["-e", `tell application id "${appId}" to activate`]);
|
||||
return;
|
||||
} catch {
|
||||
// Try the next installed browser bundle id.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class BrowserSession {
|
||||
private constructor(
|
||||
private readonly cdp: CdpClient,
|
||||
public readonly targetSession: TargetSession,
|
||||
public readonly interactive: boolean,
|
||||
) {}
|
||||
|
||||
static async open(
|
||||
cdp: CdpClient,
|
||||
options: {
|
||||
initialUrl?: string;
|
||||
interactive?: boolean;
|
||||
} = {},
|
||||
): Promise<BrowserSession> {
|
||||
const targetSession = await cdp.createPageSession({
|
||||
initialUrl: options.initialUrl,
|
||||
visible: options.interactive,
|
||||
});
|
||||
const browser = new BrowserSession(cdp, targetSession, Boolean(options.interactive));
|
||||
if (browser.interactive) {
|
||||
await browser.bringToFront().catch(() => {});
|
||||
}
|
||||
return browser;
|
||||
}
|
||||
|
||||
async goto(url: string, timeoutMs = 30_000): Promise<void> {
|
||||
const loadPromise = this.targetSession.waitForEvent("Page.loadEventFired", undefined, timeoutMs).catch(() => null);
|
||||
const result = await this.targetSession.send<NavigationResult>("Page.navigate", { url });
|
||||
if (result.errorText) {
|
||||
throw new Error(`Navigation failed: ${result.errorText}`);
|
||||
}
|
||||
await loadPromise;
|
||||
await this.waitForReadyState(timeoutMs);
|
||||
}
|
||||
|
||||
async waitForReadyState(timeoutMs = 30_000): Promise<void> {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
const state = await this.evaluate<string>("document.readyState");
|
||||
if (state === "interactive" || state === "complete") {
|
||||
return;
|
||||
}
|
||||
await sleep(150);
|
||||
}
|
||||
throw new Error("Timed out waiting for document.readyState");
|
||||
}
|
||||
|
||||
async evaluate<T>(expression: string): Promise<T> {
|
||||
return evaluateRuntime<T>(this.targetSession, expression);
|
||||
}
|
||||
|
||||
async getHTML(): Promise<string> {
|
||||
return this.evaluate<string>("document.documentElement.outerHTML");
|
||||
}
|
||||
|
||||
async getTitle(): Promise<string> {
|
||||
return this.evaluate<string>("document.title");
|
||||
}
|
||||
|
||||
async getURL(): Promise<string> {
|
||||
return this.evaluate<string>("window.location.href");
|
||||
}
|
||||
|
||||
async bringToFront(): Promise<void> {
|
||||
await this.targetSession.send("Page.bringToFront").catch(async () => {
|
||||
await this.cdp.sendBrowserCommand("Target.activateTarget", {
|
||||
targetId: this.targetSession.targetId,
|
||||
});
|
||||
});
|
||||
if (this.interactive) {
|
||||
await activateBrowserApp().catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
async click(selector: string): Promise<void> {
|
||||
const result = await this.evaluate<{ ok: boolean; error?: string }>(`
|
||||
(() => {
|
||||
const element = document.querySelector(${JSON.stringify(selector)});
|
||||
if (!element) {
|
||||
return { ok: false, error: "Element not found" };
|
||||
}
|
||||
element.scrollIntoView({ block: "center", inline: "center" });
|
||||
if (element instanceof HTMLElement) {
|
||||
element.click();
|
||||
return { ok: true };
|
||||
}
|
||||
return { ok: false, error: "Element is not clickable" };
|
||||
})()
|
||||
`);
|
||||
|
||||
if (!result.ok) {
|
||||
throw new Error(result.error ?? `Failed to click ${selector}`);
|
||||
}
|
||||
}
|
||||
|
||||
async scrollToEnd(options: { stepPx?: number; delayMs?: number; maxSteps?: number } = {}): Promise<void> {
|
||||
const stepPx = options.stepPx ?? 1_400;
|
||||
const delayMs = options.delayMs ?? 250;
|
||||
const maxSteps = options.maxSteps ?? 6;
|
||||
|
||||
for (let step = 0; step < maxSteps; step += 1) {
|
||||
const done = await this.evaluate<boolean>(`
|
||||
(() => {
|
||||
const before = window.scrollY;
|
||||
window.scrollBy(0, ${stepPx});
|
||||
const atBottom = window.innerHeight + window.scrollY >= document.body.scrollHeight - 4;
|
||||
return atBottom || window.scrollY === before;
|
||||
})()
|
||||
`);
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
await sleep(delayMs);
|
||||
}
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await this.cdp.closeTarget(this.targetSession.targetId);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
#!/usr/bin/env bun
|
||||
|
||||
import {
|
||||
runConvertCommand,
|
||||
type ConvertCommandOptions,
|
||||
type OutputFormat,
|
||||
type WaitMode,
|
||||
} from "./commands/convert";
|
||||
|
||||
export const HELP_TEXT = `
|
||||
baoyu-fetch - Read a URL into Markdown or JSON with Chrome CDP
|
||||
|
||||
Usage:
|
||||
baoyu-fetch <url> [options]
|
||||
|
||||
Options:
|
||||
--output <file> Save output to file
|
||||
--format <type> Output format: markdown | json
|
||||
--json Alias for --format json
|
||||
--adapter <name> Force an adapter (e.g. x, generic)
|
||||
--download-media Download adapter-reported media into ./imgs and ./videos, then rewrite markdown links
|
||||
--media-dir <dir> Base directory for downloaded media. Defaults to the output directory
|
||||
--debug-dir <dir> Write debug artifacts
|
||||
--cdp-url <url> Reuse an existing Chrome DevTools endpoint
|
||||
--browser-path <path> Explicit Chrome binary path
|
||||
--chrome-profile-dir <path>
|
||||
Chrome user data dir. Defaults to BAOYU_CHROME_PROFILE_DIR
|
||||
or baoyu-skills/chrome-profile.
|
||||
--headless Launch a temporary headless Chrome if needed
|
||||
--wait-for <mode> Wait mode: interaction | force
|
||||
interaction: start visible Chrome and auto-wait only when login or verification is required
|
||||
force: start visible Chrome, then auto-continue after it detects login/challenge progress
|
||||
or continue immediately when you press Enter
|
||||
--wait-for-interaction
|
||||
Alias for --wait-for interaction
|
||||
--wait-for-login Alias for --wait-for interaction
|
||||
--interaction-timeout <ms>
|
||||
How long to wait for manual interaction before failing (default: 600000)
|
||||
--interaction-poll-interval <ms>
|
||||
How often to poll interaction state while waiting (default: 1500)
|
||||
--login-timeout <ms> Alias for --interaction-timeout
|
||||
--login-poll-interval <ms>
|
||||
Alias for --interaction-poll-interval
|
||||
--timeout <ms> Page timeout in milliseconds (default: 30000)
|
||||
--help Show help
|
||||
|
||||
Examples:
|
||||
baoyu-fetch https://example.com
|
||||
baoyu-fetch https://example.com --format markdown --output article.md --download-media
|
||||
baoyu-fetch https://example.com --format json --output article.json
|
||||
baoyu-fetch https://x.com/lennysan/status/2036483059407810640 --wait-for interaction
|
||||
baoyu-fetch https://x.com/lennysan/status/2036483059407810640 --wait-for force
|
||||
`.trim();
|
||||
|
||||
interface CliOptions extends ConvertCommandOptions {
|
||||
url?: string;
|
||||
help: boolean;
|
||||
}
|
||||
|
||||
function normalizeWaitMode(raw: string): WaitMode {
|
||||
const value = raw.toLowerCase();
|
||||
if (value === "interaction" || value === "auto") {
|
||||
return "interaction";
|
||||
}
|
||||
if (value === "force" || value === "manual" || value === "always") {
|
||||
return "force";
|
||||
}
|
||||
throw new Error(`Invalid wait mode: ${raw}. Expected interaction or force.`);
|
||||
}
|
||||
|
||||
function normalizeOutputFormat(raw: string): OutputFormat {
|
||||
const value = raw.toLowerCase();
|
||||
if (value === "markdown" || value === "json") {
|
||||
return value;
|
||||
}
|
||||
|
||||
throw new Error(`Invalid output format: ${raw}. Expected markdown or json.`);
|
||||
}
|
||||
|
||||
export function parseArgs(argv: string[]): CliOptions {
|
||||
const options: CliOptions = {
|
||||
format: "markdown",
|
||||
headless: false,
|
||||
downloadMedia: false,
|
||||
waitMode: "none",
|
||||
interactionTimeoutMs: 600_000,
|
||||
interactionPollIntervalMs: 1_500,
|
||||
timeoutMs: 30_000,
|
||||
help: false,
|
||||
};
|
||||
|
||||
const args = argv.slice(2);
|
||||
for (let index = 0; index < args.length; index += 1) {
|
||||
const value = args[index];
|
||||
|
||||
if (value === "--help" || value === "-h") {
|
||||
options.help = true;
|
||||
continue;
|
||||
}
|
||||
if (value === "--format") {
|
||||
const format = args[index + 1];
|
||||
if (!format) {
|
||||
throw new Error("--format requires a value");
|
||||
}
|
||||
options.format = normalizeOutputFormat(format);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--json") {
|
||||
options.format = "json";
|
||||
continue;
|
||||
}
|
||||
if (value === "--download-media") {
|
||||
options.downloadMedia = true;
|
||||
continue;
|
||||
}
|
||||
if (value === "--headless") {
|
||||
options.headless = true;
|
||||
continue;
|
||||
}
|
||||
if (value === "--wait-for") {
|
||||
const mode = args[index + 1];
|
||||
if (!mode) {
|
||||
throw new Error("--wait-for requires a mode");
|
||||
}
|
||||
options.waitMode = normalizeWaitMode(mode);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--wait-for-interaction" || value === "--wait-for-login") {
|
||||
options.waitMode = "interaction";
|
||||
continue;
|
||||
}
|
||||
if (value === "--output") {
|
||||
options.output = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--adapter") {
|
||||
options.adapter = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--debug-dir") {
|
||||
options.debugDir = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--media-dir") {
|
||||
options.mediaDir = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--cdp-url") {
|
||||
options.cdpUrl = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--browser-path") {
|
||||
options.browserPath = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--chrome-profile-dir") {
|
||||
options.chromeProfileDir = args[index + 1];
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--timeout") {
|
||||
const parsed = Number(args[index + 1]);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
throw new Error(`Invalid timeout: ${args[index + 1]}`);
|
||||
}
|
||||
options.timeoutMs = parsed;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--interaction-timeout" || value === "--login-timeout") {
|
||||
const parsed = Number(args[index + 1]);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
throw new Error(`Invalid interaction timeout: ${args[index + 1]}`);
|
||||
}
|
||||
options.interactionTimeoutMs = parsed;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value === "--interaction-poll-interval" || value === "--login-poll-interval") {
|
||||
const parsed = Number(args[index + 1]);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
throw new Error(`Invalid interaction poll interval: ${args[index + 1]}`);
|
||||
}
|
||||
options.interactionPollIntervalMs = parsed;
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
if (value.startsWith("-")) {
|
||||
throw new Error(`Unknown option: ${value}`);
|
||||
}
|
||||
if (!options.url) {
|
||||
options.url = value;
|
||||
continue;
|
||||
}
|
||||
throw new Error(`Unexpected argument: ${value}`);
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
try {
|
||||
const options = parseArgs(process.argv);
|
||||
if (options.help || !options.url) {
|
||||
console.log(HELP_TEXT);
|
||||
return;
|
||||
}
|
||||
|
||||
await runConvertCommand(options);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
console.error(message);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
void main();
|
||||
}
|
||||
|
|
@ -0,0 +1,580 @@
|
|||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
import { connectChrome, type ChromeConnection } from "../browser/chrome-launcher";
|
||||
import { CdpClient } from "../browser/cdp-client";
|
||||
import { detectInteractionGate } from "../browser/interaction-gates";
|
||||
import { NetworkJournal } from "../browser/network-journal";
|
||||
import { BrowserSession } from "../browser/session";
|
||||
import { genericAdapter, resolveAdapter } from "../adapters";
|
||||
import { isXSessionReady } from "../adapters/x/session";
|
||||
import type { ExtractedDocument } from "../extract/document";
|
||||
import { renderMarkdown } from "../extract/markdown-renderer";
|
||||
import { downloadMediaAssets } from "../media/default-downloader";
|
||||
import { rewriteMarkdownMediaLinks } from "../media/markdown-media";
|
||||
import { createLogger } from "../utils/logger";
|
||||
import { normalizeUrl } from "../utils/url";
|
||||
import type {
|
||||
Adapter,
|
||||
AdapterContext,
|
||||
AdapterLoginInfo,
|
||||
LoginState,
|
||||
MediaAsset,
|
||||
WaitForInteractionRequest,
|
||||
} from "../adapters/types";
|
||||
|
||||
export type WaitMode = "none" | "interaction" | "force";
|
||||
export type OutputFormat = "markdown" | "json";
|
||||
|
||||
export interface ConvertCommandOptions {
|
||||
url?: string;
|
||||
output?: string;
|
||||
format: OutputFormat;
|
||||
adapter?: string;
|
||||
debugDir?: string;
|
||||
cdpUrl?: string;
|
||||
browserPath?: string;
|
||||
chromeProfileDir?: string;
|
||||
headless: boolean;
|
||||
downloadMedia: boolean;
|
||||
mediaDir?: string;
|
||||
waitMode: WaitMode;
|
||||
interactionTimeoutMs: number;
|
||||
interactionPollIntervalMs: number;
|
||||
timeoutMs: number;
|
||||
}
|
||||
|
||||
interface RuntimeResources {
|
||||
chrome: ChromeConnection;
|
||||
cdp: CdpClient;
|
||||
browser: BrowserSession;
|
||||
network: NetworkJournal;
|
||||
interactive: boolean;
|
||||
}
|
||||
|
||||
interface ForceWaitSnapshot {
|
||||
url: string;
|
||||
hasGate: boolean;
|
||||
loginState: LoginState | "unavailable";
|
||||
sessionReady: boolean;
|
||||
}
|
||||
|
||||
interface SuccessfulConvertOutput {
|
||||
adapter: string;
|
||||
status: "ok";
|
||||
login?: AdapterLoginInfo;
|
||||
media: MediaAsset[];
|
||||
downloads: Awaited<ReturnType<typeof downloadMediaAssets>> | null;
|
||||
document: ExtractedDocument;
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
interface InteractionRequiredOutput {
|
||||
adapter: string;
|
||||
status: "needs_interaction";
|
||||
login?: AdapterLoginInfo;
|
||||
interaction: WaitForInteractionRequest;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isForceWaitSessionReady(snapshot: ForceWaitSnapshot): boolean {
|
||||
return snapshot.sessionReady;
|
||||
}
|
||||
|
||||
export function shouldKeepBrowserOpenAfterInteraction(options: {
|
||||
launched: boolean;
|
||||
interaction: Pick<WaitForInteractionRequest, "kind" | "provider">;
|
||||
}): boolean {
|
||||
return options.launched && options.interaction.kind === "login" && options.interaction.provider === "x";
|
||||
}
|
||||
|
||||
export function shouldAutoContinueForceWait(
|
||||
initial: ForceWaitSnapshot,
|
||||
current: ForceWaitSnapshot,
|
||||
): boolean {
|
||||
if (initial.hasGate && !current.hasGate) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (initial.loginState === "logged_out" && current.loginState !== "logged_out" && isForceWaitSessionReady(current)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (initial.loginState !== "logged_in" && current.loginState === "logged_in" && isForceWaitSessionReady(current)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (
|
||||
current.url !== initial.url &&
|
||||
!current.hasGate &&
|
||||
current.loginState !== "logged_out" &&
|
||||
isForceWaitSessionReady(current)
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async function writeOutput(path: string, content: string): Promise<void> {
|
||||
const directory = path.includes("/") ? path.slice(0, path.lastIndexOf("/")) : "";
|
||||
if (directory) {
|
||||
await mkdir(directory, { recursive: true });
|
||||
}
|
||||
await writeFile(path, content, "utf8");
|
||||
}
|
||||
|
||||
async function writeDebugArtifacts(
|
||||
debugDir: string,
|
||||
document: ExtractedDocument,
|
||||
markdown: string,
|
||||
browser: BrowserSession,
|
||||
network: NetworkJournal,
|
||||
): Promise<void> {
|
||||
await mkdir(debugDir, { recursive: true });
|
||||
|
||||
const html = await browser.getHTML().catch(() => "");
|
||||
const networkDump = await network.toJSON({ includeBodies: true });
|
||||
|
||||
await Promise.all([
|
||||
writeFile(join(debugDir, "document.json"), JSON.stringify(document, null, 2), "utf8"),
|
||||
writeFile(join(debugDir, "markdown.md"), markdown, "utf8"),
|
||||
writeFile(join(debugDir, "page.html"), html, "utf8"),
|
||||
writeFile(join(debugDir, "network.json"), JSON.stringify(networkDump, null, 2), "utf8"),
|
||||
]);
|
||||
}
|
||||
|
||||
async function openRuntime(
|
||||
options: ConvertCommandOptions,
|
||||
interactive: boolean,
|
||||
debugEnabled: boolean,
|
||||
): Promise<RuntimeResources> {
|
||||
const logger = createLogger(debugEnabled);
|
||||
if (interactive) {
|
||||
logger.info("Opening Chrome in interactive mode.");
|
||||
}
|
||||
const chrome = await connectChrome({
|
||||
cdpUrl: options.cdpUrl,
|
||||
browserPath: options.browserPath,
|
||||
profileDir: options.chromeProfileDir,
|
||||
headless: interactive ? false : options.headless,
|
||||
logger,
|
||||
});
|
||||
|
||||
const cdp = await CdpClient.connect(chrome.browserWsUrl);
|
||||
const browser = await BrowserSession.open(cdp, { interactive });
|
||||
if (interactive) {
|
||||
await browser.bringToFront().catch(() => {});
|
||||
}
|
||||
const network = new NetworkJournal(browser.targetSession, logger);
|
||||
await network.start();
|
||||
|
||||
return {
|
||||
chrome,
|
||||
cdp,
|
||||
browser,
|
||||
network,
|
||||
interactive,
|
||||
};
|
||||
}
|
||||
|
||||
async function closeRuntime(runtime: RuntimeResources | null | undefined): Promise<void> {
|
||||
if (!runtime) {
|
||||
return;
|
||||
}
|
||||
runtime.network.stop();
|
||||
await runtime.browser.close().catch(() => {});
|
||||
await runtime.cdp.close().catch(() => {});
|
||||
await runtime.chrome.close().catch(() => {});
|
||||
}
|
||||
|
||||
async function isInteractionSessionReady(
|
||||
context: AdapterContext,
|
||||
interaction: WaitForInteractionRequest,
|
||||
): Promise<boolean> {
|
||||
if (interaction.provider !== "x") {
|
||||
return true;
|
||||
}
|
||||
return await isXSessionReady(context).catch(() => false);
|
||||
}
|
||||
|
||||
async function reopenInteractiveRuntime(
|
||||
runtime: RuntimeResources,
|
||||
options: ConvertCommandOptions,
|
||||
debugEnabled: boolean,
|
||||
): Promise<RuntimeResources> {
|
||||
if (runtime.interactive) {
|
||||
return runtime;
|
||||
}
|
||||
|
||||
await closeRuntime(runtime);
|
||||
return openRuntime(options, true, debugEnabled);
|
||||
}
|
||||
|
||||
async function captureForceWaitSnapshot(
|
||||
adapter: Adapter,
|
||||
context: AdapterContext,
|
||||
): Promise<ForceWaitSnapshot> {
|
||||
const [gate, url, login] = await Promise.all([
|
||||
detectInteractionGate(context.browser).catch(() => null),
|
||||
context.browser.getURL().catch(() => context.input.url.toString()),
|
||||
adapter.checkLogin?.(context).catch(() => ({
|
||||
provider: adapter.name,
|
||||
state: "unknown" as const,
|
||||
})),
|
||||
]);
|
||||
|
||||
return {
|
||||
url,
|
||||
hasGate: Boolean(gate),
|
||||
loginState: login?.state ?? "unavailable",
|
||||
sessionReady: adapter.name === "x" ? await isXSessionReady(context).catch(() => false) : true,
|
||||
};
|
||||
}
|
||||
|
||||
async function waitForForceResume(
|
||||
adapter: Adapter,
|
||||
context: AdapterContext,
|
||||
options: ConvertCommandOptions,
|
||||
): Promise<void> {
|
||||
if (context.interactive) {
|
||||
await context.browser.bringToFront().catch(() => {});
|
||||
}
|
||||
|
||||
const prompt =
|
||||
"Chrome is ready. Complete any manual login or verification. Extraction will continue automatically after it detects progress, or press Enter to continue immediately.";
|
||||
context.log.info(prompt);
|
||||
|
||||
const rl = createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stderr,
|
||||
});
|
||||
|
||||
let manualContinue = false;
|
||||
let closed = false;
|
||||
const closeReadline = (): void => {
|
||||
if (!closed) {
|
||||
closed = true;
|
||||
rl.close();
|
||||
}
|
||||
};
|
||||
|
||||
rl.once("line", () => {
|
||||
manualContinue = true;
|
||||
closeReadline();
|
||||
});
|
||||
|
||||
const initial = await captureForceWaitSnapshot(adapter, context);
|
||||
const startedAt = Date.now();
|
||||
|
||||
try {
|
||||
while (Date.now() - startedAt < options.interactionTimeoutMs) {
|
||||
if (manualContinue) {
|
||||
return;
|
||||
}
|
||||
|
||||
const current = await captureForceWaitSnapshot(adapter, context);
|
||||
if (shouldAutoContinueForceWait(initial, current)) {
|
||||
return;
|
||||
}
|
||||
|
||||
await sleep(options.interactionPollIntervalMs);
|
||||
}
|
||||
} finally {
|
||||
closeReadline();
|
||||
}
|
||||
|
||||
throw new Error("Timed out waiting for force-mode interaction to complete");
|
||||
}
|
||||
|
||||
async function waitForInteraction(
|
||||
adapter: Adapter,
|
||||
context: AdapterContext,
|
||||
interaction: WaitForInteractionRequest,
|
||||
options: ConvertCommandOptions,
|
||||
): Promise<AdapterLoginInfo> {
|
||||
const timeoutMs = interaction.timeoutMs ?? options.interactionTimeoutMs;
|
||||
const pollIntervalMs = interaction.pollIntervalMs ?? options.interactionPollIntervalMs;
|
||||
if (context.interactive) {
|
||||
await context.browser.bringToFront().catch(() => {});
|
||||
}
|
||||
context.log.info(interaction.prompt);
|
||||
|
||||
const startedAt = Date.now();
|
||||
let lastLogin: AdapterLoginInfo | null = null;
|
||||
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
if (interaction.kind === "login" && adapter.checkLogin) {
|
||||
lastLogin = await adapter.checkLogin(context);
|
||||
if (lastLogin.state === "logged_in" && await isInteractionSessionReady(context, interaction)) {
|
||||
return lastLogin;
|
||||
}
|
||||
}
|
||||
|
||||
const gate = await detectInteractionGate(context.browser);
|
||||
if (!gate) {
|
||||
if (interaction.kind !== "login") {
|
||||
return lastLogin ?? {
|
||||
provider: interaction.provider,
|
||||
state: "unknown",
|
||||
reason: `${interaction.provider} challenge cleared`,
|
||||
};
|
||||
}
|
||||
|
||||
if (!adapter.checkLogin) {
|
||||
return {
|
||||
provider: interaction.provider,
|
||||
state: "unknown",
|
||||
};
|
||||
}
|
||||
|
||||
lastLogin = await adapter.checkLogin(context);
|
||||
if (lastLogin.state !== "logged_out" && await isInteractionSessionReady(context, interaction)) {
|
||||
return lastLogin;
|
||||
}
|
||||
}
|
||||
await sleep(pollIntervalMs);
|
||||
}
|
||||
|
||||
const reason = lastLogin?.reason ? ` (${lastLogin.reason})` : "";
|
||||
throw new Error(`Timed out waiting for ${interaction.provider} interaction${reason}`);
|
||||
}
|
||||
|
||||
export function formatOutputContent(
|
||||
format: OutputFormat,
|
||||
payload: SuccessfulConvertOutput | InteractionRequiredOutput,
|
||||
): string {
|
||||
if (format === "json") {
|
||||
return JSON.stringify(payload, null, 2);
|
||||
}
|
||||
|
||||
if (payload.status !== "ok") {
|
||||
throw new Error("Markdown output is only available for successful extraction results");
|
||||
}
|
||||
|
||||
return payload.markdown;
|
||||
}
|
||||
|
||||
function printOutput(content: string): void {
|
||||
process.stdout.write(content);
|
||||
if (!content.endsWith("\n")) {
|
||||
process.stdout.write("\n");
|
||||
}
|
||||
}
|
||||
|
||||
export async function runConvertCommand(options: ConvertCommandOptions): Promise<void> {
|
||||
if (!options.url) {
|
||||
throw new Error("URL is required");
|
||||
}
|
||||
if (options.downloadMedia && !options.output) {
|
||||
throw new Error("--download-media requires --output so media paths can be rewritten relative to the saved output file");
|
||||
}
|
||||
|
||||
const url = normalizeUrl(options.url);
|
||||
let runtime = await openRuntime(options, options.waitMode !== "none", Boolean(options.debugDir));
|
||||
const logger = createLogger(Boolean(options.debugDir));
|
||||
let didLogin = false;
|
||||
let adapter: Adapter | null = null;
|
||||
let context: AdapterContext | null = null;
|
||||
|
||||
try {
|
||||
adapter = resolveAdapter({ url }, options.adapter);
|
||||
context = {
|
||||
input: { url },
|
||||
browser: runtime.browser,
|
||||
network: runtime.network,
|
||||
cdp: runtime.cdp,
|
||||
log: logger,
|
||||
outputFormat: options.format,
|
||||
timeoutMs: options.timeoutMs,
|
||||
interactive: runtime.interactive,
|
||||
downloadMedia: options.downloadMedia,
|
||||
};
|
||||
|
||||
if (adapter.restoreCookies) {
|
||||
const restored = await adapter.restoreCookies(context, runtime.chrome.profileDir).catch(() => false);
|
||||
if (restored) logger.info(`Restored ${adapter.name} session cookies from sidecar.`);
|
||||
}
|
||||
|
||||
if (options.waitMode === "interaction" && adapter.checkLogin) {
|
||||
await context.browser.goto(url.toString(), options.timeoutMs).catch(() => {});
|
||||
const preLogin = await adapter.checkLogin(context);
|
||||
if (preLogin.state !== "logged_in") {
|
||||
didLogin = true;
|
||||
await waitForInteraction(adapter, context, {
|
||||
type: "wait_for_interaction",
|
||||
kind: "login",
|
||||
provider: preLogin.provider ?? adapter.name,
|
||||
prompt: `Please sign in to ${adapter.name === "x" ? "X" : adapter.name} in the opened Chrome window. Extraction will continue automatically once login is detected.`,
|
||||
reason: preLogin.reason ?? `Not logged in to ${adapter.name}`,
|
||||
requiresVisibleBrowser: true,
|
||||
}, options);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.waitMode === "force") {
|
||||
await context.browser.goto(url.toString(), options.timeoutMs).catch(() => {});
|
||||
await waitForForceResume(adapter, context, options);
|
||||
}
|
||||
|
||||
let result = await adapter.process(context);
|
||||
|
||||
if (result.status === "no_document") {
|
||||
const interaction = await detectInteractionGate(context.browser);
|
||||
if (interaction) {
|
||||
result = {
|
||||
status: "needs_interaction",
|
||||
interaction,
|
||||
login: result.login,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
while (result.status === "needs_interaction") {
|
||||
if (options.waitMode === "none") {
|
||||
if (options.format === "json") {
|
||||
printOutput(
|
||||
formatOutputContent(options.format, {
|
||||
adapter: adapter.name,
|
||||
status: result.status,
|
||||
login: result.login,
|
||||
interaction: result.interaction,
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
throw new Error(`${adapter.name} requires manual interaction. Re-run with --wait-for interaction to continue after completing it.`);
|
||||
}
|
||||
|
||||
if (result.interaction.requiresVisibleBrowser !== false) {
|
||||
runtime = await reopenInteractiveRuntime(runtime, options, Boolean(options.debugDir));
|
||||
}
|
||||
|
||||
context = {
|
||||
input: { url },
|
||||
browser: runtime.browser,
|
||||
network: runtime.network,
|
||||
cdp: runtime.cdp,
|
||||
log: logger,
|
||||
outputFormat: options.format,
|
||||
timeoutMs: options.timeoutMs,
|
||||
interactive: runtime.interactive,
|
||||
downloadMedia: options.downloadMedia,
|
||||
};
|
||||
|
||||
await context.browser.goto(url.toString(), options.timeoutMs).catch(() => {});
|
||||
if (result.interaction.kind === "login") {
|
||||
didLogin = true;
|
||||
}
|
||||
await waitForInteraction(adapter, context, result.interaction, options);
|
||||
result = await adapter.process(context);
|
||||
|
||||
if (result.status === "no_document") {
|
||||
const interaction = await detectInteractionGate(context.browser);
|
||||
if (interaction) {
|
||||
result = {
|
||||
status: "needs_interaction",
|
||||
interaction,
|
||||
login: result.login,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let document: ExtractedDocument | null = result.status === "ok" ? result.document : null;
|
||||
let media: MediaAsset[] = result.status === "ok" ? (result.media ?? []) : [];
|
||||
let login = result.login;
|
||||
let mediaAdapter = adapter;
|
||||
|
||||
if (!document && adapter.name !== genericAdapter.name && result.status === "no_document") {
|
||||
logger.info(`Adapter ${adapter.name} returned no structured document; falling back to generic extraction`);
|
||||
const fallback = await genericAdapter.process(context);
|
||||
if (fallback.status === "ok") {
|
||||
document = fallback.document;
|
||||
media = fallback.media ?? [];
|
||||
mediaAdapter = genericAdapter;
|
||||
}
|
||||
}
|
||||
|
||||
if (!document) {
|
||||
throw new Error("Failed to extract a document from the target URL");
|
||||
}
|
||||
|
||||
document.requestedUrl ??= url.toString();
|
||||
|
||||
let markdown = renderMarkdown(document);
|
||||
let downloadResult:
|
||||
| Awaited<ReturnType<typeof downloadMediaAssets>>
|
||||
| null = null;
|
||||
|
||||
if (options.downloadMedia && options.output) {
|
||||
downloadResult = mediaAdapter.downloadMedia
|
||||
? await mediaAdapter.downloadMedia({
|
||||
media,
|
||||
outputPath: options.output,
|
||||
mediaDir: options.mediaDir,
|
||||
log: logger,
|
||||
})
|
||||
: await downloadMediaAssets({
|
||||
media,
|
||||
outputPath: options.output,
|
||||
mediaDir: options.mediaDir,
|
||||
log: logger,
|
||||
});
|
||||
|
||||
markdown = rewriteMarkdownMediaLinks(markdown, downloadResult.replacements);
|
||||
if (downloadResult.downloadedImages > 0 || downloadResult.downloadedVideos > 0) {
|
||||
logger.info(
|
||||
`Downloaded ${downloadResult.downloadedImages} images and ${downloadResult.downloadedVideos} videos`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.output) {
|
||||
await writeOutput(
|
||||
options.output,
|
||||
formatOutputContent(options.format, {
|
||||
adapter: document.adapter ?? adapter.name,
|
||||
status: "ok",
|
||||
login,
|
||||
media,
|
||||
downloads: downloadResult,
|
||||
document,
|
||||
markdown,
|
||||
}),
|
||||
);
|
||||
logger.info(`Saved ${options.format} to ${options.output}`);
|
||||
}
|
||||
|
||||
if (options.debugDir) {
|
||||
await writeDebugArtifacts(options.debugDir, document, markdown, runtime.browser, runtime.network);
|
||||
logger.info(`Wrote debug artifacts to ${options.debugDir}`);
|
||||
}
|
||||
|
||||
if (options.format === "json") {
|
||||
printOutput(
|
||||
formatOutputContent(options.format, {
|
||||
adapter: document.adapter ?? adapter.name,
|
||||
status: "ok",
|
||||
login,
|
||||
media,
|
||||
downloads: downloadResult,
|
||||
document,
|
||||
markdown,
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
printOutput(markdown);
|
||||
} finally {
|
||||
if (adapter?.exportCookies && context) {
|
||||
await adapter.exportCookies(context, runtime.chrome.profileDir).catch(() => {});
|
||||
}
|
||||
await closeRuntime(runtime);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
export type ContentBlock =
|
||||
| {
|
||||
type: "paragraph";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "heading";
|
||||
depth: number;
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "list";
|
||||
ordered: boolean;
|
||||
items: string[];
|
||||
}
|
||||
| {
|
||||
type: "quote";
|
||||
text: string;
|
||||
}
|
||||
| {
|
||||
type: "code";
|
||||
code: string;
|
||||
language?: string;
|
||||
}
|
||||
| {
|
||||
type: "image";
|
||||
url: string;
|
||||
alt?: string;
|
||||
}
|
||||
| {
|
||||
type: "html";
|
||||
html: string;
|
||||
}
|
||||
| {
|
||||
type: "markdown";
|
||||
markdown: string;
|
||||
};
|
||||
|
||||
export interface ExtractedDocument {
|
||||
url: string;
|
||||
requestedUrl?: string;
|
||||
canonicalUrl?: string;
|
||||
title?: string;
|
||||
author?: string;
|
||||
siteName?: string;
|
||||
publishedAt?: string;
|
||||
summary?: string;
|
||||
content: ContentBlock[];
|
||||
metadata?: Record<string, unknown>;
|
||||
adapter?: string;
|
||||
}
|
||||
|
|
@ -0,0 +1,467 @@
|
|||
import { JSDOM } from "jsdom";
|
||||
|
||||
export interface CleanHtmlOptions {
|
||||
removeAds?: boolean;
|
||||
removeBase64Images?: boolean;
|
||||
onlyMainContent?: boolean;
|
||||
includeSelectors?: string[];
|
||||
excludeSelectors?: string[];
|
||||
}
|
||||
|
||||
const ALWAYS_REMOVE_SELECTORS = [
|
||||
"script",
|
||||
"style",
|
||||
"noscript",
|
||||
"link[rel='stylesheet']",
|
||||
"[hidden]",
|
||||
"[aria-hidden='true']",
|
||||
"[style*='display: none']",
|
||||
"[style*='display:none']",
|
||||
"[style*='visibility: hidden']",
|
||||
"[style*='visibility:hidden']",
|
||||
"svg[aria-hidden='true']",
|
||||
"svg.icon",
|
||||
"svg[class*='icon']",
|
||||
"template",
|
||||
"meta",
|
||||
"iframe",
|
||||
"canvas",
|
||||
"object",
|
||||
"embed",
|
||||
"form",
|
||||
"input",
|
||||
"select",
|
||||
"textarea",
|
||||
"button",
|
||||
];
|
||||
|
||||
const OVERLAY_SELECTORS = [
|
||||
"[class*='modal']",
|
||||
"[class*='popup']",
|
||||
"[class*='overlay']",
|
||||
"[class*='dialog']",
|
||||
"[role='dialog']",
|
||||
"[role='alertdialog']",
|
||||
"[class*='cookie']",
|
||||
"[class*='consent']",
|
||||
"[class*='gdpr']",
|
||||
"[class*='privacy-banner']",
|
||||
"[class*='notification-bar']",
|
||||
"[id*='cookie']",
|
||||
"[id*='consent']",
|
||||
"[id*='gdpr']",
|
||||
"[style*='position: fixed']",
|
||||
"[style*='position:fixed']",
|
||||
"[style*='position: sticky']",
|
||||
"[style*='position:sticky']",
|
||||
];
|
||||
|
||||
const NAVIGATION_SELECTORS = [
|
||||
"header",
|
||||
"footer",
|
||||
"nav",
|
||||
"aside",
|
||||
".header",
|
||||
".top",
|
||||
".navbar",
|
||||
"#header",
|
||||
".footer",
|
||||
".bottom",
|
||||
"#footer",
|
||||
".sidebar",
|
||||
".side",
|
||||
".aside",
|
||||
"#sidebar",
|
||||
".modal",
|
||||
".popup",
|
||||
"#modal",
|
||||
".overlay",
|
||||
".ad",
|
||||
".ads",
|
||||
".advert",
|
||||
"#ad",
|
||||
".lang-selector",
|
||||
".language",
|
||||
"#language-selector",
|
||||
".social",
|
||||
".social-media",
|
||||
".social-links",
|
||||
"#social",
|
||||
".menu",
|
||||
".navigation",
|
||||
"#nav",
|
||||
".breadcrumbs",
|
||||
"#breadcrumbs",
|
||||
".share",
|
||||
"#share",
|
||||
".widget",
|
||||
"#widget",
|
||||
".cookie",
|
||||
"#cookie",
|
||||
];
|
||||
|
||||
const FORCE_INCLUDE_SELECTORS = [
|
||||
"#main",
|
||||
"#content",
|
||||
"#main-content",
|
||||
"#article",
|
||||
"#post",
|
||||
"#page-content",
|
||||
"main",
|
||||
"article",
|
||||
"[role='main']",
|
||||
".main-content",
|
||||
".content",
|
||||
".post-content",
|
||||
".article-content",
|
||||
".entry-content",
|
||||
".page-content",
|
||||
".article-body",
|
||||
".post-body",
|
||||
".story-content",
|
||||
".blog-content",
|
||||
];
|
||||
|
||||
const AD_SELECTORS = [
|
||||
"ins.adsbygoogle",
|
||||
".google-ad",
|
||||
".adsense",
|
||||
"[data-ad]",
|
||||
"[data-ads]",
|
||||
"[data-ad-slot]",
|
||||
"[data-ad-client]",
|
||||
".ad-container",
|
||||
".ad-wrapper",
|
||||
".advertisement",
|
||||
".sponsored-content",
|
||||
"img[width='1'][height='1']",
|
||||
"img[src*='pixel']",
|
||||
"img[src*='tracking']",
|
||||
"img[src*='analytics']",
|
||||
];
|
||||
|
||||
function getLinkDensity(element: Element): number {
|
||||
const text = element.textContent || "";
|
||||
const textLength = text.trim().length;
|
||||
if (textLength === 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
let linkLength = 0;
|
||||
element.querySelectorAll("a").forEach((link) => {
|
||||
linkLength += (link.textContent || "").trim().length;
|
||||
});
|
||||
|
||||
return linkLength / textLength;
|
||||
}
|
||||
|
||||
function getContentScore(element: Element): number {
|
||||
let score = 0;
|
||||
const text = element.textContent || "";
|
||||
const textLength = text.trim().length;
|
||||
|
||||
score += Math.min(textLength / 100, 50);
|
||||
score += element.querySelectorAll("p").length * 3;
|
||||
score += element.querySelectorAll("h1, h2, h3, h4, h5, h6").length * 2;
|
||||
score += element.querySelectorAll("img").length;
|
||||
|
||||
score -= element.querySelectorAll("a").length * 0.5;
|
||||
score -= element.querySelectorAll("li").length * 0.2;
|
||||
|
||||
const linkDensity = getLinkDensity(element);
|
||||
if (linkDensity > 0.5) {
|
||||
score -= 30;
|
||||
} else if (linkDensity > 0.3) {
|
||||
score -= 15;
|
||||
}
|
||||
|
||||
const className = typeof element.className === "string" ? element.className : "";
|
||||
const classAndId = `${className} ${element.id || ""}`;
|
||||
if (/article|content|post|body|main|entry/i.test(classAndId)) {
|
||||
score += 25;
|
||||
}
|
||||
if (/comment|sidebar|footer|nav|menu|header|widget|ad/i.test(classAndId)) {
|
||||
score -= 25;
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
function looksLikeNavigation(element: Element): boolean {
|
||||
const linkDensity = getLinkDensity(element);
|
||||
if (linkDensity > 0.5) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const listItems = element.querySelectorAll("li");
|
||||
const links = element.querySelectorAll("a");
|
||||
return listItems.length > 5 && links.length > listItems.length * 0.8;
|
||||
}
|
||||
|
||||
function removeElements(document: Document, selectors: string[]): void {
|
||||
for (const selector of selectors) {
|
||||
try {
|
||||
document.querySelectorAll(selector).forEach((element) => element.remove());
|
||||
} catch {
|
||||
// Ignore unsupported selectors.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function removeWithProtection(
|
||||
document: Document,
|
||||
selectorsToRemove: string[],
|
||||
protectedSelectors: string[],
|
||||
): void {
|
||||
for (const selector of selectorsToRemove) {
|
||||
try {
|
||||
document.querySelectorAll(selector).forEach((element) => {
|
||||
const isProtected = protectedSelectors.some((protectedSelector) => {
|
||||
try {
|
||||
return element.matches(protectedSelector);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
if (isProtected) {
|
||||
return;
|
||||
}
|
||||
|
||||
const containsProtected = protectedSelectors.some((protectedSelector) => {
|
||||
try {
|
||||
return element.querySelector(protectedSelector) !== null;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
if (containsProtected) {
|
||||
return;
|
||||
}
|
||||
|
||||
element.remove();
|
||||
});
|
||||
} catch {
|
||||
// Ignore unsupported selectors.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isValidContent(element: Element | null): element is Element {
|
||||
if (!element) {
|
||||
return false;
|
||||
}
|
||||
const text = element.textContent || "";
|
||||
if (text.trim().length < 100) {
|
||||
return false;
|
||||
}
|
||||
return !looksLikeNavigation(element);
|
||||
}
|
||||
|
||||
function findMainContent(document: Document): Element | null {
|
||||
const main = document.querySelector("main");
|
||||
if (isValidContent(main) && getLinkDensity(main) < 0.4) {
|
||||
return main;
|
||||
}
|
||||
|
||||
const roleMain = document.querySelector('[role="main"]');
|
||||
if (isValidContent(roleMain) && getLinkDensity(roleMain) < 0.4) {
|
||||
return roleMain;
|
||||
}
|
||||
|
||||
const articles = document.querySelectorAll("article");
|
||||
if (articles.length === 1 && isValidContent(articles[0] ?? null)) {
|
||||
return articles[0] ?? null;
|
||||
}
|
||||
|
||||
const contentSelectors = [
|
||||
"#content",
|
||||
"#main-content",
|
||||
"#main",
|
||||
".content",
|
||||
".main-content",
|
||||
".post-content",
|
||||
".article-content",
|
||||
".entry-content",
|
||||
".page-content",
|
||||
".article-body",
|
||||
".post-body",
|
||||
".story-content",
|
||||
".blog-content",
|
||||
];
|
||||
|
||||
for (const selector of contentSelectors) {
|
||||
try {
|
||||
const element = document.querySelector(selector);
|
||||
if (isValidContent(element) && getLinkDensity(element) < 0.4) {
|
||||
return element;
|
||||
}
|
||||
} catch {
|
||||
// Ignore invalid selectors.
|
||||
}
|
||||
}
|
||||
|
||||
const candidates: Array<{ element: Element; score: number }> = [];
|
||||
document.querySelectorAll("div, section, article").forEach((element) => {
|
||||
const text = element.textContent || "";
|
||||
if (text.trim().length < 200) {
|
||||
return;
|
||||
}
|
||||
|
||||
const score = getContentScore(element);
|
||||
if (score > 0) {
|
||||
candidates.push({ element, score });
|
||||
}
|
||||
});
|
||||
|
||||
candidates.sort((left, right) => right.score - left.score);
|
||||
if ((candidates[0]?.score ?? 0) > 20) {
|
||||
return candidates[0]?.element ?? null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function removeBase64ImagesFromDocument(document: Document): void {
|
||||
document.querySelectorAll("img[src^='data:']").forEach((element) => element.remove());
|
||||
|
||||
document.querySelectorAll("[style*='data:image']").forEach((element) => {
|
||||
const style = element.getAttribute("style");
|
||||
if (!style) {
|
||||
return;
|
||||
}
|
||||
|
||||
const cleanedStyle = style.replace(
|
||||
/background(-image)?:\s*url\([^)]*data:image[^)]*\)[^;]*;?/gi,
|
||||
"",
|
||||
);
|
||||
|
||||
if (cleanedStyle.trim()) {
|
||||
element.setAttribute("style", cleanedStyle);
|
||||
} else {
|
||||
element.removeAttribute("style");
|
||||
}
|
||||
});
|
||||
|
||||
document
|
||||
.querySelectorAll("source[src^='data:'], source[srcset*='data:']")
|
||||
.forEach((element) => element.remove());
|
||||
}
|
||||
|
||||
function makeAbsoluteUrl(value: string, baseUrl: string): string | null {
|
||||
try {
|
||||
return new URL(value, baseUrl).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function convertRelativeUrls(document: Document, baseUrl: string): void {
|
||||
document.querySelectorAll("[src]").forEach((element) => {
|
||||
const src = element.getAttribute("src");
|
||||
if (!src || src.startsWith("http") || src.startsWith("//") || src.startsWith("data:")) {
|
||||
return;
|
||||
}
|
||||
|
||||
const absolute = makeAbsoluteUrl(src, baseUrl);
|
||||
if (absolute) {
|
||||
element.setAttribute("src", absolute);
|
||||
}
|
||||
});
|
||||
|
||||
document.querySelectorAll("[href]").forEach((element) => {
|
||||
const href = element.getAttribute("href");
|
||||
if (
|
||||
!href ||
|
||||
href.startsWith("http") ||
|
||||
href.startsWith("//") ||
|
||||
href.startsWith("#") ||
|
||||
href.startsWith("mailto:") ||
|
||||
href.startsWith("tel:") ||
|
||||
href.startsWith("javascript:")
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const absolute = makeAbsoluteUrl(href, baseUrl);
|
||||
if (absolute) {
|
||||
element.setAttribute("href", absolute);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function removeComments(document: Document): void {
|
||||
const walker = document.createTreeWalker(document, document.defaultView?.NodeFilter.SHOW_COMMENT ?? 128);
|
||||
const comments: Comment[] = [];
|
||||
while (walker.nextNode()) {
|
||||
comments.push(walker.currentNode as Comment);
|
||||
}
|
||||
comments.forEach((comment) => comment.parentNode?.removeChild(comment));
|
||||
}
|
||||
|
||||
export function cleanHtml(
|
||||
html: string,
|
||||
baseUrl: string,
|
||||
options: CleanHtmlOptions = {},
|
||||
): string {
|
||||
const {
|
||||
removeAds = true,
|
||||
removeBase64Images = true,
|
||||
onlyMainContent = true,
|
||||
includeSelectors,
|
||||
excludeSelectors,
|
||||
} = options;
|
||||
|
||||
const dom = new JSDOM(html, { url: baseUrl });
|
||||
const { document } = dom.window;
|
||||
|
||||
removeElements(document, ALWAYS_REMOVE_SELECTORS);
|
||||
removeElements(document, OVERLAY_SELECTORS);
|
||||
|
||||
if (removeAds) {
|
||||
removeElements(document, AD_SELECTORS);
|
||||
}
|
||||
|
||||
if (excludeSelectors?.length) {
|
||||
removeElements(document, excludeSelectors);
|
||||
}
|
||||
|
||||
if (onlyMainContent) {
|
||||
removeWithProtection(document, NAVIGATION_SELECTORS, FORCE_INCLUDE_SELECTORS);
|
||||
|
||||
const mainContent = findMainContent(document);
|
||||
if (mainContent && document.body) {
|
||||
const clone = mainContent.cloneNode(true);
|
||||
document.body.innerHTML = "";
|
||||
document.body.appendChild(clone);
|
||||
}
|
||||
}
|
||||
|
||||
if (includeSelectors?.length && document.body) {
|
||||
const matchedElements: Element[] = [];
|
||||
for (const selector of includeSelectors) {
|
||||
try {
|
||||
document.querySelectorAll(selector).forEach((element) => {
|
||||
matchedElements.push(element.cloneNode(true) as Element);
|
||||
});
|
||||
} catch {
|
||||
// Ignore invalid selectors.
|
||||
}
|
||||
}
|
||||
|
||||
if (matchedElements.length > 0) {
|
||||
document.body.innerHTML = "";
|
||||
matchedElements.forEach((element) => document.body?.appendChild(element));
|
||||
}
|
||||
}
|
||||
|
||||
if (removeBase64Images) {
|
||||
removeBase64ImagesFromDocument(document);
|
||||
}
|
||||
|
||||
removeComments(document);
|
||||
convertRelativeUrls(document, baseUrl);
|
||||
|
||||
return document.documentElement.outerHTML || html;
|
||||
}
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
import { Readability } from "@mozilla/readability";
|
||||
import { JSDOM } from "jsdom";
|
||||
import type { ExtractedDocument } from "./document";
|
||||
|
||||
function getMetaContent(document: Document, selectors: string[]): string | undefined {
|
||||
for (const selector of selectors) {
|
||||
const value = document.querySelector(selector)?.getAttribute("content")?.trim();
|
||||
if (value) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function extractDocumentFromHtml(input: {
|
||||
url: string;
|
||||
html: string;
|
||||
adapter?: string;
|
||||
}): ExtractedDocument {
|
||||
const dom = new JSDOM(input.html, { url: input.url });
|
||||
const document = dom.window.document;
|
||||
|
||||
const canonicalUrl =
|
||||
document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim() ??
|
||||
getMetaContent(document, ['meta[property="og:url"]']);
|
||||
|
||||
const siteName = getMetaContent(document, [
|
||||
'meta[property="og:site_name"]',
|
||||
'meta[name="application-name"]',
|
||||
]);
|
||||
|
||||
const metadataAuthor = getMetaContent(document, [
|
||||
'meta[name="author"]',
|
||||
'meta[property="article:author"]',
|
||||
'meta[name="twitter:creator"]',
|
||||
]);
|
||||
|
||||
const publishedAt = getMetaContent(document, [
|
||||
'meta[property="article:published_time"]',
|
||||
'meta[name="pubdate"]',
|
||||
'meta[name="date"]',
|
||||
'meta[itemprop="datePublished"]',
|
||||
]);
|
||||
|
||||
const article = new Readability(document).parse();
|
||||
const title =
|
||||
article?.title?.trim() ||
|
||||
getMetaContent(document, ['meta[property="og:title"]']) ||
|
||||
document.title.trim() ||
|
||||
undefined;
|
||||
|
||||
const summary =
|
||||
article?.excerpt?.trim() ||
|
||||
getMetaContent(document, [
|
||||
'meta[name="description"]',
|
||||
'meta[property="og:description"]',
|
||||
'meta[name="twitter:description"]',
|
||||
]);
|
||||
|
||||
const contentHtml =
|
||||
article?.content?.trim() ||
|
||||
document.querySelector("main")?.innerHTML?.trim() ||
|
||||
document.body?.innerHTML?.trim() ||
|
||||
"";
|
||||
|
||||
const author = article?.byline?.trim() || metadataAuthor;
|
||||
|
||||
return {
|
||||
url: input.url,
|
||||
canonicalUrl,
|
||||
title,
|
||||
author,
|
||||
siteName,
|
||||
publishedAt,
|
||||
summary,
|
||||
adapter: input.adapter ?? "generic",
|
||||
metadata: {
|
||||
language: document.documentElement.lang || undefined,
|
||||
},
|
||||
content: contentHtml ? [{ type: "html", html: contentHtml }] : [],
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,758 @@
|
|||
import { Readability } from "@mozilla/readability";
|
||||
import { Defuddle } from "defuddle/node";
|
||||
import { JSDOM, VirtualConsole } from "jsdom";
|
||||
import TurndownService from "turndown";
|
||||
import { gfm } from "turndown-plugin-gfm";
|
||||
import { collectMediaFromMarkdown } from "../media/markdown-media";
|
||||
import type { MediaAsset } from "../media/types";
|
||||
import { cleanHtml } from "./html-cleaner";
|
||||
|
||||
export interface HtmlConversionMetadata {
|
||||
url: string;
|
||||
canonicalUrl?: string;
|
||||
siteName?: string;
|
||||
title?: string;
|
||||
summary?: string;
|
||||
author?: string;
|
||||
publishedAt?: string;
|
||||
coverImage?: string;
|
||||
language?: string;
|
||||
capturedAt: string;
|
||||
}
|
||||
|
||||
export interface ConvertHtmlToMarkdownOptions {
|
||||
enableRemoteMarkdownFallback?: boolean;
|
||||
preserveBase64Images?: boolean;
|
||||
}
|
||||
|
||||
export interface HtmlToMarkdownResult {
|
||||
metadata: HtmlConversionMetadata;
|
||||
markdown: string;
|
||||
rawHtml: string;
|
||||
cleanedHtml: string;
|
||||
media: MediaAsset[];
|
||||
conversionMethod: string;
|
||||
fallbackReason?: string;
|
||||
}
|
||||
|
||||
type JsonObject = Record<string, unknown>;
|
||||
|
||||
const MIN_CONTENT_LENGTH = 120;
|
||||
const DEFUDDLE_API_ORIGIN = "https://defuddle.md";
|
||||
const LOCAL_FALLBACK_SCORE_DELTA = 120;
|
||||
const REMOTE_FALLBACK_SCORE_DELTA = 20;
|
||||
const LOW_QUALITY_MARKERS = [
|
||||
/Join The Conversation/i,
|
||||
/One Community\. Many Voices/i,
|
||||
/Read our community guidelines/i,
|
||||
/Create a free account to share your thoughts/i,
|
||||
/Become a Forbes Member/i,
|
||||
/Subscribe to trusted journalism/i,
|
||||
/\bComments\b/i,
|
||||
];
|
||||
|
||||
const ARTICLE_TYPES = new Set([
|
||||
"Article",
|
||||
"NewsArticle",
|
||||
"BlogPosting",
|
||||
"WebPage",
|
||||
"ReportageNewsArticle",
|
||||
]);
|
||||
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: "atx",
|
||||
bulletListMarker: "-",
|
||||
codeBlockStyle: "fenced",
|
||||
}) as TurndownService & {
|
||||
remove(selectors: string[]): void;
|
||||
addRule(
|
||||
key: string,
|
||||
rule: {
|
||||
filter: string | ((node: Node) => boolean);
|
||||
replacement: (content: string) => string;
|
||||
},
|
||||
): void;
|
||||
};
|
||||
|
||||
turndown.use(gfm);
|
||||
turndown.remove(["script", "style", "iframe", "noscript", "template", "svg", "path"]);
|
||||
turndown.addRule("collapseFigure", {
|
||||
filter: "figure",
|
||||
replacement(content: string) {
|
||||
return `\n\n${content.trim()}\n\n`;
|
||||
},
|
||||
});
|
||||
turndown.addRule("dropInvisibleAnchors", {
|
||||
filter(node: Node) {
|
||||
return (
|
||||
node.nodeName === "A" &&
|
||||
!(node as Element).textContent?.trim() &&
|
||||
!(node as Element).querySelector("img, video, picture, source")
|
||||
);
|
||||
},
|
||||
replacement() {
|
||||
return "";
|
||||
},
|
||||
});
|
||||
|
||||
function pickString(...values: unknown[]): string | undefined {
|
||||
for (const value of values) {
|
||||
if (typeof value !== "string") {
|
||||
continue;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
if (trimmed) {
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeMarkdown(markdown: string): string {
|
||||
return markdown
|
||||
.replace(/\r\n/g, "\n")
|
||||
.replace(/[ \t]+\n/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function stripWrappingQuotes(value: string): string {
|
||||
const trimmed = value.trim();
|
||||
if (
|
||||
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
|
||||
(trimmed.startsWith("'") && trimmed.endsWith("'"))
|
||||
) {
|
||||
return trimmed.slice(1, -1).trim();
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function stripMarkdownFrontmatter(markdown: string): string {
|
||||
return markdown.replace(/^\uFEFF?---\n[\s\S]*?\n---(?:\n|$)/, "").trim();
|
||||
}
|
||||
|
||||
function cleanMarkdownTitle(value: string): string | undefined {
|
||||
const cleaned = stripWrappingQuotes(
|
||||
value
|
||||
.replace(/\s+#+\s*$/, "")
|
||||
.replace(/!\[[^\]]*\]\([^)]+\)/g, "")
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
||||
.replace(/[*_`~]/g, "")
|
||||
.trim(),
|
||||
);
|
||||
|
||||
return cleaned || undefined;
|
||||
}
|
||||
|
||||
export function extractTitleFromMarkdownDocument(markdown: string): string | undefined {
|
||||
const normalized = markdown.replace(/\r\n/g, "\n").trim();
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const frontmatterMatch = normalized.match(/^\uFEFF?---\n([\s\S]*?)\n---(?:\n|$)/);
|
||||
if (frontmatterMatch) {
|
||||
for (const line of frontmatterMatch[1].split("\n")) {
|
||||
const match = line.match(/^title:\s*(.+?)\s*$/i);
|
||||
if (!match) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const title = cleanMarkdownTitle(match[1]);
|
||||
if (title) {
|
||||
return title;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const body = stripMarkdownFrontmatter(normalized);
|
||||
const headingMatch = body.match(/^#{1,6}\s+(.+)$/m);
|
||||
if (!headingMatch) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return cleanMarkdownTitle(headingMatch[1]);
|
||||
}
|
||||
|
||||
function trimKnownBoilerplate(markdown: string): string {
|
||||
const normalized = normalizeMarkdown(markdown);
|
||||
const lines = normalized.split("\n");
|
||||
|
||||
while (lines.length > 0) {
|
||||
const lastLine = lines[lines.length - 1]?.trim();
|
||||
if (!lastLine) {
|
||||
lines.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (/^继续滑动看下一个$/.test(lastLine) || /^轻触阅读原文$/.test(lastLine)) {
|
||||
lines.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return normalizeMarkdown(lines.join("\n"));
|
||||
}
|
||||
|
||||
function buildDefuddleApiUrl(targetUrl: string): string {
|
||||
return `${DEFUDDLE_API_ORIGIN}/${encodeURIComponent(targetUrl)}`;
|
||||
}
|
||||
|
||||
async function fetchDefuddleApiMarkdown(
|
||||
targetUrl: string,
|
||||
): Promise<{ markdown: string; title?: string }> {
|
||||
const response = await fetch(buildDefuddleApiUrl(targetUrl), {
|
||||
headers: {
|
||||
accept: "text/markdown,text/plain;q=0.9,*/*;q=0.1",
|
||||
},
|
||||
redirect: "follow",
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`defuddle.md returned ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const rawMarkdown = (await response.text()).replace(/\r\n/g, "\n").trim();
|
||||
if (!rawMarkdown) {
|
||||
throw new Error("defuddle.md returned empty markdown");
|
||||
}
|
||||
|
||||
const title = extractTitleFromMarkdownDocument(rawMarkdown);
|
||||
const markdown = trimKnownBoilerplate(stripMarkdownFrontmatter(rawMarkdown));
|
||||
if (!markdown) {
|
||||
throw new Error("defuddle.md returned empty markdown");
|
||||
}
|
||||
|
||||
return {
|
||||
markdown,
|
||||
title,
|
||||
};
|
||||
}
|
||||
|
||||
function sanitizeHtmlFragment(html: string): string {
|
||||
const dom = new JSDOM(`<div id="__root">${html}</div>`);
|
||||
const root = dom.window.document.querySelector("#__root");
|
||||
if (!root) {
|
||||
return html;
|
||||
}
|
||||
|
||||
for (const selector of ["script", "style", "iframe", "noscript", "template", "svg", "path"]) {
|
||||
root.querySelectorAll(selector).forEach((element) => element.remove());
|
||||
}
|
||||
|
||||
return root.innerHTML;
|
||||
}
|
||||
|
||||
function extractTextFromHtml(html: string): string {
|
||||
const dom = new JSDOM(`<!doctype html><html><body>${html}</body></html>`);
|
||||
const { document } = dom.window;
|
||||
for (const selector of ["script", "style", "noscript", "template", "iframe", "svg", "path"]) {
|
||||
document.querySelectorAll(selector).forEach((element) => element.remove());
|
||||
}
|
||||
return document.body?.textContent?.replace(/\s+/g, " ").trim() ?? "";
|
||||
}
|
||||
|
||||
function getMetaContent(document: Document, names: string[]): string | undefined {
|
||||
for (const name of names) {
|
||||
const element =
|
||||
document.querySelector(`meta[name="${name}"]`) ??
|
||||
document.querySelector(`meta[property="${name}"]`);
|
||||
const content = element?.getAttribute("content")?.trim();
|
||||
if (content) {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function normalizeLanguageTag(value: string | null | undefined): string | undefined {
|
||||
if (!value) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const primary = trimmed.split(/[,\s;]/, 1)[0]?.trim();
|
||||
if (!primary) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return primary.replace(/_/g, "-");
|
||||
}
|
||||
|
||||
function flattenJsonLdItems(data: unknown): JsonObject[] {
|
||||
if (!data || typeof data !== "object") {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
return data.flatMap(flattenJsonLdItems);
|
||||
}
|
||||
|
||||
const item = data as JsonObject;
|
||||
if (Array.isArray(item["@graph"])) {
|
||||
return (item["@graph"] as unknown[]).flatMap(flattenJsonLdItems);
|
||||
}
|
||||
|
||||
return [item];
|
||||
}
|
||||
|
||||
function parseJsonLdScripts(document: Document): JsonObject[] {
|
||||
const results: JsonObject[] = [];
|
||||
document.querySelectorAll("script[type='application/ld+json']").forEach((script) => {
|
||||
try {
|
||||
const data = JSON.parse(script.textContent ?? "");
|
||||
results.push(...flattenJsonLdItems(data));
|
||||
} catch {
|
||||
// Ignore malformed json-ld blocks.
|
||||
}
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
function extractAuthorFromJsonLd(authorData: unknown): string | undefined {
|
||||
if (typeof authorData === "string") {
|
||||
return authorData.trim() || undefined;
|
||||
}
|
||||
|
||||
if (!authorData || typeof authorData !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (Array.isArray(authorData)) {
|
||||
return authorData
|
||||
.map((author) => extractAuthorFromJsonLd(author))
|
||||
.filter((value): value is string => Boolean(value))
|
||||
.join(", ") || undefined;
|
||||
}
|
||||
|
||||
const author = authorData as JsonObject;
|
||||
return pickString(author.name);
|
||||
}
|
||||
|
||||
function extractPrimaryJsonLdMeta(document: Document): Partial<HtmlConversionMetadata> {
|
||||
for (const item of parseJsonLdScripts(document)) {
|
||||
const type = Array.isArray(item["@type"]) ? item["@type"][0] : item["@type"];
|
||||
if (typeof type !== "string" || !ARTICLE_TYPES.has(type)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return {
|
||||
title: pickString(item.headline, item.name),
|
||||
summary: pickString(item.description),
|
||||
author: extractAuthorFromJsonLd(item.author),
|
||||
publishedAt: pickString(item.datePublished, item.dateCreated),
|
||||
coverImage: pickString(
|
||||
item.image,
|
||||
(item.image as JsonObject | undefined)?.url,
|
||||
Array.isArray(item.image) ? item.image[0] : undefined,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
function extractPageMetadata(
|
||||
html: string,
|
||||
url: string,
|
||||
capturedAt: string,
|
||||
): HtmlConversionMetadata {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const { document } = dom.window;
|
||||
const jsonLd = extractPrimaryJsonLdMeta(document);
|
||||
|
||||
return {
|
||||
url,
|
||||
canonicalUrl:
|
||||
document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim() ??
|
||||
getMetaContent(document, ["og:url"]),
|
||||
siteName: pickString(
|
||||
getMetaContent(document, ["og:site_name"]),
|
||||
document.querySelector('meta[name="application-name"]')?.getAttribute("content"),
|
||||
),
|
||||
title: pickString(
|
||||
getMetaContent(document, ["og:title", "twitter:title"]),
|
||||
jsonLd.title,
|
||||
document.querySelector("h1")?.textContent,
|
||||
document.title,
|
||||
),
|
||||
summary: pickString(
|
||||
getMetaContent(document, ["description", "og:description", "twitter:description"]),
|
||||
jsonLd.summary,
|
||||
),
|
||||
author: pickString(
|
||||
getMetaContent(document, ["author", "article:author", "twitter:creator"]),
|
||||
jsonLd.author,
|
||||
),
|
||||
publishedAt: pickString(
|
||||
document.querySelector("time[datetime]")?.getAttribute("datetime"),
|
||||
getMetaContent(document, ["article:published_time", "datePublished", "publishdate", "date"]),
|
||||
jsonLd.publishedAt,
|
||||
),
|
||||
coverImage: pickString(
|
||||
getMetaContent(document, ["og:image", "twitter:image", "twitter:image:src"]),
|
||||
jsonLd.coverImage,
|
||||
),
|
||||
language: pickString(
|
||||
normalizeLanguageTag(document.documentElement.getAttribute("lang")),
|
||||
normalizeLanguageTag(
|
||||
pickString(
|
||||
getMetaContent(document, ["language", "content-language", "og:locale"]),
|
||||
document.querySelector("meta[http-equiv='content-language']")?.getAttribute("content"),
|
||||
),
|
||||
),
|
||||
),
|
||||
capturedAt,
|
||||
};
|
||||
}
|
||||
|
||||
function isMarkdownUsable(markdown: string, html: string): boolean {
|
||||
const normalized = normalizeMarkdown(markdown);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const htmlTextLength = extractTextFromHtml(html).length;
|
||||
if (htmlTextLength < MIN_CONTENT_LENGTH) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (normalized.length >= 80) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return normalized.length >= Math.min(200, Math.floor(htmlTextLength * 0.2));
|
||||
}
|
||||
|
||||
function countMarkerHits(markdown: string, markers: RegExp[]): number {
|
||||
let hits = 0;
|
||||
for (const marker of markers) {
|
||||
if (marker.test(markdown)) {
|
||||
hits += 1;
|
||||
}
|
||||
}
|
||||
return hits;
|
||||
}
|
||||
|
||||
function countUsefulParagraphs(markdown: string): number {
|
||||
const paragraphs = normalizeMarkdown(markdown).split(/\n{2,}/);
|
||||
let count = 0;
|
||||
|
||||
for (const paragraph of paragraphs) {
|
||||
const trimmed = paragraph.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
if (/^!?\[[^\]]*\]\([^)]+\)$/.test(trimmed)) {
|
||||
continue;
|
||||
}
|
||||
if (/^#{1,6}\s+/.test(trimmed)) {
|
||||
continue;
|
||||
}
|
||||
if ((trimmed.match(/\b[\p{L}\p{N}']+\b/gu) || []).length < 8) {
|
||||
continue;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
function scoreMarkdownQuality(markdown: string): number {
|
||||
const normalized = normalizeMarkdown(markdown);
|
||||
const wordCount = (normalized.match(/\b[\p{L}\p{N}']+\b/gu) || []).length;
|
||||
const usefulParagraphs = countUsefulParagraphs(normalized);
|
||||
const headingCount = (normalized.match(/^#{1,6}\s+/gm) || []).length;
|
||||
const markerHits = countMarkerHits(normalized, LOW_QUALITY_MARKERS);
|
||||
return Math.min(wordCount, 4000) + usefulParagraphs * 40 + headingCount * 10 - markerHits * 180;
|
||||
}
|
||||
|
||||
function shouldCompareWithFallback(markdown: string): boolean {
|
||||
const normalized = normalizeMarkdown(markdown);
|
||||
return countMarkerHits(normalized, LOW_QUALITY_MARKERS) > 0 || countUsefulParagraphs(normalized) < 6;
|
||||
}
|
||||
|
||||
function hasMeaningfulMarkdownStructure(markdown: string): boolean {
|
||||
const normalized = normalizeMarkdown(markdown);
|
||||
if (!normalized) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
countUsefulParagraphs(normalized) > 0 ||
|
||||
/^#{1,6}\s+/m.test(normalized) ||
|
||||
/^[-*]\s+/m.test(normalized) ||
|
||||
/^\d+\.\s+/m.test(normalized) ||
|
||||
/!\[[^\]]*\]\([^)]+\)/.test(normalized)
|
||||
);
|
||||
}
|
||||
|
||||
function shouldTryRemoteMarkdownFallback(
|
||||
markdown: string,
|
||||
html: string,
|
||||
options: ConvertHtmlToMarkdownOptions,
|
||||
): boolean {
|
||||
if (!options.enableRemoteMarkdownFallback) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return !isMarkdownUsable(markdown, html) || shouldCompareWithFallback(markdown);
|
||||
}
|
||||
|
||||
function shouldPreferRemoteMarkdown(
|
||||
current: HtmlToMarkdownResult,
|
||||
remote: HtmlToMarkdownResult,
|
||||
html: string,
|
||||
): boolean {
|
||||
if (!isMarkdownUsable(current.markdown, html)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!hasMeaningfulMarkdownStructure(current.markdown) && hasMeaningfulMarkdownStructure(remote.markdown)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return scoreMarkdownQuality(remote.markdown) > scoreMarkdownQuality(current.markdown) + REMOTE_FALLBACK_SCORE_DELTA;
|
||||
}
|
||||
|
||||
function buildRemoteFallbackReason(current: HtmlToMarkdownResult, html: string): string {
|
||||
if (!isMarkdownUsable(current.markdown, html)) {
|
||||
return current.fallbackReason
|
||||
? `Used defuddle.md markdown fallback after local extraction failed: ${current.fallbackReason}`
|
||||
: "Used defuddle.md markdown fallback after local extraction returned empty or incomplete markdown";
|
||||
}
|
||||
|
||||
return "defuddle.md produced higher-quality markdown than local extraction";
|
||||
}
|
||||
|
||||
async function tryDefuddleConversion(
|
||||
html: string,
|
||||
url: string,
|
||||
baseMetadata: HtmlConversionMetadata,
|
||||
): Promise<{ ok: true; result: HtmlToMarkdownResult } | { ok: false; reason: string }> {
|
||||
try {
|
||||
const virtualConsole = new VirtualConsole();
|
||||
virtualConsole.on("jsdomError", (error: Error & { type?: string }) => {
|
||||
if (error.type === "css parsing" || /Could not parse CSS stylesheet/i.test(error.message)) {
|
||||
return;
|
||||
}
|
||||
});
|
||||
|
||||
const dom = new JSDOM(html, { url, virtualConsole });
|
||||
const result = await Defuddle(dom, url, { markdown: true });
|
||||
const markdown = trimKnownBoilerplate(result.content || "");
|
||||
|
||||
if (!isMarkdownUsable(markdown, html)) {
|
||||
return { ok: false, reason: "Defuddle returned empty or incomplete markdown" };
|
||||
}
|
||||
|
||||
const metadata: HtmlConversionMetadata = {
|
||||
...baseMetadata,
|
||||
title: pickString(result.title, baseMetadata.title),
|
||||
summary: pickString(result.description, baseMetadata.summary),
|
||||
author: pickString(result.author, baseMetadata.author),
|
||||
publishedAt: pickString(result.published, baseMetadata.publishedAt),
|
||||
coverImage: pickString(result.image, baseMetadata.coverImage),
|
||||
language: pickString(result.language, baseMetadata.language),
|
||||
};
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
result: {
|
||||
metadata,
|
||||
markdown,
|
||||
rawHtml: html,
|
||||
cleanedHtml: html,
|
||||
media: collectMediaFromMarkdown(markdown).concat(
|
||||
metadata.coverImage
|
||||
? [{ url: metadata.coverImage, kind: "image", role: "cover" as const }]
|
||||
: [],
|
||||
),
|
||||
conversionMethod: "defuddle",
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function tryDefuddleApiConversion(
|
||||
html: string,
|
||||
url: string,
|
||||
baseMetadata: HtmlConversionMetadata,
|
||||
): Promise<{ ok: true; result: HtmlToMarkdownResult } | { ok: false; reason: string }> {
|
||||
try {
|
||||
const result = await fetchDefuddleApiMarkdown(url);
|
||||
const markdown = result.markdown;
|
||||
|
||||
if (!isMarkdownUsable(markdown, html) && scoreMarkdownQuality(markdown) < 80) {
|
||||
return { ok: false, reason: "defuddle.md returned empty or incomplete markdown" };
|
||||
}
|
||||
|
||||
const metadata: HtmlConversionMetadata = {
|
||||
...baseMetadata,
|
||||
title: pickString(result.title, baseMetadata.title),
|
||||
};
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
result: {
|
||||
metadata,
|
||||
markdown,
|
||||
rawHtml: html,
|
||||
cleanedHtml: html,
|
||||
media: collectMediaFromMarkdown(markdown).concat(
|
||||
metadata.coverImage
|
||||
? [{ url: metadata.coverImage, kind: "image", role: "cover" as const }]
|
||||
: [],
|
||||
),
|
||||
conversionMethod: "defuddle-api",
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
reason: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function convertHtmlFragmentToMarkdown(html: string): string {
|
||||
if (!html.trim()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
return turndown.turndown(sanitizeHtmlFragment(html));
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function fallbackPlainText(html: string): string {
|
||||
return trimKnownBoilerplate(extractTextFromHtml(html));
|
||||
}
|
||||
|
||||
function convertWithReadability(
|
||||
rawHtml: string,
|
||||
cleanedHtml: string,
|
||||
url: string,
|
||||
baseMetadata: HtmlConversionMetadata,
|
||||
): HtmlToMarkdownResult {
|
||||
const dom = new JSDOM(cleanedHtml, { url });
|
||||
const document = dom.window.document;
|
||||
const article = new Readability(document).parse();
|
||||
|
||||
const contentHtml =
|
||||
article?.content?.trim() ??
|
||||
document.querySelector("main")?.innerHTML?.trim() ??
|
||||
document.body?.innerHTML?.trim() ??
|
||||
"";
|
||||
|
||||
let markdown = contentHtml ? convertHtmlFragmentToMarkdown(contentHtml) : "";
|
||||
if (!markdown) {
|
||||
markdown = fallbackPlainText(cleanedHtml);
|
||||
}
|
||||
|
||||
const metadata: HtmlConversionMetadata = {
|
||||
...baseMetadata,
|
||||
title: pickString(article?.title, baseMetadata.title),
|
||||
summary: pickString(article?.excerpt, baseMetadata.summary),
|
||||
author: pickString(article?.byline, baseMetadata.author),
|
||||
};
|
||||
|
||||
const media = collectMediaFromMarkdown(markdown);
|
||||
if (metadata.coverImage) {
|
||||
media.unshift({
|
||||
url: metadata.coverImage,
|
||||
kind: "image",
|
||||
role: "cover",
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
metadata,
|
||||
markdown: trimKnownBoilerplate(markdown),
|
||||
rawHtml,
|
||||
cleanedHtml,
|
||||
media,
|
||||
conversionMethod: article?.content ? "legacy:readability" : "legacy:body",
|
||||
};
|
||||
}
|
||||
|
||||
export async function convertHtmlToMarkdown(
|
||||
html: string,
|
||||
url: string,
|
||||
options: ConvertHtmlToMarkdownOptions = {},
|
||||
): Promise<HtmlToMarkdownResult> {
|
||||
const capturedAt = new Date().toISOString();
|
||||
const baseMetadata = extractPageMetadata(html, url, capturedAt);
|
||||
|
||||
let cleanedHtml = html;
|
||||
try {
|
||||
cleanedHtml = cleanHtml(html, url, {
|
||||
removeBase64Images: !options.preserveBase64Images,
|
||||
});
|
||||
} catch {
|
||||
cleanedHtml = html;
|
||||
}
|
||||
|
||||
let selectedResult: HtmlToMarkdownResult;
|
||||
const defuddleResult = await tryDefuddleConversion(cleanedHtml, url, baseMetadata);
|
||||
if (defuddleResult.ok) {
|
||||
if (shouldCompareWithFallback(defuddleResult.result.markdown)) {
|
||||
const fallbackResult = convertWithReadability(html, cleanedHtml, url, baseMetadata);
|
||||
if (
|
||||
scoreMarkdownQuality(fallbackResult.markdown) >
|
||||
scoreMarkdownQuality(defuddleResult.result.markdown) + LOCAL_FALLBACK_SCORE_DELTA
|
||||
) {
|
||||
selectedResult = {
|
||||
...fallbackResult,
|
||||
fallbackReason: "Readability/Turndown produced higher-quality markdown than Defuddle",
|
||||
};
|
||||
} else {
|
||||
selectedResult = {
|
||||
...defuddleResult.result,
|
||||
rawHtml: html,
|
||||
cleanedHtml,
|
||||
};
|
||||
}
|
||||
} else {
|
||||
selectedResult = {
|
||||
...defuddleResult.result,
|
||||
rawHtml: html,
|
||||
cleanedHtml,
|
||||
};
|
||||
}
|
||||
} else {
|
||||
selectedResult = {
|
||||
...convertWithReadability(html, cleanedHtml, url, baseMetadata),
|
||||
fallbackReason: defuddleResult.reason,
|
||||
};
|
||||
}
|
||||
|
||||
if (!shouldTryRemoteMarkdownFallback(selectedResult.markdown, cleanedHtml, options)) {
|
||||
return selectedResult;
|
||||
}
|
||||
|
||||
const remoteDefuddleResult = await tryDefuddleApiConversion(cleanedHtml, url, baseMetadata);
|
||||
if (!remoteDefuddleResult.ok || !shouldPreferRemoteMarkdown(selectedResult, remoteDefuddleResult.result, cleanedHtml)) {
|
||||
return selectedResult;
|
||||
}
|
||||
|
||||
return {
|
||||
...remoteDefuddleResult.result,
|
||||
rawHtml: html,
|
||||
cleanedHtml,
|
||||
fallbackReason: buildRemoteFallbackReason(selectedResult, cleanedHtml),
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,169 @@
|
|||
import TurndownService from "turndown";
|
||||
import { gfm } from "turndown-plugin-gfm";
|
||||
import { normalizeMarkdownMediaLinks } from "../media/markdown-media";
|
||||
import type { ContentBlock, ExtractedDocument } from "./document";
|
||||
|
||||
const turndownService = new TurndownService({
|
||||
codeBlockStyle: "fenced",
|
||||
headingStyle: "atx",
|
||||
bulletListMarker: "-",
|
||||
});
|
||||
|
||||
turndownService.use(gfm);
|
||||
|
||||
function renderBlock(block: ContentBlock): string {
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return block.text.trim();
|
||||
case "heading":
|
||||
return `${"#".repeat(Math.min(Math.max(block.depth, 1), 6))} ${block.text.trim()}`;
|
||||
case "list":
|
||||
return block.items
|
||||
.map((item, index) => (block.ordered ? `${index + 1}. ${item.trim()}` : `- ${item.trim()}`))
|
||||
.join("\n");
|
||||
case "quote":
|
||||
return block.text
|
||||
.split("\n")
|
||||
.map((line) => `> ${line}`)
|
||||
.join("\n");
|
||||
case "code":
|
||||
return `\`\`\`${block.language ?? ""}\n${block.code.trimEnd()}\n\`\`\``;
|
||||
case "image":
|
||||
return ``;
|
||||
case "html":
|
||||
return turndownService.turndown(block.html).trim();
|
||||
case "markdown":
|
||||
return block.markdown.trim();
|
||||
}
|
||||
}
|
||||
|
||||
function isDefinedValue(value: unknown): boolean {
|
||||
return value !== undefined && value !== null && value !== "";
|
||||
}
|
||||
|
||||
function renderFrontmatterValue(value: unknown): string {
|
||||
if (typeof value === "string") {
|
||||
if (value.includes("\n")) {
|
||||
return `|-\n${value
|
||||
.replace(/\r\n/g, "\n")
|
||||
.split("\n")
|
||||
.map((line) => ` ${line}`)
|
||||
.join("\n")}`;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
if (typeof value === "number" || typeof value === "boolean") {
|
||||
return String(value);
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function renderFrontmatter(document: ExtractedDocument): string {
|
||||
const fields = new Map<string, unknown>();
|
||||
const preferredOrder = [
|
||||
"title",
|
||||
"url",
|
||||
"requestedUrl",
|
||||
"author",
|
||||
"authorName",
|
||||
"authorUsername",
|
||||
"authorUrl",
|
||||
"coverImage",
|
||||
"siteName",
|
||||
"publishedAt",
|
||||
"summary",
|
||||
"adapter",
|
||||
];
|
||||
|
||||
fields.set("title", document.title);
|
||||
fields.set("url", document.canonicalUrl ?? document.url);
|
||||
fields.set("requestedUrl", document.requestedUrl ?? document.url);
|
||||
fields.set("author", document.author);
|
||||
fields.set("siteName", document.siteName);
|
||||
fields.set("publishedAt", document.publishedAt);
|
||||
fields.set("summary", document.summary);
|
||||
fields.set("adapter", document.adapter);
|
||||
|
||||
for (const [key, value] of Object.entries(document.metadata ?? {})) {
|
||||
if (!fields.has(key)) {
|
||||
fields.set(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
const orderedKeys = [
|
||||
...preferredOrder.filter((key) => fields.has(key)),
|
||||
...Array.from(fields.keys()).filter((key) => !preferredOrder.includes(key)).sort(),
|
||||
];
|
||||
|
||||
const lines = orderedKeys
|
||||
.map((key) => [key, fields.get(key)] as const)
|
||||
.filter(([, value]) => isDefinedValue(value))
|
||||
.map(([key, value]) => `${key}: ${renderFrontmatterValue(value)}`);
|
||||
|
||||
if (lines.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return `---\n${lines.join("\n")}\n---`;
|
||||
}
|
||||
|
||||
function cleanMarkdown(markdown: string): string {
|
||||
return normalizeMarkdownMediaLinks(markdown.replace(/\n{3,}/g, "\n\n").trim());
|
||||
}
|
||||
|
||||
function normalizeComparableTitle(value: string): string {
|
||||
return value
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/^>\s*/, "")
|
||||
.replace(/^#+\s+/, "")
|
||||
.replace(/(?:\.{3}|…)\s*$/, "");
|
||||
}
|
||||
|
||||
function bodyStartsWithTitle(body: string, title: string): boolean {
|
||||
const firstMeaningfulLine = body
|
||||
.replace(/\r\n/g, "\n")
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.find((line) => line && !/^!?\[[^\]]*\]\([^)]+\)$/.test(line));
|
||||
|
||||
if (!firstMeaningfulLine) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const comparableTitle = normalizeComparableTitle(title);
|
||||
const comparableFirstLine = normalizeComparableTitle(firstMeaningfulLine);
|
||||
if (!comparableTitle || !comparableFirstLine) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
comparableFirstLine === comparableTitle ||
|
||||
comparableFirstLine.startsWith(comparableTitle) ||
|
||||
comparableTitle.startsWith(comparableFirstLine)
|
||||
);
|
||||
}
|
||||
|
||||
export function renderMarkdown(document: ExtractedDocument): string {
|
||||
const sections: string[] = [];
|
||||
const frontmatter = renderFrontmatter(document);
|
||||
|
||||
if (frontmatter) {
|
||||
sections.push(frontmatter);
|
||||
}
|
||||
|
||||
const body = document.content
|
||||
.map((block) => renderBlock(block))
|
||||
.filter(Boolean)
|
||||
.join("\n\n");
|
||||
|
||||
if (document.title && !bodyStartsWithTitle(body, document.title)) {
|
||||
sections.push(`# ${document.title}`);
|
||||
}
|
||||
|
||||
if (body) {
|
||||
sections.push(body);
|
||||
}
|
||||
|
||||
return cleanMarkdown(sections.join("\n\n"));
|
||||
}
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
import path from "node:path";
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import {
|
||||
buildFileName,
|
||||
isDataUri,
|
||||
normalizeContentType,
|
||||
normalizeMediaUrl,
|
||||
resolveExtensionFromContentType,
|
||||
resolveExtensionFromUrl,
|
||||
resolveMediaKind,
|
||||
resolveOutputExtension,
|
||||
toPosixPath,
|
||||
} from "./media-utils";
|
||||
import type { MediaAsset, MediaDownloadRequest, MediaDownloadResult, MediaKind } from "./types";
|
||||
|
||||
const DOWNLOAD_USER_AGENT =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
|
||||
|
||||
function parseBase64DataUri(rawUrl: string): { contentType: string; bytes: Buffer } | null {
|
||||
const match = rawUrl.match(/^data:([^;,]+);base64,([A-Za-z0-9+/=\s]+)$/i);
|
||||
if (!match?.[1] || !match[2]) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const contentType = normalizeContentType(match[1]);
|
||||
if (!contentType) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const bytes = Buffer.from(match[2].replace(/\s+/g, ""), "base64");
|
||||
if (bytes.length === 0) {
|
||||
return null;
|
||||
}
|
||||
return { contentType, bytes };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function dedupeMedia(media: MediaAsset[]): MediaAsset[] {
|
||||
const deduped: MediaAsset[] = [];
|
||||
const seen = new Set<string>();
|
||||
for (const item of media) {
|
||||
const normalizedUrl = normalizeMediaUrl(item.url);
|
||||
if (!normalizedUrl || seen.has(normalizedUrl)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(normalizedUrl);
|
||||
deduped.push({
|
||||
...item,
|
||||
url: normalizedUrl,
|
||||
});
|
||||
}
|
||||
return deduped;
|
||||
}
|
||||
|
||||
function toRelativePath(fromDir: string, absoluteTarget: string): string {
|
||||
const relative = path.relative(fromDir, absoluteTarget) || path.basename(absoluteTarget);
|
||||
return toPosixPath(relative);
|
||||
}
|
||||
|
||||
export async function downloadMediaAssets(
|
||||
request: MediaDownloadRequest,
|
||||
): Promise<MediaDownloadResult> {
|
||||
const dedupedMedia = dedupeMedia(request.media);
|
||||
const absoluteOutputPath = path.resolve(request.outputPath);
|
||||
const markdownDir = path.dirname(absoluteOutputPath);
|
||||
const baseDir = request.mediaDir ? path.resolve(request.mediaDir) : markdownDir;
|
||||
const replacements: MediaDownloadResult["replacements"] = [];
|
||||
|
||||
let downloadedImages = 0;
|
||||
let downloadedVideos = 0;
|
||||
|
||||
for (const asset of dedupedMedia) {
|
||||
try {
|
||||
let sourceUrl = normalizeMediaUrl(asset.url);
|
||||
let contentType = "";
|
||||
let extension: string | undefined;
|
||||
let kind: MediaKind | undefined;
|
||||
let bytes: Buffer | null = null;
|
||||
|
||||
if (isDataUri(asset.url)) {
|
||||
const parsed = parseBase64DataUri(asset.url);
|
||||
if (!parsed) {
|
||||
request.log.warn(`Skipping unsupported embedded media: ${asset.url.slice(0, 32)}...`);
|
||||
continue;
|
||||
}
|
||||
|
||||
contentType = parsed.contentType;
|
||||
extension =
|
||||
resolveExtensionFromContentType(contentType) ??
|
||||
resolveExtensionFromUrl(asset.fileNameHint ?? "");
|
||||
kind = resolveMediaKind(sourceUrl, contentType, extension, asset.kind);
|
||||
bytes = parsed.bytes;
|
||||
} else {
|
||||
const response = await fetch(sourceUrl, {
|
||||
method: "GET",
|
||||
redirect: "follow",
|
||||
headers: {
|
||||
"user-agent": DOWNLOAD_USER_AGENT,
|
||||
...(asset.headers ?? {}),
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
request.log.warn(`Skipping media (${response.status}): ${asset.url}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
sourceUrl = normalizeMediaUrl(response.url || sourceUrl);
|
||||
contentType = normalizeContentType(response.headers.get("content-type"));
|
||||
extension =
|
||||
resolveExtensionFromUrl(sourceUrl) ??
|
||||
resolveExtensionFromUrl(asset.url) ??
|
||||
resolveExtensionFromUrl(asset.fileNameHint ?? "");
|
||||
kind = resolveMediaKind(sourceUrl, contentType, extension, asset.kind);
|
||||
bytes = Buffer.from(await response.arrayBuffer());
|
||||
}
|
||||
|
||||
if (!kind || !bytes) {
|
||||
request.log.debug(`Skipping media with unresolved kind: ${asset.url}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const outputExtension = resolveOutputExtension(contentType, extension, kind);
|
||||
const nextIndex = kind === "image" ? downloadedImages + 1 : downloadedVideos + 1;
|
||||
const dirName = kind === "image" ? "imgs" : "videos";
|
||||
const targetDir = path.join(baseDir, dirName);
|
||||
await mkdir(targetDir, { recursive: true });
|
||||
|
||||
const fileName = buildFileName(kind, nextIndex, sourceUrl, outputExtension, asset.fileNameHint);
|
||||
const absolutePath = path.join(targetDir, fileName);
|
||||
await writeFile(absolutePath, bytes);
|
||||
|
||||
replacements.push({
|
||||
url: asset.url,
|
||||
localPath: toRelativePath(markdownDir, absolutePath),
|
||||
absolutePath,
|
||||
kind,
|
||||
});
|
||||
|
||||
if (kind === "image") {
|
||||
downloadedImages = nextIndex;
|
||||
} else {
|
||||
downloadedVideos = nextIndex;
|
||||
}
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
request.log.warn(`Failed to download media ${asset.url}: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
replacements,
|
||||
downloadedImages,
|
||||
downloadedVideos,
|
||||
imageDir: downloadedImages > 0 ? path.join(baseDir, "imgs") : null,
|
||||
videoDir: downloadedVideos > 0 ? path.join(baseDir, "videos") : null,
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,458 @@
|
|||
import remarkGfm from "remark-gfm";
|
||||
import remarkParse from "remark-parse";
|
||||
import { unified } from "unified";
|
||||
import type { ContentBlock, ExtractedDocument } from "../extract/document";
|
||||
import {
|
||||
isDataUri,
|
||||
normalizeContentType,
|
||||
normalizeMediaUrl,
|
||||
resolveExtensionFromContentType,
|
||||
resolveExtensionFromUrl,
|
||||
resolveKindFromExtension,
|
||||
} from "./media-utils";
|
||||
import type { MediaAsset, MediaReplacement } from "./types";
|
||||
|
||||
const MARKDOWN_LINK_RE =
|
||||
/(!?\[[^\]\n]*\])\((<)?((?:https?:\/\/[^)\s>]+)|(?:data:[^)>\s]+))(>)?\)/g;
|
||||
const FRONTMATTER_COVER_RE = /^(coverImage:\s*")((?:https?:\/\/[^"]+)|(?:data:[^"]+))(")/m;
|
||||
const RAW_URL_RE = /(?:https?:\/\/[^\s<>"')\]]+|data:[^\s<>"')\]]+)/g;
|
||||
|
||||
interface MarkdownAstNode {
|
||||
type: string;
|
||||
url?: string | null;
|
||||
alt?: string | null;
|
||||
title?: string | null;
|
||||
value?: string | null;
|
||||
children?: MarkdownAstNode[];
|
||||
position?: {
|
||||
start?: { offset?: number | null };
|
||||
end?: { offset?: number | null };
|
||||
};
|
||||
}
|
||||
|
||||
interface MarkdownReplacementRange {
|
||||
start: number;
|
||||
end: number;
|
||||
value: string;
|
||||
}
|
||||
|
||||
function inferMediaKindFromLabel(label: string, rawUrl: string): "image" | "video" | undefined {
|
||||
if (label.startsWith("![")) {
|
||||
return "image";
|
||||
}
|
||||
|
||||
const normalizedLabel = label.replace(/[!\[\]]/g, "").trim().toLowerCase();
|
||||
if (/\b(video|animated[_ -]?gif|gif)\b/.test(normalizedLabel)) {
|
||||
return "video";
|
||||
}
|
||||
|
||||
if (isDataUri(rawUrl)) {
|
||||
const contentType = normalizeContentType(rawUrl.slice(5, rawUrl.indexOf(";")));
|
||||
return contentType.startsWith("image/") ? "image" : contentType.startsWith("video/") ? "video" : undefined;
|
||||
}
|
||||
|
||||
return resolveKindFromExtension(resolveExtensionFromUrl(rawUrl));
|
||||
}
|
||||
|
||||
function inferMediaKindFromRawUrl(rawUrl: string): "image" | "video" | undefined {
|
||||
if (isDataUri(rawUrl)) {
|
||||
const contentType = normalizeContentType(rawUrl.slice(5, rawUrl.indexOf(";")));
|
||||
return contentType.startsWith("image/") ? "image" : contentType.startsWith("video/") ? "video" : undefined;
|
||||
}
|
||||
|
||||
return resolveKindFromExtension(resolveExtensionFromUrl(rawUrl));
|
||||
}
|
||||
|
||||
function pushMedia(assets: MediaAsset[], seen: Set<string>, media: MediaAsset): void {
|
||||
const normalizedUrl = normalizeMediaUrl(media.url);
|
||||
if (!normalizedUrl || seen.has(normalizedUrl)) {
|
||||
return;
|
||||
}
|
||||
seen.add(normalizedUrl);
|
||||
assets.push({
|
||||
...media,
|
||||
url: normalizedUrl,
|
||||
});
|
||||
}
|
||||
|
||||
function getNodeOffsets(node: MarkdownAstNode): { start: number; end: number } | null {
|
||||
const start = node.position?.start?.offset;
|
||||
const end = node.position?.end?.offset;
|
||||
if (typeof start !== "number" || typeof end !== "number" || start < 0 || end < start) {
|
||||
return null;
|
||||
}
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
function escapeMarkdownLabel(value: string): string {
|
||||
return value.replace(/\\/g, "\\\\").replace(/\[/g, "\\[").replace(/\]/g, "\\]");
|
||||
}
|
||||
|
||||
function escapeMarkdownTitle(value: string): string {
|
||||
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
||||
}
|
||||
|
||||
function formatMarkdownDestination(url: string): string {
|
||||
return /[\s()<>]/.test(url) ? `<${url}>` : url;
|
||||
}
|
||||
|
||||
function serializeImageNode(node: MarkdownAstNode): string {
|
||||
const rawUrl = node.url ?? "";
|
||||
const normalizedUrl = normalizeMediaUrl(rawUrl);
|
||||
const alt = escapeMarkdownLabel(node.alt ?? "");
|
||||
const title = node.title ? ` "${escapeMarkdownTitle(node.title)}"` : "";
|
||||
return `}${title})`;
|
||||
}
|
||||
|
||||
function serializeLinkedImageNode(linkNode: MarkdownAstNode, imageNode: MarkdownAstNode): string {
|
||||
const imageMarkdown = serializeImageNode(imageNode);
|
||||
const imageUrl = normalizeMediaUrl(imageNode.url ?? "");
|
||||
const linkUrl = normalizeMediaUrl(linkNode.url ?? "");
|
||||
|
||||
if (!linkUrl || linkUrl === imageUrl) {
|
||||
return imageMarkdown;
|
||||
}
|
||||
|
||||
const title = linkNode.title ? ` "${escapeMarkdownTitle(linkNode.title)}"` : "";
|
||||
return `[${imageMarkdown}](${formatMarkdownDestination(linkUrl)}${title})`;
|
||||
}
|
||||
|
||||
function isParagraphWithSingleText(node: MarkdownAstNode | undefined, expectedValue: string): boolean {
|
||||
if (node?.type !== "paragraph" || node.children?.length !== 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const child = node.children[0];
|
||||
return child?.type === "text" && child.value?.trim() === expectedValue;
|
||||
}
|
||||
|
||||
function getSingleImageFromParagraph(node: MarkdownAstNode | undefined): MarkdownAstNode | null {
|
||||
if (node?.type !== "paragraph" || node.children?.length !== 1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return node.children[0]?.type === "image" ? node.children[0] : null;
|
||||
}
|
||||
|
||||
function extractBrokenLinkedImageDestination(node: MarkdownAstNode | undefined): string | null {
|
||||
if (node?.type !== "paragraph") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const children = node.children ?? [];
|
||||
if (children.length !== 3) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const [prefix, linkNode, suffix] = children;
|
||||
if (prefix?.type !== "text" || prefix.value?.trim() !== "](") {
|
||||
return null;
|
||||
}
|
||||
if (linkNode?.type !== "link" || !linkNode.url) {
|
||||
return null;
|
||||
}
|
||||
if (suffix?.type !== "text" || suffix.value?.trim() !== ")") {
|
||||
return null;
|
||||
}
|
||||
|
||||
return linkNode.url;
|
||||
}
|
||||
|
||||
function collectLinkedImageReplacements(
|
||||
node: MarkdownAstNode,
|
||||
replacements: MarkdownReplacementRange[],
|
||||
): void {
|
||||
const children = node.children ?? [];
|
||||
|
||||
if (node.type === "link" && children.length === 1 && children[0]?.type === "image") {
|
||||
const offsets = getNodeOffsets(node);
|
||||
if (offsets) {
|
||||
replacements.push({
|
||||
start: offsets.start,
|
||||
end: offsets.end,
|
||||
value: serializeLinkedImageNode(node, children[0]),
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (const child of children) {
|
||||
collectLinkedImageReplacements(child, replacements);
|
||||
}
|
||||
}
|
||||
|
||||
function collectBrokenLinkedImageReplacements(
|
||||
node: MarkdownAstNode,
|
||||
replacements: MarkdownReplacementRange[],
|
||||
): void {
|
||||
const children = node.children ?? [];
|
||||
for (let index = 0; index <= children.length - 3; index += 1) {
|
||||
const openParagraph = children[index];
|
||||
const imageParagraph = children[index + 1];
|
||||
const closeParagraph = children[index + 2];
|
||||
|
||||
if (!isParagraphWithSingleText(openParagraph, "[")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const imageNode = getSingleImageFromParagraph(imageParagraph);
|
||||
if (!imageNode) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const linkUrl = extractBrokenLinkedImageDestination(closeParagraph);
|
||||
if (!linkUrl) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const start = openParagraph.position?.start?.offset;
|
||||
const end = closeParagraph.position?.end?.offset;
|
||||
if (typeof start !== "number" || typeof end !== "number" || end < start) {
|
||||
continue;
|
||||
}
|
||||
|
||||
replacements.push({
|
||||
start,
|
||||
end,
|
||||
value: serializeLinkedImageNode({ type: "link", url: linkUrl }, imageNode),
|
||||
});
|
||||
|
||||
index += 2;
|
||||
}
|
||||
|
||||
for (const child of children) {
|
||||
collectBrokenLinkedImageReplacements(child, replacements);
|
||||
}
|
||||
}
|
||||
|
||||
function applyReplacements(source: string, replacements: MarkdownReplacementRange[]): string {
|
||||
if (replacements.length === 0) {
|
||||
return source;
|
||||
}
|
||||
|
||||
let result = source;
|
||||
const sorted = [...replacements].sort((left, right) => right.start - left.start);
|
||||
for (const replacement of sorted) {
|
||||
result = `${result.slice(0, replacement.start)}${replacement.value}${result.slice(replacement.end)}`;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function normalizeLinkedImageMarkdown(markdown: string): string {
|
||||
let tree: MarkdownAstNode;
|
||||
try {
|
||||
tree = unified().use(remarkParse).use(remarkGfm).parse(markdown) as MarkdownAstNode;
|
||||
} catch {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
const replacements: MarkdownReplacementRange[] = [];
|
||||
collectLinkedImageReplacements(tree, replacements);
|
||||
collectBrokenLinkedImageReplacements(tree, replacements);
|
||||
return applyReplacements(markdown, replacements);
|
||||
}
|
||||
|
||||
export function normalizeMarkdownMediaLinks(markdown: string): string {
|
||||
MARKDOWN_LINK_RE.lastIndex = 0;
|
||||
let result = markdown.replace(MARKDOWN_LINK_RE, (full, label, openAngle, rawUrl, closeAngle) => {
|
||||
const normalizedUrl = normalizeMediaUrl(rawUrl);
|
||||
if (normalizedUrl === rawUrl) {
|
||||
return full;
|
||||
}
|
||||
return `${label}(${openAngle ?? ""}${normalizedUrl}${closeAngle ?? ""})`;
|
||||
});
|
||||
|
||||
result = result.replace(FRONTMATTER_COVER_RE, (full, prefix, rawUrl, suffix) => {
|
||||
const normalizedUrl = normalizeMediaUrl(rawUrl);
|
||||
if (normalizedUrl === rawUrl) {
|
||||
return full;
|
||||
}
|
||||
return `${prefix}${normalizedUrl}${suffix}`;
|
||||
});
|
||||
|
||||
RAW_URL_RE.lastIndex = 0;
|
||||
result = result.replace(RAW_URL_RE, (rawUrl) => normalizeMediaUrl(rawUrl));
|
||||
return normalizeLinkedImageMarkdown(result);
|
||||
}
|
||||
|
||||
export function collectMediaFromText(
|
||||
text: string,
|
||||
options: {
|
||||
role?: MediaAsset["role"];
|
||||
defaultKind?: MediaAsset["kind"];
|
||||
seen?: Set<string>;
|
||||
into?: MediaAsset[];
|
||||
} = {},
|
||||
): MediaAsset[] {
|
||||
const assets = options.into ?? [];
|
||||
const seen = options.seen ?? new Set<string>();
|
||||
|
||||
MARKDOWN_LINK_RE.lastIndex = 0;
|
||||
let linkMatch: RegExpExecArray | null;
|
||||
while ((linkMatch = MARKDOWN_LINK_RE.exec(text))) {
|
||||
const label = linkMatch[1] ?? "";
|
||||
const rawUrl = linkMatch[3] ?? "";
|
||||
const kind = inferMediaKindFromLabel(label, rawUrl) ?? options.defaultKind;
|
||||
if (!kind) {
|
||||
continue;
|
||||
}
|
||||
pushMedia(assets, seen, {
|
||||
url: rawUrl,
|
||||
kind,
|
||||
role: options.role ?? "inline",
|
||||
});
|
||||
}
|
||||
|
||||
RAW_URL_RE.lastIndex = 0;
|
||||
let rawMatch: RegExpExecArray | null;
|
||||
while ((rawMatch = RAW_URL_RE.exec(text))) {
|
||||
const rawUrl = rawMatch[0] ?? "";
|
||||
const kind = inferMediaKindFromRawUrl(rawUrl) ?? options.defaultKind;
|
||||
if (!kind) {
|
||||
continue;
|
||||
}
|
||||
pushMedia(assets, seen, {
|
||||
url: rawUrl,
|
||||
kind,
|
||||
role: options.role ?? "inline",
|
||||
});
|
||||
}
|
||||
|
||||
return assets;
|
||||
}
|
||||
|
||||
function collectMediaFromBlock(
|
||||
block: ContentBlock,
|
||||
assets: MediaAsset[],
|
||||
seen: Set<string>,
|
||||
): void {
|
||||
switch (block.type) {
|
||||
case "image":
|
||||
pushMedia(assets, seen, {
|
||||
url: block.url,
|
||||
kind: "image",
|
||||
role: "inline",
|
||||
alt: block.alt,
|
||||
});
|
||||
return;
|
||||
case "html":
|
||||
case "markdown":
|
||||
collectMediaFromText(block.type === "html" ? block.html : block.markdown, {
|
||||
role: "inline",
|
||||
seen,
|
||||
into: assets,
|
||||
});
|
||||
return;
|
||||
case "paragraph":
|
||||
case "quote":
|
||||
collectMediaFromText(block.text, {
|
||||
role: "inline",
|
||||
seen,
|
||||
into: assets,
|
||||
});
|
||||
return;
|
||||
case "list":
|
||||
for (const item of block.items) {
|
||||
collectMediaFromText(item, {
|
||||
role: "attachment",
|
||||
seen,
|
||||
into: assets,
|
||||
});
|
||||
}
|
||||
return;
|
||||
case "heading":
|
||||
case "code":
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
export function collectMediaFromDocument(document: ExtractedDocument): MediaAsset[] {
|
||||
const assets: MediaAsset[] = [];
|
||||
const seen = new Set<string>();
|
||||
const coverImage =
|
||||
typeof document.metadata?.coverImage === "string" ? document.metadata.coverImage : undefined;
|
||||
|
||||
if (coverImage) {
|
||||
pushMedia(assets, seen, {
|
||||
url: coverImage,
|
||||
kind: "image",
|
||||
role: "cover",
|
||||
});
|
||||
}
|
||||
|
||||
for (const block of document.content) {
|
||||
collectMediaFromBlock(block, assets, seen);
|
||||
}
|
||||
|
||||
return assets;
|
||||
}
|
||||
|
||||
export function collectMediaFromMarkdown(markdown: string): MediaAsset[] {
|
||||
const assets: MediaAsset[] = [];
|
||||
const seen = new Set<string>();
|
||||
const fmMatch = markdown.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (fmMatch) {
|
||||
const coverMatch = fmMatch[1]?.match(FRONTMATTER_COVER_RE);
|
||||
if (coverMatch?.[2]) {
|
||||
pushMedia(assets, seen, {
|
||||
url: coverMatch[2],
|
||||
kind: "image",
|
||||
role: "cover",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
collectMediaFromText(markdown, { seen, into: assets });
|
||||
return assets;
|
||||
}
|
||||
|
||||
export function rewriteMarkdownMediaLinks(
|
||||
markdown: string,
|
||||
replacements: MediaReplacement[],
|
||||
): string {
|
||||
if (replacements.length === 0) {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
const replacementMap = new Map<string, string>();
|
||||
for (const item of replacements) {
|
||||
replacementMap.set(item.url, item.localPath);
|
||||
replacementMap.set(normalizeMediaUrl(item.url), item.localPath);
|
||||
}
|
||||
|
||||
MARKDOWN_LINK_RE.lastIndex = 0;
|
||||
let result = markdown.replace(MARKDOWN_LINK_RE, (full, label, _openAngle, rawUrl) => {
|
||||
const replacement = replacementMap.get(rawUrl) ?? replacementMap.get(normalizeMediaUrl(rawUrl));
|
||||
if (!replacement) {
|
||||
return full;
|
||||
}
|
||||
return `${label}(${replacement})`;
|
||||
});
|
||||
|
||||
result = result.replace(FRONTMATTER_COVER_RE, (full, prefix, rawUrl, suffix) => {
|
||||
const replacement = replacementMap.get(rawUrl) ?? replacementMap.get(normalizeMediaUrl(rawUrl));
|
||||
if (!replacement) {
|
||||
return full;
|
||||
}
|
||||
return `${prefix}${replacement}${suffix}`;
|
||||
});
|
||||
|
||||
for (const { url, localPath } of replacements) {
|
||||
result = result.split(url).join(localPath);
|
||||
const normalizedUrl = normalizeMediaUrl(url);
|
||||
if (normalizedUrl !== url) {
|
||||
result = result.split(normalizedUrl).join(localPath);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export function resolveDataUriExtension(rawUrl: string): string | undefined {
|
||||
if (!isDataUri(rawUrl)) {
|
||||
return undefined;
|
||||
}
|
||||
const separatorIndex = rawUrl.indexOf(";");
|
||||
const contentType = normalizeContentType(rawUrl.slice(5, separatorIndex === -1 ? undefined : separatorIndex));
|
||||
return resolveExtensionFromContentType(contentType);
|
||||
}
|
||||
|
|
@ -0,0 +1,261 @@
|
|||
import path from "node:path";
|
||||
import type { MediaKind } from "./types";
|
||||
|
||||
const IMAGE_EXTENSIONS = new Set([
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"png",
|
||||
"webp",
|
||||
"gif",
|
||||
"bmp",
|
||||
"avif",
|
||||
"heic",
|
||||
"heif",
|
||||
"svg",
|
||||
]);
|
||||
|
||||
const VIDEO_EXTENSIONS = new Set(["mp4", "m4v", "mov", "webm", "mkv"]);
|
||||
|
||||
const MIME_EXTENSION_MAP: Record<string, string> = {
|
||||
"image/jpeg": "jpg",
|
||||
"image/jpg": "jpg",
|
||||
"image/png": "png",
|
||||
"image/webp": "webp",
|
||||
"image/gif": "gif",
|
||||
"image/bmp": "bmp",
|
||||
"image/avif": "avif",
|
||||
"image/heic": "heic",
|
||||
"image/heif": "heif",
|
||||
"image/svg+xml": "svg",
|
||||
"video/mp4": "mp4",
|
||||
"video/webm": "webm",
|
||||
"video/quicktime": "mov",
|
||||
"video/x-m4v": "m4v",
|
||||
};
|
||||
|
||||
export function normalizeContentType(raw: string | null): string {
|
||||
return raw?.split(";")[0]?.trim().toLowerCase() ?? "";
|
||||
}
|
||||
|
||||
export function normalizeExtension(raw: string | undefined | null): string | undefined {
|
||||
if (!raw) {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = raw.replace(/^\./, "").trim().toLowerCase();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
if (trimmed === "jpeg" || trimmed === "jpg") {
|
||||
return "jpg";
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function resolveExtensionFromUrl(rawUrl: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(rawUrl);
|
||||
const extFromPath = normalizeExtension(path.posix.extname(parsed.pathname));
|
||||
if (extFromPath) {
|
||||
return extFromPath;
|
||||
}
|
||||
const extFromFormat = normalizeExtension(parsed.searchParams.get("format"));
|
||||
if (extFromFormat) {
|
||||
return extFromFormat;
|
||||
}
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveExtensionFromContentType(contentType: string): string | undefined {
|
||||
return normalizeExtension(MIME_EXTENSION_MAP[contentType]);
|
||||
}
|
||||
|
||||
export function resolveKindFromContentType(contentType: string): MediaKind | undefined {
|
||||
if (!contentType) {
|
||||
return undefined;
|
||||
}
|
||||
if (contentType.startsWith("image/")) {
|
||||
return "image";
|
||||
}
|
||||
if (contentType.startsWith("video/")) {
|
||||
return "video";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveKindFromExtension(extension: string | undefined): MediaKind | undefined {
|
||||
if (!extension) {
|
||||
return undefined;
|
||||
}
|
||||
if (IMAGE_EXTENSIONS.has(extension)) {
|
||||
return "image";
|
||||
}
|
||||
if (VIDEO_EXTENSIONS.has(extension)) {
|
||||
return "video";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveMediaKind(
|
||||
rawUrl: string,
|
||||
contentType: string,
|
||||
extension: string | undefined,
|
||||
hint?: MediaKind,
|
||||
): MediaKind | undefined {
|
||||
const kindFromType = resolveKindFromContentType(contentType);
|
||||
if (kindFromType) {
|
||||
return kindFromType;
|
||||
}
|
||||
|
||||
const kindFromExtension = resolveKindFromExtension(extension);
|
||||
if (kindFromExtension) {
|
||||
return kindFromExtension;
|
||||
}
|
||||
|
||||
if (contentType && contentType !== "application/octet-stream") {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (hint) {
|
||||
return hint;
|
||||
}
|
||||
|
||||
if (rawUrl.startsWith("data:image/")) {
|
||||
return "image";
|
||||
}
|
||||
|
||||
if (rawUrl.startsWith("data:video/")) {
|
||||
return "video";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveOutputExtension(
|
||||
contentType: string,
|
||||
extension: string | undefined,
|
||||
kind: MediaKind,
|
||||
): string {
|
||||
const fromMime = resolveExtensionFromContentType(contentType);
|
||||
if (fromMime) {
|
||||
return fromMime;
|
||||
}
|
||||
const normalized = normalizeExtension(extension);
|
||||
if (normalized) {
|
||||
return normalized;
|
||||
}
|
||||
return kind === "video" ? "mp4" : "jpg";
|
||||
}
|
||||
|
||||
export function isDataUri(value: string): boolean {
|
||||
return value.startsWith("data:");
|
||||
}
|
||||
|
||||
export function safeDecodeURIComponent(value: string): string {
|
||||
try {
|
||||
return decodeURIComponent(value);
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
function extractEmbeddedUrl(value: string): string | undefined {
|
||||
const encodedMatch = value.match(/https?%3A%2F%2F.+$/i)?.[0];
|
||||
if (encodedMatch) {
|
||||
const decoded = safeDecodeURIComponent(encodedMatch);
|
||||
try {
|
||||
return new URL(decoded).href;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const literalMatch = value.match(/https?:\/\/.+$/i)?.[0];
|
||||
if (!literalMatch) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(literalMatch).href;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function normalizeMediaUrl(rawUrl: string): string {
|
||||
if (isDataUri(rawUrl)) {
|
||||
return rawUrl;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = new URL(rawUrl);
|
||||
const hostname = parsed.hostname.toLowerCase();
|
||||
|
||||
if (hostname === "substackcdn.com" || hostname.endsWith(".substackcdn.com")) {
|
||||
const embeddedUrl = extractEmbeddedUrl(`${parsed.pathname}${parsed.search}`);
|
||||
if (embeddedUrl) {
|
||||
return embeddedUrl;
|
||||
}
|
||||
}
|
||||
|
||||
return parsed.href;
|
||||
} catch {
|
||||
return rawUrl;
|
||||
}
|
||||
}
|
||||
|
||||
export function sanitizeFileSegment(input: string): string {
|
||||
return input
|
||||
.replace(/[^a-zA-Z0-9_-]+/g, "-")
|
||||
.replace(/-+/g, "-")
|
||||
.replace(/^[-_]+|[-_]+$/g, "")
|
||||
.slice(0, 48);
|
||||
}
|
||||
|
||||
export function resolveFileStem(rawUrl: string, extension: string, fileNameHint?: string): string {
|
||||
const hintBase = fileNameHint?.trim();
|
||||
if (hintBase) {
|
||||
const parsed = path.posix.parse(hintBase);
|
||||
const stem = parsed.name || parsed.base;
|
||||
return sanitizeFileSegment(stem);
|
||||
}
|
||||
|
||||
if (isDataUri(rawUrl)) {
|
||||
return "";
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = new URL(rawUrl);
|
||||
const base = path.posix.basename(parsed.pathname);
|
||||
if (!base) {
|
||||
return "";
|
||||
}
|
||||
const decodedBase = safeDecodeURIComponent(base);
|
||||
const normalizedExtension = normalizeExtension(extension);
|
||||
const stripExtension = normalizedExtension ? new RegExp(`\\.${normalizedExtension}$`, "i") : null;
|
||||
const rawStem = stripExtension ? decodedBase.replace(stripExtension, "") : decodedBase;
|
||||
return sanitizeFileSegment(rawStem);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export function buildFileName(
|
||||
kind: MediaKind,
|
||||
index: number,
|
||||
sourceUrl: string,
|
||||
extension: string,
|
||||
fileNameHint?: string,
|
||||
): string {
|
||||
const stem = resolveFileStem(sourceUrl, extension, fileNameHint);
|
||||
const prefix = kind === "image" ? "img" : "video";
|
||||
const serial = String(index).padStart(3, "0");
|
||||
const suffix = stem ? `-${stem}` : "";
|
||||
return `${prefix}-${serial}${suffix}.${extension}`;
|
||||
}
|
||||
|
||||
export function toPosixPath(value: string): string {
|
||||
return value.split(path.sep).join(path.posix.sep);
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
import type { Logger } from "../utils/logger";
|
||||
|
||||
export type MediaKind = "image" | "video";
|
||||
|
||||
export interface MediaAsset {
|
||||
url: string;
|
||||
kind?: MediaKind;
|
||||
role?: "cover" | "inline" | "attachment";
|
||||
alt?: string;
|
||||
fileNameHint?: string;
|
||||
headers?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface MediaReplacement {
|
||||
url: string;
|
||||
localPath: string;
|
||||
absolutePath: string;
|
||||
kind: MediaKind;
|
||||
}
|
||||
|
||||
export interface MediaDownloadRequest {
|
||||
media: MediaAsset[];
|
||||
outputPath: string;
|
||||
mediaDir?: string;
|
||||
log: Logger;
|
||||
}
|
||||
|
||||
export interface MediaDownloadResult {
|
||||
replacements: MediaReplacement[];
|
||||
downloadedImages: number;
|
||||
downloadedVideos: number;
|
||||
imageDir: string | null;
|
||||
videoDir: string | null;
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
declare module "defuddle/node" {
|
||||
export interface DefuddleResponse {
|
||||
content?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
author?: string;
|
||||
published?: string;
|
||||
image?: string;
|
||||
language?: string;
|
||||
}
|
||||
|
||||
export interface DefuddleOptions {
|
||||
markdown?: boolean;
|
||||
}
|
||||
|
||||
export function Defuddle(
|
||||
input:
|
||||
| Document
|
||||
| string
|
||||
| {
|
||||
window: {
|
||||
document: Document;
|
||||
location: {
|
||||
href: string;
|
||||
};
|
||||
};
|
||||
},
|
||||
url?: string,
|
||||
options?: DefuddleOptions,
|
||||
): Promise<DefuddleResponse>;
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
declare module "turndown" {
|
||||
export interface TurndownOptions {
|
||||
codeBlockStyle?: "indented" | "fenced";
|
||||
headingStyle?: "setext" | "atx";
|
||||
bulletListMarker?: "-" | "*" | "+";
|
||||
}
|
||||
|
||||
export default class TurndownService {
|
||||
constructor(options?: TurndownOptions);
|
||||
use(plugin: unknown): void;
|
||||
turndown(input: string): string;
|
||||
}
|
||||
}
|
||||
|
||||
declare module "turndown-plugin-gfm" {
|
||||
export const gfm: unknown;
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
export interface Logger {
|
||||
info(message: string): void;
|
||||
warn(message: string): void;
|
||||
error(message: string): void;
|
||||
debug(message: string): void;
|
||||
}
|
||||
|
||||
export function createLogger(debugEnabled = false): Logger {
|
||||
const print = (level: string, message: string): void => {
|
||||
console.error(`[${level}] ${message}`);
|
||||
};
|
||||
|
||||
return {
|
||||
info(message: string) {
|
||||
print("info", message);
|
||||
},
|
||||
warn(message: string) {
|
||||
print("warn", message);
|
||||
},
|
||||
error(message: string) {
|
||||
print("error", message);
|
||||
},
|
||||
debug(message: string) {
|
||||
if (debugEnabled) {
|
||||
print("debug", message);
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
export function normalizeUrl(input: string): URL {
|
||||
try {
|
||||
return new URL(input);
|
||||
} catch {
|
||||
throw new Error(`Invalid URL: ${input}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function sanitizeFilename(input: string): string {
|
||||
return input.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "document";
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"types": ["bun"],
|
||||
"strict": true,
|
||||
"noEmit": true,
|
||||
"skipLibCheck": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"resolveJsonModule": true
|
||||
},
|
||||
"include": ["src/**/*.ts", "src/**/*.d.ts"]
|
||||
}
|
||||
|
|
@ -3,6 +3,9 @@
|
|||
"private": true,
|
||||
"version": "0.1.0",
|
||||
"type": "module",
|
||||
"files": [
|
||||
"src"
|
||||
],
|
||||
"exports": {
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import assert from "node:assert/strict";
|
|||
import test from "node:test";
|
||||
|
||||
import {
|
||||
cleanSummaryText,
|
||||
extractSummaryFromBody,
|
||||
extractTitleFromMarkdown,
|
||||
parseFrontmatter,
|
||||
|
|
@ -91,3 +92,19 @@ This is **the first paragraph** with [a link](https://example.com) and \`inline
|
|||
"This is the first paragraph with a link and inline code that should...",
|
||||
);
|
||||
});
|
||||
|
||||
test("summary extraction normalizes raw HTML paragraphs to plain text", () => {
|
||||
const summary = extractSummaryFromBody(
|
||||
`
|
||||
# Heading
|
||||
<p style="font-size: 16px; color: #666; margin-bottom: 20px;">2026年初,一只“龙虾”搅动了整个科技圈。腾讯楼下排起近千人长队,只为让工程师领取一份福利。</p>
|
||||
`,
|
||||
120,
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
summary,
|
||||
"2026年初,一只“龙虾”搅动了整个科技圈。腾讯楼下排起近千人长队,只为让工程师领取一份福利。",
|
||||
);
|
||||
assert.equal(cleanSummaryText("<strong>Good text!'</strong>"), "Good text!'");
|
||||
});
|
||||
|
|
|
|||
|
|
@ -46,6 +46,45 @@ export function stripWrappingQuotes(value: string): string {
|
|||
return value.trim();
|
||||
}
|
||||
|
||||
const HTML_ENTITIES: Record<string, string> = {
|
||||
amp: "&",
|
||||
apos: "'",
|
||||
gt: ">",
|
||||
lt: "<",
|
||||
nbsp: " ",
|
||||
quot: '"',
|
||||
};
|
||||
|
||||
function decodeHtmlCodePoint(codePoint: number, fallback: string): string {
|
||||
if (!Number.isFinite(codePoint) || codePoint < 0 || codePoint > 0x10ffff) {
|
||||
return fallback;
|
||||
}
|
||||
return String.fromCodePoint(codePoint);
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(value: string): string {
|
||||
return value.replace(/&(#x?[0-9a-f]+|[a-z]+);/gi, (entity, body: string) => {
|
||||
const normalized = body.toLowerCase();
|
||||
if (normalized.startsWith("#x")) {
|
||||
return decodeHtmlCodePoint(Number.parseInt(normalized.slice(2), 16), entity);
|
||||
}
|
||||
if (normalized.startsWith("#")) {
|
||||
return decodeHtmlCodePoint(Number.parseInt(normalized.slice(1), 10), entity);
|
||||
}
|
||||
return HTML_ENTITIES[normalized] ?? entity;
|
||||
});
|
||||
}
|
||||
|
||||
export function cleanSummaryText(value: string): string {
|
||||
return decodeHtmlEntities(stripWrappingQuotes(value))
|
||||
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
||||
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
||||
.replace(/<br\s*\/?>/gi, " ")
|
||||
.replace(/<\/?[a-z][a-z0-9:-]*(?:\s+[^>]*)?>/gi, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function toFrontmatterString(value: unknown): string | undefined {
|
||||
if (typeof value === "string") {
|
||||
return stripWrappingQuotes(value);
|
||||
|
|
@ -94,10 +133,11 @@ export function extractSummaryFromBody(body: string, maxLen: number): string {
|
|||
.replace(/\*(.+?)\*/g, "$1")
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1")
|
||||
.replace(/`([^`]+)`/g, "$1");
|
||||
const summaryText = cleanSummaryText(cleanText);
|
||||
|
||||
if (cleanText.length > 20) {
|
||||
if (cleanText.length <= maxLen) return cleanText;
|
||||
return `${cleanText.slice(0, maxLen - 3)}...`;
|
||||
if (summaryText.length > 20) {
|
||||
if (summaryText.length <= maxLen) return summaryText;
|
||||
return `${summaryText.slice(0, maxLen - 3)}...`;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,22 @@ test("buildHtmlDocument includes optional meta tags and code theme CSS", () => {
|
|||
assert.match(html, /<article>Hello<\/article>/);
|
||||
});
|
||||
|
||||
test("buildHtmlDocument escapes head metadata attributes", () => {
|
||||
const html = buildHtmlDocument(
|
||||
{
|
||||
title: `Doc <draft>`,
|
||||
author: `Bao"yu`,
|
||||
description: `<p style="color: red">Summary & notes</p>`,
|
||||
},
|
||||
"",
|
||||
"",
|
||||
);
|
||||
|
||||
assert.match(html, /<title>Doc <draft><\/title>/);
|
||||
assert.match(html, /meta name="author" content="Bao"yu"/);
|
||||
assert.match(html, /meta name="description" content="<p style="color: red">Summary & notes<\/p>"/);
|
||||
});
|
||||
|
||||
test("normalizeCssText and normalizeInlineCss replace variables and strip declarations", () => {
|
||||
const rawCss = `
|
||||
:root { --md-primary-color: #000; --md-font-size: 12px; --foreground: 0 0% 5%; }
|
||||
|
|
|
|||
|
|
@ -45,19 +45,24 @@ export function loadCodeThemeCss(themeName: string): string {
|
|||
}
|
||||
|
||||
export function buildHtmlDocument(meta: HtmlDocumentMeta, css: string, html: string, codeThemeCss?: string): string {
|
||||
const escapeHtmlAttribute = (value: string) => value
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """)
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
const lines = [
|
||||
"<!doctype html>",
|
||||
"<html>",
|
||||
"<head>",
|
||||
' <meta charset="utf-8" />',
|
||||
' <meta name="viewport" content="width=device-width, initial-scale=1" />',
|
||||
` <title>${meta.title}</title>`,
|
||||
` <title>${escapeHtmlAttribute(meta.title)}</title>`,
|
||||
];
|
||||
if (meta.author) {
|
||||
lines.push(` <meta name="author" content="${meta.author}" />`);
|
||||
lines.push(` <meta name="author" content="${escapeHtmlAttribute(meta.author)}" />`);
|
||||
}
|
||||
if (meta.description) {
|
||||
lines.push(` <meta name="description" content="${meta.description}" />`);
|
||||
lines.push(` <meta name="description" content="${escapeHtmlAttribute(meta.description)}" />`);
|
||||
}
|
||||
lines.push(` <style>${css}</style>`);
|
||||
if (codeThemeCss) {
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ async function makeTempDir(prefix: string): Promise<string> {
|
|||
|
||||
test("replaceMarkdownImagesWithPlaceholders rewrites markdown and tracks image metadata", () => {
|
||||
const result = replaceMarkdownImagesWithPlaceholders(
|
||||
`\n\nText\n\n`,
|
||||
`\n\nText\n\n`,
|
||||
"IMG_",
|
||||
);
|
||||
|
||||
assert.equal(result.markdown, `IMG_1\n\nText\n\nIMG_2`);
|
||||
assert.deepEqual(result.images, [
|
||||
{ alt: "cover", originalPath: "images/cover.png", placeholder: "IMG_1" },
|
||||
{ alt: "diagram", originalPath: "images/diagram.webp", placeholder: "IMG_2" },
|
||||
{ alt: "cover", originalPath: "imgs/cover.png", placeholder: "IMG_1" },
|
||||
{ alt: "diagram", originalPath: "imgs/diagram.webp", placeholder: "IMG_2" },
|
||||
]);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,12 @@ const PACKAGE_DEPENDENCY_SECTIONS = [
|
|||
"devDependencies",
|
||||
];
|
||||
|
||||
const SKIPPED_DIRS = new Set([".git", ".clawhub", ".clawdhub", "node_modules"]);
|
||||
const SKIPPED_DIRS = new Set([".git", ".changeset", ".clawhub", ".clawdhub", "node_modules"]);
|
||||
const SKIPPED_FILES = new Set([".DS_Store"]);
|
||||
const TEST_DIR_NAMES = new Set(["__tests__", "test", "tests"]);
|
||||
const TEST_FILE_PATTERN = /\.(test|spec)\.[^.]+$/;
|
||||
const CHANGELOG_FILE_PATTERN = /^CHANGELOG(?:\..+)?\.md$/i;
|
||||
const UNSUPPORTED_FILES_GLOB_PATTERN = /[*?[\]{}!]/;
|
||||
|
||||
export async function syncSharedSkillPackages(repoRoot, options = {}) {
|
||||
const root = path.resolve(repoRoot);
|
||||
|
|
@ -131,23 +135,7 @@ async function syncPackageTree({ sourceDir, targetDir, workspacePackages }) {
|
|||
const sourcePackageJsonPath = path.join(sourceDir, "package.json");
|
||||
const packageJson = JSON.parse(await fs.readFile(sourcePackageJsonPath, "utf8"));
|
||||
const localDeps = collectLocalDependencies(packageJson, workspacePackages);
|
||||
|
||||
const entries = await fs.readdir(sourceDir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (SKIPPED_DIRS.has(entry.name) || SKIPPED_FILES.has(entry.name)) continue;
|
||||
|
||||
const sourcePath = path.join(sourceDir, entry.name);
|
||||
const targetPath = path.join(targetDir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
await copyDirectory(sourcePath, targetPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry.isFile() || entry.name === "package.json") continue;
|
||||
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
||||
await fs.copyFile(sourcePath, targetPath);
|
||||
}
|
||||
await copyPackageContents({ sourceDir, targetDir, packageJson });
|
||||
|
||||
for (const name of localDeps) {
|
||||
const nestedSourceDir = workspacePackages.get(name);
|
||||
|
|
@ -167,7 +155,7 @@ async function copyDirectory(sourceDir, targetDir) {
|
|||
await fs.mkdir(targetDir, { recursive: true });
|
||||
const entries = await fs.readdir(sourceDir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (SKIPPED_DIRS.has(entry.name) || SKIPPED_FILES.has(entry.name)) continue;
|
||||
if (shouldSkipEntry(entry)) continue;
|
||||
|
||||
const sourcePath = path.join(sourceDir, entry.name);
|
||||
const targetPath = path.join(targetDir, entry.name);
|
||||
|
|
@ -183,6 +171,102 @@ async function copyDirectory(sourceDir, targetDir) {
|
|||
}
|
||||
}
|
||||
|
||||
async function copyPackageContents({ sourceDir, targetDir, packageJson }) {
|
||||
const includedPaths = resolveIncludedPackagePaths(packageJson);
|
||||
if (!includedPaths) {
|
||||
const entries = await fs.readdir(sourceDir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (shouldSkipEntry(entry)) continue;
|
||||
|
||||
const sourcePath = path.join(sourceDir, entry.name);
|
||||
const targetPath = path.join(targetDir, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
await copyDirectory(sourcePath, targetPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!entry.isFile() || entry.name === "package.json") continue;
|
||||
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
||||
await fs.copyFile(sourcePath, targetPath);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (const relativePath of includedPaths) {
|
||||
const sourcePath = path.join(sourceDir, relativePath);
|
||||
const targetPath = path.join(targetDir, relativePath);
|
||||
await copyPath(sourcePath, targetPath);
|
||||
}
|
||||
}
|
||||
|
||||
async function copyPath(sourcePath, targetPath) {
|
||||
let stat;
|
||||
try {
|
||||
stat = await fs.lstat(sourcePath);
|
||||
} catch (error) {
|
||||
if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") return;
|
||||
throw error;
|
||||
}
|
||||
|
||||
const name = path.basename(sourcePath);
|
||||
if (stat.isDirectory()) {
|
||||
if (shouldSkipName(name, { isDirectory: true })) return;
|
||||
await copyDirectory(sourcePath, targetPath);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!stat.isFile()) return;
|
||||
if (name === "package.json" || shouldSkipName(name, { isFile: true })) return;
|
||||
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
||||
await fs.copyFile(sourcePath, targetPath);
|
||||
}
|
||||
|
||||
function resolveIncludedPackagePaths(packageJson) {
|
||||
if (!Array.isArray(packageJson.files)) return null;
|
||||
|
||||
const includedPaths = [];
|
||||
for (const entry of packageJson.files) {
|
||||
if (typeof entry !== "string") continue;
|
||||
|
||||
const normalized = normalizeIncludedPath(entry);
|
||||
if (!normalized || normalized === "package.json") continue;
|
||||
includedPaths.push(normalized);
|
||||
}
|
||||
|
||||
return [...new Set(includedPaths)];
|
||||
}
|
||||
|
||||
function normalizeIncludedPath(entry) {
|
||||
const trimmed = entry.trim();
|
||||
if (!trimmed) return null;
|
||||
if (UNSUPPORTED_FILES_GLOB_PATTERN.test(trimmed)) {
|
||||
throw new Error(`Unsupported package.json files entry: ${entry}`);
|
||||
}
|
||||
|
||||
const normalized = path.posix.normalize(trimmed.replace(/\\/g, "/")).replace(/^(\.\/)+/, "");
|
||||
if (!normalized || normalized === ".") return null;
|
||||
if (path.posix.isAbsolute(normalized) || normalized.startsWith("../")) {
|
||||
throw new Error(`Package file entry must stay within the package root: ${entry}`);
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function shouldSkipEntry(entry) {
|
||||
return shouldSkipName(entry.name, {
|
||||
isDirectory: entry.isDirectory(),
|
||||
isFile: entry.isFile(),
|
||||
});
|
||||
}
|
||||
|
||||
function shouldSkipName(name, { isDirectory = false, isFile = false } = {}) {
|
||||
if (SKIPPED_DIRS.has(name) || SKIPPED_FILES.has(name)) return true;
|
||||
if (isDirectory && TEST_DIR_NAMES.has(name)) return true;
|
||||
if (isFile && (TEST_FILE_PATTERN.test(name) || CHANGELOG_FILE_PATTERN.test(name))) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
async function discoverWorkspacePackages(repoRoot) {
|
||||
const packagesRoot = path.join(repoRoot, "packages");
|
||||
const map = new Map();
|
||||
|
|
|
|||
|
|
@ -31,6 +31,26 @@ test("syncSharedSkillPackages vendors workspace packages into skill scripts", as
|
|||
path.join(root, "packages", "baoyu-md", "src", "index.ts"),
|
||||
"export const markdown = true;\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "baoyu-md", "src", "index.test.ts"),
|
||||
"test('ignored', () => {});\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "baoyu-md", "src", "__tests__", "helper.ts"),
|
||||
"export const helper = true;\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "baoyu-md", "tests", "setup.ts"),
|
||||
"export const setup = true;\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "baoyu-md", ".changeset", "demo.md"),
|
||||
"---\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "baoyu-md", "CHANGELOG.md"),
|
||||
"# changelog\n",
|
||||
);
|
||||
|
||||
const consumerDir = path.join(root, "skills", "demo-skill", "scripts");
|
||||
await writeJson(path.join(consumerDir, "package.json"), {
|
||||
|
|
@ -67,4 +87,97 @@ test("syncSharedSkillPackages vendors workspace packages into skill scripts", as
|
|||
"utf8",
|
||||
);
|
||||
assert.match(vendoredFile, /markdown = true/);
|
||||
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "baoyu-md", "src", "index.test.ts"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "baoyu-md", "src", "__tests__", "helper.ts"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "baoyu-md", "tests", "setup.ts"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "baoyu-md", ".changeset", "demo.md"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "baoyu-md", "CHANGELOG.md"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
});
|
||||
|
||||
test("syncSharedSkillPackages respects package.json files allowlists", async (t) => {
|
||||
const root = await makeTempDir("baoyu-sync-files-");
|
||||
t.after(() => fs.rm(root, { recursive: true, force: true }));
|
||||
|
||||
await writeJson(path.join(root, "packages", "demo-pkg", "package.json"), {
|
||||
name: "demo-pkg",
|
||||
version: "1.0.0",
|
||||
files: ["README.md", "src", "CHANGELOG.md", ".changeset"],
|
||||
});
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", "README.md"),
|
||||
"# Demo\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", "src", "index.ts"),
|
||||
"export const demo = true;\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", "src", "index.test.ts"),
|
||||
"test('ignored', () => {});\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", "docs", "private.md"),
|
||||
"private\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", "CHANGELOG.md"),
|
||||
"# changelog\n",
|
||||
);
|
||||
await writeFile(
|
||||
path.join(root, "packages", "demo-pkg", ".changeset", "demo.md"),
|
||||
"---\n",
|
||||
);
|
||||
|
||||
const consumerDir = path.join(root, "skills", "demo-skill", "scripts");
|
||||
await writeJson(path.join(consumerDir, "package.json"), {
|
||||
name: "demo-skill-scripts",
|
||||
version: "1.0.0",
|
||||
dependencies: {
|
||||
"demo-pkg": "^1.0.0",
|
||||
},
|
||||
});
|
||||
|
||||
await syncSharedSkillPackages(root, { install: false });
|
||||
|
||||
const readme = await fs.readFile(path.join(consumerDir, "vendor", "demo-pkg", "README.md"), "utf8");
|
||||
assert.match(readme, /Demo/);
|
||||
|
||||
const vendoredSource = await fs.readFile(
|
||||
path.join(consumerDir, "vendor", "demo-pkg", "src", "index.ts"),
|
||||
"utf8",
|
||||
);
|
||||
assert.match(vendoredSource, /demo = true/);
|
||||
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "demo-pkg", "docs", "private.md"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "demo-pkg", "CHANGELOG.md"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "demo-pkg", ".changeset", "demo.md"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
await assert.rejects(
|
||||
fs.readFile(path.join(consumerDir, "vendor", "demo-pkg", "src", "index.test.ts"), "utf8"),
|
||||
{ code: "ENOENT" },
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
import { spawn } from "node:child_process";
|
||||
import { readdir, readFile } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import process from "node:process";
|
||||
|
||||
const ROOT_DIR = process.cwd();
|
||||
const TEST_FILE_PATTERN = /\.test\.(?:[cm]?[jt]s|tsx)$/;
|
||||
const SKIP_DIRECTORIES = new Set([".git", "node_modules"]);
|
||||
const BUN_TEST_IMPORT_PATTERN = /from\s+["']bun:test["']/;
|
||||
|
||||
async function collectTestFiles(directory) {
|
||||
const entries = await readdir(directory, { withFileTypes: true });
|
||||
const files = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
const entryPath = path.join(directory, entry.name);
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
if (SKIP_DIRECTORIES.has(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push(...(await collectTestFiles(entryPath)));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (entry.isFile() && TEST_FILE_PATTERN.test(entry.name)) {
|
||||
files.push(entryPath);
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
async function isNodeCompatibleTest(filePath) {
|
||||
const source = await readFile(filePath, "utf8");
|
||||
return !BUN_TEST_IMPORT_PATTERN.test(source);
|
||||
}
|
||||
|
||||
const allTestFiles = await collectTestFiles(ROOT_DIR);
|
||||
const runnableTestFiles = [];
|
||||
|
||||
for (const filePath of allTestFiles.sort()) {
|
||||
if (await isNodeCompatibleTest(filePath)) {
|
||||
runnableTestFiles.push(filePath);
|
||||
}
|
||||
}
|
||||
|
||||
if (runnableTestFiles.length === 0) {
|
||||
console.error("No Node-compatible test files found.");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const forwardedArgs = process.argv.slice(2);
|
||||
const nodeArgs = ["--import", "tsx", ...forwardedArgs, "--test", ...runnableTestFiles];
|
||||
const child = spawn(process.execPath, nodeArgs, { stdio: "inherit" });
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) {
|
||||
process.kill(process.pid, signal);
|
||||
return;
|
||||
}
|
||||
|
||||
process.exit(code ?? 1);
|
||||
});
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: baoyu-article-illustrator
|
||||
description: Analyzes article structure, identifies positions requiring visual aids, generates illustrations with Type × Style two-dimension approach. Use when user asks to "illustrate article", "add images", "generate images for article", or "为文章配图".
|
||||
version: 1.56.1
|
||||
description: Analyzes article structure, identifies positions requiring visual aids, generates illustrations with Type × Style × Palette three-dimension approach. Use when user asks to "illustrate article", "add images", "generate images for article", or "为文章配图".
|
||||
version: 1.57.0
|
||||
metadata:
|
||||
openclaw:
|
||||
homepage: https://github.com/JimLiu/baoyu-skills#baoyu-article-illustrator
|
||||
|
|
@ -9,18 +9,19 @@ metadata:
|
|||
|
||||
# Article Illustrator
|
||||
|
||||
Analyze articles, identify illustration positions, generate images with Type × Style consistency.
|
||||
Analyze articles, identify illustration positions, generate images with Type × Style × Palette consistency.
|
||||
|
||||
## Two Dimensions
|
||||
## Three Dimensions
|
||||
|
||||
| Dimension | Controls | Examples |
|
||||
|-----------|----------|----------|
|
||||
| **Type** | Information structure | infographic, scene, flowchart, comparison, framework, timeline |
|
||||
| **Style** | Visual aesthetics | notion, warm, minimal, blueprint, watercolor, elegant |
|
||||
| **Style** | Rendering approach | notion, warm, minimal, blueprint, watercolor, elegant |
|
||||
| **Palette** | Color scheme (optional) | macaron, warm, neon — overrides style's default colors |
|
||||
|
||||
Combine freely: `--type infographic --style blueprint`
|
||||
Combine freely: `--type infographic --style vector-illustration --palette macaron`
|
||||
|
||||
Or use presets: `--preset tech-explainer` → type + style in one flag. See [Style Presets](references/style-presets.md).
|
||||
Or use presets: `--preset edu-visual` → type + style + palette in one flag. See [Style Presets](references/style-presets.md).
|
||||
|
||||
## Types
|
||||
|
||||
|
|
@ -96,13 +97,14 @@ Full procedures: [references/workflow.md](references/workflow.md#step-2-setup--a
|
|||
| **Q1: Preset or Type** | [Recommended preset], [alt preset], or manual: infographic, scene, flowchart, comparison, framework, timeline, mixed |
|
||||
| **Q2: Density** | minimal (1-2), balanced (3-5), per-section (Recommended), rich (6+) |
|
||||
| **Q3: Style** | [Recommended], minimal-flat, sci-fi, hand-drawn, editorial, scene, poster, Other — **skip if preset chosen** |
|
||||
| Q4: Language | When article language ≠ EXTEND.md setting |
|
||||
| Q4: Palette | Default (style colors), macaron, warm, neon — **skip if preset includes palette or preferred_palette set** |
|
||||
| Q5: Language | When article language ≠ EXTEND.md setting |
|
||||
|
||||
Full procedures: [references/workflow.md](references/workflow.md#step-3-confirm-settings-)
|
||||
|
||||
### Step 4: Generate Outline
|
||||
|
||||
Save `outline.md` with frontmatter (type, density, style, image_count) and entries:
|
||||
Save `outline.md` with frontmatter (type, density, style, palette, image_count) and entries:
|
||||
|
||||
```yaml
|
||||
## Illustration 1
|
||||
|
|
@ -118,7 +120,7 @@ Full template: [references/workflow.md](references/workflow.md#step-4-generate-o
|
|||
|
||||
⛔ **BLOCKING: Prompt files MUST be saved before ANY image generation.**
|
||||
|
||||
**Execution strategy**: When multiple illustrations have saved prompt files and the task is now plain generation, prefer `baoyu-image-gen` batch mode (`build-batch.ts` → `--batchfile`) over spawning subagents. Use subagents only when each image still needs separate prompt iteration or creative exploration.
|
||||
**Execution strategy**: When multiple illustrations have saved prompt files and the task is now plain generation, prefer `baoyu-imagine` batch mode (`build-batch.ts` → `--batchfile`) over spawning subagents. Use subagents only when each image still needs separate prompt iteration or creative exploration.
|
||||
|
||||
1. For each illustration, create a prompt file per [references/prompt-construction.md](references/prompt-construction.md)
|
||||
2. Save to `prompts/NN-{type}-{slug}.md` with YAML frontmatter
|
||||
|
|
@ -137,7 +139,7 @@ Insert `` after paragraphs.
|
|||
|
||||
```
|
||||
Article Illustration Complete!
|
||||
Article: [path] | Type: [type] | Density: [level] | Style: [style]
|
||||
Article: [path] | Type: [type] | Density: [level] | Style: [style] | Palette: [palette or default]
|
||||
Images: X/N generated
|
||||
```
|
||||
|
||||
|
|
@ -180,7 +182,7 @@ When input is **pasted content** (no file path), always uses `illustrations/{top
|
|||
|------|---------|
|
||||
| [references/workflow.md](references/workflow.md) | Detailed procedures |
|
||||
| [references/usage.md](references/usage.md) | Command syntax |
|
||||
| [references/styles.md](references/styles.md) | Style gallery |
|
||||
| [references/style-presets.md](references/style-presets.md) | Preset shortcuts (type + style) |
|
||||
| [references/styles.md](references/styles.md) | Style gallery + Palette gallery |
|
||||
| [references/style-presets.md](references/style-presets.md) | Preset shortcuts (type + style + palette) |
|
||||
| [references/prompt-construction.md](references/prompt-construction.md) | Prompt templates |
|
||||
| [references/config/first-time-setup.md](references/config/first-time-setup.md) | First-time setup |
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ preferred_style:
|
|||
name: null # Built-in or custom style name
|
||||
description: "" # Override/notes
|
||||
|
||||
preferred_palette: null # Built-in palette name (macaron|warm|neon) or null
|
||||
|
||||
language: null # zh|en|ja|ko|auto
|
||||
|
||||
default_output_dir: null # same-dir|illustrations-subdir|independent
|
||||
|
|
@ -47,6 +49,7 @@ custom_styles:
|
|||
| `watermark.position` | enum | bottom-right | Position on image |
|
||||
| `preferred_style.name` | string | null | Style name or null |
|
||||
| `preferred_style.description` | string | "" | Custom notes/override |
|
||||
| `preferred_palette` | string | null | Palette override (macaron, warm, neon, or null) |
|
||||
| `language` | string | null | Output language (null = auto-detect) |
|
||||
| `default_output_dir` | enum | null | Output directory preference (null = ask each time) |
|
||||
| `custom_styles` | array | [] | User-defined styles |
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
# macaron
|
||||
|
||||
Soft macaron pastel color blocks on warm cream
|
||||
|
||||
## Background
|
||||
|
||||
- Color: Warm Cream (#F5F0E8)
|
||||
- Texture: Subtle warm paper grain
|
||||
|
||||
## Colors
|
||||
|
||||
| Role | Color | Hex | Usage |
|
||||
|------|-------|-----|-------|
|
||||
| Background | Warm Cream | #F5F0E8 | Primary background |
|
||||
| Primary Text | Deep Charcoal | #2D2D2D | Headlines, main text, outlines |
|
||||
| Macaron Blue | Sky Blue | #A8D8EA | Info block fill, cool-toned zones |
|
||||
| Macaron Mint | Mint Green | #B5E5CF | Info block fill, growth/positive zones |
|
||||
| Macaron Lavender | Lavender | #D5C6E0 | Info block fill, abstract/concept zones |
|
||||
| Macaron Peach | Peach | #FFD5C2 | Info block fill, warm-toned zones |
|
||||
| Accent | Coral Red | #E8655A | Key data, warnings, emphasis |
|
||||
| Muted Text | Warm Gray | #6B6B6B | Secondary annotations, small labels |
|
||||
|
||||
## Accent
|
||||
|
||||
Coral Red (#E8655A) for key data, warnings, and emphasis highlights. Use sparingly — one or two elements per illustration.
|
||||
|
||||
## Semantic Constraint
|
||||
|
||||
Soft pastel macaron color palette. Use block colors as rounded card backgrounds for distinct information sections. Accent coral red sparingly for emphasis on key terms only. Do NOT render color names, hex codes, or role labels as visible text in the image.
|
||||
|
||||
## Best For
|
||||
|
||||
Educational content, knowledge sharing, concept explainers, tutorials, tech summaries, onboarding materials
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
# mono-ink
|
||||
|
||||
Black ink on pure white with sparse semantic accent colors
|
||||
|
||||
## Background
|
||||
|
||||
- Color: Pure White (#FFFFFF)
|
||||
- Texture: Clean, no grain, no tint
|
||||
|
||||
## Colors
|
||||
|
||||
| Role | Color | Hex | Usage |
|
||||
|------|-------|-----|-------|
|
||||
| Background | Pure White | #FFFFFF | Canvas |
|
||||
| Primary | Near Black | #1A1A1A | All lines, text, figures, arrows |
|
||||
| Accent (risk/emphasis) | Coral Red | #E8655A | Risk, problem, gap, key emphasis |
|
||||
| Accent (positive) | Muted Teal | #5FA8A8 | Positive, solution, "after" state |
|
||||
| Accent (neutral tag) | Dusty Lavender | #9B8AB5 | Neutral tags, category labels |
|
||||
| Soft Fill | Pale Gray | #F0F0F0 | Subtle zone backgrounds (optional) |
|
||||
|
||||
## Accent
|
||||
|
||||
Use black ink for all structural elements — lines, text, figures. Accent colors appear only for semantic highlighting: coral red for risks/gaps/problems, muted teal for positive/solution/after-states, dusty lavender for neutral category tags. Total colored pixels must remain under 10% of canvas. Pale gray may back a subtle zone but must never dominate.
|
||||
|
||||
## Semantic Constraint
|
||||
|
||||
Black ink on white canvas. Accent colors for semantic highlighting only — total colored pixels under 10% of canvas. Do NOT render color names, hex codes, or role labels as visible text in the image.
|
||||
|
||||
## Compatible With
|
||||
|
||||
- `ink-notes` (primary, default pairing)
|
||||
- `minimal` (strict monochrome variation, drops the style's built-in accent)
|
||||
- `sketch` (pencil + ink hybrid look)
|
||||
|
||||
## Not Recommended With
|
||||
|
||||
- `sketch-notes` — its "no pure white backgrounds" rule conflicts
|
||||
- `warm`, `elegant`, `watercolor`, `fantasy-animation` — color-heavy by design, mono-ink strips their identity
|
||||
|
||||
## Best For
|
||||
|
||||
Professional visual notes, Before/After essays, tech manifestos, framework analogies, whiteboard-presentation explainers
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
# neon
|
||||
|
||||
Vibrant neon colors on dark backgrounds
|
||||
|
||||
## Background
|
||||
|
||||
- Color: Deep Purple (#2D1B4E)
|
||||
- Texture: Subtle grid pattern or solid dark
|
||||
|
||||
## Colors
|
||||
|
||||
| Role | Color | Hex | Usage |
|
||||
|------|-------|-----|-------|
|
||||
| Background | Deep Purple | #2D1B4E | Primary background |
|
||||
| Alt Background | Dark Teal | #0F4C5C | Alternative sections |
|
||||
| Primary | Hot Pink | #FF1493 | Main accent |
|
||||
| Secondary | Electric Cyan | #00FFFF | Supporting elements |
|
||||
| Tertiary | Neon Yellow | #FFFF00 | Highlights |
|
||||
| Accent 1 | Lime Green | #32CD32 | Energy, success |
|
||||
| Accent 2 | Orange | #FF6B35 | Warmth |
|
||||
| Text | White | #FFFFFF | Text elements |
|
||||
|
||||
## Accent
|
||||
|
||||
Hot Pink (#FF1493) for primary emphasis. High contrast neon-on-dark creates immediate visual impact.
|
||||
|
||||
## Semantic Constraint
|
||||
|
||||
Vibrant neon-on-dark palette. High contrast, immediate visual impact. Do NOT render color names, hex codes, or role labels as visible text in the image.
|
||||
|
||||
## Best For
|
||||
|
||||
Gaming, retro tech, 80s/90s nostalgic content, bold editorial, trend and pop culture
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
# warm
|
||||
|
||||
Warm earth tones on soft peach, no cool colors
|
||||
|
||||
## Background
|
||||
|
||||
- Color: Soft Peach (#FFECD2)
|
||||
- Texture: Warm paper texture
|
||||
|
||||
## Colors
|
||||
|
||||
| Role | Color | Hex | Usage |
|
||||
|------|-------|-----|-------|
|
||||
| Background | Soft Peach | #FFECD2 | Primary background |
|
||||
| Outlines | Deep Charcoal | #2D2D2D | All element outlines |
|
||||
| Primary | Warm Orange | #ED8936 | Main accent color |
|
||||
| Secondary | Terracotta | #C05621 | Warm depth |
|
||||
| Tertiary | Golden Yellow | #F6AD55 | Highlights, energy |
|
||||
| Accent | Deep Brown | #744210 | Grounding, anchoring |
|
||||
| Text | Warm Charcoal | #4A4A4A | Text elements |
|
||||
|
||||
## Accent
|
||||
|
||||
Warm Orange (#ED8936) for primary emphasis. Warm-only palette — no cool colors (no green, blue, purple). Modern-retro feel.
|
||||
|
||||
## Semantic Constraint
|
||||
|
||||
Warm earth tone palette. Warm-only — no cool colors (no green, blue, purple). Do NOT render color names, hex codes, or role labels as visible text in the image.
|
||||
|
||||
## Best For
|
||||
|
||||
Product showcases, team introductions, feature grids, brand content, personal growth, lifestyle
|
||||
|
|
@ -67,6 +67,17 @@ STYLE (from reference):
|
|||
|
||||
---
|
||||
|
||||
## Color Specification Rules
|
||||
|
||||
Colors in prompts use hex codes for **rendering guidance only** — they tell the model which colors to use, NOT what text to display.
|
||||
|
||||
**⚠️ CRITICAL**: Image generation models sometimes render color names and hex values as visible text labels in the image (e.g., painting "Macaron Blue #A8D8EA" as a label). This must be prevented.
|
||||
|
||||
**Add to ALL prompts that contain a COLORS section**:
|
||||
> Color values (#hex) and color names are rendering guidance only — do NOT display color names, hex codes, or palette labels as visible text in the image.
|
||||
|
||||
---
|
||||
|
||||
## Character Rendering
|
||||
|
||||
When depicting people:
|
||||
|
|
@ -135,6 +146,16 @@ COLORS: Cream background (#F5F0E6), Coral Red (#E07A5F), Mint Green (#81B29A), M
|
|||
ELEMENTS: Geometric simplified icons, no gradients, playful decorative elements (dots, stars)
|
||||
```
|
||||
|
||||
**Infographic + vector-illustration + warm palette**:
|
||||
```
|
||||
Flat vector illustration infographic. Clean black outlines on all elements.
|
||||
PALETTE OVERRIDE (warm): Warm-only color palette, no cool colors.
|
||||
COLORS: Soft Peach background (#FFECD2), Warm Orange (#ED8936),
|
||||
Terracotta (#C05621), Golden Yellow (#F6AD55), Deep Brown (#744210)
|
||||
ELEMENTS: Geometric simplified icons, no gradients, rounded corners,
|
||||
modular card layout, consistent icon style
|
||||
```
|
||||
|
||||
### Scene
|
||||
|
||||
```
|
||||
|
|
@ -172,6 +193,33 @@ COLORS: Cream background (#F5F0E6), steps in Coral/Mint/Mustard, black outlines
|
|||
ELEMENTS: Rounded rectangles, thick arrows, simple icons per step
|
||||
```
|
||||
|
||||
**Flowchart + sketch-notes + macaron palette**:
|
||||
```
|
||||
Hand-drawn educational flowchart on warm cream paper. Slight wobble on all lines.
|
||||
PALETTE: macaron — soft pastel color blocks
|
||||
COLORS: Warm Cream background (#F5F0E8), zone fills in Macaron Blue (#A8D8EA),
|
||||
Lavender (#D5C6E0), Mint (#B5E5CF), Coral Red (#E8655A) for emphasis
|
||||
ELEMENTS: Rounded cards with dashed/solid borders, wavy hand-drawn arrows with labels,
|
||||
simple stick-figure characters, doodle decorations (stars, underlines)
|
||||
STYLE: Color fills don't completely fill outlines, hand-drawn lettering, generous white space
|
||||
```
|
||||
|
||||
**Flowchart + ink-notes + mono-ink palette**:
|
||||
```
|
||||
Professional hand-drawn visual-note flowchart on pure white. Black ink line work
|
||||
with slight wobble, à la Mike Rohde sketchnoting.
|
||||
PALETTE: mono-ink — black ink dominant, sparse semantic accents
|
||||
COLORS: Pure White background (#FFFFFF), Near Black (#1A1A1A) for all lines,
|
||||
text, and figures; Coral Red (#E8655A) only for risk/emphasis,
|
||||
Muted Teal (#5FA8A8) only for positive/solution states
|
||||
ELEMENTS: Left-to-right stage boxes with rounded-rect frames, wavy hand-drawn
|
||||
arrows between stages, simple stick-figure characters with role
|
||||
labels above (e.g., "ML Engineer", "Team Lead"), dashed-border box
|
||||
for future/empty stage, small doodle icons per stage
|
||||
STYLE: Hand-lettered titles (bold, oversized), handwritten stage labels and
|
||||
annotations, generous white space, bottom tagline summarizing takeaway
|
||||
```
|
||||
|
||||
### Comparison
|
||||
|
||||
```
|
||||
|
|
@ -197,6 +245,37 @@ COLORS: Left side Coral (#E07A5F), Right side Mint (#81B29A), cream background
|
|||
ELEMENTS: Bold icons, black outlines, centered divider line
|
||||
```
|
||||
|
||||
**Comparison + vector-illustration + warm palette**:
|
||||
```
|
||||
Flat vector comparison with split layout. Clear visual separation.
|
||||
PALETTE OVERRIDE (warm): Warm-only color palette, no cool colors.
|
||||
COLORS: Left side Warm Orange (#ED8936), Right side Terracotta (#C05621),
|
||||
Soft Peach background (#FFECD2), Deep Brown (#744210) accents
|
||||
ELEMENTS: Bold icons, black outlines, centered divider line
|
||||
```
|
||||
|
||||
**Comparison + ink-notes + mono-ink palette** (Before/After, Traditional vs New):
|
||||
```
|
||||
Professional hand-drawn sketchnote comparison on pure white. Black ink line work
|
||||
with slight wobble, à la Mike Rohde sketchnoting.
|
||||
PALETTE: mono-ink — black ink dominant, sparse semantic accents
|
||||
COLORS: Pure White background (#FFFFFF), Near Black (#1A1A1A) for all outlines,
|
||||
text, figures, arrows; Coral Red (#E8655A) reserved for risks/gaps
|
||||
(left/Before side); Muted Teal (#5FA8A8) reserved for positives
|
||||
(right/After side). Color accents under 10% of canvas.
|
||||
LAYOUT: Left | Right split with vertical hand-drawn divider. Hand-lettered
|
||||
"Before" label (top-left) and "After" label (top-right).
|
||||
LEFT SIDE: Stick figure(s) with role label above, speech bubble showing the
|
||||
pain point, bulleted pain-point list in handwritten text.
|
||||
RIGHT SIDE: Stick figure(s) showing the new state, bulleted improvement list,
|
||||
small positive-action icons.
|
||||
BRIDGE: Curved hand-drawn "mindset shift" arrow bridging left → right with
|
||||
small inline label describing the shift.
|
||||
BOTTOM: Single-line hand-lettered tagline summarizing the takeaway.
|
||||
STYLE: Hand-lettered headings (bold, oversized), handwritten body annotations,
|
||||
generous white space, no computer fonts, no gradients, no shadows.
|
||||
```
|
||||
|
||||
### Framework
|
||||
|
||||
```
|
||||
|
|
@ -220,6 +299,36 @@ COLORS: Cream background (#F5F0E6), nodes in Coral/Mint/Mustard/Blue, black outl
|
|||
ELEMENTS: Rounded rectangles or circles for nodes, thick connecting lines
|
||||
```
|
||||
|
||||
**Framework + vector-illustration + warm palette**:
|
||||
```
|
||||
Flat vector framework diagram with geometric nodes and bold connectors.
|
||||
PALETTE OVERRIDE (warm): Warm-only color palette, no cool colors.
|
||||
COLORS: Soft Peach background (#FFECD2), nodes in Warm Orange (#ED8936),
|
||||
Terracotta (#C05621), Golden Yellow (#F6AD55), black outlines
|
||||
ELEMENTS: Rounded rectangles or circles for nodes, thick connecting lines
|
||||
```
|
||||
|
||||
**Framework + ink-notes + mono-ink palette** (command center, OS analogy):
|
||||
```
|
||||
Professional hand-drawn sketchnote framework on pure white. Black ink line work
|
||||
with slight wobble, à la Mike Rohde sketchnoting.
|
||||
PALETTE: mono-ink — black ink dominant, sparse semantic accents
|
||||
COLORS: Pure White background (#FFFFFF), Near Black (#1A1A1A) for all lines,
|
||||
text, figures; Dusty Lavender (#9B8AB5) for neutral category tags only;
|
||||
Coral Red (#E8655A) for emphasis sparingly. Color accents under 10%.
|
||||
STRUCTURE: Central rounded-rectangle frame as "the system" with hand-lettered
|
||||
title inside. Inner layer of labeled sub-components (node labels
|
||||
above each). Outer layer of feeder arrows from stick-figure
|
||||
operators/users with role labels.
|
||||
ELEMENTS: Stick figures at the edges with role tags ("Team Lead", "Operator"),
|
||||
wavy hand-drawn connector arrows with small inline labels, small
|
||||
doodle icons per component, dashed-border placeholder(s) for
|
||||
future/empty capabilities.
|
||||
BOTTOM: Single-line hand-lettered tagline.
|
||||
STYLE: Hand-lettered headings, handwritten annotations, generous white space,
|
||||
no computer fonts, no gradients.
|
||||
```
|
||||
|
||||
### Timeline
|
||||
|
||||
```
|
||||
|
|
@ -268,6 +377,39 @@ TEXTURE: Halftone transitions between sides
|
|||
|
||||
---
|
||||
|
||||
## Palette Override
|
||||
|
||||
When a palette is specified (via `--palette` or preset), it overrides the style's default colors:
|
||||
|
||||
1. Read style file → get rendering rules (Visual Elements, Style Rules, line treatment)
|
||||
2. Read palette file (`palettes/<palette>.md`) → get Colors + Background
|
||||
3. Palette Colors **replace** style's default Color Palette in prompt
|
||||
4. Palette Background **replaces** style's Background color (keep style's texture description)
|
||||
5. Build prompt: style rendering instructions + palette colors
|
||||
|
||||
**Prompt frontmatter** includes palette when specified:
|
||||
```yaml
|
||||
---
|
||||
illustration_id: 01
|
||||
type: infographic
|
||||
style: vector-illustration
|
||||
palette: macaron
|
||||
---
|
||||
```
|
||||
|
||||
**Example**: `vector-illustration` + `macaron` palette:
|
||||
```
|
||||
Flat vector illustration infographic. Clean black outlines on all elements.
|
||||
PALETTE: macaron — soft pastel color blocks
|
||||
COLORS: Warm Cream background (#F5F0E8), Macaron Blue (#A8D8EA), Mint (#B5E5CF),
|
||||
Lavender (#D5C6E0), Peach (#FFD5C2), Coral Red (#E8655A) for emphasis
|
||||
ELEMENTS: Geometric simplified icons, no gradients, playful decorative elements
|
||||
```
|
||||
|
||||
When no palette is specified, use the style's built-in Color Palette as before.
|
||||
|
||||
---
|
||||
|
||||
## What to Avoid
|
||||
|
||||
- Vague descriptions ("a nice image")
|
||||
|
|
@ -280,5 +422,5 @@ TEXTURE: Halftone transitions between sides
|
|||
If watermark enabled in preferences, append:
|
||||
|
||||
```
|
||||
Include a subtle watermark "[content]" positioned at [position] with approximately [opacity*100]% visibility.
|
||||
Include a subtle watermark "[content]" positioned at [position].
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,51 +1,57 @@
|
|||
# Style Presets
|
||||
|
||||
`--preset X` expands to a type + style combination. Users can override either dimension.
|
||||
`--preset X` expands to a type + style + optional palette combination. Users can override any dimension.
|
||||
|
||||
## By Category
|
||||
|
||||
### Technical & Engineering
|
||||
|
||||
| --preset | Type | Style | Best For |
|
||||
|----------|------|-------|----------|
|
||||
| `tech-explainer` | `infographic` | `blueprint` | API docs, system metrics, technical deep-dives |
|
||||
| `system-design` | `framework` | `blueprint` | Architecture diagrams, system design |
|
||||
| `architecture` | `framework` | `vector-illustration` | Component relationships, module structure |
|
||||
| `science-paper` | `infographic` | `scientific` | Research findings, lab results, academic |
|
||||
| --preset | Type | Style | Palette | Best For |
|
||||
|----------|------|-------|---------|----------|
|
||||
| `tech-explainer` | `infographic` | `blueprint` | — | API docs, system metrics, technical deep-dives |
|
||||
| `system-design` | `framework` | `blueprint` | — | Architecture diagrams, system design |
|
||||
| `architecture` | `framework` | `vector-illustration` | — | Component relationships, module structure |
|
||||
| `science-paper` | `infographic` | `scientific` | — | Research findings, lab results, academic |
|
||||
|
||||
### Knowledge & Education
|
||||
|
||||
| --preset | Type | Style | Best For |
|
||||
|----------|------|-------|----------|
|
||||
| `knowledge-base` | `infographic` | `vector-illustration` | Concept explainers, tutorials, how-to |
|
||||
| `saas-guide` | `infographic` | `notion` | Product guides, SaaS docs, tool walkthroughs |
|
||||
| `tutorial` | `flowchart` | `vector-illustration` | Step-by-step tutorials, setup guides |
|
||||
| `process-flow` | `flowchart` | `notion` | Workflow documentation, onboarding flows |
|
||||
| --preset | Type | Style | Palette | Best For |
|
||||
|----------|------|-------|---------|----------|
|
||||
| `knowledge-base` | `infographic` | `vector-illustration` | — | Concept explainers, tutorials, how-to |
|
||||
| `saas-guide` | `infographic` | `notion` | — | Product guides, SaaS docs, tool walkthroughs |
|
||||
| `tutorial` | `flowchart` | `vector-illustration` | — | Step-by-step tutorials, setup guides |
|
||||
| `process-flow` | `flowchart` | `notion` | — | Workflow documentation, onboarding flows |
|
||||
| `warm-knowledge` | `infographic` | `vector-illustration` | `warm` | Product showcases, team intros, feature cards, brand content |
|
||||
| `edu-visual` | `infographic` | `vector-illustration` | `macaron` | Knowledge summaries, concept explainers, educational articles |
|
||||
| `hand-drawn-edu` | `flowchart` | `sketch-notes` | `macaron` | Hand-drawn educational diagrams, process explainers, onboarding visuals |
|
||||
| `ink-notes-compare` | `comparison` | `ink-notes` | `mono-ink` | Before/After essays, Traditional vs New, OS-style comparisons, mindset-shift narratives |
|
||||
| `ink-notes-flow` | `flowchart` | `ink-notes` | `mono-ink` | Professional process explainers, workforce pipelines, hand-drawn technical walkthroughs |
|
||||
| `ink-notes-framework` | `framework` | `ink-notes` | `mono-ink` | System analogies, command-center diagrams, architecture-as-metaphor, tech manifestos |
|
||||
|
||||
### Data & Analysis
|
||||
|
||||
| --preset | Type | Style | Best For |
|
||||
|----------|------|-------|----------|
|
||||
| `data-report` | `infographic` | `editorial` | Data journalism, metrics reports, dashboards |
|
||||
| `versus` | `comparison` | `vector-illustration` | Tech comparisons, framework shootouts |
|
||||
| `business-compare` | `comparison` | `elegant` | Product evaluations, strategy options |
|
||||
| --preset | Type | Style | Palette | Best For |
|
||||
|----------|------|-------|---------|----------|
|
||||
| `data-report` | `infographic` | `editorial` | — | Data journalism, metrics reports, dashboards |
|
||||
| `versus` | `comparison` | `vector-illustration` | — | Tech comparisons, framework shootouts |
|
||||
| `business-compare` | `comparison` | `elegant` | — | Product evaluations, strategy options |
|
||||
|
||||
### Narrative & Creative
|
||||
|
||||
| --preset | Type | Style | Best For |
|
||||
|----------|------|-------|----------|
|
||||
| `storytelling` | `scene` | `warm` | Personal essays, reflections, growth stories |
|
||||
| `lifestyle` | `scene` | `watercolor` | Travel, wellness, lifestyle, creative |
|
||||
| `history` | `timeline` | `elegant` | Historical overviews, milestones |
|
||||
| `evolution` | `timeline` | `warm` | Progress narratives, growth journeys |
|
||||
| --preset | Type | Style | Palette | Best For |
|
||||
|----------|------|-------|---------|----------|
|
||||
| `storytelling` | `scene` | `warm` | — | Personal essays, reflections, growth stories |
|
||||
| `lifestyle` | `scene` | `watercolor` | — | Travel, wellness, lifestyle, creative |
|
||||
| `history` | `timeline` | `elegant` | — | Historical overviews, milestones |
|
||||
| `evolution` | `timeline` | `warm` | — | Progress narratives, growth journeys |
|
||||
|
||||
### Editorial & Opinion
|
||||
|
||||
| --preset | Type | Style | Best For |
|
||||
|----------|------|-------|----------|
|
||||
| `opinion-piece` | `scene` | `screen-print` | Op-eds, commentary, critical essays |
|
||||
| `editorial-poster` | `comparison` | `screen-print` | Debate, contrasting viewpoints |
|
||||
| `cinematic` | `scene` | `screen-print` | Dramatic narratives, cultural essays |
|
||||
| --preset | Type | Style | Palette | Best For |
|
||||
|----------|------|-------|---------|----------|
|
||||
| `opinion-piece` | `scene` | `screen-print` | — | Op-eds, commentary, critical essays |
|
||||
| `editorial-poster` | `comparison` | `screen-print` | — | Debate, contrasting viewpoints |
|
||||
| `cinematic` | `scene` | `screen-print` | — | Dramatic narratives, cultural essays |
|
||||
|
||||
## Content Type → Preset Recommendations
|
||||
|
||||
|
|
@ -54,15 +60,17 @@ Use this table during Step 3 to recommend presets based on Step 2 content analys
|
|||
| Content Type (Step 2) | Primary Preset | Alternatives |
|
||||
|------------------------|----------------|--------------|
|
||||
| Technical | `tech-explainer` | `system-design`, `architecture` |
|
||||
| Tutorial | `tutorial` | `process-flow`, `knowledge-base` |
|
||||
| Tutorial | `tutorial` | `process-flow`, `knowledge-base`, `edu-visual` |
|
||||
| Methodology / Framework | `system-design` | `architecture`, `process-flow` |
|
||||
| Data / Metrics | `data-report` | `versus`, `tech-explainer` |
|
||||
| Comparison / Review | `versus` | `business-compare`, `editorial-poster` |
|
||||
| Comparison / Review | `versus` | `business-compare`, `editorial-poster`, `ink-notes-compare` |
|
||||
| Manifesto / Mindset shift / Professional visual note | `ink-notes-compare` | `ink-notes-framework`, `ink-notes-flow` |
|
||||
| Narrative / Personal | `storytelling` | `lifestyle`, `evolution` |
|
||||
| Opinion / Editorial | `opinion-piece` | `cinematic`, `editorial-poster` |
|
||||
| Historical / Timeline | `history` | `evolution` |
|
||||
| Academic / Research | `science-paper` | `tech-explainer`, `data-report` |
|
||||
| SaaS / Product | `saas-guide` | `knowledge-base`, `process-flow` |
|
||||
| SaaS / Product | `saas-guide` | `knowledge-base`, `process-flow`, `warm-knowledge` |
|
||||
| Education / Knowledge | `edu-visual` | `knowledge-base`, `tutorial`, `hand-drawn-edu` |
|
||||
|
||||
## Override Examples
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue