feat(baoyu-danger-x-to-markdown): add video media support for X articles

This commit is contained in:
Jim Liu 宝玉 2026-03-18 21:54:21 -05:00
parent ea84f21439
commit ebc74a10ad
3 changed files with 254 additions and 42 deletions

View File

@ -53,3 +53,126 @@ print('hello from x article')
After the snippet.`);
});
test("formatArticleMarkdown renders article video media as poster plus video link", () => {
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/123/img/poster.jpg";
const videoUrl = "https://video.twimg.com/amplify_video/123/vid/avc1/720x720/demo.mp4?tag=21";
const article = {
title: "Video Example",
content_state: {
blocks: [
{
type: "unstyled",
text: "Intro text.",
entityRanges: [],
},
{
type: "atomic",
text: " ",
entityRanges: [{ key: 0, offset: 0, length: 1 }],
},
],
entityMap: {
"0": {
key: "0",
value: {
type: "MEDIA",
mutability: "Immutable",
data: {
caption: "Demo reel",
mediaItems: [{ mediaId: "vid-1" }],
},
},
},
},
},
media_entities: [
{
media_id: "vid-1",
media_info: {
__typename: "ApiVideo",
preview_image: {
original_img_url: posterUrl,
},
variants: [
{
content_type: "video/mp4",
bit_rate: 256000,
url: videoUrl,
},
],
},
},
],
};
const { markdown } = formatArticleMarkdown(article);
expect(markdown).toContain("Intro text.");
expect(markdown).toContain(`![Demo reel](${posterUrl})`);
expect(markdown).toContain(`[video](${videoUrl})`);
expect(markdown).not.toContain(`![Demo reel](${videoUrl})`);
expect(markdown).not.toContain("## Media");
});
test("formatArticleMarkdown renders unused article videos in trailing media section", () => {
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/456/img/poster.jpg";
const videoUrl = "https://video.twimg.com/amplify_video/456/vid/avc1/1080x1080/demo.mp4?tag=21";
const article = {
title: "Trailing Media Example",
plain_text: "Body text.",
media_entities: [
{
media_id: "vid-2",
media_info: {
__typename: "ApiVideo",
preview_image: {
original_img_url: posterUrl,
},
variants: [
{
content_type: "video/mp4",
bit_rate: 832000,
url: videoUrl,
},
],
},
},
],
};
const { markdown, coverUrl } = formatArticleMarkdown(article);
expect(coverUrl).toBeNull();
expect(markdown).toContain("## Media");
expect(markdown).toContain(`![video](${posterUrl})`);
expect(markdown).toContain(`[video](${videoUrl})`);
});
test("formatArticleMarkdown keeps coverUrl as preview image for video cover media", () => {
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/789/img/poster.jpg";
const videoUrl = "https://video.twimg.com/amplify_video/789/vid/avc1/720x720/demo.mp4?tag=21";
const article = {
title: "Video Cover Example",
plain_text: "Body text.",
cover_media: {
media_info: {
__typename: "ApiVideo",
preview_image: {
original_img_url: posterUrl,
},
variants: [
{
content_type: "video/mp4",
bit_rate: 1280000,
url: videoUrl,
},
],
},
},
};
const { coverUrl } = formatArticleMarkdown(article);
expect(coverUrl).toBe(posterUrl);
});

View File

@ -18,6 +18,17 @@ export type FormatArticleOptions = {
referencedTweets?: Map<string, ReferencedTweetInfo>;
};
type ResolvedMediaAsset =
| {
kind: "image";
url: string;
}
| {
kind: "video";
url: string;
posterUrl?: string;
};
function coerceArticleEntity(value: unknown): ArticleEntity | null {
if (!value || typeof value !== "object") return null;
const candidate = value as ArticleEntity;
@ -109,58 +120,127 @@ function resolveEntityEntry(
return entityMap[String(entityKey)];
}
function resolveMediaUrl(info?: ArticleMediaInfo): string | undefined {
function resolveVideoUrl(info?: ArticleMediaInfo): string | undefined {
if (!info) return undefined;
if (info.original_img_url) return info.original_img_url;
if (info.preview_image?.original_img_url) return info.preview_image.original_img_url;
const variants = info.variants ?? [];
const mp4 = variants
.filter((variant) => variant?.content_type?.includes("video"))
.sort((a, b) => (b.bit_rate ?? 0) - (a.bit_rate ?? 0))[0];
return mp4?.url ?? variants[0]?.url;
return mp4?.url ?? variants.find((variant) => typeof variant?.url === "string")?.url;
}
function buildMediaById(article: ArticleEntity): Map<string, string> {
const map = new Map<string, string>();
function resolveMediaAsset(info?: ArticleMediaInfo): ResolvedMediaAsset | undefined {
if (!info) return undefined;
const posterUrl = info.preview_image?.original_img_url ?? info.original_img_url;
const videoUrl = resolveVideoUrl(info);
if (videoUrl) {
return {
kind: "video",
url: videoUrl,
posterUrl,
};
}
const imageUrl = info.original_img_url ?? info.preview_image?.original_img_url;
if (imageUrl) {
return {
kind: "image",
url: imageUrl,
};
}
return undefined;
}
function resolveFallbackMediaAsset(rawUrl?: string): ResolvedMediaAsset | undefined {
if (!rawUrl) return undefined;
if (/^https:\/\/video\.twimg\.com\//i.test(rawUrl) || /\.(mp4|m4v|mov|webm)(?:$|[?#])/i.test(rawUrl)) {
return {
kind: "video",
url: rawUrl,
};
}
return {
kind: "image",
url: rawUrl,
};
}
function resolveCoverUrl(info?: ArticleMediaInfo): string | undefined {
if (!info) return undefined;
return info.original_img_url ?? info.preview_image?.original_img_url;
}
function buildMediaIdentity(asset: ResolvedMediaAsset): string {
return asset.kind === "video"
? `video:${asset.url}:${asset.posterUrl ?? ""}`
: `image:${asset.url}`;
}
function renderMediaLines(
asset: ResolvedMediaAsset,
altText: string,
usedUrls: Set<string>
): string[] {
if (asset.kind === "video") {
const lines: string[] = [];
if (asset.posterUrl && !usedUrls.has(asset.posterUrl)) {
usedUrls.add(asset.posterUrl);
lines.push(`![${altText || "video"}](${asset.posterUrl})`);
}
if (!usedUrls.has(asset.url)) {
usedUrls.add(asset.url);
lines.push(`[video](${asset.url})`);
}
return lines;
}
if (usedUrls.has(asset.url)) {
return [];
}
usedUrls.add(asset.url);
return [`![${altText}](${asset.url})`];
}
function buildMediaById(article: ArticleEntity): Map<string, ResolvedMediaAsset> {
const map = new Map<string, ResolvedMediaAsset>();
for (const entity of article.media_entities ?? []) {
if (!entity?.media_id) continue;
const url = resolveMediaUrl(entity.media_info);
if (url) {
map.set(entity.media_id, url);
const asset = resolveMediaAsset(entity.media_info);
if (asset) {
map.set(entity.media_id, asset);
}
}
return map;
}
function collectMediaUrls(
article: ArticleEntity,
usedUrls: Set<string>,
excludeUrl?: string
): string[] {
const urls: string[] = [];
const addUrl = (url?: string) => {
if (!url) return;
if (excludeUrl && url === excludeUrl) {
usedUrls.add(url);
return;
}
if (usedUrls.has(url)) return;
usedUrls.add(url);
urls.push(url);
function collectMediaAssets(article: ArticleEntity): ResolvedMediaAsset[] {
const assets: ResolvedMediaAsset[] = [];
const seen = new Set<string>();
const addAsset = (asset?: ResolvedMediaAsset) => {
if (!asset) return;
const identity = buildMediaIdentity(asset);
if (seen.has(identity)) return;
seen.add(identity);
assets.push(asset);
};
for (const entity of article.media_entities ?? []) {
addUrl(resolveMediaUrl(entity?.media_info));
addAsset(resolveMediaAsset(entity?.media_info));
}
return urls;
return assets;
}
function resolveEntityMediaLines(
entityKey: number | undefined,
entityMap: ArticleContentState["entityMap"] | undefined,
entityLookup: EntityLookup,
mediaById: Map<string, string>,
mediaById: Map<string, ResolvedMediaAsset>,
usedUrls: Set<string>
): string[] {
if (entityKey === undefined) return [];
@ -182,17 +262,16 @@ function resolveEntityMediaLines(
: typeof item?.media_id === "string"
? item.media_id
: undefined;
const url = mediaId ? mediaById.get(mediaId) : undefined;
if (url && !usedUrls.has(url)) {
usedUrls.add(url);
lines.push(`![${altText}](${url})`);
const asset = mediaId ? mediaById.get(mediaId) : undefined;
if (asset) {
lines.push(...renderMediaLines(asset, altText, usedUrls));
}
}
const fallbackUrl = typeof value.data?.url === "string" ? value.data.url : undefined;
if (fallbackUrl && !usedUrls.has(fallbackUrl)) {
usedUrls.add(fallbackUrl);
lines.push(`![${altText}](${fallbackUrl})`);
const fallbackAsset = resolveFallbackMediaAsset(fallbackUrl);
if (fallbackAsset) {
lines.push(...renderMediaLines(fallbackAsset, altText, usedUrls));
}
return lines;
@ -346,7 +425,7 @@ function renderContentBlocks(
blocks: ArticleBlock[],
entityMap: ArticleContentState["entityMap"] | undefined,
entityLookup: EntityLookup,
mediaById: Map<string, string>,
mediaById: Map<string, ResolvedMediaAsset>,
usedUrls: Set<string>,
mediaLinkMap: Map<number, string>,
referencedTweets?: Map<string, ReferencedTweetInfo>
@ -602,7 +681,7 @@ export function formatArticleMarkdown(
lines.push(`# ${title}`);
}
const coverUrl = resolveMediaUrl(candidate.cover_media?.media_info) ?? null;
const coverUrl = resolveCoverUrl(candidate.cover_media?.media_info) ?? null;
if (coverUrl) {
usedUrls.add(coverUrl);
}
@ -633,12 +712,13 @@ export function formatArticleMarkdown(
lines.push(candidate.preview_text.trim());
}
const mediaUrls = collectMediaUrls(candidate, usedUrls, coverUrl ?? undefined);
if (mediaUrls.length > 0) {
const trailingMediaLines: string[] = [];
for (const asset of collectMediaAssets(candidate)) {
trailingMediaLines.push(...renderMediaLines(asset, "", usedUrls));
}
if (trailingMediaLines.length > 0) {
lines.push("", "## Media", "");
for (const url of mediaUrls) {
lines.push(`![](${url})`);
}
lines.push(...trailingMediaLines);
}
return { markdown: lines.join("\n").trimEnd(), coverUrl };

View File

@ -202,6 +202,13 @@ function toHighResUrl(rawUrl: string): string {
}
}
function isPlausibleMediaUrl(rawUrl: string): boolean {
const ext = resolveExtensionFromUrl(rawUrl);
if (ext && (IMAGE_EXTENSIONS.has(ext) || VIDEO_EXTENSIONS.has(ext))) return true;
if (resolveKindFromHostname(rawUrl) !== undefined) return true;
return false;
}
function collectMarkdownLinkCandidates(markdown: string): MarkdownLinkCandidate[] {
const candidates: MarkdownLinkCandidate[] = [];
const seen = new Set<string>();
@ -221,10 +228,12 @@ function collectMarkdownLinkCandidates(markdown: string): MarkdownLinkCandidate[
const label = match[1] ?? "";
const rawUrl = match[3] ?? "";
if (!rawUrl || seen.has(rawUrl)) continue;
const isImage = label.startsWith("![");
if (!isImage && !isPlausibleMediaUrl(rawUrl)) continue;
seen.add(rawUrl);
candidates.push({
url: rawUrl,
hint: label.startsWith("![") ? "image" : "unknown",
hint: isImage ? "image" : "unknown",
});
}