feat(baoyu-danger-x-to-markdown): add video media support for X articles
This commit is contained in:
parent
ea84f21439
commit
ebc74a10ad
|
|
@ -53,3 +53,126 @@ print('hello from x article')
|
|||
|
||||
After the snippet.`);
|
||||
});
|
||||
|
||||
test("formatArticleMarkdown renders article video media as poster plus video link", () => {
|
||||
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/123/img/poster.jpg";
|
||||
const videoUrl = "https://video.twimg.com/amplify_video/123/vid/avc1/720x720/demo.mp4?tag=21";
|
||||
const article = {
|
||||
title: "Video Example",
|
||||
content_state: {
|
||||
blocks: [
|
||||
{
|
||||
type: "unstyled",
|
||||
text: "Intro text.",
|
||||
entityRanges: [],
|
||||
},
|
||||
{
|
||||
type: "atomic",
|
||||
text: " ",
|
||||
entityRanges: [{ key: 0, offset: 0, length: 1 }],
|
||||
},
|
||||
],
|
||||
entityMap: {
|
||||
"0": {
|
||||
key: "0",
|
||||
value: {
|
||||
type: "MEDIA",
|
||||
mutability: "Immutable",
|
||||
data: {
|
||||
caption: "Demo reel",
|
||||
mediaItems: [{ mediaId: "vid-1" }],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
media_entities: [
|
||||
{
|
||||
media_id: "vid-1",
|
||||
media_info: {
|
||||
__typename: "ApiVideo",
|
||||
preview_image: {
|
||||
original_img_url: posterUrl,
|
||||
},
|
||||
variants: [
|
||||
{
|
||||
content_type: "video/mp4",
|
||||
bit_rate: 256000,
|
||||
url: videoUrl,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const { markdown } = formatArticleMarkdown(article);
|
||||
|
||||
expect(markdown).toContain("Intro text.");
|
||||
expect(markdown).toContain(``);
|
||||
expect(markdown).toContain(`[video](${videoUrl})`);
|
||||
expect(markdown).not.toContain(``);
|
||||
expect(markdown).not.toContain("## Media");
|
||||
});
|
||||
|
||||
test("formatArticleMarkdown renders unused article videos in trailing media section", () => {
|
||||
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/456/img/poster.jpg";
|
||||
const videoUrl = "https://video.twimg.com/amplify_video/456/vid/avc1/1080x1080/demo.mp4?tag=21";
|
||||
const article = {
|
||||
title: "Trailing Media Example",
|
||||
plain_text: "Body text.",
|
||||
media_entities: [
|
||||
{
|
||||
media_id: "vid-2",
|
||||
media_info: {
|
||||
__typename: "ApiVideo",
|
||||
preview_image: {
|
||||
original_img_url: posterUrl,
|
||||
},
|
||||
variants: [
|
||||
{
|
||||
content_type: "video/mp4",
|
||||
bit_rate: 832000,
|
||||
url: videoUrl,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const { markdown, coverUrl } = formatArticleMarkdown(article);
|
||||
|
||||
expect(coverUrl).toBeNull();
|
||||
expect(markdown).toContain("## Media");
|
||||
expect(markdown).toContain(``);
|
||||
expect(markdown).toContain(`[video](${videoUrl})`);
|
||||
});
|
||||
|
||||
test("formatArticleMarkdown keeps coverUrl as preview image for video cover media", () => {
|
||||
const posterUrl = "https://pbs.twimg.com/amplify_video_thumb/789/img/poster.jpg";
|
||||
const videoUrl = "https://video.twimg.com/amplify_video/789/vid/avc1/720x720/demo.mp4?tag=21";
|
||||
const article = {
|
||||
title: "Video Cover Example",
|
||||
plain_text: "Body text.",
|
||||
cover_media: {
|
||||
media_info: {
|
||||
__typename: "ApiVideo",
|
||||
preview_image: {
|
||||
original_img_url: posterUrl,
|
||||
},
|
||||
variants: [
|
||||
{
|
||||
content_type: "video/mp4",
|
||||
bit_rate: 1280000,
|
||||
url: videoUrl,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const { coverUrl } = formatArticleMarkdown(article);
|
||||
|
||||
expect(coverUrl).toBe(posterUrl);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -18,6 +18,17 @@ export type FormatArticleOptions = {
|
|||
referencedTweets?: Map<string, ReferencedTweetInfo>;
|
||||
};
|
||||
|
||||
type ResolvedMediaAsset =
|
||||
| {
|
||||
kind: "image";
|
||||
url: string;
|
||||
}
|
||||
| {
|
||||
kind: "video";
|
||||
url: string;
|
||||
posterUrl?: string;
|
||||
};
|
||||
|
||||
function coerceArticleEntity(value: unknown): ArticleEntity | null {
|
||||
if (!value || typeof value !== "object") return null;
|
||||
const candidate = value as ArticleEntity;
|
||||
|
|
@ -109,58 +120,127 @@ function resolveEntityEntry(
|
|||
return entityMap[String(entityKey)];
|
||||
}
|
||||
|
||||
function resolveMediaUrl(info?: ArticleMediaInfo): string | undefined {
|
||||
function resolveVideoUrl(info?: ArticleMediaInfo): string | undefined {
|
||||
if (!info) return undefined;
|
||||
if (info.original_img_url) return info.original_img_url;
|
||||
if (info.preview_image?.original_img_url) return info.preview_image.original_img_url;
|
||||
const variants = info.variants ?? [];
|
||||
const mp4 = variants
|
||||
.filter((variant) => variant?.content_type?.includes("video"))
|
||||
.sort((a, b) => (b.bit_rate ?? 0) - (a.bit_rate ?? 0))[0];
|
||||
return mp4?.url ?? variants[0]?.url;
|
||||
return mp4?.url ?? variants.find((variant) => typeof variant?.url === "string")?.url;
|
||||
}
|
||||
|
||||
function buildMediaById(article: ArticleEntity): Map<string, string> {
|
||||
const map = new Map<string, string>();
|
||||
function resolveMediaAsset(info?: ArticleMediaInfo): ResolvedMediaAsset | undefined {
|
||||
if (!info) return undefined;
|
||||
|
||||
const posterUrl = info.preview_image?.original_img_url ?? info.original_img_url;
|
||||
const videoUrl = resolveVideoUrl(info);
|
||||
if (videoUrl) {
|
||||
return {
|
||||
kind: "video",
|
||||
url: videoUrl,
|
||||
posterUrl,
|
||||
};
|
||||
}
|
||||
|
||||
const imageUrl = info.original_img_url ?? info.preview_image?.original_img_url;
|
||||
if (imageUrl) {
|
||||
return {
|
||||
kind: "image",
|
||||
url: imageUrl,
|
||||
};
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveFallbackMediaAsset(rawUrl?: string): ResolvedMediaAsset | undefined {
|
||||
if (!rawUrl) return undefined;
|
||||
|
||||
if (/^https:\/\/video\.twimg\.com\//i.test(rawUrl) || /\.(mp4|m4v|mov|webm)(?:$|[?#])/i.test(rawUrl)) {
|
||||
return {
|
||||
kind: "video",
|
||||
url: rawUrl,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
kind: "image",
|
||||
url: rawUrl,
|
||||
};
|
||||
}
|
||||
|
||||
function resolveCoverUrl(info?: ArticleMediaInfo): string | undefined {
|
||||
if (!info) return undefined;
|
||||
return info.original_img_url ?? info.preview_image?.original_img_url;
|
||||
}
|
||||
|
||||
function buildMediaIdentity(asset: ResolvedMediaAsset): string {
|
||||
return asset.kind === "video"
|
||||
? `video:${asset.url}:${asset.posterUrl ?? ""}`
|
||||
: `image:${asset.url}`;
|
||||
}
|
||||
|
||||
function renderMediaLines(
|
||||
asset: ResolvedMediaAsset,
|
||||
altText: string,
|
||||
usedUrls: Set<string>
|
||||
): string[] {
|
||||
if (asset.kind === "video") {
|
||||
const lines: string[] = [];
|
||||
if (asset.posterUrl && !usedUrls.has(asset.posterUrl)) {
|
||||
usedUrls.add(asset.posterUrl);
|
||||
lines.push(``);
|
||||
}
|
||||
if (!usedUrls.has(asset.url)) {
|
||||
usedUrls.add(asset.url);
|
||||
lines.push(`[video](${asset.url})`);
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
if (usedUrls.has(asset.url)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
usedUrls.add(asset.url);
|
||||
return [``];
|
||||
}
|
||||
|
||||
function buildMediaById(article: ArticleEntity): Map<string, ResolvedMediaAsset> {
|
||||
const map = new Map<string, ResolvedMediaAsset>();
|
||||
for (const entity of article.media_entities ?? []) {
|
||||
if (!entity?.media_id) continue;
|
||||
const url = resolveMediaUrl(entity.media_info);
|
||||
if (url) {
|
||||
map.set(entity.media_id, url);
|
||||
const asset = resolveMediaAsset(entity.media_info);
|
||||
if (asset) {
|
||||
map.set(entity.media_id, asset);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
function collectMediaUrls(
|
||||
article: ArticleEntity,
|
||||
usedUrls: Set<string>,
|
||||
excludeUrl?: string
|
||||
): string[] {
|
||||
const urls: string[] = [];
|
||||
const addUrl = (url?: string) => {
|
||||
if (!url) return;
|
||||
if (excludeUrl && url === excludeUrl) {
|
||||
usedUrls.add(url);
|
||||
return;
|
||||
}
|
||||
if (usedUrls.has(url)) return;
|
||||
usedUrls.add(url);
|
||||
urls.push(url);
|
||||
function collectMediaAssets(article: ArticleEntity): ResolvedMediaAsset[] {
|
||||
const assets: ResolvedMediaAsset[] = [];
|
||||
const seen = new Set<string>();
|
||||
const addAsset = (asset?: ResolvedMediaAsset) => {
|
||||
if (!asset) return;
|
||||
const identity = buildMediaIdentity(asset);
|
||||
if (seen.has(identity)) return;
|
||||
seen.add(identity);
|
||||
assets.push(asset);
|
||||
};
|
||||
|
||||
for (const entity of article.media_entities ?? []) {
|
||||
addUrl(resolveMediaUrl(entity?.media_info));
|
||||
addAsset(resolveMediaAsset(entity?.media_info));
|
||||
}
|
||||
|
||||
return urls;
|
||||
return assets;
|
||||
}
|
||||
|
||||
function resolveEntityMediaLines(
|
||||
entityKey: number | undefined,
|
||||
entityMap: ArticleContentState["entityMap"] | undefined,
|
||||
entityLookup: EntityLookup,
|
||||
mediaById: Map<string, string>,
|
||||
mediaById: Map<string, ResolvedMediaAsset>,
|
||||
usedUrls: Set<string>
|
||||
): string[] {
|
||||
if (entityKey === undefined) return [];
|
||||
|
|
@ -182,17 +262,16 @@ function resolveEntityMediaLines(
|
|||
: typeof item?.media_id === "string"
|
||||
? item.media_id
|
||||
: undefined;
|
||||
const url = mediaId ? mediaById.get(mediaId) : undefined;
|
||||
if (url && !usedUrls.has(url)) {
|
||||
usedUrls.add(url);
|
||||
lines.push(``);
|
||||
const asset = mediaId ? mediaById.get(mediaId) : undefined;
|
||||
if (asset) {
|
||||
lines.push(...renderMediaLines(asset, altText, usedUrls));
|
||||
}
|
||||
}
|
||||
|
||||
const fallbackUrl = typeof value.data?.url === "string" ? value.data.url : undefined;
|
||||
if (fallbackUrl && !usedUrls.has(fallbackUrl)) {
|
||||
usedUrls.add(fallbackUrl);
|
||||
lines.push(``);
|
||||
const fallbackAsset = resolveFallbackMediaAsset(fallbackUrl);
|
||||
if (fallbackAsset) {
|
||||
lines.push(...renderMediaLines(fallbackAsset, altText, usedUrls));
|
||||
}
|
||||
|
||||
return lines;
|
||||
|
|
@ -346,7 +425,7 @@ function renderContentBlocks(
|
|||
blocks: ArticleBlock[],
|
||||
entityMap: ArticleContentState["entityMap"] | undefined,
|
||||
entityLookup: EntityLookup,
|
||||
mediaById: Map<string, string>,
|
||||
mediaById: Map<string, ResolvedMediaAsset>,
|
||||
usedUrls: Set<string>,
|
||||
mediaLinkMap: Map<number, string>,
|
||||
referencedTweets?: Map<string, ReferencedTweetInfo>
|
||||
|
|
@ -602,7 +681,7 @@ export function formatArticleMarkdown(
|
|||
lines.push(`# ${title}`);
|
||||
}
|
||||
|
||||
const coverUrl = resolveMediaUrl(candidate.cover_media?.media_info) ?? null;
|
||||
const coverUrl = resolveCoverUrl(candidate.cover_media?.media_info) ?? null;
|
||||
if (coverUrl) {
|
||||
usedUrls.add(coverUrl);
|
||||
}
|
||||
|
|
@ -633,12 +712,13 @@ export function formatArticleMarkdown(
|
|||
lines.push(candidate.preview_text.trim());
|
||||
}
|
||||
|
||||
const mediaUrls = collectMediaUrls(candidate, usedUrls, coverUrl ?? undefined);
|
||||
if (mediaUrls.length > 0) {
|
||||
lines.push("", "## Media", "");
|
||||
for (const url of mediaUrls) {
|
||||
lines.push(``);
|
||||
const trailingMediaLines: string[] = [];
|
||||
for (const asset of collectMediaAssets(candidate)) {
|
||||
trailingMediaLines.push(...renderMediaLines(asset, "", usedUrls));
|
||||
}
|
||||
if (trailingMediaLines.length > 0) {
|
||||
lines.push("", "## Media", "");
|
||||
lines.push(...trailingMediaLines);
|
||||
}
|
||||
|
||||
return { markdown: lines.join("\n").trimEnd(), coverUrl };
|
||||
|
|
|
|||
|
|
@ -202,6 +202,13 @@ function toHighResUrl(rawUrl: string): string {
|
|||
}
|
||||
}
|
||||
|
||||
function isPlausibleMediaUrl(rawUrl: string): boolean {
|
||||
const ext = resolveExtensionFromUrl(rawUrl);
|
||||
if (ext && (IMAGE_EXTENSIONS.has(ext) || VIDEO_EXTENSIONS.has(ext))) return true;
|
||||
if (resolveKindFromHostname(rawUrl) !== undefined) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function collectMarkdownLinkCandidates(markdown: string): MarkdownLinkCandidate[] {
|
||||
const candidates: MarkdownLinkCandidate[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
|
@ -221,10 +228,12 @@ function collectMarkdownLinkCandidates(markdown: string): MarkdownLinkCandidate[
|
|||
const label = match[1] ?? "";
|
||||
const rawUrl = match[3] ?? "";
|
||||
if (!rawUrl || seen.has(rawUrl)) continue;
|
||||
const isImage = label.startsWith("![");
|
||||
if (!isImage && !isPlausibleMediaUrl(rawUrl)) continue;
|
||||
seen.add(rawUrl);
|
||||
candidates.push({
|
||||
url: rawUrl,
|
||||
hint: label.startsWith("![") ? "image" : "unknown",
|
||||
hint: isImage ? "image" : "unknown",
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue