From 204765a137d97cfbc31c9255923573409bf36a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Fri, 3 Apr 2026 23:35:42 -0500 Subject: [PATCH] feat(baoyu-youtube-transcript): auto-retry with yt-dlp on empty InnerTube transcript --- .../scripts/main.test.ts | 60 +++++++++++++++- .../baoyu-youtube-transcript/scripts/main.ts | 30 ++++---- .../scripts/youtube.ts | 70 +++++++++++++++++-- 3 files changed, 140 insertions(+), 20 deletions(-) diff --git a/skills/baoyu-youtube-transcript/scripts/main.test.ts b/skills/baoyu-youtube-transcript/scripts/main.test.ts index e3b3df7..256ce4d 100644 --- a/skills/baoyu-youtube-transcript/scripts/main.test.ts +++ b/skills/baoyu-youtube-transcript/scripts/main.test.ts @@ -2,7 +2,7 @@ import test from "node:test"; import assert from "node:assert/strict"; import { findTranscript, parseTranscriptJson3, parseWebVtt } from "./transcript.ts"; -import { buildTranscriptListFromYtDlp, resolveVideoSource, selectYtDlpTrack } from "./youtube.ts"; +import { buildTranscriptListFromYtDlp, fetchTranscriptWithFallback, resolveVideoSource, selectYtDlpTrack } from "./youtube.ts"; test("selectYtDlpTrack prefers json3 over xml and vtt", () => { const track = selectYtDlpTrack([ @@ -123,3 +123,61 @@ test("resolveVideoSource falls back to yt-dlp only after fallback-eligible error assert.equal(fallbackCalled, true); assert.equal(source.transcripts[0].languageCode, "en"); }); + +test("fetchTranscriptWithFallback retries with yt-dlp when InnerTube transcript payload is empty", async () => { + const warnings: string[] = []; + let fallbackCalled = false; + const result = await fetchTranscriptWithFallback( + "video12345ab", + { + kind: "innertube", + data: { videoDetails: { title: "Primary" } }, + transcripts: [{ + language: "English", + languageCode: "en", + isGenerated: false, + isTranslatable: false, + baseUrl: "https://www.youtube.com/api/timedtext?v=video12345ab&lang=en&fmt=json3", + translationLanguages: [], + }], + }, + { + languages: ["en"], + translate: "", + excludeGenerated: false, + excludeManual: false, + }, + async (info) => { + if (info.baseUrl.includes("youtube.com/api/timedtext")) { + return { snippets: [], language: info.language, languageCode: info.languageCode }; + } + return { + snippets: [{ text: "Recovered subtitle", start: 0, duration: 2 }], + language: info.language, + languageCode: info.languageCode, + }; + }, + async () => { + fallbackCalled = true; + return { + kind: "yt-dlp", + info: { title: "Fallback" }, + transcripts: [{ + language: "English", + languageCode: "en", + isGenerated: false, + isTranslatable: false, + baseUrl: "https://example.com/subtitles.en.json3", + translationLanguages: [], + }], + }; + }, + (message) => warnings.push(message) + ); + + assert.equal(fallbackCalled, true); + assert.equal(result.source.kind, "yt-dlp"); + assert.equal(result.snippets.length, 1); + assert.equal(result.snippets[0].text, "Recovered subtitle"); + assert.match(warnings[0] || "", /Retrying with yt-dlp fallback/); +}); diff --git a/skills/baoyu-youtube-transcript/scripts/main.ts b/skills/baoyu-youtube-transcript/scripts/main.ts index 22aea19..ac95fa3 100644 --- a/skills/baoyu-youtube-transcript/scripts/main.ts +++ b/skills/baoyu-youtube-transcript/scripts/main.ts @@ -13,13 +13,13 @@ import { registerVideoDir, resolveBaseDir, } from "./storage.ts"; -import { findTranscript, formatListOutput, formatMarkdown, formatSrt, segmentIntoSentences } from "./transcript.ts"; +import { formatListOutput, formatMarkdown, formatSrt, segmentIntoSentences } from "./transcript.ts"; import type { Options, Sentence, Snippet, VideoMeta, VideoResult } from "./types.ts"; import { buildVideoMeta, buildVideoMetaFromYtDlp, downloadCoverImage, - fetchTranscriptSnippets, + fetchTranscriptWithFallback, fetchVideoSource, getThumbnailUrls, getYtDlpThumbnailUrls, @@ -31,10 +31,12 @@ async function fetchAndCache( baseDir: string, opts: Options ): Promise<{ meta: VideoMeta; snippets: Snippet[]; sentences: Sentence[]; videoDir: string }> { - const source = await fetchVideoSource(videoId); - const requestedLanguages = source.kind === "yt-dlp" && opts.translate ? [opts.translate] : opts.languages; - const transcript = findTranscript(source.transcripts, requestedLanguages, opts.excludeGenerated, opts.excludeManual); - const result = await fetchTranscriptSnippets(transcript, source.kind === "yt-dlp" ? undefined : opts.translate || undefined); + const initialSource = await fetchVideoSource(videoId); + const { source, transcript, snippets, language, languageCode } = await fetchTranscriptWithFallback( + videoId, + initialSource, + opts + ); const description = source.kind === "yt-dlp" ? source.info.description || "" : source.data?.videoDetails?.shortDescription || ""; @@ -42,21 +44,21 @@ async function fetchAndCache( ? Number(source.info.duration || 0) : parseInt(source.data?.videoDetails?.lengthSeconds || "0"); const chapters = parseChapters(description, duration); - const language = { - code: result.languageCode, - name: result.language, + const languageMeta = { + code: languageCode, + name: language, isGenerated: transcript.isGenerated, }; const meta = source.kind === "yt-dlp" - ? buildVideoMetaFromYtDlp(source.info, videoId, language, chapters) - : buildVideoMeta(source.data, videoId, language, chapters); + ? buildVideoMetaFromYtDlp(source.info, videoId, languageMeta, chapters) + : buildVideoMeta(source.data, videoId, languageMeta, chapters); const videoDir = registerVideoDir(videoId, slugify(meta.channel), slugify(meta.title), baseDir); ensureDir(join(videoDir, "meta.json")); - writeFileSync(join(videoDir, "transcript-raw.json"), JSON.stringify(result.snippets, null, 2)); + writeFileSync(join(videoDir, "transcript-raw.json"), JSON.stringify(snippets, null, 2)); - const sentences = segmentIntoSentences(result.snippets); + const sentences = segmentIntoSentences(snippets); writeFileSync(join(videoDir, "transcript-sentences.json"), JSON.stringify(sentences, null, 2)); const imagePath = join(videoDir, "imgs", "cover.jpg"); @@ -69,7 +71,7 @@ async function fetchAndCache( writeFileSync(join(videoDir, "meta.json"), JSON.stringify(meta, null, 2)); - return { meta, snippets: result.snippets, sentences, videoDir }; + return { meta, snippets, sentences, videoDir }; } async function processVideo(videoId: string, opts: Options): Promise { diff --git a/skills/baoyu-youtube-transcript/scripts/youtube.ts b/skills/baoyu-youtube-transcript/scripts/youtube.ts index 5a2f682..8f8ba9b 100644 --- a/skills/baoyu-youtube-transcript/scripts/youtube.ts +++ b/skills/baoyu-youtube-transcript/scripts/youtube.ts @@ -2,12 +2,13 @@ import { spawnSync } from "child_process"; import { writeFileSync } from "fs"; import { makeError, normalizeError, normalizePublishDate, shouldTryAlternateClient, shouldTryYtDlpFallback } from "./shared.ts"; -import { parseTranscriptPayload } from "./transcript.ts"; +import { findTranscript, parseTranscriptPayload } from "./transcript.ts"; import type { Chapter, InnerTubeClient, InnerTubeSession, LanguageMeta, + Options, Snippet, TranscriptInfo, VideoMeta, @@ -219,6 +220,68 @@ export async function fetchTranscriptSnippets( }; } +function buildYtDlpVideoSource(videoId: string, info: YtDlpInfo): VideoSource { + const transcripts = buildTranscriptListFromYtDlp(info); + if (!transcripts.length) throw makeError(`Transcripts disabled for ${videoId}`, "TRANSCRIPTS_DISABLED"); + return { kind: "yt-dlp", info, transcripts }; +} + +function getRequestedLanguages( + source: VideoSource, + opts: Pick +): string[] { + return source.kind === "yt-dlp" && opts.translate ? [opts.translate] : opts.languages; +} + +export async function fetchTranscriptWithFallback( + videoId: string, + source: VideoSource, + opts: Pick, + fetchSnippets: ( + info: TranscriptInfo, + translateTo?: string + ) => Promise<{ snippets: Snippet[]; language: string; languageCode: string }> = fetchTranscriptSnippets, + fetchFallbackSource: (videoId: string) => Promise | VideoSource = (requestedVideoId) => + buildYtDlpVideoSource(requestedVideoId, fetchYtDlpInfo(requestedVideoId)), + logWarning: (message: string) => void = (message) => console.error(message) +): Promise<{ + source: VideoSource; + transcript: TranscriptInfo; + snippets: Snippet[]; + language: string; + languageCode: string; +}> { + const transcript = findTranscript( + source.transcripts, + getRequestedLanguages(source, opts), + opts.excludeGenerated, + opts.excludeManual + ); + const result = await fetchSnippets(transcript, source.kind === "yt-dlp" ? undefined : opts.translate || undefined); + if (result.snippets.length > 0) return { source, transcript, ...result }; + + if (source.kind === "yt-dlp") { + throw makeError(`Transcript fetch returned empty snippets for ${videoId}`, "EMPTY_TRANSCRIPT"); + } + + logWarning(`Warning (${videoId}): Transcript fetch returned empty snippets. Retrying with yt-dlp fallback.`); + const fallbackSource = await fetchFallbackSource(videoId); + const fallbackTranscript = findTranscript( + fallbackSource.transcripts, + getRequestedLanguages(fallbackSource, opts), + opts.excludeGenerated, + opts.excludeManual + ); + const fallbackResult = await fetchSnippets( + fallbackTranscript, + fallbackSource.kind === "yt-dlp" ? undefined : opts.translate || undefined + ); + if (!fallbackResult.snippets.length) { + throw makeError(`Transcript fetch returned empty snippets for ${videoId} after yt-dlp fallback`, "EMPTY_TRANSCRIPT"); + } + return { source: fallbackSource, transcript: fallbackTranscript, ...fallbackResult }; +} + export function detectYtDlpCommand(): { command: string; args: string[]; label: string } | null { if (cachedYtDlpCommand !== undefined) return cachedYtDlpCommand; const candidates = [ @@ -366,10 +429,7 @@ export async function resolveVideoSource( const normalized = normalizeError(error); if (!shouldTryYtDlpFallback(normalized)) throw normalized; logWarning(`Warning (${videoId}): ${normalized.message}. Retrying with yt-dlp fallback.`); - const info = fetchFallback(videoId); - const transcripts = buildTranscriptListFromYtDlp(info); - if (!transcripts.length) throw makeError(`Transcripts disabled for ${videoId}`, "TRANSCRIPTS_DISABLED"); - return { kind: "yt-dlp", info, transcripts }; + return buildYtDlpVideoSource(videoId, fetchFallback(videoId)); } }