feat(baoyu-youtube-transcript): auto-retry with yt-dlp on empty InnerTube transcript

2026-04-03 23:35:42 -05:00 · 2026-04-03 23:35:42 -05:00 · 204765a137
parent 4874cd2dae
commit 204765a137
3 changed files with 140 additions and 20 deletions
--- a/skills/baoyu-youtube-transcript/scripts/main.test.ts
+++ b/skills/baoyu-youtube-transcript/scripts/main.test.ts
@ -2,7 +2,7 @@ import test from "node:test";
 import assert from "node:assert/strict";
 import { findTranscript, parseTranscriptJson3, parseWebVtt } from "./transcript.ts";
-import { buildTranscriptListFromYtDlp, resolveVideoSource, selectYtDlpTrack } from "./youtube.ts";
+import { buildTranscriptListFromYtDlp, fetchTranscriptWithFallback, resolveVideoSource, selectYtDlpTrack } from "./youtube.ts";
 test("selectYtDlpTrack prefers json3 over xml and vtt", () => {
  const track = selectYtDlpTrack([
@ -123,3 +123,61 @@ test("resolveVideoSource falls back to yt-dlp only after fallback-eligible error
  assert.equal(fallbackCalled, true);
  assert.equal(source.transcripts[0].languageCode, "en");
 });
 test("fetchTranscriptWithFallback retries with yt-dlp when InnerTube transcript payload is empty", async () => {
  const warnings: string[] = [];
  let fallbackCalled = false;
  const result = await fetchTranscriptWithFallback(
    "video12345ab",
    {
      kind: "innertube",
      data: { videoDetails: { title: "Primary" } },
      transcripts: [{
        language: "English",
        languageCode: "en",
        isGenerated: false,
        isTranslatable: false,
        baseUrl: "https://www.youtube.com/api/timedtext?v=video12345ab&lang=en&fmt=json3",
        translationLanguages: [],
      }],
    },
    {
      languages: ["en"],
      translate: "",
      excludeGenerated: false,
      excludeManual: false,
    },
    async (info) => {
      if (info.baseUrl.includes("youtube.com/api/timedtext")) {
        return { snippets: [], language: info.language, languageCode: info.languageCode };
      }
      return {
        snippets: [{ text: "Recovered subtitle", start: 0, duration: 2 }],
        language: info.language,
        languageCode: info.languageCode,
      };
    },
    async () => {
      fallbackCalled = true;
      return {
        kind: "yt-dlp",
        info: { title: "Fallback" },
        transcripts: [{
          language: "English",
          languageCode: "en",
          isGenerated: false,
          isTranslatable: false,
          baseUrl: "https://example.com/subtitles.en.json3",
          translationLanguages: [],
        }],
      };
    },
    (message) => warnings.push(message)
  );
  assert.equal(fallbackCalled, true);
  assert.equal(result.source.kind, "yt-dlp");
  assert.equal(result.snippets.length, 1);
  assert.equal(result.snippets[0].text, "Recovered subtitle");
  assert.match(warnings[0] || "", /Retrying with yt-dlp fallback/);
 });
--- a/skills/baoyu-youtube-transcript/scripts/main.ts
+++ b/skills/baoyu-youtube-transcript/scripts/main.ts
@ -13,13 +13,13 @@ import {
  registerVideoDir,
  resolveBaseDir,
 } from "./storage.ts";
-import { findTranscript, formatListOutput, formatMarkdown, formatSrt, segmentIntoSentences } from "./transcript.ts";
+import { formatListOutput, formatMarkdown, formatSrt, segmentIntoSentences } from "./transcript.ts";
 import type { Options, Sentence, Snippet, VideoMeta, VideoResult } from "./types.ts";
 import {
  buildVideoMeta,
  buildVideoMetaFromYtDlp,
  downloadCoverImage,
-  fetchTranscriptSnippets,
+  fetchTranscriptWithFallback,
  fetchVideoSource,
  getThumbnailUrls,
  getYtDlpThumbnailUrls,
@ -31,10 +31,12 @@ async function fetchAndCache(
  baseDir: string,
  opts: Options
 ): Promise<{ meta: VideoMeta; snippets: Snippet[]; sentences: Sentence[]; videoDir: string }> {
-  const source = await fetchVideoSource(videoId);
+  const initialSource = await fetchVideoSource(videoId);
-  const requestedLanguages = source.kind === "yt-dlp" && opts.translate ? [opts.translate] : opts.languages;
+  const { source, transcript, snippets, language, languageCode } = await fetchTranscriptWithFallback(
-  const transcript = findTranscript(source.transcripts, requestedLanguages, opts.excludeGenerated, opts.excludeManual);
+    videoId,
-  const result = await fetchTranscriptSnippets(transcript, source.kind === "yt-dlp" ? undefined : opts.translate || undefined);
+    initialSource,
    opts
  );
  const description = source.kind === "yt-dlp"
    ? source.info.description || ""
    : source.data?.videoDetails?.shortDescription || "";
@ -42,21 +44,21 @@ async function fetchAndCache(
    ? Number(source.info.duration || 0)
    : parseInt(source.data?.videoDetails?.lengthSeconds || "0");
  const chapters = parseChapters(description, duration);
-  const language = {
+  const languageMeta = {
-    code: result.languageCode,
+    code: languageCode,
-    name: result.language,
+    name: language,
    isGenerated: transcript.isGenerated,
  };
  const meta = source.kind === "yt-dlp"
-    ? buildVideoMetaFromYtDlp(source.info, videoId, language, chapters)
+    ? buildVideoMetaFromYtDlp(source.info, videoId, languageMeta, chapters)
-    : buildVideoMeta(source.data, videoId, language, chapters);
+    : buildVideoMeta(source.data, videoId, languageMeta, chapters);
  const videoDir = registerVideoDir(videoId, slugify(meta.channel), slugify(meta.title), baseDir);
  ensureDir(join(videoDir, "meta.json"));
-  writeFileSync(join(videoDir, "transcript-raw.json"), JSON.stringify(result.snippets, null, 2));
+  writeFileSync(join(videoDir, "transcript-raw.json"), JSON.stringify(snippets, null, 2));
-  const sentences = segmentIntoSentences(result.snippets);
+  const sentences = segmentIntoSentences(snippets);
  writeFileSync(join(videoDir, "transcript-sentences.json"), JSON.stringify(sentences, null, 2));
  const imagePath = join(videoDir, "imgs", "cover.jpg");
@ -69,7 +71,7 @@ async function fetchAndCache(
  writeFileSync(join(videoDir, "meta.json"), JSON.stringify(meta, null, 2));
-  return { meta, snippets: result.snippets, sentences, videoDir };
+  return { meta, snippets, sentences, videoDir };
 }
 async function processVideo(videoId: string, opts: Options): Promise<VideoResult> {
--- a/skills/baoyu-youtube-transcript/scripts/youtube.ts
+++ b/skills/baoyu-youtube-transcript/scripts/youtube.ts
@ -2,12 +2,13 @@ import { spawnSync } from "child_process";
 import { writeFileSync } from "fs";
 import { makeError, normalizeError, normalizePublishDate, shouldTryAlternateClient, shouldTryYtDlpFallback } from "./shared.ts";
-import { parseTranscriptPayload } from "./transcript.ts";
+import { findTranscript, parseTranscriptPayload } from "./transcript.ts";
 import type {
  Chapter,
  InnerTubeClient,
  InnerTubeSession,
  LanguageMeta,
  Options,
  Snippet,
  TranscriptInfo,
  VideoMeta,
@ -219,6 +220,68 @@ export async function fetchTranscriptSnippets(
  };
 }
 function buildYtDlpVideoSource(videoId: string, info: YtDlpInfo): VideoSource {
  const transcripts = buildTranscriptListFromYtDlp(info);
  if (!transcripts.length) throw makeError(`Transcripts disabled for ${videoId}`, "TRANSCRIPTS_DISABLED");
  return { kind: "yt-dlp", info, transcripts };
 }
 function getRequestedLanguages(
  source: VideoSource,
  opts: Pick<Options, "languages" | "translate">
 ): string[] {
  return source.kind === "yt-dlp" && opts.translate ? [opts.translate] : opts.languages;
 }
 export async function fetchTranscriptWithFallback(
  videoId: string,
  source: VideoSource,
  opts: Pick<Options, "languages" | "translate" | "excludeGenerated" | "excludeManual">,
  fetchSnippets: (
    info: TranscriptInfo,
    translateTo?: string
  ) => Promise<{ snippets: Snippet[]; language: string; languageCode: string }> = fetchTranscriptSnippets,
  fetchFallbackSource: (videoId: string) => Promise<VideoSource> | VideoSource = (requestedVideoId) =>
    buildYtDlpVideoSource(requestedVideoId, fetchYtDlpInfo(requestedVideoId)),
  logWarning: (message: string) => void = (message) => console.error(message)
 ): Promise<{
  source: VideoSource;
  transcript: TranscriptInfo;
  snippets: Snippet[];
  language: string;
  languageCode: string;
 }> {
  const transcript = findTranscript(
    source.transcripts,
    getRequestedLanguages(source, opts),
    opts.excludeGenerated,
    opts.excludeManual
  );
  const result = await fetchSnippets(transcript, source.kind === "yt-dlp" ? undefined : opts.translate || undefined);
  if (result.snippets.length > 0) return { source, transcript, ...result };
  if (source.kind === "yt-dlp") {
    throw makeError(`Transcript fetch returned empty snippets for ${videoId}`, "EMPTY_TRANSCRIPT");
  }
  logWarning(`Warning (${videoId}): Transcript fetch returned empty snippets. Retrying with yt-dlp fallback.`);
  const fallbackSource = await fetchFallbackSource(videoId);
  const fallbackTranscript = findTranscript(
    fallbackSource.transcripts,
    getRequestedLanguages(fallbackSource, opts),
    opts.excludeGenerated,
    opts.excludeManual
  );
  const fallbackResult = await fetchSnippets(
    fallbackTranscript,
    fallbackSource.kind === "yt-dlp" ? undefined : opts.translate || undefined
  );
  if (!fallbackResult.snippets.length) {
    throw makeError(`Transcript fetch returned empty snippets for ${videoId} after yt-dlp fallback`, "EMPTY_TRANSCRIPT");
  }
  return { source: fallbackSource, transcript: fallbackTranscript, ...fallbackResult };
 }
 export function detectYtDlpCommand(): { command: string; args: string[]; label: string } | null {
  if (cachedYtDlpCommand !== undefined) return cachedYtDlpCommand;
  const candidates = [
@ -366,10 +429,7 @@ export async function resolveVideoSource(
    const normalized = normalizeError(error);
    if (!shouldTryYtDlpFallback(normalized)) throw normalized;
    logWarning(`Warning (${videoId}): ${normalized.message}. Retrying with yt-dlp fallback.`);
-    const info = fetchFallback(videoId);
+    return buildYtDlpVideoSource(videoId, fetchFallback(videoId));
    const transcripts = buildTranscriptListFromYtDlp(info);
    if (!transcripts.length) throw makeError(`Transcripts disabled for ${videoId}`, "TRANSCRIPTS_DISABLED");
    return { kind: "yt-dlp", info, transcripts };
  }
 }