From bb78aab09551b6c9bcbbad23ce40c476a43b2c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jim=20Liu=20=E5=AE=9D=E7=8E=89?= Date: Sat, 21 Mar 2026 23:07:44 -0500 Subject: [PATCH] feat(baoyu-youtube-transcript): add title heading, description summary, and cover image to markdown output --- skills/baoyu-youtube-transcript/SKILL.md | 4 ++-- .../prompts/speaker-transcript.md | 16 +++++++++++++--- .../baoyu-youtube-transcript/scripts/main.ts | 18 +++++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/skills/baoyu-youtube-transcript/SKILL.md b/skills/baoyu-youtube-transcript/SKILL.md index c169da8..e76e618 100644 --- a/skills/baoyu-youtube-transcript/SKILL.md +++ b/skills/baoyu-youtube-transcript/SKILL.md @@ -88,7 +88,7 @@ Accepts any of these as video input: | Format | Extension | Description | |--------|-----------|-------------| -| `text` | `.md` | Markdown with frontmatter, natural paragraphs, optional timestamps/chapters/speakers | +| `text` | `.md` | Markdown with frontmatter (incl. `description`), title heading, summary, optional TOC/cover/timestamps/chapters/speakers | | `srt` | `.srt` | SubRip subtitle format for video players | ## Output Directory @@ -147,7 +147,7 @@ If no chapter timestamps exist in the description, the transcript is output as g ### Speaker Identification (`--speakers`) Speaker identification requires AI processing. The script outputs a raw `.md` file containing: -- YAML frontmatter with video metadata (title, channel, date, cover, language) +- YAML frontmatter with video metadata (title, channel, date, cover, description, language) - Video description (for speaker name extraction) - Chapter list from description (if available) - Raw transcript in SRT format (pre-computed start/end timestamps, token-efficient) diff --git a/skills/baoyu-youtube-transcript/prompts/speaker-transcript.md b/skills/baoyu-youtube-transcript/prompts/speaker-transcript.md index 16e3159..c925750 100644 --- a/skills/baoyu-youtube-transcript/prompts/speaker-transcript.md +++ b/skills/baoyu-youtube-transcript/prompts/speaker-transcript.md @@ -5,9 +5,12 @@ You are an expert transcript specialist. Process the raw transcript file (with Y ## Output Structure Produce a single cohesive markdown file containing: -1. YAML frontmatter (keep the original frontmatter from the raw file) -2. Table of Contents -3. Full chapter-segmented transcript with speaker labels +1. YAML frontmatter (keep the original frontmatter from the raw file, which includes `description`) +2. `# Title` heading (from frontmatter title) +3. Description/summary paragraph (from frontmatter `description`) +4. Table of Contents +5. Cover image (if `cover` exists in frontmatter): `![cover](imgs/cover.jpg)` — right after the ToC +6. Full chapter-segmented transcript with speaker labels Use the same language as the transcription for the title and ToC. @@ -79,13 +82,20 @@ channel: "The Show" date: 2024-04-15 url: "https://www.youtube.com/watch?v=xxx" cover: imgs/cover.jpg +description: "Jane Doe discusses her groundbreaking five-year study on the long-term effects of dietary changes." language: en --- +# Example Interview + +Jane Doe discusses her groundbreaking five-year study on the long-term effects of dietary changes. + ## Table of Contents * [00:00:00] Introduction and Welcome * [00:00:12] Overview of the New Research +![cover](imgs/cover.jpg) + ## Introduction and Welcome [00:00:00] diff --git a/skills/baoyu-youtube-transcript/scripts/main.ts b/skills/baoyu-youtube-transcript/scripts/main.ts index a7a9ce6..d96a804 100644 --- a/skills/baoyu-youtube-transcript/scripts/main.ts +++ b/skills/baoyu-youtube-transcript/scripts/main.ts @@ -505,17 +505,28 @@ function yamlEscape(s: string): string { return s; } +function extractSummary(description: string): string { + if (!description) return ""; + const firstPara = description.split(/\n\s*\n/)[0].trim(); + const lines = firstPara.split("\n").filter(l => !/^\s*(https?:\/\/|#|@|\d+:\d+)/.test(l) && l.trim()); + return lines.join(" ").slice(0, 300).trim(); +} + function formatMarkdown(sentences: Sentence[], meta: VideoMeta, opts: { timestamps: boolean; chapters: boolean; speakers: boolean }, snippets?: Snippet[]): string { + const summary = extractSummary(meta.description); let md = "---\n"; md += `title: ${yamlEscape(meta.title)}\n`; md += `channel: ${yamlEscape(meta.channel)}\n`; if (meta.publishDate) md += `date: ${meta.publishDate}\n`; md += `url: ${yamlEscape(meta.url)}\n`; if (meta.coverImage) md += `cover: ${meta.coverImage}\n`; + if (summary) md += `description: ${yamlEscape(summary)}\n`; if (meta.language) md += `language: ${meta.language.code}\n`; md += "---\n\n"; if (opts.speakers) { + md += `# ${meta.title}\n\n`; + if (summary) md += `${summary}\n\n`; if (meta.description) md += "# Description\n\n" + meta.description.trim() + "\n\n"; if (meta.chapters.length) { md += "# Chapters\n\n"; @@ -527,12 +538,17 @@ function formatMarkdown(sentences: Sentence[], meta: VideoMeta, opts: { timestam return md; } + md += `# ${meta.title}\n\n`; + if (summary) md += `${summary}\n\n`; + const chapters = opts.chapters ? meta.chapters : []; if (chapters.length) { md += "## Table of Contents\n\n"; for (const ch of chapters) md += opts.timestamps ? `* [${ts(ch.start)}] ${ch.title}\n` : `* ${ch.title}\n`; - md += "\n\n"; + md += "\n"; + if (meta.coverImage) md += `\n![cover](${meta.coverImage})\n`; + md += "\n"; for (let i = 0; i < chapters.length; i++) { const nextStart = i < chapters.length - 1 ? chapters[i + 1].start : Infinity; const chSentences = sentences.filter(s => parseTs(s.start) >= chapters[i].start && parseTs(s.start) < nextStart);