feat: add aggregated RSS feed, CSV and OPML export

2026-03-20 01:25:38 +08:00 · 2026-03-20 01:25:38 +08:00 · 4825edc355
parent 869c5c0c92
commit 4825edc355
3 changed files with 287 additions and 1 deletions
--- a/routes/rss.py
+++ b/routes/rss.py
@ -9,11 +9,14 @@ RSS 订阅路由
 订阅管理 + RSS XML 输出
 """
 import csv
 import io
 import time
 import logging
 from datetime import datetime, timezone
 from html import escape as html_escape
 from typing import Optional
 import xml.etree.ElementTree as ET
 from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import Response
@ -173,6 +176,112 @@ async def poller_status():
    )
 # ── 聚合 RSS ─────────────────────────────────────────────
@router.get("/rss/all", summary="聚合 RSS 订阅源",
            response_class=Response)
 async def get_aggregated_rss_feed(
    request: Request,
    limit: int = Query(50, ge=1, le=200, description="文章数量上限"),
 ):
    """
    获取所有订阅公众号的聚合 RSS 2.0 订阅源。
    将此地址添加到 RSS 阅读器，即可在一个订阅源中查看所有公众号文章。
    订阅增减后自动生效，无需更换链接。
    """
    subs = rss_store.list_subscriptions()
    nickname_map = {s["fakeid"]: s.get("nickname") or s["fakeid"] for s in subs}
    articles = rss_store.get_all_articles(limit=limit) if subs else []
    base_url = str(request.base_url).rstrip("/")
    xml = _build_aggregated_rss_xml(articles, nickname_map, base_url)
    return Response(
        content=xml,
        media_type="application/rss+xml; charset=utf-8",
        headers={"Cache-Control": "public, max-age=600"},
    )
 # ── 导出 ─────────────────────────────────────────────────
@router.get("/rss/export", summary="导出订阅列表")
 async def export_subscriptions(
    request: Request,
    format: str = Query("csv", regex="^(csv|opml)$", description="导出格式: csv 或 opml"),
 ):
    """
    导出当前订阅列表。
    - **csv**: 包含公众号名称、FakeID、RSS 地址、文章数、订阅时间
    - **opml**: 标准 OPML 格式，可直接导入 RSS 阅读器
    """
    subs = rss_store.list_subscriptions()
    base_url = str(request.base_url).rstrip("/")
    if format == "opml":
        return _build_opml_response(subs, base_url)
    return _build_csv_response(subs, base_url)
 def _build_csv_response(subs: list, base_url: str) -> Response:
    buf = io.StringIO()
    buf.write('\ufeff')
    writer = csv.writer(buf)
    writer.writerow(["Name", "FakeID", "RSS URL", "Articles", "Subscribed At"])
    for s in subs:
        rss_url = f"{base_url}/api/rss/{s['fakeid']}"
        sub_date = datetime.fromtimestamp(
            s.get("created_at", 0), tz=timezone.utc
        ).strftime("%Y-%m-%d")
        writer.writerow([
            s.get("nickname") or s["fakeid"],
            s["fakeid"],
            rss_url,
            s.get("article_count", 0),
            sub_date,
        ])
    return Response(
        content=buf.getvalue(),
        media_type="text/csv; charset=utf-8",
        headers={"Content-Disposition": 'attachment; filename="wechat_rss_subscriptions.csv"'},
    )
 def _build_opml_response(subs: list, base_url: str) -> Response:
    opml = ET.Element("opml", version="2.0")
    head = ET.SubElement(opml, "head")
    ET.SubElement(head, "title").text = "WeChat RSS Subscriptions"
    ET.SubElement(head, "dateCreated").text = datetime.now(timezone.utc).strftime(
        "%a, %d %b %Y %H:%M:%S +0000"
    )
    body = ET.SubElement(opml, "body")
    group = ET.SubElement(body, "outline", text="WeChat RSS", title="WeChat RSS")
    for s in subs:
        name = s.get("nickname") or s["fakeid"]
        rss_url = f"{base_url}/api/rss/{s['fakeid']}"
        ET.SubElement(group, "outline", **{
            "type": "rss",
            "text": name,
            "title": name,
            "xmlUrl": rss_url,
            "htmlUrl": "https://mp.weixin.qq.com",
            "description": f"{name} - WeChat RSS",
        })
    xml_str = ET.tostring(opml, encoding="unicode", xml_declaration=False)
    content = '<?xml version="1.0" encoding="UTF-8"?>\n' + xml_str
    return Response(
        content=content,
        media_type="application/xml; charset=utf-8",
        headers={"Content-Disposition": 'attachment; filename="wechat_rss_subscriptions.opml"'},
    )
 # ── RSS XML 输出 ──────────────────────────────────────────
 def _rfc822(ts: int) -> str:
@ -347,3 +456,115 @@ async def get_rss_feed(fakeid: str, request: Request,
        media_type="application/rss+xml; charset=utf-8",
        headers={"Cache-Control": "public, max-age=600"},
    )
 # ── 聚合 RSS XML 构建 ────────────────────────────────────
 def _build_aggregated_rss_xml(articles: list, nickname_map: dict,
                               base_url: str) -> str:
    """Build aggregated RSS XML across all subscriptions."""
    from xml.dom import minidom
    doc = minidom.Document()
    rss = doc.createElement("rss")
    rss.setAttribute("version", "2.0")
    rss.setAttribute("xmlns:atom", "http://www.w3.org/2005/Atom")
    doc.appendChild(rss)
    channel = doc.createElement("channel")
    rss.appendChild(channel)
    def add_text(parent, tag, text):
        elem = doc.createElement(tag)
        elem.appendChild(doc.createTextNode(str(text)))
        parent.appendChild(elem)
        return elem
    add_text(channel, "title", "WeChat RSS - All Subscriptions")
    add_text(channel, "link", base_url)
    add_text(channel, "description", "Aggregated feed of all subscribed WeChat accounts")
    add_text(channel, "language", "zh-CN")
    add_text(channel, "lastBuildDate", _rfc822(int(time.time())))
    add_text(channel, "generator", "WeChat Download API")
    atom_link = doc.createElement("atom:link")
    atom_link.setAttribute("href", f"{base_url}/api/rss/all")
    atom_link.setAttribute("rel", "self")
    atom_link.setAttribute("type", "application/rss+xml")
    channel.appendChild(atom_link)
    for a in articles:
        item = doc.createElement("item")
        source_name = nickname_map.get(a.get("fakeid", ""), "")
        title_text = a.get("title", "")
        if source_name:
            title_text = f"[{source_name}] {title_text}"
        add_text(item, "title", title_text)
        link = a.get("link", "")
        add_text(item, "link", link)
        guid = doc.createElement("guid")
        guid.setAttribute("isPermaLink", "true")
        guid.appendChild(doc.createTextNode(link))
        item.appendChild(guid)
        if a.get("publish_time"):
            add_text(item, "pubDate", _rfc822(a["publish_time"]))
        if a.get("author"):
            add_text(item, "author", a["author"])
        cover = proxy_image_url(a.get("cover", ""), base_url)
        digest = html_escape(a.get("digest", "")) if a.get("digest") else ""
        author = html_escape(a.get("author", "")) if a.get("author") else ""
        title_escaped = html_escape(a.get("title", ""))
        content_html = a.get("content", "")
        html_parts = []
        if content_html:
            html_parts.append(
                f'<div style="font-size:16px;line-height:1.8;color:#333">'
                f'{content_html}</div>'
            )
            if author:
                html_parts.append(
                    f'<hr style="margin:24px 0;border:none;border-top:1px solid #eee" />'
                    f'<p style="color:#888;font-size:13px;margin:0">author: {author}</p>'
                )
        else:
            if cover:
                html_parts.append(
                    f'<div style="margin-bottom:12px">'
                    f'<a href="{html_escape(link)}">'
                    f'<img src="{html_escape(cover)}" alt="{title_escaped}" '
                    f'style="max-width:100%;height:auto;border-radius:8px" /></a></div>'
                )
            if digest:
                html_parts.append(
                    f'<p style="color:#333;font-size:15px;line-height:1.8;'
                    f'margin:0 0 16px">{digest}</p>'
                )
            if author:
                html_parts.append(
                    f'<p style="color:#888;font-size:13px;margin:0 0 12px">'
                    f'author: {author}</p>'
                )
            html_parts.append(
                f'<p style="margin:0"><a href="{html_escape(link)}" '
                f'style="color:#1890ff;text-decoration:none;font-size:14px">'
                f'Read &rarr;</a></p>'
            )
        description = doc.createElement("description")
        cdata = doc.createCDATASection("\n".join(html_parts))
        description.appendChild(cdata)
        item.appendChild(description)
        channel.appendChild(item)
    xml_str = doc.toprettyxml(indent="  ", encoding=None)
    lines = [line for line in xml_str.split('\n') if line.strip()]
    xml_str = '\n'.join(lines[1:])
    return '<?xml version="1.0" encoding="UTF-8"?>\n' + xml_str
--- a/static/rss.html
+++ b/static/rss.html
@ -405,7 +405,28 @@
        </div>
        <div class="sub-section">
-            <h2>我的订阅</h2>
+            <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:var(--space-md);">
                <h2 style="margin-bottom:0;">我的订阅</h2>
                <div style="display:flex;gap:var(--space-sm);position:relative;">
                    <div id="exportDropdown" style="position:relative;">
                        <button class="btn-sm" onclick="toggleExportMenu()">导出</button>
                        <div id="exportMenu" style="display:none;position:absolute;right:0;top:100%;margin-top:4px;background:var(--bg-primary);border:1px solid var(--border-base);border-radius:var(--radius-base);box-shadow:var(--shadow-base);z-index:10;min-width:120px;">
                            <div style="padding:8px 16px;cursor:pointer;font-size:var(--font-sm);transition:background var(--duration-fast);" onmouseover="this.style.background='var(--bg-secondary)'" onmouseout="this.style.background=''" onclick="doExport('csv')">CSV</div>
                            <div style="padding:8px 16px;cursor:pointer;font-size:var(--font-sm);transition:background var(--duration-fast);" onmouseover="this.style.background='var(--bg-secondary)'" onmouseout="this.style.background=''" onclick="doExport('opml')">OPML</div>
                        </div>
                    </div>
                </div>
            </div>
            <div id="aggRssBar" style="display:none;background:var(--bg-secondary);border:1px solid var(--border-light);border-radius:var(--radius-base);padding:12px var(--space-md);margin-bottom:var(--space-md);">
                <div style="display:flex;align-items:center;justify-content:space-between;gap:12px;">
                    <div style="flex:1;min-width:0;">
                        <div style="font-size:var(--font-xs);font-weight:600;color:var(--text-secondary);margin-bottom:4px;">聚合 RSS 链接</div>
                        <div id="aggRssUrl" style="font-size:var(--font-xs);color:var(--text-muted);font-family:monospace;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;"></div>
                    </div>
                    <button class="btn-copy" onclick="copyRss(document.getElementById('aggRssUrl').textContent)">复制</button>
                </div>
                <div style="font-size:11px;color:var(--text-muted);margin-top:6px;">包含所有订阅的公众号文章，订阅变动自动生效，无需更换链接</div>
            </div>
            <div id="subList"><div class="sub-empty">加载中...</div></div>
        </div>
@ -454,8 +475,10 @@
                var d = await res.json();
                if (!d.success || !d.data || d.data.length === 0) {
                    el.innerHTML = '<div class="sub-empty">暂无订阅，搜索公众号后添加</div>';
                    updateAggRssBar(false);
                    return;
                }
                updateAggRssBar(true);
                var html = '<ul class="sub-list">';
                d.data.forEach(function(s) {
                    var lastPoll = s.last_poll ? new Date(s.last_poll * 1000).toLocaleString('zh-CN') : '从未';
@ -476,6 +499,7 @@
                el.innerHTML = html;
            } catch (e) {
                el.innerHTML = '<div class="sub-empty">加载失败: ' + e.message + '</div>';
                updateAggRssBar(false);
            }
        }
@ -603,6 +627,34 @@
            return s.replace(/\\/g, '\\\\').replace(/'/g, "\\'").replace(/"/g, '&quot;');
        }
        function updateAggRssBar(hasSubs) {
            var bar = document.getElementById('aggRssBar');
            if (hasSubs) {
                var url = window.location.origin + '/api/rss/all';
                document.getElementById('aggRssUrl').textContent = url;
                bar.style.display = 'block';
            } else {
                bar.style.display = 'none';
            }
        }
        function toggleExportMenu() {
            var menu = document.getElementById('exportMenu');
            menu.style.display = menu.style.display === 'none' ? 'block' : 'none';
        }
        function doExport(format) {
            document.getElementById('exportMenu').style.display = 'none';
            window.location.href = '/api/rss/export?format=' + format;
        }
        document.addEventListener('click', function(e) {
            var dd = document.getElementById('exportDropdown');
            if (dd && !dd.contains(e.target)) {
                document.getElementById('exportMenu').style.display = 'none';
            }
        });
        document.getElementById('searchInput').addEventListener('keydown', function(e) {
            if (e.key === 'Enter') doSearch();
        });
--- a/utils/rss_store.py
+++ b/utils/rss_store.py
@ -190,3 +190,16 @@ def get_all_fakeids() -> List[str]:
        conn.close()
 def get_all_articles(limit: int = 50) -> List[Dict]:
    """Get latest articles across all subscriptions, sorted by publish_time desc."""
    conn = _get_conn()
    try:
        rows = conn.execute(
            "SELECT * FROM articles ORDER BY publish_time DESC LIMIT ?",
            (limit,),
        ).fetchall()
        return [dict(r) for r in rows]
    finally:
        conn.close()