tmwgsicp-wechat-download-api/utils/rss_poller.py

170 lines
5.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2026 tmwgsicp
# Licensed under the GNU Affero General Public License v3.0
# See LICENSE file in the project root for full license text.
# SPDX-License-Identifier: AGPL-3.0-only
"""
RSS 后台轮询器
定时通过公众号后台 API 拉取订阅号的最新文章列表并缓存到 SQLite。
仅获取标题、摘要、封面等元数据,不访问文章页面,零风控风险。
"""
import asyncio
import json
import logging
import os
from typing import List, Dict, Optional
import httpx
from utils.auth_manager import auth_manager
from utils import rss_store
logger = logging.getLogger(__name__)
POLL_INTERVAL = int(os.getenv("RSS_POLL_INTERVAL", "3600"))
ARTICLES_PER_POLL = 10
class RSSPoller:
"""后台轮询单例"""
_instance = None
_task: Optional[asyncio.Task] = None
_running = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
async def start(self):
if self._running:
return
self._running = True
self._task = asyncio.create_task(self._loop())
logger.info("RSS poller started (interval=%ds)", POLL_INTERVAL)
async def stop(self):
self._running = False
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
logger.info("RSS poller stopped")
@property
def is_running(self) -> bool:
return self._running
async def _loop(self):
while self._running:
try:
await self._poll_all()
except Exception as e:
logger.error("RSS poll cycle error: %s", e, exc_info=True)
await asyncio.sleep(POLL_INTERVAL)
async def _poll_all(self):
fakeids = rss_store.get_all_fakeids()
if not fakeids:
return
creds = auth_manager.get_credentials()
if not creds or not creds.get("token") or not creds.get("cookie"):
logger.warning("RSS poll skipped: not logged in")
return
logger.info("RSS poll: checking %d subscriptions", len(fakeids))
for fakeid in fakeids:
try:
articles = await self._fetch_article_list(fakeid, creds)
if articles:
new_count = rss_store.save_articles(fakeid, articles)
if new_count > 0:
logger.info("RSS: %d new articles for %s", new_count, fakeid[:8])
rss_store.update_last_poll(fakeid)
except Exception as e:
logger.error("RSS poll error for %s: %s", fakeid[:8], e)
await asyncio.sleep(3)
async def _fetch_article_list(self, fakeid: str, creds: Dict) -> List[Dict]:
params = {
"sub": "list",
"search_field": "null",
"begin": 0,
"count": ARTICLES_PER_POLL,
"query": "",
"fakeid": fakeid,
"type": "101_1",
"free_publish_type": 1,
"sub_action": "list_ex",
"token": creds["token"],
"lang": "zh_CN",
"f": "json",
"ajax": 1,
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://mp.weixin.qq.com/",
"Cookie": creds["cookie"],
}
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.get(
"https://mp.weixin.qq.com/cgi-bin/appmsgpublish",
params=params,
headers=headers,
)
resp.raise_for_status()
result = resp.json()
base_resp = result.get("base_resp", {})
if base_resp.get("ret") != 0:
logger.warning("WeChat API error for %s: ret=%s",
fakeid[:8], base_resp.get("ret"))
return []
publish_page = result.get("publish_page", {})
if isinstance(publish_page, str):
try:
publish_page = json.loads(publish_page)
except (json.JSONDecodeError, ValueError):
return []
if not isinstance(publish_page, dict):
return []
articles = []
for item in publish_page.get("publish_list", []):
publish_info = item.get("publish_info", {})
if isinstance(publish_info, str):
try:
publish_info = json.loads(publish_info)
except (json.JSONDecodeError, ValueError):
continue
if not isinstance(publish_info, dict):
continue
for a in publish_info.get("appmsgex", []):
articles.append({
"aid": a.get("aid", ""),
"title": a.get("title", ""),
"link": a.get("link", ""),
"digest": a.get("digest", ""),
"cover": a.get("cover", ""),
"author": a.get("author", ""),
"publish_time": a.get("update_time", 0),
})
return articles
async def poll_now(self):
"""手动触发一次轮询"""
await self._poll_all()
rss_poller = RSSPoller()