tmwgsicp-wechat-download-api/routes/articles.py

238 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (C) 2026 tmwgsicp
# Licensed under the GNU Affero General Public License v3.0
# See LICENSE file in the project root for full license text.
# SPDX-License-Identifier: AGPL-3.0-only
"""
文章列表API
获取公众号的文章列表
"""
from fastapi import APIRouter, Query, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict
import json
import httpx
from utils.auth_manager import auth_manager
router = APIRouter()
class ArticleItem(BaseModel):
"""文章列表项"""
aid: str
title: str
link: str
update_time: int
create_time: int
digest: Optional[str] = None
cover: Optional[str] = None
author: Optional[str] = None
class ArticlesResponse(BaseModel):
"""文章列表响应"""
success: bool
data: Optional[Dict] = None
error: Optional[str] = None
@router.get("/articles", response_model=ArticlesResponse, summary="获取文章列表")
async def get_articles(
fakeid: str = Query(..., description="目标公众号的 FakeID通过搜索接口获取"),
begin: int = Query(0, description="偏移量,从第几条开始", ge=0, alias="begin"),
count: int = Query(10, description="获取数量,最大 100", ge=1, le=100),
keyword: Optional[str] = Query(None, description="在该公众号内搜索关键词(可选)")
):
"""
获取指定公众号的文章列表,支持分页。
**使用流程:**
1. 先调用 `GET /api/public/searchbiz` 搜索目标公众号
2. 从搜索结果中获取目标公众号的 `fakeid`
3. 使用 `fakeid` 调用本接口获取文章列表
**查询参数:**
- **fakeid** (必填): 目标公众号的 FakeID
- **begin** (可选): 偏移量,默认 0
- **count** (可选): 获取数量,默认 10最大 100
- **keyword** (可选): 在该公众号内搜索关键词
"""
try:
print(f"📋 获取文章列表: fakeid={fakeid[:8]}...")
# 获取认证信息用于请求微信API
creds = auth_manager.get_credentials()
if not creds or not isinstance(creds, dict):
raise HTTPException(
status_code=401,
detail="未登录或认证信息格式错误"
)
token = creds.get("token", "")
cookie = creds.get("cookie", "")
if not token or not cookie:
raise HTTPException(
status_code=401,
detail="登录信息不完整,请重新登录"
)
# 构建请求参数
is_searching = bool(keyword)
params = {
"sub": "search" if is_searching else "list",
"search_field": "7" if is_searching else "null",
"begin": begin,
"count": count,
"query": keyword or "",
"fakeid": fakeid,
"type": "101_1",
"free_publish_type": 1,
"sub_action": "list_ex",
"token": token,
"lang": "zh_CN",
"f": "json",
"ajax": 1,
}
# 请求微信API
url = "https://mp.weixin.qq.com/cgi-bin/appmsgpublish"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://mp.weixin.qq.com/",
"Cookie": cookie
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.get(url, params=params, headers=headers)
response.raise_for_status()
result = response.json()
# 检查返回结果
base_resp = result.get("base_resp", {})
if base_resp.get("ret") != 0:
error_msg = base_resp.get("err_msg", "未知错误")
ret_code = base_resp.get("ret")
print(f"❌ 微信API返回错误: ret={ret_code}, msg={error_msg}")
# 检查是否需要重新登录
if "login" in error_msg.lower() or ret_code == 200003:
return ArticlesResponse(
success=False,
error="登录已过期,请重新登录"
)
return ArticlesResponse(
success=False,
error=f"获取文章列表失败: ret={ret_code}, msg={error_msg}"
)
# 解析文章列表
publish_page = result.get("publish_page", {})
if isinstance(publish_page, str):
try:
publish_page = json.loads(publish_page)
except (json.JSONDecodeError, ValueError):
return ArticlesResponse(
success=False,
error="数据格式错误: publish_page 无法解析"
)
if not isinstance(publish_page, dict):
return ArticlesResponse(
success=False,
error=f"数据格式错误: publish_page 类型为 {type(publish_page).__name__}"
)
publish_list = publish_page.get("publish_list", [])
articles = []
for item in publish_list:
publish_info = item.get("publish_info", {})
# publish_info可能是字符串JSON需要解析
if isinstance(publish_info, str):
try:
publish_info = json.loads(publish_info)
except (json.JSONDecodeError, ValueError):
continue
if not isinstance(publish_info, dict):
continue # 跳过非字典类型
appmsgex = publish_info.get("appmsgex", [])
# 处理每篇文章
for article in appmsgex:
articles.append({
"aid": article.get("aid", ""),
"title": article.get("title", ""),
"link": article.get("link", ""),
"update_time": article.get("update_time", 0),
"create_time": article.get("create_time", 0),
"digest": article.get("digest", ""),
"cover": article.get("cover", ""),
"author": article.get("author", "")
})
return ArticlesResponse(
success=True,
data={
"articles": articles,
"total": publish_page.get("total_count", 0),
"begin": begin,
"count": len(articles),
"keyword": keyword
}
)
except httpx.HTTPStatusError as e:
print(f"❌ HTTP错误: {e.response.status_code}")
return ArticlesResponse(
success=False,
error=f"请求失败: HTTP {e.response.status_code}"
)
except httpx.RequestError as e:
print(f"❌ 请求错误: {e}")
return ArticlesResponse(
success=False,
error=f"网络请求失败: {str(e)}"
)
except Exception as e:
import traceback
print(f"❌ 未知错误: {e}")
traceback.print_exc()
return ArticlesResponse(
success=False,
error=f"服务器内部错误,请稍后重试"
)
@router.get("/articles/search", response_model=ArticlesResponse, summary="搜索公众号文章")
async def search_articles(
fakeid: str = Query(..., description="目标公众号的 FakeID"),
query: str = Query(..., description="搜索关键词", alias="query"),
begin: int = Query(0, description="偏移量,默认 0", ge=0, alias="begin"),
count: int = Query(10, description="获取数量,默认 10最大 100", ge=1, le=100)
):
"""
在指定公众号内按关键词搜索文章。
**查询参数:**
- **fakeid** (必填): 目标公众号的 FakeID
- **query** (必填): 搜索关键词
- **begin** (可选): 偏移量,默认 0
- **count** (可选): 获取数量,默认 10最大 100
"""
return await get_articles(
fakeid=fakeid,
keyword=query,
begin=begin,
count=count
)