commit 3c14b15c8770a1cafba705b8a486f1c3316a28c9 Author: tmwgsicp <2589462900@qq.com> Date: Tue Feb 24 10:22:59 2026 +0800 init: wechat-download-api v1.0.0 Co-authored-by: Cursor diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..241480b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,14 @@ +# Default: auto-detect +* text=auto + +# Force LF for shell scripts +*.sh text eol=lf +*.py text eol=lf + +# Force CRLF for Windows scripts +*.bat text eol=crlf + +# Binary files +*.jpg binary +*.png binary +*.ico binary diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee527b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# Environment Variables +.env +.env.local + +# IDE +.vscode/ +.idea/ +.cursor/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Test files +test_qr.png +*.log + +# QR codes (generated dynamically) +static/qrcodes/ + +# Temporary files +*.tmp +*.bak + +# Example output files +articles*.json +fetch_progress.json + +# Logs +logs/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..be3f7b2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..02b8d13 --- /dev/null +++ b/README.md @@ -0,0 +1,352 @@ +
+ +# WeChat Download API + +### 微信公众号文章获取 API 服务 + +**扫码登录 | 文章抓取 | 公众号搜索 | 一键部署** + +[![GitHub stars](https://img.shields.io/github/stars/tmwgsicp/wechat-download-api?style=for-the-badge&logo=github)](https://github.com/tmwgsicp/wechat-download-api/stargazers) +[![License](https://img.shields.io/badge/License-AGPL%203.0-blue?style=for-the-badge)](LICENSE) +[![Python](https://img.shields.io/badge/Python-3.8+-3776AB?style=for-the-badge&logo=python&logoColor=white)](https://www.python.org/) +[![FastAPI](https://img.shields.io/badge/FastAPI-0.104+-009688?style=for-the-badge&logo=fastapi&logoColor=white)](https://fastapi.tiangolo.com/) + +
+ +--- + +## 功能特性 + +- **文章内容获取** — 通过 URL 获取文章完整内容(标题、作者、正文、图片) +- **文章列表** — 获取任意公众号历史文章列表,支持分页 +- **文章搜索** — 在指定公众号文章中按关键词搜索 +- **公众号搜索** — 搜索公众号并获取 FakeID +- **图片代理** — 代理微信 CDN 图片,解决防盗链问题 +- **扫码登录** — 微信公众平台扫码登录,凭证自动保存 +- **自动限频** — 内置三层限频机制(全局/IP/文章间隔),防止触发微信风控 +- **Webhook 通知** — 登录过期、触发验证等事件自动推送 +- **API 文档** — 自动生成 Swagger UI,在线调试所有接口 + +
+ 管理面板 +

管理面板 — 登录状态、接口文档、在线测试一站式管理

+
+ +--- + +## 使用前提 + +> 本工具需要通过微信公众平台后台的登录凭证来调用接口,因此使用前需要: + +1. **拥有一个微信公众号**(订阅号、服务号均可) +2. 部署并启动服务后,访问登录页面用**公众号管理员微信**扫码登录 +3. 登录成功后凭证自动保存到 `.env` 文件,有效期约 **4 天**,过期后需重新扫码 + +登录后即可通过 API 获取**任意公众号**的公开文章(不限于自己的公众号)。 + +--- + +## 快速开始 + +### 方式一:一键启动(推荐) + +**Windows:** +```bash +start.bat +``` + +**Linux / macOS:** +```bash +chmod +x start.sh +./start.sh +``` + +脚本会自动完成环境检查、虚拟环境创建、依赖安装和服务启动。 + +> Linux 生产环境可使用 `sudo bash start.sh` 自动配置 systemd 服务和开机自启。 + +### 方式二:手动安装 + +```bash +# 创建虚拟环境 +python -m venv venv +source venv/bin/activate # Linux/macOS +# venv\Scripts\activate # Windows + +# 安装依赖 +pip install -r requirements.txt + +# 启动 +python app.py +``` + +### 访问服务 + +| 地址 | 说明 | +|------|------| +| http://localhost:5000 | 管理面板 | +| http://localhost:5000/login.html | 扫码登录 | +| http://localhost:5000/api/docs | Swagger API 文档 | +| http://localhost:5000/api/health | 健康检查 | + +--- + +## API 接口 + +### 获取文章内容 + +`POST /api/article` — 解析微信公众号文章,返回标题、正文、图片等结构化数据 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `url` | string | 是 | 微信文章链接(`https://mp.weixin.qq.com/s/...`) | + +请求示例: + +```bash +curl -X POST http://localhost:5000/api/article \ + -H "Content-Type: application/json" \ + -d '{"url": "https://mp.weixin.qq.com/s/xxxxx"}' +``` + +返回字段:`title` 标题、`content` HTML 正文、`plain_content` 纯文本正文、`author` 作者、`publish_time` 发布时间戳、`images` 图片列表 + +### 搜索公众号 + +`GET /api/public/searchbiz` — 按关键词搜索微信公众号,获取 FakeID + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `query` | string | 是 | 搜索关键词(公众号名称) | + +请求示例: + +```bash +curl "http://localhost:5000/api/public/searchbiz?query=公众号名称" +``` + +返回字段:`list[]` 公众号列表,每项包含 `fakeid`、`nickname`、`alias`、`round_head_img` + +### 获取文章列表 + +`GET /api/public/articles` — 获取指定公众号的文章列表,支持分页 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `fakeid` | string | 是 | 目标公众号的 FakeID(从搜索接口获取) | +| `begin` | int | 否 | 偏移量,默认 `0` | +| `count` | int | 否 | 获取数量,默认 `10`,最大 `100` | +| `keyword` | string | 否 | 在该公众号内搜索关键词 | + +请求示例: + +```bash +# 获取前 50 篇 +curl "http://localhost:5000/api/public/articles?fakeid=YOUR_FAKEID&begin=0&count=50" + +# 获取第 51-100 篇 +curl "http://localhost:5000/api/public/articles?fakeid=YOUR_FAKEID&begin=50&count=50" +``` + +### 搜索公众号文章 + +`GET /api/public/articles/search` — 在指定公众号内按关键词搜索文章 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `fakeid` | string | 是 | 目标公众号的 FakeID | +| `query` | string | 是 | 搜索关键词 | +| `begin` | int | 否 | 偏移量,默认 `0` | +| `count` | int | 否 | 获取数量,默认 `10`,最大 `100` | + +请求示例: + +```bash +curl "http://localhost:5000/api/public/articles/search?fakeid=YOUR_FAKEID&query=关键词" +``` + +### 其他接口 + +| 方法 | 路径 | 说明 | +|------|------|------| +| `GET` | `/api/image?url=IMG_URL` | 图片代理(仅限微信 CDN 域名) | +| `GET` | `/api/health` | 健康检查 | +| `GET` | `/api/stats` | 限频统计 | +| `POST` | `/api/login/session/{id}` | 初始化登录会话 | +| `GET` | `/api/login/getqrcode` | 获取登录二维码 | +| `GET` | `/api/login/scan` | 检查扫码状态 | +| `POST` | `/api/login/bizlogin` | 完成登录 | +| `GET` | `/api/login/info` | 获取登录信息 | +| `GET` | `/api/admin/status` | 查询登录状态 | +| `POST` | `/api/admin/logout` | 退出登录 | + +完整的接口文档请访问 http://localhost:5000/api/docs + +--- + +## 配置说明 + +复制 `env.example` 为 `.env`,登录后凭证会自动保存: + +```bash +cp env.example .env +``` + +| 配置项 | 说明 | 默认值 | +|--------|------|--------| +| `WECHAT_TOKEN` | 微信 Token(登录后自动填充) | - | +| `WECHAT_COOKIE` | 微信 Cookie(登录后自动填充) | - | +| `WECHAT_FAKEID` | 公众号 FakeID(登录后自动填充) | - | +| `WEBHOOK_URL` | Webhook 通知地址(可选) | 空 | +| `RATE_LIMIT_GLOBAL` | 全局每分钟请求上限 | 10 | +| `RATE_LIMIT_PER_IP` | 单 IP 每分钟请求上限 | 5 | +| `RATE_LIMIT_ARTICLE_INTERVAL` | 文章请求最小间隔(秒) | 3 | +| `PORT` | 服务端口 | 5000 | + +--- + +## 项目结构 + +``` +├── app.py # FastAPI 主应用 +├── requirements.txt # Python 依赖 +├── env.example # 环境变量示例 +├── routes/ # API 路由 +│ ├── article.py # 文章内容获取 +│ ├── articles.py # 文章列表 +│ ├── search.py # 公众号搜索 +│ ├── login.py # 扫码登录 +│ ├── admin.py # 管理接口 +│ ├── image.py # 图片代理 +│ ├── health.py # 健康检查 +│ └── stats.py # 统计信息 +├── utils/ # 工具模块 +│ ├── auth_manager.py # 认证管理 +│ ├── helpers.py # HTML 解析 +│ ├── rate_limiter.py # 限频器 +│ └── webhook.py # Webhook 通知 +└── static/ # 前端页面 +``` + +--- + +## 常见问题 + +
+提示"服务器未登录" + +访问 http://localhost:5000/login.html 扫码登录,凭证会自动保存到 `.env`。 +
+ +
+触发微信风控 / 需要验证 + +1. 在浏览器中打开提示的文章 URL 完成验证 +2. 等待 30 分钟后重试 +3. 降低请求频率(系统已内置自动限频) +
+ +
+如何获取公众号的 FakeID + +调用搜索接口:`GET /api/public/searchbiz?query=公众号名称`,从返回结果的 `fakeid` 字段获取。 +
+ +
+Token 多久过期 + +Cookie 登录有效期约 4 天,过期后需重新扫码登录。配置 `WEBHOOK_URL` 可以在过期时收到通知。 +
+ +
+可以同时登录多个公众号吗 + +当前版本不支持多账号。建议部署多个实例,每个登录不同公众号。 +
+ +--- + +## 技术栈 + +| 层级 | 技术 | +|------|------| +| **Web 框架** | FastAPI | +| **ASGI 服务器** | Uvicorn | +| **HTTP 客户端** | HTTPX | +| **配置管理** | python-dotenv | +| **运行环境** | Python 3.8+ | + +--- + +## 开源协议 + +本项目采用 **AGPL 3.0** 协议开源。 + +| 使用场景 | 是否允许 | +|---------|---------| +| 个人学习和研究 | 允许,免费使用 | +| 企业内部使用 | 允许,免费使用 | +| 修改代码内部使用 | 允许,免费使用 | +| 修改后对外提供网络服务 | 需开源修改后的代码 | +| 集成到产品中销售 | 需开源或取得商业授权 | + +> **AGPL 3.0 核心要求**:修改代码并通过网络提供服务时,必须公开源代码。 + +详见 [LICENSE](LICENSE) 文件。 + +### 免责声明 + +- 本软件按"原样"提供,不提供任何形式的担保 +- 本项目仅供学习和研究目的,请遵守微信公众平台相关服务条款 +- 使用者对自己的操作承担全部责任 +- 因使用本软件导致的任何损失,开发者不承担责任 + +--- + +## 参与贡献 + +由于个人精力有限,目前**暂不接受 PR**,但非常欢迎: + +- **提交 Issue** — 报告 Bug、提出功能建议 +- **Fork 项目** — 自由修改和定制 +- **Star 支持** — 给项目点 Star,让更多人看到 + +--- + +## 联系方式 + + + + + + +
+
+ 个人微信
+ 技术交流 · 商务合作 +
+
+ 赞赏支持
+ 开源不易,感谢支持 +
+ +- **GitHub Issues**: [提交问题](https://github.com/tmwgsicp/wechat-download-api/issues) + +--- + +## 致谢 + +- [FastAPI](https://fastapi.tiangolo.com/) — 高性能 Python Web 框架 +- [HTTPX](https://www.python-httpx.org/) — 现代化 HTTP 客户端 + +--- + +
+ +**如果觉得项目有用,请给个 Star 支持一下!** + +[![Star History Chart](https://api.star-history.com/svg?repos=tmwgsicp/wechat-download-api&type=Date)](https://star-history.com/#tmwgsicp/wechat-download-api&Date) + +Made with ❤️ by [tmwgsicp](https://github.com/tmwgsicp) + +
diff --git a/app.py b/app.py new file mode 100644 index 0000000..7e706e8 --- /dev/null +++ b/app.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +微信公众号文章API服务 - FastAPI版本 +主应用文件 +""" + +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse, JSONResponse, HTMLResponse +from fastapi.middleware.cors import CORSMiddleware +import os +from pathlib import Path + +# 导入路由 +from routes import article, articles, search, admin, login, image, health, stats + +API_DESCRIPTION = """ +微信公众号文章下载 API,支持文章解析、公众号搜索、文章列表获取等功能。 + +## 快速开始 + +1. 访问 `/login.html` 扫码登录微信公众号后台 +2. 调用 `GET /api/public/searchbiz?query=公众号名称` 搜索目标公众号 +3. 从返回结果中取 `fakeid`,调用 `GET /api/public/articles?fakeid=xxx` 获取文章列表 +4. 对每篇文章调用 `POST /api/article` 获取完整内容 + +## 认证说明 + +所有核心接口都需要先登录。登录后凭证自动保存到 `.env` 文件,服务重启后无需重新登录(有效期约 4 天)。 +""" + +app = FastAPI( + title="WeChat Download API", + description=API_DESCRIPTION, + version="1.0.0", + docs_url="/api/docs", + redoc_url=None, + openapi_url="/api/openapi.json", + license_info={ + "name": "AGPL-3.0", + "url": "https://www.gnu.org/licenses/agpl-3.0.html", + }, +) + +# CORS配置 +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 注册路由(注意:articles.router 必须在 search.router 之前注册,避免路由冲突) +app.include_router(health.router, prefix="/api", tags=["健康检查"]) +app.include_router(stats.router, prefix="/api", tags=["统计信息"]) +app.include_router(article.router, prefix="/api", tags=["文章内容"]) +app.include_router(articles.router, prefix="/api/public", tags=["文章列表"]) # 必须先注册 +app.include_router(search.router, prefix="/api/public", tags=["公众号搜索"]) # 后注册 +app.include_router(admin.router, prefix="/api/admin", tags=["管理"]) +app.include_router(login.router, prefix="/api/login", tags=["登录"]) +app.include_router(image.router, prefix="/api", tags=["图片代理"]) + +# 静态文件 +static_dir = Path(__file__).parent / "static" +if static_dir.exists(): + app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + +@app.get("/api/redoc", include_in_schema=False) +async def redoc_html(): + """ReDoc 文档(使用 cdnjs 加速)""" + return HTMLResponse(""" + +WeChat Download API - ReDoc + + + + + + + +""") + +# 静态页面路由 +@app.get("/", include_in_schema=False) +async def root(): + """首页 - 重定向到管理页面""" + return FileResponse(static_dir / "admin.html") + +@app.get("/admin.html", include_in_schema=False) +async def admin_page(): + """管理页面""" + return FileResponse(static_dir / "admin.html") + +@app.get("/login.html", include_in_schema=False) +async def login_page(): + """登录页面""" + return FileResponse(static_dir / "login.html") + +@app.get("/verify.html", include_in_schema=False) +async def verify_page(): + """验证页面""" + return FileResponse(static_dir / "verify.html") + +# 启动事件 +@app.on_event("startup") +async def startup_event(): + """启动时检查配置""" + env_file = Path(__file__).parent / ".env" + if not env_file.exists(): + print("\n" + "=" * 60) + print("[WARNING] .env file not found") + print("=" * 60) + print("Please configure .env file or login via admin page") + print("Visit: http://localhost:5000/admin.html") + print("=" * 60 + "\n") + else: + print("\n" + "=" * 60) + print("[OK] .env file loaded") + print("=" * 60 + "\n") + +if __name__ == "__main__": + import uvicorn + + print("=" * 60) + print("Wechat Article API Service - FastAPI Version") + print("=" * 60) + print("Admin Page: http://localhost:5000/admin.html") + print("API Docs: http://localhost:5000/api/docs") + print("ReDoc Docs: http://localhost:5000/api/redoc") + print("First time? Please login via admin page") + print("=" * 60) + + uvicorn.run( + "app:app", + host="0.0.0.0", + port=5000, + reload=True, + log_level="info" + ) diff --git a/assets/dashboard.jpg b/assets/dashboard.jpg new file mode 100644 index 0000000..daea658 Binary files /dev/null and b/assets/dashboard.jpg differ diff --git a/assets/qrcode/.gitkeep b/assets/qrcode/.gitkeep new file mode 100644 index 0000000..0184b71 --- /dev/null +++ b/assets/qrcode/.gitkeep @@ -0,0 +1,3 @@ +# 联系方式二维码存放目录 +# 请上传微信、QQ 群二维码等 + diff --git a/assets/qrcode/sponsor.jpg b/assets/qrcode/sponsor.jpg new file mode 100644 index 0000000..9a03304 Binary files /dev/null and b/assets/qrcode/sponsor.jpg differ diff --git a/assets/qrcode/wechat.jpg b/assets/qrcode/wechat.jpg new file mode 100644 index 0000000..cb2c5d7 Binary files /dev/null and b/assets/qrcode/wechat.jpg differ diff --git a/env.example b/env.example new file mode 100644 index 0000000..d2269ae --- /dev/null +++ b/env.example @@ -0,0 +1,24 @@ +# WeChat Download API 配置文件 +# 复制为 .env 后使用: cp env.example .env + +# 认证信息 (登录后自动填充,无需手动配置) +WECHAT_TOKEN= +WECHAT_COOKIE= +WECHAT_FAKEID= +WECHAT_NICKNAME= +WECHAT_EXPIRE_TIME= + +# 限频配置 (单位: 次/分钟 或 秒) +RATE_LIMIT_GLOBAL=10 +RATE_LIMIT_PER_IP=5 +RATE_LIMIT_ARTICLE_INTERVAL=3 + +# Webhook 通知 (留空则禁用,支持企业微信群机器人) +WEBHOOK_URL= +# 同一事件通知最小间隔(秒),防止重复轰炸 +WEBHOOK_NOTIFICATION_INTERVAL=300 + +# 服务配置 +PORT=5000 +HOST=0.0.0.0 +DEBUG=false diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7b408a7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +pydantic==2.5.0 +httpx==0.25.2 +python-dotenv==1.0.0 diff --git a/routes/__init__.py b/routes/__init__.py new file mode 100644 index 0000000..b8a92b8 --- /dev/null +++ b/routes/__init__.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +路由模块初始化 +""" + +# 导出所有路由模块 +from . import article, articles, search, admin, login, image, health, stats + +__all__ = ['article', 'articles', 'search', 'admin', 'login', 'image', 'health', 'stats'] diff --git a/routes/admin.py b/routes/admin.py new file mode 100644 index 0000000..6adc497 --- /dev/null +++ b/routes/admin.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +管理路由 - FastAPI版本 +""" + +from fastapi import APIRouter +from pydantic import BaseModel +from typing import Optional +from utils.auth_manager import auth_manager + +router = APIRouter() + +class StatusResponse(BaseModel): + """状态响应模型""" + authenticated: bool + loggedIn: bool + account: str + nickname: Optional[str] = "" + fakeid: Optional[str] = "" + expireTime: Optional[int] = 0 + isExpired: Optional[bool] = False + status: str + +@router.get("/status", response_model=StatusResponse, summary="获取登录状态") +async def get_status(): + """ + 获取当前登录状态 + + Returns: + 登录状态信息 + """ + return auth_manager.get_status() + +@router.post("/logout", summary="退出登录") +async def logout(): + """ + 退出登录,清除凭证 + + Returns: + 操作结果 + """ + success = auth_manager.clear_credentials() + if success: + return {"success": True, "message": "已退出登录"} + else: + return {"success": False, "message": "退出登录失败"} diff --git a/routes/article.py b/routes/article.py new file mode 100644 index 0000000..e758fcc --- /dev/null +++ b/routes/article.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +文章路由 - FastAPI版本 +""" + +from fastapi import APIRouter, HTTPException, Request +from pydantic import BaseModel, Field +from typing import Optional, List +import re +import httpx +from utils.auth_manager import auth_manager +from utils.helpers import extract_article_info, parse_article_url +from utils.rate_limiter import rate_limiter +from utils.webhook import webhook + +router = APIRouter() + +class ArticleRequest(BaseModel): + """文章请求""" + url: str = Field(..., description="微信文章链接,如 https://mp.weixin.qq.com/s/xxxxx") + +class ArticleData(BaseModel): + """文章数据""" + title: str = Field(..., description="文章标题") + content: str = Field(..., description="文章 HTML 正文(保留原始排版)") + plain_content: str = Field("", description="纯文本正文(去除所有 HTML 标签,适合直接阅读或 AI 处理)") + images: List[str] = Field(default_factory=list, description="文章内图片 URL 列表") + author: str = Field("", description="作者") + publish_time: int = Field(0, description="发布时间戳(秒)") + publish_time_str: Optional[str] = Field(None, description="可读发布时间,如 2026-02-24 09:00:00") + +class ArticleResponse(BaseModel): + """文章响应""" + success: bool = Field(..., description="是否成功") + data: Optional[ArticleData] = Field(None, description="文章数据,失败时为 null") + error: Optional[str] = Field(None, description="错误信息,成功时为 null") + +@router.post("/article", response_model=ArticleResponse, summary="获取文章内容") +async def get_article(article_request: ArticleRequest, request: Request): + """ + 解析微信公众号文章,返回标题、正文、图片等结构化数据。 + + **请求体参数:** + - **url** (必填): 微信文章链接,支持 `https://mp.weixin.qq.com/s/xxxxx` 格式 + + **返回字段:** + - `title`: 文章标题 + - `content`: HTML 正文(保留原始排版) + - `plain_content`: 纯文本正文(去除所有 HTML 标签,适合直接阅读或 AI 处理) + - `author`: 作者 + - `publish_time`: 发布时间戳 + - `images`: 文章内的图片列表 + """ + # ⭐ 限频检查 + client_ip = request.client.host if request.client else "unknown" + allowed, error_msg = rate_limiter.check_rate_limit(client_ip, "/api/article") + if not allowed: + return { + "success": False, + "error": f"⏱️ {error_msg}" + } + + # 检查认证 + credentials = auth_manager.get_credentials() + if not credentials: + return { + "success": False, + "error": "服务器未登录,请先访问管理页面扫码登录" + } + + # 准备请求头 + headers = { + "Cookie": credentials["cookie"], + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF XWEB/8391", + "Referer": "https://mp.weixin.qq.com/" + } + + try: + # 发起HTTP请求 + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(article_request.url, headers=headers, follow_redirects=True) + response.raise_for_status() + html = response.text + + # 检查内容 + if "js_content" not in html: + # 检查各种错误情况 + if "verify" in html or "验证" in html or "环境异常" in html: + # 🔔 Webhook通知 + await webhook.notify('verification_required', { + 'url': article_request.url, + 'ip': client_ip + }) + return { + "success": False, + "error": "触发微信安全验证。解决方法:1) 在浏览器中打开文章URL完成验证 2) 等待30分钟后重试 3) 降低请求频率" + } + if "请登录" in html: + # 🔔 Webhook通知 + await webhook.notify('login_expired', { + 'account': auth_manager.get_nickname(), + 'url': article_request.url + }) + return { + "success": False, + "error": "登录已失效,请重新扫码登录" + } + return { + "success": False, + "error": "无法获取文章内容。可能原因:文章被删除、访问受限或需要验证。" + } + + # 多种方式尝试提取 URL 参数(__biz, mid, idx, sn) + params = parse_article_url(article_request.url) + + if not params or not params.get('__biz'): + location_match = re.search(r'var\s+msg_link\s*=\s*"([^"]+)"', html) + if location_match: + real_url = location_match.group(1).replace('&', '&') + params = parse_article_url(real_url) + + if not params or not params.get('__biz'): + href_match = re.search(r'window\.location\.href\s*=\s*"([^"]+)"', html) + if href_match: + real_url = href_match.group(1).replace('&', '&') + params = parse_article_url(real_url) + + if not params or not params.get('__biz'): + biz_match = re.search(r'var\s+__biz\s*=\s*"([^"]+)"', html) + mid_match = re.search(r'var\s+mid\s*=\s*"([^"]+)"', html) + idx_match = re.search(r'var\s+idx\s*=\s*"([^"]+)"', html) + sn_match = re.search(r'var\s+sn\s*=\s*"([^"]+)"', html) + + if all([biz_match, mid_match, idx_match, sn_match]): + params = { + '__biz': biz_match.group(1), + 'mid': mid_match.group(1), + 'idx': idx_match.group(1), + 'sn': sn_match.group(1) + } + + if not params or not params.get('__biz'): + params = None + + # 提取文章信息(params可以是None) + article_data = extract_article_info(html, params) + + return { + "success": True, + "data": article_data + } + + except httpx.HTTPStatusError as e: + return { + "success": False, + "error": f"HTTP错误: {e.response.status_code}" + } + except httpx.TimeoutException: + return { + "success": False, + "error": "请求超时,请稍后重试" + } + except Exception as e: + return { + "success": False, + "error": f"处理请求时发生错误: {str(e)}" + } diff --git a/routes/articles.py b/routes/articles.py new file mode 100644 index 0000000..22da246 --- /dev/null +++ b/routes/articles.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +文章列表API +获取公众号的文章列表 +""" + +from fastapi import APIRouter, Query, HTTPException +from pydantic import BaseModel +from typing import Optional, List, Dict +import json +import httpx +from utils.auth_manager import auth_manager + +router = APIRouter() + + +class ArticleItem(BaseModel): + """文章列表项""" + aid: str + title: str + link: str + update_time: int + create_time: int + digest: Optional[str] = None + cover: Optional[str] = None + author: Optional[str] = None + + +class ArticlesResponse(BaseModel): + """文章列表响应""" + success: bool + data: Optional[Dict] = None + error: Optional[str] = None + + +@router.get("/articles", response_model=ArticlesResponse, summary="获取文章列表") +async def get_articles( + fakeid: str = Query(..., description="目标公众号的 FakeID(通过搜索接口获取)"), + begin: int = Query(0, description="偏移量,从第几条开始", ge=0, alias="begin"), + count: int = Query(10, description="获取数量,最大 100", ge=1, le=100), + keyword: Optional[str] = Query(None, description="在该公众号内搜索关键词(可选)") +): + """ + 获取指定公众号的文章列表,支持分页。 + + **使用流程:** + 1. 先调用 `GET /api/public/searchbiz` 搜索目标公众号 + 2. 从搜索结果中获取目标公众号的 `fakeid` + 3. 使用 `fakeid` 调用本接口获取文章列表 + + **查询参数:** + - **fakeid** (必填): 目标公众号的 FakeID + - **begin** (可选): 偏移量,默认 0 + - **count** (可选): 获取数量,默认 10,最大 100 + - **keyword** (可选): 在该公众号内搜索关键词 + """ + try: + print(f"📋 获取文章列表: fakeid={fakeid[:8]}...") + + # 获取认证信息(用于请求微信API) + creds = auth_manager.get_credentials() + + if not creds or not isinstance(creds, dict): + raise HTTPException( + status_code=401, + detail="未登录或认证信息格式错误" + ) + + token = creds.get("token", "") + cookie = creds.get("cookie", "") + + if not token or not cookie: + raise HTTPException( + status_code=401, + detail="登录信息不完整,请重新登录" + ) + + # 构建请求参数 + is_searching = bool(keyword) + params = { + "sub": "search" if is_searching else "list", + "search_field": "7" if is_searching else "null", + "begin": begin, + "count": count, + "query": keyword or "", + "fakeid": fakeid, + "type": "101_1", + "free_publish_type": 1, + "sub_action": "list_ex", + "token": token, + "lang": "zh_CN", + "f": "json", + "ajax": 1, + } + + # 请求微信API + url = "https://mp.weixin.qq.com/cgi-bin/appmsgpublish" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Referer": "https://mp.weixin.qq.com/", + "Cookie": cookie + } + + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, params=params, headers=headers) + response.raise_for_status() + result = response.json() + + # 检查返回结果 + base_resp = result.get("base_resp", {}) + if base_resp.get("ret") != 0: + error_msg = base_resp.get("err_msg", "未知错误") + ret_code = base_resp.get("ret") + + print(f"❌ 微信API返回错误: ret={ret_code}, msg={error_msg}") + + # 检查是否需要重新登录 + if "login" in error_msg.lower() or ret_code == 200003: + return ArticlesResponse( + success=False, + error="登录已过期,请重新登录" + ) + + return ArticlesResponse( + success=False, + error=f"获取文章列表失败: ret={ret_code}, msg={error_msg}" + ) + + # 解析文章列表 + publish_page = result.get("publish_page", {}) + + if isinstance(publish_page, str): + try: + publish_page = json.loads(publish_page) + except (json.JSONDecodeError, ValueError): + return ArticlesResponse( + success=False, + error="数据格式错误: publish_page 无法解析" + ) + if not isinstance(publish_page, dict): + return ArticlesResponse( + success=False, + error=f"数据格式错误: publish_page 类型为 {type(publish_page).__name__}" + ) + + publish_list = publish_page.get("publish_list", []) + + articles = [] + for item in publish_list: + publish_info = item.get("publish_info", {}) + + # publish_info可能是字符串JSON,需要解析 + if isinstance(publish_info, str): + try: + publish_info = json.loads(publish_info) + except (json.JSONDecodeError, ValueError): + continue + + if not isinstance(publish_info, dict): + continue # 跳过非字典类型 + + appmsgex = publish_info.get("appmsgex", []) + + # 处理每篇文章 + for article in appmsgex: + articles.append({ + "aid": article.get("aid", ""), + "title": article.get("title", ""), + "link": article.get("link", ""), + "update_time": article.get("update_time", 0), + "create_time": article.get("create_time", 0), + "digest": article.get("digest", ""), + "cover": article.get("cover", ""), + "author": article.get("author", "") + }) + + return ArticlesResponse( + success=True, + data={ + "articles": articles, + "total": publish_page.get("total_count", 0), + "begin": begin, + "count": len(articles), + "keyword": keyword + } + ) + + except httpx.HTTPStatusError as e: + print(f"❌ HTTP错误: {e.response.status_code}") + return ArticlesResponse( + success=False, + error=f"请求失败: HTTP {e.response.status_code}" + ) + except httpx.RequestError as e: + print(f"❌ 请求错误: {e}") + return ArticlesResponse( + success=False, + error=f"网络请求失败: {str(e)}" + ) + except Exception as e: + import traceback + print(f"❌ 未知错误: {e}") + traceback.print_exc() + return ArticlesResponse( + success=False, + error=f"服务器内部错误,请稍后重试" + ) + + +@router.get("/articles/search", response_model=ArticlesResponse, summary="搜索公众号文章") +async def search_articles( + fakeid: str = Query(..., description="目标公众号的 FakeID"), + query: str = Query(..., description="搜索关键词", alias="query"), + begin: int = Query(0, description="偏移量,默认 0", ge=0, alias="begin"), + count: int = Query(10, description="获取数量,默认 10,最大 100", ge=1, le=100) +): + """ + 在指定公众号内按关键词搜索文章。 + + **查询参数:** + - **fakeid** (必填): 目标公众号的 FakeID + - **query** (必填): 搜索关键词 + - **begin** (可选): 偏移量,默认 0 + - **count** (可选): 获取数量,默认 10,最大 100 + """ + return await get_articles( + fakeid=fakeid, + keyword=query, + begin=begin, + count=count + ) + diff --git a/routes/health.py b/routes/health.py new file mode 100644 index 0000000..55efe1d --- /dev/null +++ b/routes/health.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +健康检查路由 +""" + +from fastapi import APIRouter +from pydantic import BaseModel + +router = APIRouter() + +class HealthResponse(BaseModel): + """健康检查响应""" + status: str + version: str + framework: str + +@router.get("/health", response_model=HealthResponse, summary="健康检查") +async def health_check(): + """ + 检查服务健康状态 + + Returns: + 服务状态信息 + """ + return { + "status": "healthy", + "version": "1.0.0", + "framework": "FastAPI" + } + diff --git a/routes/image.py b/routes/image.py new file mode 100644 index 0000000..c5f2a73 --- /dev/null +++ b/routes/image.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +图片代理路由 - FastAPI版本 +""" + +from fastapi import APIRouter, Query, HTTPException +from fastapi.responses import StreamingResponse +from urllib.parse import urlparse +import httpx + +router = APIRouter() + +ALLOWED_IMAGE_HOSTS = { + "mmbiz.qpic.cn", + "mmbiz.qlogo.cn", + "wx.qlogo.cn", + "res.wx.qq.com", +} + +@router.get("/image", summary="图片代理下载") +async def proxy_image(url: str = Query(..., description="图片URL")): + """ + 代理下载微信图片,避免防盗链 + + Args: + url: 图片URL(仅允许微信CDN域名) + + Returns: + 图片数据流 + + Raises: + HTTPException: 当下载失败时 + """ + if not url: + raise HTTPException(status_code=400, detail="URL参数不能为空") + + parsed = urlparse(url) + if parsed.scheme not in ("http", "https"): + raise HTTPException(status_code=400, detail="仅支持HTTP/HTTPS协议") + if parsed.hostname not in ALLOWED_IMAGE_HOSTS: + raise HTTPException(status_code=403, detail="仅允许代理微信CDN图片") + + try: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + + # 获取内容类型 + content_type = response.headers.get("content-type", "image/jpeg") + + # 返回图片流 + return StreamingResponse( + iter([response.content]), + media_type=content_type, + headers={ + "Content-Disposition": f"inline; filename={url.split('/')[-1]}" + } + ) + + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=e.response.status_code, + detail=f"下载图片失败: HTTP {e.response.status_code}" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"下载图片失败: {str(e)}") diff --git a/routes/login.py b/routes/login.py new file mode 100644 index 0000000..3c23818 --- /dev/null +++ b/routes/login.py @@ -0,0 +1,599 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +登录路由 - FastAPI版本 +实现真实的微信公众号登录流程 +""" + +from fastapi import APIRouter, HTTPException, Response, Request +from fastapi.responses import StreamingResponse, JSONResponse +from pydantic import BaseModel +from typing import Optional, Dict +import httpx +import time +from utils.auth_manager import auth_manager +from utils.webhook import webhook + +router = APIRouter() + +# 微信登录API端点 +MP_BASE_URL = "https://mp.weixin.qq.com" +QR_ENDPOINT = f"{MP_BASE_URL}/cgi-bin/scanloginqrcode" +BIZ_LOGIN_ENDPOINT = f"{MP_BASE_URL}/cgi-bin/bizlogin" + +# 全局session状态存储 +_sessions = {} + +async def proxy_wx_request(request: Request, url: str, params: dict = None, method: str = "GET", data: dict = None): + """ + 代理微信请求,转发浏览器cookies + + 这个函数类似Node.js版本的proxyMpRequest: + 1. 从浏览器请求中提取cookies + 2. 转发给微信API + 3. 把微信的Set-Cookie响应转发回浏览器 + """ + # 从浏览器请求中提取cookies + cookie_header = request.headers.get("cookie", "") + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Referer": "https://mp.weixin.qq.com/", + "Origin": "https://mp.weixin.qq.com", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", + "Cookie": cookie_header # 转发浏览器的cookies + } + + async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: + if method == "GET": + response = await client.get(url, params=params, headers=headers) + else: + response = await client.post(url, params=params, data=data, headers=headers) + + return response + +class LoginRequest(BaseModel): + """手动登录请求模型""" + token: str + cookie: str + fakeid: str + nickname: str + expire_time: int + +class LoginResponse(BaseModel): + """登录响应模型""" + success: bool + message: str + +@router.post("/session/{sessionid}", summary="初始化登录会话", include_in_schema=True) +async def create_session(sessionid: str, request: Request): + """ + 初始化登录会话,必须在获取二维码之前调用。 + + **路径参数:** + - **sessionid**: 会话标识,由前端生成 + """ + try: + # 🔍 调试:输出请求信息 + cookie_header = request.headers.get("cookie", "") + print(f"🔍 [DEBUG] 创建Session - Cookie: {cookie_header[:100]}..." if len(cookie_header) > 100 else f"🔍 [DEBUG] 创建Session - Cookie: {cookie_header}") + + # ⭐ 关键:调用bizlogin而不是scanloginqrcode! + body = { + "userlang": "zh_CN", + "redirect_url": "", + "login_type": 3, + "sessionid": sessionid, + "token": "", + "lang": "zh_CN", + "f": "json", + "ajax": 1 + } + + response = await proxy_wx_request( + request, + BIZ_LOGIN_ENDPOINT, # ⭐ 使用bizlogin + params={"action": "startlogin"}, + method="POST", + data=body # ⭐ 传递body + ) + + # 存储session + _sessions[sessionid] = { + "created_at": time.time(), + "status": "created" + } + + data = response.json() if response.headers.get("content-type", "").startswith("application/json") else {"base_resp": {"ret": 0}} + + # 🔍 调试:输出响应详情 + print(f"🔍 [DEBUG] Session响应状态码: {response.status_code}") + print(f"🔍 [DEBUG] Session响应数据: {data}") + print(f"🔍 [DEBUG] Session响应 Set-Cookie 数量: {len(response.headers.get_list('set-cookie'))}") + for i, cookie in enumerate(response.headers.get_list("set-cookie")): + print(f"🔍 [DEBUG] Cookie [{i}]: {cookie[:150]}..." if len(cookie) > 150 else f"🔍 [DEBUG] Cookie [{i}]: {cookie}") + + # 转发Set-Cookie(智能处理Secure标志) + response_obj = JSONResponse(content=data) + + # 🔍 检测是否使用 HTTPS(支持反向代理) + is_https = ( + request.url.scheme == "https" or + request.headers.get("x-forwarded-proto") == "https" or + request.headers.get("x-forwarded-ssl") == "on" + ) + + if is_https: + print(f"🔒 检测到HTTPS环境,Cookie将保留Secure标志(安全传输)") + else: + print(f"⚠️ 检测到HTTP环境,Cookie将移除Secure标志(兼容模式,生产环境建议使用HTTPS)") + + for cookie_str in response.headers.get_list("set-cookie"): + if not is_https: + # 🔧 HTTP模式:移除Secure标志以支持HTTP传输 + modified_cookie = cookie_str.replace("; Secure", "") + response_obj.headers.append("Set-Cookie", modified_cookie) + else: + # 🔒 HTTPS模式:保留Secure标志,保持安全性 + response_obj.headers.append("Set-Cookie", cookie_str) + + print(f"✅ 创建session: {sessionid}, 响应: {data}") + return response_obj + + except Exception as e: + print(f"❌ 创建session失败: {str(e)}") + import traceback + traceback.print_exc() + return JSONResponse(content={"base_resp": {"ret": 0}}) # 返回成功避免前端报错 + +@router.get("/getqrcode", summary="获取登录二维码", include_in_schema=True) +async def get_qrcode(request: Request): + """ + 获取微信公众号登录二维码图片,用微信扫码登录。 + + **返回:** 二维码图片(PNG/JPEG 格式) + """ + try: + # 🔍 调试:输出请求信息 + cookie_header = request.headers.get("cookie", "") + print(f"🔍 [DEBUG] 二维码请求 Cookie: {cookie_header[:100]}..." if len(cookie_header) > 100 else f"🔍 [DEBUG] 二维码请求 Cookie: {cookie_header}") + + # 代理请求到微信 + response = await proxy_wx_request( + request, + QR_ENDPOINT, + params={ + "action": "getqrcode", + "random": int(time.time() * 1000) + } + ) + + # 🔍 调试:输出响应信息 + print(f"🔍 [DEBUG] 微信响应状态码: {response.status_code}") + print(f"🔍 [DEBUG] 微信响应 Content-Type: {response.headers.get('content-type', 'N/A')}") + print(f"🔍 [DEBUG] 微信响应内容长度: {len(response.content)} 字节") + print(f"🔍 [DEBUG] 微信响应 Set-Cookie: {response.headers.get('set-cookie', 'N/A')}") + + # 检查响应类型 + content_type = response.headers.get("content-type", "") + content = response.content + + # 检查是否是图片格式 + is_png = content.startswith(b'\x89PNG') + is_jpeg = content.startswith(b'\xff\xd8\xff') or b'JFIF' in content[:20] + is_image = "image" in content_type or is_png or is_jpeg + + # 如果返回的是JSON或者不是图片,说明出错了 + if not is_image: + try: + error_data = response.json() + print(f"⚠️ 二维码接口返回JSON: {error_data}") + return JSONResponse( + status_code=400, + content={"error": "需要先调用 /session/{sessionid} 创建会话", "detail": error_data} + ) + except: + print(f"⚠️ 二维码接口返回非图片内容: {content_type}") + print(f"响应内容前20字节: {content[:20]}") + return JSONResponse( + status_code=400, + content={"error": "获取二维码失败,响应格式异常"} + ) + + # 确定正确的媒体类型 + if is_png: + media_type = "image/png" + print(f"✅ 获取到PNG格式二维码") + elif is_jpeg: + media_type = "image/jpeg" + print(f"✅ 获取到JPEG格式二维码") + else: + # 使用响应头中的类型 + media_type = content_type if "image" in content_type else "image/png" + print(f"✅ 获取到二维码,类型: {media_type}") + + # 可选:保存二维码到本地(用于调试) + import os + qrcode_dir = "static/qrcodes" + if not os.path.exists(qrcode_dir): + os.makedirs(qrcode_dir) + + # 根据格式确定文件扩展名 + ext = "png" if is_png else "jpg" + qrcode_path = f"{qrcode_dir}/login_qrcode.{ext}" + + with open(qrcode_path, "wb") as f: + f.write(content) + print(f"💾 二维码已保存到: {qrcode_path}") + + # 构建响应,转发Set-Cookie + response_obj = Response( + content=content, + media_type=media_type, + headers={ + "Cache-Control": "no-cache, no-store, must-revalidate", + "Pragma": "no-cache", + "Expires": "0" + } + ) + + # 转发Set-Cookie到浏览器(智能处理Secure标志) + is_https = ( + request.url.scheme == "https" or + request.headers.get("x-forwarded-proto") == "https" or + request.headers.get("x-forwarded-ssl") == "on" + ) + + for cookie_str in response.headers.get_list("set-cookie"): + if not is_https: + # 🔧 HTTP模式:移除Secure标志 + modified_cookie = cookie_str.replace("; Secure", "") + response_obj.headers.append("Set-Cookie", modified_cookie) + else: + # 🔒 HTTPS模式:保留Secure标志 + response_obj.headers.append("Set-Cookie", cookie_str) + + return response_obj + + except httpx.HTTPStatusError as e: + print(f"❌ HTTP错误: {e.response.status_code}, 内容: {e.response.text[:200]}") + raise HTTPException( + status_code=e.response.status_code, + detail=f"获取二维码失败: {e.response.status_code}" + ) + except Exception as e: + print(f"❌ 获取二维码异常: {str(e)}") + import traceback + traceback.print_exc() + raise HTTPException(status_code=500, detail=f"获取二维码失败: {str(e)}") + +@router.get("/scan", summary="检查扫码状态", include_in_schema=True) +async def check_scan_status(request: Request): + """ + 轮询检查二维码扫描状态。 + + **返回状态:** 等待扫码 / 已扫码待确认 / 确认成功 / 二维码过期 + """ + try: + # 代理请求到微信 + response = await proxy_wx_request( + request, + QR_ENDPOINT, + params={ + "action": "ask", + "token": "", + "lang": "zh_CN", + "f": "json", + "ajax": 1 + } + ) + response.raise_for_status() + + # 返回微信的响应 + data = response.json() + + # 记录状态用于调试 + if data.get("base_resp", {}).get("ret") != 0: + print(f"⚠️ 扫码状态检查失败: ret={data.get('base_resp', {}).get('ret')}") + else: + status = data.get("status", 0) + if status == 1: # 登录成功 + print(f"🎉 用户已确认登录! status=1") + elif status in [4, 6]: # 已扫码 + acct_size = data.get("acct_size", 0) + print(f"✅ 用户已扫码, status={status}, acct_size={acct_size}") + + # 转发Set-Cookie到浏览器(智能处理Secure标志) + response_obj = JSONResponse(content=data) + + is_https = ( + request.url.scheme == "https" or + request.headers.get("x-forwarded-proto") == "https" or + request.headers.get("x-forwarded-ssl") == "on" + ) + + for cookie_str in response.headers.get_list("set-cookie"): + if not is_https: + # 🔧 HTTP模式:移除Secure标志 + modified_cookie = cookie_str.replace("; Secure", "") + response_obj.headers.append("Set-Cookie", modified_cookie) + else: + # 🔒 HTTPS模式:保留Secure标志 + response_obj.headers.append("Set-Cookie", cookie_str) + + return response_obj + + except httpx.HTTPStatusError as e: + raise HTTPException( + status_code=e.response.status_code, + detail=f"检查扫码状态失败: {e.response.status_code}" + ) + except Exception as e: + print(f"❌ 检查扫码状态异常: {str(e)}") + raise HTTPException(status_code=500, detail=f"检查扫码状态失败: {str(e)}") + +@router.post("/bizlogin", summary="完成登录", include_in_schema=True) +async def biz_login(request: Request): + """ + 扫码确认后调用此接口完成登录,成功后凭证自动保存到 `.env`。 + + **返回:** Token、Cookie、FakeID、昵称等登录凭证 + """ + try: + # 准备登录请求数据 + login_data = { + "userlang": "zh_CN", + "redirect_url": "", + "cookie_forbidden": 0, + "cookie_cleaned": 0, + "plugin_used": 0, + "login_type": 3, + "token": "", + "lang": "zh_CN", + "f": "json", + "ajax": 1 + } + + # 发起登录请求 + response = await proxy_wx_request( + request, + BIZ_LOGIN_ENDPOINT, + params={"action": "login"}, + method="POST", + data=login_data # ⭐ 修复变量名 + ) + response.raise_for_status() + + # 解析响应 + result = response.json() + + print(f"📍 Bizlogin响应: base_resp.ret={result.get('base_resp', {}).get('ret')}") + + # 检查登录是否成功 + if result.get("base_resp", {}).get("ret") != 0: + error_msg = result.get("base_resp", {}).get("err_msg", "登录失败") + print(f"❌ 微信返回错误: {error_msg}") + return JSONResponse( + status_code=400, + content={"success": False, "error": error_msg} + ) + + # 获取redirect_url中的token + redirect_url = result.get("redirect_url", "") + if not redirect_url: + print(f"❌ 未获取到redirect_url,完整响应: {result}") + return JSONResponse( + status_code=400, + content={"success": False, "error": "未获取到登录凭证"} + ) + + # 从URL中提取token + from urllib.parse import urlparse, parse_qs + parsed = urlparse(f"http://localhost{redirect_url}") + token = parse_qs(parsed.query).get("token", [""])[0] + + if not token: + print(f"❌ 未获取到Token,redirect_url: {redirect_url}") + return JSONResponse( + status_code=400, + content={"success": False, "error": "未获取到Token"} + ) + + # 获取Cookie:合并浏览器已有的cookie和bizlogin响应新设的cookie + cookies = {} + + # 先解析浏览器在整个登录流程中累积的cookie + browser_cookie = request.headers.get("cookie", "") + for part in browser_cookie.split(";"): + part = part.strip() + if "=" in part: + key, _, value = part.partition("=") + cookies[key.strip()] = value.strip() + + # 再用bizlogin响应中新设的cookie覆盖(这些是最新的) + for cookie in response.cookies.jar: + cookies[cookie.name] = cookie.value + + cookie_str = "; ".join([f"{k}={v}" for k, v in cookies.items()]) + + # 获取公众号信息和FakeID(使用同一个客户端) + common_headers = { + "Cookie": cookie_str, + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + + nickname = "公众号" + fakeid = "" + + async with httpx.AsyncClient(timeout=30.0) as client: + # 第一步:获取公众号昵称 + info_response = await client.get( + f"{MP_BASE_URL}/cgi-bin/home", + params={"t": "home/index", "token": token, "lang": "zh_CN"}, + headers=common_headers + ) + + html = info_response.text + import re + nick_match = re.search(r'nick_name\s*[:=]\s*["\']([^"\']+)["\']', html) + if nick_match: + nickname = nick_match.group(1) + + # 第二步:通过昵称搜索获取FakeID + print(f"🔍 开始获取FakeID,昵称: {nickname}") + + try: + search_response = await client.get( + f"{MP_BASE_URL}/cgi-bin/searchbiz", + params={ + "action": "search_biz", + "token": token, + "lang": "zh_CN", + "f": "json", + "ajax": 1, + "random": time.time(), + "query": nickname, + "begin": 0, + "count": 5 + }, + headers=common_headers + ) + + print(f"📡 搜索API响应状态: {search_response.status_code}") + search_result = search_response.json() + print(f"📡 搜索结果: {search_result}") + + if search_result.get("base_resp", {}).get("ret") == 0: + accounts = search_result.get("list", []) + print(f"📋 找到 {len(accounts)} 个公众号") + + for account in accounts: + acc_nickname = account.get("nickname", "") + acc_fakeid = account.get("fakeid", "") + print(f" - {acc_nickname} (fakeid: {acc_fakeid})") + + if acc_nickname == nickname: + fakeid = acc_fakeid + print(f"✅ 匹配成功,FakeID: {fakeid}") + break + + if not fakeid: + print(f"⚠️ 未找到完全匹配的公众号,尝试使用第一个结果") + if accounts: + fakeid = accounts[0].get("fakeid", "") + print(f"📝 使用第一个公众号的FakeID: {fakeid}") + else: + ret = search_result.get("base_resp", {}).get("ret") + err_msg = search_result.get("base_resp", {}).get("err_msg", "未知错误") + print(f"❌ 搜索API返回错误: ret={ret}, err_msg={err_msg}") + + except Exception as e: + print(f"❌ 获取FakeID失败: {str(e)}") + import traceback + traceback.print_exc() + + # 计算过期时间(30天后) + expire_time = int((time.time() + 30 * 24 * 3600) * 1000) + + # 保存凭证 + auth_manager.save_credentials( + token=token, + cookie=cookie_str, + fakeid=fakeid, + nickname=nickname, + expire_time=expire_time + ) + + print(f"✅ 登录成功: {nickname} (fakeid: {fakeid})") + print(f" Token: {token[:20]}...") + print(f" Cookie已保存到.env") + + await webhook.notify('login_success', { + 'nickname': nickname, + 'fakeid': fakeid, + }) + + return { + "success": True, + "message": "登录成功", + "data": { + "nickname": nickname, + "fakeid": fakeid, + "token": token, + "expire_time": expire_time + } + } + + except httpx.HTTPStatusError as e: + return JSONResponse( + status_code=e.response.status_code, + content={"success": False, "error": f"登录请求失败: {e.response.status_code}"} + ) + except Exception as e: + return JSONResponse( + status_code=500, + content={"success": False, "error": f"登录失败: {str(e)}"} + ) + +@router.post("/manual", response_model=LoginResponse, summary="手动配置登录凭证") +async def manual_login(request: LoginRequest): + """ + 手动提交登录凭证(适用于已通过其他方式获取凭证的场景)。 + + **请求体参数:** + - **token** (必填): 微信 Token + - **cookie** (必填): 微信 Cookie + - **fakeid** (可选): 公众号 FakeID + - **nickname** (可选): 公众号昵称 + - **expire_time** (可选): 过期时间戳 + """ + try: + success = auth_manager.save_credentials( + token=request.token, + cookie=request.cookie, + fakeid=request.fakeid, + nickname=request.nickname, + expire_time=request.expire_time + ) + + if success: + await webhook.notify('login_success', { + 'nickname': request.nickname or '', + 'fakeid': request.fakeid or '', + }) + return { + "success": True, + "message": "登录凭证已保存" + } + else: + return { + "success": False, + "message": "保存登录凭证失败" + } + except Exception as e: + raise HTTPException(status_code=500, detail=f"保存失败: {str(e)}") + +@router.get("/info", summary="获取登录信息") +async def get_login_info(): + """ + 获取当前登录用户的昵称、FakeID、过期时间等信息。 + """ + credentials = auth_manager.get_credentials() + if credentials: + return { + "success": True, + "data": { + "nickname": credentials.get("nickname"), + "fakeid": credentials.get("fakeid"), + "expire_time": credentials.get("expire_time") + } + } + return { + "success": False, + "error": "未登录" + } diff --git a/routes/search.py b/routes/search.py new file mode 100644 index 0000000..fa0d4c0 --- /dev/null +++ b/routes/search.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +搜索路由 - FastAPI版本 +""" + +from fastapi import APIRouter, Query +from pydantic import BaseModel +from typing import Optional, List +import time +import httpx +from utils.auth_manager import auth_manager + +router = APIRouter() + +class Account(BaseModel): + """公众号模型""" + id: str + name: str + round_head_img: str + +class SearchResponse(BaseModel): + """搜索响应模型""" + success: bool + data: Optional[dict] = None + error: Optional[str] = None + +@router.get("/searchbiz", response_model=SearchResponse, summary="搜索公众号") +async def search_accounts(query: str = Query(..., description="公众号名称或关键词", alias="query")): + """ + 按关键词搜索微信公众号,获取 FakeID。 + + **查询参数:** + - **query** (必填): 搜索关键词(公众号名称) + + **返回字段:** + - `list[]`: 匹配的公众号列表,每项包含 `fakeid`、`nickname`、`alias`、`round_head_img` + - `total`: 匹配数量 + """ + credentials = auth_manager.get_credentials() + if not credentials: + return SearchResponse( + success=False, + error="服务器未登录,请先访问管理页面扫码登录" + ) + + token = credentials.get("token") + cookie = credentials.get("cookie") + + try: + # 调用微信搜索公众号API + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + "https://mp.weixin.qq.com/cgi-bin/searchbiz", + params={ + "action": "search_biz", + "token": token, + "lang": "zh_CN", + "f": "json", + "ajax": 1, + "random": time.time(), + "query": query, + "begin": 0, + "count": 5 + }, + headers={ + "Cookie": cookie, + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + } + ) + + result = response.json() + + if result.get("base_resp", {}).get("ret") == 0: + accounts = result.get("list", []) + + # 格式化返回数据 + formatted_accounts = [] + for acc in accounts: + formatted_accounts.append({ + "fakeid": acc.get("fakeid", ""), + "nickname": acc.get("nickname", ""), + "alias": acc.get("alias", ""), + "round_head_img": acc.get("round_head_img", ""), + "service_type": acc.get("service_type", 0) + }) + + return SearchResponse( + success=True, + data={ + "list": formatted_accounts, + "total": len(formatted_accounts) + } + ) + else: + return SearchResponse( + success=False, + error=f"搜索失败: {result.get('base_resp', {}).get('err_msg', '未知错误')}" + ) + + except Exception as e: + print(f"❌ 搜索公众号失败: {str(e)}") + return SearchResponse( + success=False, + error=f"搜索请求失败: {str(e)}" + ) diff --git a/routes/stats.py b/routes/stats.py new file mode 100644 index 0000000..2757d78 --- /dev/null +++ b/routes/stats.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +统计信息路由 +""" + +from fastapi import APIRouter +from pydantic import BaseModel +from utils.rate_limiter import rate_limiter + +router = APIRouter() + +class StatsResponse(BaseModel): + """统计响应""" + rate_limit: dict + + class Config: + json_schema_extra = { + "example": { + "rate_limit": { + "global_requests": 5, + "global_limit": 10, + "active_ips": 2, + "article_requests": 3 + } + } + } + +@router.get("/stats", response_model=StatsResponse, summary="获取API统计信息") +async def get_stats(): + """ + 获取API统计信息 + + 包括: + - 限频统计 + - 请求统计 + """ + return { + "rate_limit": rate_limiter.get_stats() + } + diff --git a/start.bat b/start.bat new file mode 100644 index 0000000..0e9f928 --- /dev/null +++ b/start.bat @@ -0,0 +1,217 @@ +@echo off +chcp 65001 > nul + +:: 强制在持久cmd窗口中运行 +if "%1"=="" ( + cmd /k "%~f0" run + exit /b +) + +title WeChat Article API Service + +echo. +echo ======================================== +echo WeChat Article API Service v1.0.0 +echo FastAPI Edition - One-Click Startup +echo ======================================== +echo. + +:: Configuration variables +set PROJECT_NAME=wechat-article-api +set SERVICE_PORT=5000 +set PYTHON_VERSION=3.8 +set VENV_NAME=venv + +:: Step 1: Check Python environment +echo [94m[1/6] Checking Python environment...[0m +python --version >nul 2>&1 +if errorlevel 1 ( + echo [91mX Python is not installed or not in PATH[0m + echo Please install Python %PYTHON_VERSION%+ and add to PATH + echo Download from: https://www.python.org/downloads/ + pause + exit /b 1 +) + +for /f "tokens=2" %%i in ('python --version') do set INSTALLED_PYTHON=%%i +echo [92m+ Python version: %INSTALLED_PYTHON%[0m + +:: Check pip +pip --version >nul 2>&1 +if errorlevel 1 ( + echo [91mX pip is not installed[0m + echo Please install pip + pause + exit /b 1 +) +echo [92m+ pip is installed[0m + +:: Step 2: Create virtual environment +echo. +echo [94m[2/6] Creating Python virtual environment...[0m +if exist "%VENV_NAME%" ( + echo [93m! Virtual environment already exists, skipping creation[0m +) else ( + python -m venv %VENV_NAME% + if errorlevel 1 ( + echo [91mX Virtual environment creation failed[0m + pause + exit /b 1 + ) + echo [92m+ Virtual environment created successfully[0m +) + +:: Activate virtual environment +call %VENV_NAME%\Scripts\activate.bat +if errorlevel 1 ( + echo [91mX Virtual environment activation failed[0m + pause + exit /b 1 +) +echo [92m+ Virtual environment activated[0m + +:: Step 3: Install dependencies +echo. +echo [94m[3/6] Installing Python dependencies...[0m +if exist "requirements.txt" ( + pip install -r requirements.txt + if errorlevel 1 ( + echo [91mX Dependencies installation failed[0m + pause + exit /b 1 + ) + echo [92m+ Dependencies installed successfully[0m +) else ( + echo [93m! requirements.txt not found, installing core dependencies manually[0m + pip install fastapi uvicorn httpx python-dotenv + if errorlevel 1 ( + echo [91mX Core dependencies installation failed[0m + pause + exit /b 1 + ) + echo [92m+ Core dependencies installed successfully[0m +) + +:: Step 4: Check .env configuration +echo. +echo [94m[4/6] Checking configuration file...[0m +if not exist ".env" ( + echo [93m! .env file not found, creating from template...[0m + + if exist "env.example" ( + copy env.example .env >nul + echo [92m+ .env file created from env.example[0m + ) else ( + echo [93m! env.example not found, creating basic .env file...[0m + ( + echo # WeChat Article API Configuration + echo # Auto-generated by start.bat + echo. + echo # Authentication Info ^(Auto-filled after login^) + echo WECHAT_TOKEN= + echo WECHAT_COOKIE= + echo WECHAT_FAKEID= + echo WECHAT_NICKNAME= + echo WECHAT_EXPIRE_TIME= + echo. + echo # Service Configuration + echo PORT=5000 + echo HOST=0.0.0.0 + echo DEBUG=false + echo. + echo # Rate Limiting + echo RATE_LIMIT_GLOBAL=10 + echo RATE_LIMIT_PER_IP=5 + echo RATE_LIMIT_ARTICLE_INTERVAL=3 + ) > .env + echo [92m+ Basic .env file created[0m + ) + + echo. + echo [93m========================================[0m + echo [93m First-time Setup[0m + echo [93m========================================[0m + echo. + echo [92mNext Steps:[0m + echo 1. Service will start in a moment + echo 2. Visit: http://localhost:5000/login.html + echo 3. Scan QR code with WeChat + echo 4. Login credentials will be saved automatically + echo. + echo [93m========================================[0m + echo. +) else ( + echo [92m+ .env configuration file found[0m + + :: Check required configuration items + findstr /C:"WECHAT_TOKEN=" .env | findstr /V "WECHAT_TOKEN=$" | findstr /V "WECHAT_TOKEN= *$" >nul 2>nul + if errorlevel 1 ( + echo [93m! WeChat credentials not configured yet[0m + echo [93m Please visit http://localhost:5000/login.html to login[0m + ) else ( + echo [92m+ WeChat login credentials configured[0m + ) +) +echo. + +:: Step 5: Detect system configuration +echo [94m[5/6] Detecting system configuration...[0m + +:: Get CPU cores +for /f "tokens=2 delims==" %%a in ('wmic cpu get NumberOfLogicalProcessors /value ^| find "="') do set CPU_CORES=%%a +if not defined CPU_CORES set CPU_CORES=4 + +:: Get memory size (GB) +for /f "tokens=2 delims==" %%a in ('wmic computersystem get TotalPhysicalMemory /value ^| find "="') do set MEMORY_BYTES=%%a +if defined MEMORY_BYTES ( + set /a MEMORY_GB=MEMORY_BYTES/1073741824 +) else ( + set MEMORY_GB=8 +) + +echo [92m+ System configuration:[0m +echo CPU cores: %CPU_CORES% +echo Memory: %MEMORY_GB%GB +echo. + +:: Step 6: Start service +echo [94m[6/6] Starting FastAPI service...[0m +echo. +echo ======================================== +echo [92mService Startup Information[0m +echo ======================================== +echo. +echo [94mAccess URLs:[0m +echo - Admin Panel: http://localhost:%SERVICE_PORT%/admin.html +echo - Login Page: http://localhost:%SERVICE_PORT%/login.html +echo - API Docs: http://localhost:%SERVICE_PORT%/api/docs +echo - ReDoc: http://localhost:%SERVICE_PORT%/api/redoc +echo - Health: http://localhost:%SERVICE_PORT%/api/health +echo. +echo [94mCore Features:[0m +echo + Article Retrieval - POST /api/article +echo + Article List - GET /api/public/articles +echo + Article Search - GET /api/public/articles/search +echo + Account Search - GET /api/public/searchbiz +echo + Image Proxy - GET /api/image +echo + Auto Rate Limiting +echo + Webhook Notifications +echo. +echo [94mSystem Info:[0m +echo CPU: %CPU_CORES% cores +echo Memory: %MEMORY_GB%GB +echo. +echo ======================================== +echo. +echo [93mFirst time? Please visit login page to scan QR code:[0m +echo =^> http://localhost:%SERVICE_PORT%/login.html +echo. +echo [93mTip: Press Ctrl+C to stop service[0m +echo. + +:: Start service +python app.py + +echo. +echo [92mService stopped[0m +pause diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..638fc4a --- /dev/null +++ b/start.sh @@ -0,0 +1,545 @@ +#!/bin/bash + +# =============================================== +# WeChat Article API Service - Linux Deployment Script v2.0 +# =============================================== + +# Error handling +set -e # Exit on error +set -o pipefail # Catch errors in pipes + +# Trap errors +trap 'echo -e "\n${RED}Error: Deployment failed at line $LINENO${NC}" >&2; exit 1' ERR + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration variables +PROJECT_NAME="wechat-article-api" +SERVICE_PORT=5000 +PYTHON_VERSION="3.8" +VENV_NAME="venv" +DEPLOY_USER="wechat-api" # Dedicated service user + +# Get current directory (compatible with different shells) +if [ -n "${BASH_SOURCE[0]}" ]; then + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +else + SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +fi +INSTALL_DIR="$SCRIPT_DIR" +LOG_DIR="$INSTALL_DIR/logs" + +# Get the actual user who ran sudo (if applicable) +if [ -n "$SUDO_USER" ]; then + REAL_USER="$SUDO_USER" +else + REAL_USER="$USER" +fi + +# =============================================== +# Show welcome message +# =============================================== +show_welcome() { + echo + echo "========================================" + echo " WeChat Article API Deployment Tool v1.0.0" + echo "========================================" + echo + echo -e "${BLUE}Installation directory: $INSTALL_DIR${NC}" + echo -e "${BLUE}Service port: $SERVICE_PORT${NC}" + echo -e "${BLUE}Service user: $DEPLOY_USER${NC}" + echo +} + +# =============================================== +# Check permissions +# =============================================== +check_permission() { + echo -e "${BLUE}Checking system and permissions...${NC}" + + # Detect OS + if [ -f /etc/os-release ]; then + . /etc/os-release 2>/dev/null || true + echo -e "${GREEN}+ OS: ${NAME:-Linux} ${VERSION_ID:-unknown}${NC}" + fi + + # Check if running in container + if [ -f /.dockerenv ] || grep -qa container /proc/1/environ 2>/dev/null; then + echo -e "${YELLOW}! Container environment detected${NC}" + fi + + if [ "$EUID" -ne 0 ]; then + echo -e "${YELLOW}! Running without root privileges${NC}" + echo -e "${YELLOW} - Dedicated service user will NOT be created${NC}" + echo -e "${YELLOW} - Systemd service will NOT be configured${NC}" + echo -e "${YELLOW} - For full deployment, run: sudo bash start.sh${NC}" + echo + echo -e "${YELLOW}Press Enter to continue or Ctrl+C to exit${NC}" + read -p "" + else + echo -e "${GREEN}+ Running with root privileges${NC}" + fi + echo +} + +# =============================================== +# Step 1: Check Python +# =============================================== +check_python() { + echo -e "${BLUE}[1/7] Checking Python environment...${NC}" + + if ! command -v python3 &> /dev/null; then + echo -e "${RED}X Python3 not found${NC}" + echo "Please install Python $PYTHON_VERSION or higher" + exit 1 + fi + + PYTHON_VER=$(python3 --version | cut -d' ' -f2) + echo -e "${GREEN}+ Python version: $PYTHON_VER${NC}" + + # Check venv module + if ! python3 -m venv --help &> /dev/null; then + echo -e "${YELLOW}! python3-venv not found${NC}" + + if [ "$EUID" -eq 0 ]; then + echo -e "${BLUE} Installing python3-venv...${NC}" + + # Detect package manager and install + if command -v apt &> /dev/null; then + # Debian/Ubuntu + apt update && apt install -y python3-venv python3-pip || { + echo -e "${RED}X Failed to install python3-venv${NC}" + echo "Please run: apt install python3-venv python3-pip" + exit 1 + } + elif command -v yum &> /dev/null; then + # RHEL/CentOS + yum install -y python3-venv python3-pip || { + echo -e "${RED}X Failed to install python3-venv${NC}" + echo "Please run: yum install python3-venv python3-pip" + exit 1 + } + elif command -v dnf &> /dev/null; then + # Fedora + dnf install -y python3-venv python3-pip || { + echo -e "${RED}X Failed to install python3-venv${NC}" + echo "Please run: dnf install python3-venv python3-pip" + exit 1 + } + else + echo -e "${RED}X Cannot determine package manager${NC}" + echo "Please install python3-venv manually" + exit 1 + fi + echo -e "${GREEN}+ python3-venv installed${NC}" + else + echo -e "${RED}X python3-venv is required but not installed${NC}" + echo + echo "Please run one of the following commands with sudo:" + echo " Debian/Ubuntu: sudo apt install python3-venv python3-pip" + echo " RHEL/CentOS: sudo yum install python3-venv python3-pip" + echo " Fedora: sudo dnf install python3-venv python3-pip" + echo + echo "Then run this script again" + exit 1 + fi + fi + + # Check pip + if ! command -v pip3 &> /dev/null && ! python3 -m pip --version &> /dev/null; then + echo -e "${YELLOW}! pip not found, attempting to install...${NC}" + python3 -m ensurepip --upgrade 2>/dev/null || { + if [ "$EUID" -eq 0 ]; then + if command -v apt &> /dev/null; then + apt install -y python3-pip + elif command -v yum &> /dev/null; then + yum install -y python3-pip + elif command -v dnf &> /dev/null; then + dnf install -y python3-pip + fi + fi + } + fi + echo -e "${GREEN}+ pip available${NC}" + echo +} + +# =============================================== +# Step 2: Create virtual environment +# =============================================== +create_venv() { + echo -e "${BLUE}[2/7] Creating Python virtual environment...${NC}" + + if [[ ! -d "$VENV_NAME" ]]; then + python3 -m venv "$VENV_NAME" + echo -e "${GREEN}+ Virtual environment created${NC}" + else + echo -e "${YELLOW}! Virtual environment already exists, skipping${NC}" + fi + + # Activate virtual environment + source "$VENV_NAME/bin/activate" + echo -e "${GREEN}+ Virtual environment activated${NC}" + echo +} + +# =============================================== +# Step 3: Install dependencies +# =============================================== +install_dependencies() { + echo -e "${BLUE}[3/7] Installing Python dependencies...${NC}" + + # Upgrade pip + python -m pip install --upgrade pip + + # Install requirements.txt + if [[ -f "requirements.txt" ]]; then + pip install -r requirements.txt + echo + # Verify installation + if ! python -c "import fastapi" 2>/dev/null; then + echo -e "${RED}X Dependencies installation failed${NC}" + exit 1 + fi + echo -e "${GREEN}+ Dependencies installed successfully${NC}" + else + echo -e "${YELLOW}! requirements.txt not found, installing core dependencies${NC}" + pip install fastapi uvicorn httpx python-dotenv + echo + # Verify installation + if ! python -c "import fastapi" 2>/dev/null; then + echo -e "${RED}X Core dependencies installation failed${NC}" + exit 1 + fi + echo -e "${GREEN}+ Core dependencies installed successfully${NC}" + fi + echo +} + +# =============================================== +# Step 4: Initialize project +# =============================================== +initialize_project() { + echo -e "${BLUE}[4/7] Initializing project...${NC}" + + # Create necessary directories + mkdir -p static logs + echo -e "${GREEN}+ Directory structure created${NC}" + + # Create .env file if not exists + if [[ ! -f ".env" ]]; then + echo -e "${YELLOW}! .env file not found, creating from template...${NC}" + + if [[ -f "env.example" ]]; then + cp env.example .env + echo -e "${GREEN}+ .env file created from env.example${NC}" + else + echo -e "${YELLOW}! env.example not found, creating basic .env file...${NC}" + cat > .env << 'EOF' +# WeChat Article API Configuration +# Auto-generated by start.sh + +# Authentication Info (Auto-filled after login) +WECHAT_TOKEN= +WECHAT_COOKIE= +WECHAT_FAKEID= +WECHAT_NICKNAME= +WECHAT_EXPIRE_TIME= + +# Webhook Configuration +WEBHOOK_URL= +WEBHOOK_NOTIFICATION_INTERVAL=300 + +# Rate Limiting +RATE_LIMIT_GLOBAL=10 +RATE_LIMIT_PER_IP=5 +RATE_LIMIT_ARTICLE_INTERVAL=3 +EOF + echo -e "${GREEN}+ Basic .env file created${NC}" + fi + + echo + echo -e "${YELLOW}========================================${NC}" + echo -e "${YELLOW} First-time Setup${NC}" + echo -e "${YELLOW}========================================${NC}" + echo + echo -e "${GREEN}Next Steps:${NC}" + echo " 1. Service will start after deployment" + echo " 2. Visit: http://localhost:$SERVICE_PORT/login.html" + echo " 3. Scan QR code with WeChat" + echo " 4. Login credentials will be saved automatically" + echo + echo -e "${YELLOW}========================================${NC}" + echo + else + echo -e "${GREEN}+ .env configuration file found${NC}" + + # Check if credentials are actually configured (not empty) + if grep -q "WECHAT_TOKEN=.\+" .env 2>/dev/null; then + echo -e "${GREEN}+ WeChat login credentials configured${NC}" + else + echo -e "${YELLOW}! WeChat credentials not configured yet${NC}" + echo -e "${YELLOW} Please visit http://localhost:$SERVICE_PORT/login.html to login${NC}" + fi + fi + echo +} + +# =============================================== +# Step 5: Start service (non-root mode) +# =============================================== +start_service() { + echo -e "${BLUE}[5/7] Starting service...${NC}" + + # If running with root, service will be started via systemd + if [ "$EUID" -eq 0 ]; then + echo -e "${YELLOW}! Service will be started via systemd (see step 7)${NC}" + echo + return + fi + + # For non-root users, start service directly in foreground + echo + echo "========================================" + echo -e "${GREEN}Service Startup Information${NC}" + echo "========================================" + echo + echo -e "${BLUE}Access URLs:${NC}" + echo " - Admin Panel: http://localhost:$SERVICE_PORT/admin.html" + echo " - Login Page: http://localhost:$SERVICE_PORT/login.html" + echo " - API Docs: http://localhost:$SERVICE_PORT/api/docs" + echo " - ReDoc: http://localhost:$SERVICE_PORT/api/redoc" + echo " - Health: http://localhost:$SERVICE_PORT/api/health" + echo + echo -e "${BLUE}Core Features:${NC}" + echo " + Article Retrieval - POST /api/article" + echo " + Article List - GET /api/public/articles" + echo " + Article Search - GET /api/public/articles/search" + echo " + Account Search - GET /api/public/searchbiz" + echo " + Image Proxy - GET /api/image" + echo " + Auto Rate Limiting" + echo " + Webhook Notifications" + echo + echo -e "${YELLOW}First time? Please visit login page to scan QR code:${NC}" + echo " => http://localhost:$SERVICE_PORT/login.html" + echo + echo -e "${YELLOW}Tip: Press Ctrl+C to stop service${NC}" + echo "========================================" + echo + + # Start the service + python app.py +} + +# =============================================== +# Step 6: Create dedicated service user (optional) +# =============================================== +create_service_user() { + echo -e "${BLUE}[6/7] Creating dedicated service user...${NC}" + + if [ "$EUID" -ne 0 ]; then + echo -e "${YELLOW}! Not running as root, skipping user creation${NC}" + echo -e "${YELLOW}! Service will run as: $REAL_USER${NC}" + DEPLOY_USER="$REAL_USER" + echo + return + fi + + # Check if user already exists + if id "$DEPLOY_USER" &>/dev/null; then + echo -e "${GREEN}+ Service user already exists: $DEPLOY_USER${NC}" + else + # Try to create system user + echo -e "${BLUE} Creating user $DEPLOY_USER...${NC}" + + # Try different methods depending on the system + if command -v useradd &>/dev/null; then + # Most Linux distributions + if useradd -r -s /usr/sbin/nologin -c "WeChat Article API Service" "$DEPLOY_USER" 2>/dev/null; then + echo -e "${GREEN}+ Created service user: $DEPLOY_USER${NC}" + elif useradd -r -s /bin/false -c "WeChat Article API Service" "$DEPLOY_USER" 2>/dev/null; then + echo -e "${GREEN}+ Created service user: $DEPLOY_USER${NC}" + else + echo -e "${YELLOW}! User creation failed, trying with adduser...${NC}" + if command -v adduser &>/dev/null; then + adduser --system --no-create-home --group "$DEPLOY_USER" 2>/dev/null || { + echo -e "${YELLOW}! All user creation methods failed, using current user: $REAL_USER${NC}" + DEPLOY_USER="$REAL_USER" + } + else + echo -e "${YELLOW}! Using current user: $REAL_USER${NC}" + DEPLOY_USER="$REAL_USER" + fi + fi + else + echo -e "${YELLOW}! useradd not found, using current user: $REAL_USER${NC}" + DEPLOY_USER="$REAL_USER" + fi + fi + + # Set proper ownership + if [ "$DEPLOY_USER" != "$REAL_USER" ]; then + echo -e "${BLUE} Setting file ownership...${NC}" + if chown -R "$DEPLOY_USER:$DEPLOY_USER" "$INSTALL_DIR" 2>/dev/null; then + echo -e "${GREEN}+ Ownership set to: $DEPLOY_USER${NC}" + else + echo -e "${YELLOW}! Warning: Could not set ownership, trying with group only...${NC}" + if getent group "$DEPLOY_USER" &>/dev/null; then + chown -R "$DEPLOY_USER:$DEPLOY_USER" "$INSTALL_DIR" 2>/dev/null || { + echo -e "${YELLOW}! Warning: File ownership not changed${NC}" + } + fi + fi + fi + + echo +} + +# =============================================== +# Step 7: Configure systemd service (optional) +# =============================================== +configure_systemd() { + echo -e "${BLUE}[7/7] Configuring systemd service...${NC}" + + if [ "$EUID" -ne 0 ]; then + echo -e "${YELLOW}! Not running as root, skipping systemd configuration${NC}" + echo -e "${YELLOW}! To configure systemd, run: sudo bash start.sh${NC}" + echo + return + fi + + # Create systemd service file + cat > /etc/systemd/system/wechat-article-api.service << EOF +[Unit] +Description=WeChat Article API Service +After=network.target + +[Service] +Type=simple +User=$DEPLOY_USER +Group=$DEPLOY_USER +WorkingDirectory=$INSTALL_DIR +Environment="PATH=$INSTALL_DIR/$VENV_NAME/bin" +ExecStart=$INSTALL_DIR/$VENV_NAME/bin/python $INSTALL_DIR/app.py +Restart=always +RestartSec=10 +StandardOutput=journal +StandardError=journal +SyslogIdentifier=wechat-article-api + +# Security hardening +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths=$INSTALL_DIR/logs +ReadWritePaths=$INSTALL_DIR/.env +ReadWritePaths=$INSTALL_DIR/static/qrcodes + +[Install] +WantedBy=multi-user.target +EOF + + echo -e "${GREEN}+ systemd service file created${NC}" + + # Reload systemd configuration + systemctl daemon-reload + + # Ask if user wants to start service now + read -p "Enable and start systemd service now? (y/N): " START_SERVICE + if [[ "$START_SERVICE" =~ ^[Yy]$ ]]; then + systemctl enable wechat-article-api.service + systemctl start wechat-article-api.service + echo -e "${GREEN}+ Service started${NC}" + + # Show service status + echo + echo -e "${BLUE}Service status:${NC}" + systemctl status wechat-article-api --no-pager || true + else + echo -e "${YELLOW}! Service start skipped, you can start it manually later${NC}" + fi + echo +} + +# =============================================== +# Show deployment summary +# =============================================== +show_summary() { + echo + echo "========================================" + echo -e "${GREEN}Deployment completed!${NC}" + echo "========================================" + echo + echo -e "${GREEN}Deployment Information:${NC}" + echo " - Installation directory: $INSTALL_DIR" + echo " - Service port: $SERVICE_PORT" + echo " - Service user: $DEPLOY_USER" + echo " - Virtual environment: $VENV_NAME" + echo " - Log directory: $LOG_DIR" + echo + echo -e "${GREEN}Usage:${NC}" + if [ "$EUID" -ne 0 ]; then + echo " - Restart: ./start.sh" + echo " - Stop: Press Ctrl+C or use ./stop.sh" + echo " - Status: ./status.sh" + echo " - Activate venv: source venv/bin/activate" + fi + echo + + if [ "$EUID" -eq 0 ]; then + echo -e "${GREEN}systemd Commands:${NC}" + echo " - Start service: systemctl start wechat-article-api" + echo " - Stop service: systemctl stop wechat-article-api" + echo " - Restart service: systemctl restart wechat-article-api" + echo " - View status: systemctl status wechat-article-api" + echo " - View logs: journalctl -u wechat-article-api -f" + echo + fi + + echo -e "${GREEN}Access URLs:${NC}" + echo " - Admin Panel: http://localhost:$SERVICE_PORT/admin.html" + echo " - Login Page: http://localhost:$SERVICE_PORT/login.html" + echo " - API Documentation: http://localhost:$SERVICE_PORT/api/docs" + echo " - ReDoc: http://localhost:$SERVICE_PORT/api/redoc" + echo " - Health Check: http://localhost:$SERVICE_PORT/api/health" + echo + echo -e "${GREEN}Core Features:${NC}" + echo " + Article Retrieval - POST /api/article" + echo " + Article List - GET /api/public/articles" + echo " + Article Search - GET /api/public/articles/search" + echo " + Account Search - GET /api/public/searchbiz" + echo " + Image Proxy - GET /api/image" + echo " + Auto Rate Limiting" + echo " + Webhook Notifications" + echo + echo -e "${YELLOW}Notes:${NC}" + echo " - First-time login required via web interface" + echo " - Credentials saved in .env file" + echo " - Check port usage: netstat -tulpn | grep :$SERVICE_PORT" + echo +} + +# =============================================== +# Main function +# =============================================== +main() { + show_welcome + check_permission + check_python + create_venv + install_dependencies + initialize_project + start_service # Non-root: start service directly; Root: skip (use systemd) + create_service_user # Root only: create dedicated user + configure_systemd # Root only: configure systemd service + show_summary +} + +# Run main function +main diff --git a/static/admin.html b/static/admin.html new file mode 100644 index 0000000..4033791 --- /dev/null +++ b/static/admin.html @@ -0,0 +1,770 @@ + + + + + + WeChat Download API + + + +
+ +
+
+

WeChat Download API

+

微信公众号文章获取服务

+
+ v1.0.0 +
+ + +
+
+ + + + + +
+
+
登录状态
+
检查中...
+
+
+
+ +
+
+ + +
+ +
+
+
+ + + + + +
+
+
快捷操作
+
登录与验证
+
+
+ +
+ + +
+
+
+ + + + + + + +
+
+
接口文档
+
在线调试 API
+
+
+ +
+ + +
+
+
+ + + + + +
+
+
系统
+
监控与调试
+
+
+ +
+ + +
+
+
+ + + + +
+
+
接口测试
+
输入文章 URL 测试获取功能
+
+
+
+ + +
+
+
+ + +
+ +
+
+
+
+ + + +
+ + + + diff --git a/static/login.html b/static/login.html new file mode 100644 index 0000000..c293666 --- /dev/null +++ b/static/login.html @@ -0,0 +1,282 @@ + + + + + + 登录 - WeChat Download API + + + +
+

扫码登录

+

使用微信扫描二维码登录公众平台

+ +
+
+
+ +
正在加载二维码...
+ + + 返回管理面板 +
+ + + + diff --git a/static/verify.html b/static/verify.html new file mode 100644 index 0000000..cdf1151 --- /dev/null +++ b/static/verify.html @@ -0,0 +1,245 @@ + + + + + + 验证处理 - WeChat Download API + + + +
+

验证处理

+

在浏览器中完成微信安全验证

+ + + + + +
+
操作步骤
+
+ 1 + 粘贴链接 — 输入触发验证的文章 URL +
+
+ 2 + 打开页面 — 点击按钮在新窗口中打开 +
+
+ 3 + 完成验证 — 完成滑块或点击验证码 +
+
+ 4 + 等待恢复 — 验证通过后等待 5-10 分钟再使用 API +
+
+ +
+ 验证通过后,Cookie 会自动更新。建议等待几分钟再继续调用接口,让微信系统恢复稳定。 +
+ + 返回管理面板 +
+ + + + diff --git a/status.sh b/status.sh new file mode 100644 index 0000000..96f7ce7 --- /dev/null +++ b/status.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +# =============================================== +# WeChat Article API Service - Status Check Script +# =============================================== + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +SERVICE_PORT=5000 + +echo +echo "========================================" +echo " WeChat Article API Service - Status" +echo "========================================" +echo + +# Check if running as systemd service +if [ "$EUID" -eq 0 ] && systemctl list-unit-files | grep -q wechat-article-api; then + echo -e "${BLUE}systemd Service Status:${NC}" + systemctl status wechat-article-api --no-pager -l + echo +fi + +# Check process +echo -e "${BLUE}Process Status:${NC}" +PIDS=$(pgrep -f "python.*app.py") +if [ -z "$PIDS" ]; then + PIDS=$(pgrep -f "uvicorn.*app:app") +fi + +if [ -n "$PIDS" ]; then + echo -e "${GREEN}+ Service is running${NC}" + for PID in $PIDS; do + echo -e " PID: $PID" + ps -p "$PID" -o pid,ppid,user,%cpu,%mem,etime,cmd --no-headers + done +else + echo -e "${RED}X Service is not running${NC}" +fi +echo + +# Check port +echo -e "${BLUE}Port Status:${NC}" +if command -v netstat &> /dev/null; then + PORT_CHECK=$(netstat -tulpn 2>/dev/null | grep ":$SERVICE_PORT") +elif command -v ss &> /dev/null; then + PORT_CHECK=$(ss -tulpn 2>/dev/null | grep ":$SERVICE_PORT") +fi + +if [ -n "$PORT_CHECK" ]; then + echo -e "${GREEN}+ Port $SERVICE_PORT is listening${NC}" + echo "$PORT_CHECK" +else + echo -e "${YELLOW}! Port $SERVICE_PORT is not in use${NC}" +fi +echo + +# Check API health +echo -e "${BLUE}API Health Check:${NC}" +if command -v curl &> /dev/null; then + HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$SERVICE_PORT/api/health 2>/dev/null || echo "000") + if [ "$HTTP_CODE" = "200" ]; then + RESPONSE=$(curl -s http://localhost:$SERVICE_PORT/api/health 2>/dev/null) + echo -e "${GREEN}+ API is healthy${NC}" + echo " Response: $RESPONSE" + else + echo -e "${RED}X API is not responding (HTTP $HTTP_CODE)${NC}" + fi +elif command -v wget &> /dev/null; then + if wget -q --spider http://localhost:$SERVICE_PORT/api/health 2>/dev/null; then + echo -e "${GREEN}+ API is healthy${NC}" + else + echo -e "${RED}X API is not responding${NC}" + fi +else + echo -e "${YELLOW}! curl/wget not available, skipping health check${NC}" +fi +echo + +# Check login status +echo -e "${BLUE}Login Status:${NC}" +if [ -f ".env" ]; then + if grep -q "WECHAT_TOKEN=.\+" .env 2>/dev/null; then + TOKEN_VALUE=$(grep "WECHAT_TOKEN=" .env | cut -d'=' -f2 | head -c 20) + echo -e "${GREEN}+ WeChat credentials configured${NC}" + echo " Token: ${TOKEN_VALUE}..." + + EXPIRE_TIME=$(grep "WECHAT_EXPIRE_TIME=" .env | cut -d'=' -f2) + if [ -n "$EXPIRE_TIME" ] && [ "$EXPIRE_TIME" != "0" ]; then + CURRENT_TIME=$(date +%s)000 + if [ "$EXPIRE_TIME" -gt "$CURRENT_TIME" ]; then + echo -e "${GREEN}+ Credentials are valid${NC}" + else + echo -e "${YELLOW}! Credentials may be expired${NC}" + fi + fi + else + echo -e "${YELLOW}! WeChat credentials not configured${NC}" + echo " Please visit http://localhost:$SERVICE_PORT/login.html to login" + fi +else + echo -e "${YELLOW}! .env file not found${NC}" +fi +echo + +# Show access URLs +echo -e "${BLUE}Access URLs:${NC}" +echo " - Admin Panel: http://localhost:$SERVICE_PORT/admin.html" +echo " - Login Page: http://localhost:$SERVICE_PORT/login.html" +echo " - API Docs: http://localhost:$SERVICE_PORT/api/docs" +echo " - Health: http://localhost:$SERVICE_PORT/api/health" +echo + diff --git a/stop.sh b/stop.sh new file mode 100644 index 0000000..24b61e1 --- /dev/null +++ b/stop.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# =============================================== +# WeChat Article API Service - Stop Script +# =============================================== + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo +echo "========================================" +echo " WeChat Article API Service - Stop" +echo "========================================" +echo + +# Check if running as systemd service +if [ "$EUID" -eq 0 ] && systemctl is-active --quiet wechat-article-api; then + echo -e "${BLUE}Detected systemd service, stopping...${NC}" + systemctl stop wechat-article-api + echo -e "${GREEN}+ systemd service stopped${NC}" + echo + exit 0 +fi + +# Find and kill process +echo -e "${BLUE}Searching for running API service...${NC}" + +# Try to find by app.py +PIDS=$(pgrep -f "python.*app.py") + +if [ -z "$PIDS" ]; then + # Try to find by uvicorn + PIDS=$(pgrep -f "uvicorn.*app:app") +fi + +if [ -z "$PIDS" ]; then + echo -e "${YELLOW}! No running service found${NC}" + echo + exit 0 +fi + +echo -e "${BLUE}Found process(es): $PIDS${NC}" + +# Kill processes +for PID in $PIDS; do + echo -e "${BLUE}Stopping process $PID...${NC}" + kill "$PID" 2>/dev/null + + # Wait for graceful shutdown + sleep 2 + + # Force kill if still running + if ps -p "$PID" > /dev/null 2>&1; then + echo -e "${YELLOW}! Process still running, force killing...${NC}" + kill -9 "$PID" 2>/dev/null + fi + + echo -e "${GREEN}+ Process $PID stopped${NC}" +done + +echo +echo -e "${GREEN}Service stopped successfully${NC}" +echo + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..7c432dd --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +工具模块初始化 +""" + +from .auth_manager import auth_manager +from .helpers import ( + parse_article_url, + extract_article_info, + is_article_deleted, + is_need_verification, + is_login_required, + time_str_to_microseconds, +) + +__all__ = [ + 'auth_manager', + 'parse_article_url', + 'extract_article_info', + 'is_article_deleted', + 'is_need_verification', + 'is_login_required', + 'time_str_to_microseconds', +] + diff --git a/utils/auth_manager.py b/utils/auth_manager.py new file mode 100644 index 0000000..235df2a --- /dev/null +++ b/utils/auth_manager.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +认证管理器 - FastAPI版本 +管理微信登录凭证(Token、Cookie等) +""" + +import os +import time +from pathlib import Path +from typing import Optional, Dict +from dotenv import load_dotenv, set_key, find_dotenv + +class AuthManager: + """认证管理单例类""" + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(AuthManager, cls).__new__(cls) + cls._instance._initialized = False + return cls._instance + + def __init__(self): + if self._initialized: + return + + # 设置.env文件路径(python-api目录下) + self.base_dir = Path(__file__).parent.parent + self.env_path = self.base_dir / ".env" + + # 加载环境变量 + self._load_credentials() + self._initialized = True + + def _load_credentials(self): + """从.env文件加载凭证""" + if self.env_path.exists(): + load_dotenv(self.env_path, override=True) + + self.credentials = { + "token": os.getenv("WECHAT_TOKEN", ""), + "cookie": os.getenv("WECHAT_COOKIE", ""), + "fakeid": os.getenv("WECHAT_FAKEID", ""), + "nickname": os.getenv("WECHAT_NICKNAME", ""), + "expire_time": int(os.getenv("WECHAT_EXPIRE_TIME") or 0) + } + + def save_credentials(self, token: str, cookie: str, fakeid: str, + nickname: str, expire_time: int) -> bool: + """ + 保存凭证到.env文件 + + Args: + token: 微信Token + cookie: 微信Cookie + fakeid: 公众号ID + nickname: 公众号名称 + expire_time: 过期时间(毫秒时间戳) + + Returns: + 保存是否成功 + """ + try: + # 更新内存中的凭证 + self.credentials.update({ + "token": token, + "cookie": cookie, + "fakeid": fakeid, + "nickname": nickname, + "expire_time": expire_time + }) + + # 确保.env文件存在 + if not self.env_path.exists(): + self.env_path.touch() + + # 保存到.env文件 + env_file = str(self.env_path) + set_key(env_file, "WECHAT_TOKEN", token) + set_key(env_file, "WECHAT_COOKIE", cookie) + set_key(env_file, "WECHAT_FAKEID", fakeid) + set_key(env_file, "WECHAT_NICKNAME", nickname) + set_key(env_file, "WECHAT_EXPIRE_TIME", str(expire_time)) + + print(f"✅ 凭证已保存到: {self.env_path}") + return True + except Exception as e: + print(f"❌ 保存凭证失败: {e}") + return False + + def get_credentials(self) -> Optional[Dict[str, any]]: + """ + 获取有效的凭证 + + Returns: + 凭证字典,如果未登录则返回None + """ + # 重新加载以获取最新的凭证 + self._load_credentials() + + if not self.credentials.get("token") or not self.credentials.get("cookie"): + return None + + return self.credentials + + def get_token(self) -> Optional[str]: + """获取Token""" + creds = self.get_credentials() + return creds["token"] if creds else None + + def get_cookie(self) -> Optional[str]: + """获取Cookie""" + creds = self.get_credentials() + return creds["cookie"] if creds else None + + def get_status(self) -> Dict: + """ + 获取登录状态 + + Returns: + 状态字典 + """ + # 重新加载凭证 + self._load_credentials() + + if not self.credentials.get("token") or not self.credentials.get("cookie"): + return { + "authenticated": False, + "loggedIn": False, + "account": "", + "status": "未登录,请先扫码登录" + } + + # 检查是否过期 + expire_time = self.credentials.get("expire_time", 0) + current_time = int(time.time() * 1000) # 转换为毫秒 + is_expired = expire_time > 0 and current_time > expire_time + + return { + "authenticated": True, + "loggedIn": True, + "account": self.credentials.get("nickname", ""), + "nickname": self.credentials.get("nickname", ""), + "fakeid": self.credentials.get("fakeid", ""), + "expireTime": expire_time, + "isExpired": is_expired, + "status": "登录可能已过期,建议重新登录" if is_expired else "登录正常" + } + + def clear_credentials(self) -> bool: + """ + 清除凭证 + + Returns: + 清除是否成功 + """ + try: + # 清除内存中的凭证 + self.credentials = { + "token": "", + "cookie": "", + "fakeid": "", + "nickname": "", + "expire_time": 0 + } + + # 清除进程环境变量中残留的凭证 + env_keys = [ + "WECHAT_TOKEN", "WECHAT_COOKIE", "WECHAT_FAKEID", + "WECHAT_NICKNAME", "WECHAT_EXPIRE_TIME" + ] + for key in env_keys: + os.environ.pop(key, None) + + # 清空 .env 文件中的凭证字段(保留其他配置) + if self.env_path.exists(): + env_file = str(self.env_path) + for key in env_keys: + set_key(env_file, key, "") + print(f"✅ 凭证已清除: {self.env_path}") + + return True + except Exception as e: + print(f"❌ 清除凭证失败: {e}") + return False + +# 创建全局单例 +auth_manager = AuthManager() diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000..e17ae37 --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +辅助函数模块 +提供各种工具函数 +""" + +import re +from typing import Dict, Optional +from urllib.parse import urlparse, parse_qs + +def html_to_text(html: str) -> str: + """将 HTML 转为可读纯文本""" + import html as html_module + text = re.sub(r'', '\n', html, flags=re.IGNORECASE) + text = re.sub(r'', '\n', text, flags=re.IGNORECASE) + text = re.sub(r']*>', '\n---\n', text, flags=re.IGNORECASE) + text = re.sub(r'<[^>]+>', '', text) + text = html_module.unescape(text) + text = re.sub(r'[ \t]+', ' ', text) + text = re.sub(r'\n{3,}', '\n\n', text) + return text.strip() + + +def parse_article_url(url: str) -> Optional[Dict[str, str]]: + """ + 解析微信文章URL,提取参数 + + Args: + url: 微信文章URL + + Returns: + 包含__biz, mid, idx, sn的字典,如果解析失败返回None + """ + try: + # 确保是微信文章URL + if not url or 'mp.weixin.qq.com/s' not in url: + return None + + parsed = urlparse(str(url)) # 确保url是字符串 + params = parse_qs(parsed.query) + + __biz = params.get('__biz', [''])[0] + mid = params.get('mid', [''])[0] + idx = params.get('idx', [''])[0] + sn = params.get('sn', [''])[0] + + # 必须有这4个参数才返回 + if not all([__biz, mid, idx, sn]): + return None + + return { + '__biz': __biz, + 'mid': mid, + 'idx': idx, + 'sn': sn + } + except Exception: + return None + +def extract_article_info(html: str, params: Optional[Dict] = None) -> Dict: + """ + 从HTML中提取文章信息 + + Args: + html: 文章HTML内容 + params: URL参数(可选,用于返回__biz等信息) + + Returns: + 文章信息字典 + """ + + title = '' + title_match = ( + re.search(r']*class=[^>]*rich_media_title[^>]*>([\s\S]*?)', html, re.IGNORECASE) or + re.search(r']*class=[^>]*rich_media_title[^>]*>([\s\S]*?)', html, re.IGNORECASE) or + re.search(r"var\s+msg_title\s*=\s*'([^']+)'\.html\(false\)", html) or + re.search(r']+>', '', title) + title = title.replace('"', '"').replace('&', '&').strip() + + author = '' + author_match = ( + re.search(r']*id="js_name"[^>]*>([\s\S]*?)', html, re.IGNORECASE) or + re.search(r'var\s+nickname\s*=\s*"([^"]+)"', html) or + re.search(r']*class=[^>]*rich_media_meta_nickname[^>]*>([^<]+)', html, re.IGNORECASE) + ) + + if author_match: + author = author_match.group(1) + author = re.sub(r'<[^>]+>', '', author).strip() + + publish_time = 0 + time_match = ( + re.search(r'var\s+publish_time\s*=\s*"(\d+)"', html) or + re.search(r'var\s+ct\s*=\s*"(\d+)"', html) or + re.search(r']*id="publish_time"[^>]*>([^<]+)', html) + ) + + if time_match: + try: + publish_time = int(time_match.group(1)) + except (ValueError, TypeError): + pass + + content = '' + images = [] + + # 方法1: 匹配 id="js_content" + content_match = re.search(r']*id="js_content"[^>]*>([\s\S]*?)]*>[\s\S]*?', html, re.IGNORECASE) + + if not content_match: + # 方法2: 匹配 class包含rich_media_content + content_match = re.search(r']*class="[^"]*rich_media_content[^"]*"[^>]*>([\s\S]*?)', html, re.IGNORECASE) + + if content_match and content_match.group(1): + content = content_match.group(1).strip() + else: + # 方法3: 手动截取 + js_content_pos = html.find('id="js_content"') + if js_content_pos > 0: + start = html.find('>', js_content_pos) + 1 + script_pos = html.find(' start: + content = html[start:script_pos].strip() + if content: + # 提取data-src属性 + img_regex = re.compile(r']+data-src="([^"]+)"') + for img_match in img_regex.finditer(content): + img_url = img_match.group(1) + if img_url not in images: + images.append(img_url) + + # 提取src属性 + img_regex2 = re.compile(r']+src="([^"]+)"') + for img_match in img_regex2.finditer(content): + img_url = img_match.group(1) + if not img_url.startswith('data:') and img_url not in images: + images.append(img_url) + + content = re.sub(r']*>[\s\S]*?', '', content, flags=re.IGNORECASE) + + __biz = params.get('__biz', 'unknown') if params else 'unknown' + publish_time_str = '' + if publish_time > 0: + from datetime import datetime + dt = datetime.fromtimestamp(publish_time) + publish_time_str = dt.strftime('%Y-%m-%d %H:%M:%S') + + return { + 'title': title, + 'content': content, + 'plain_content': html_to_text(content) if content else '', + 'images': images, + 'author': author, + 'publish_time': publish_time, + 'publish_time_str': publish_time_str, + '__biz': __biz + } + +def is_article_deleted(html: str) -> bool: + """检查文章是否被删除""" + return '已删除' in html or 'deleted' in html.lower() + +def is_need_verification(html: str) -> bool: + """检查是否需要验证""" + return ('verify' in html.lower() or + '验证' in html or + '环境异常' in html) + +def is_login_required(html: str) -> bool: + """检查是否需要登录""" + return '请登录' in html or 'login' in html.lower() + +def time_str_to_microseconds(time_str: str) -> int: + """ + 将时间字符串转换为微秒 + + 支持格式: + - "5s" -> 5秒 + - "1m30s" -> 1分30秒 + - "1h30m" -> 1小时30分 + - "00:01:30" -> 1分30秒 + - 直接数字 -> 微秒 + """ + if isinstance(time_str, int): + return time_str + + # 尝试解析为整数(已经是微秒) + try: + return int(time_str) + except ValueError: + pass + + # 解析时间字符串 + total_seconds = 0 + + # 格式:HH:MM:SS 或 MM:SS + if ':' in time_str: + parts = time_str.split(':') + if len(parts) == 3: + total_seconds = int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2]) + elif len(parts) == 2: + total_seconds = int(parts[0]) * 60 + int(parts[1]) + else: + # 格式:1h30m45s + hours = re.search(r'(\d+)h', time_str) + minutes = re.search(r'(\d+)m', time_str) + seconds = re.search(r'(\d+)s', time_str) + + if hours: + total_seconds += int(hours.group(1)) * 3600 + if minutes: + total_seconds += int(minutes.group(1)) * 60 + if seconds: + total_seconds += int(seconds.group(1)) + + return total_seconds * 1000000 # 转换为微秒 + + diff --git a/utils/rate_limiter.py b/utils/rate_limiter.py new file mode 100644 index 0000000..1f44bc1 --- /dev/null +++ b/utils/rate_limiter.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +API限频模块 +防止触发微信风控 +""" + +import time +from typing import Dict, Optional +from collections import deque +import threading + +class RateLimiter: + """ + 智能限频器 + + 策略: + 1. 全局限制: 每分钟最多10个请求 + 2. 单IP限制: 每分钟最多5个请求 + 3. 文章获取: 每个文章间隔至少3秒 + """ + + def __init__(self): + self._global_requests = deque() # 全局请求记录 + self._ip_requests: Dict[str, deque] = {} # IP请求记录 + self._article_requests = deque() # 文章请求记录 + self._lock = threading.Lock() + + # 限制配置 + self.GLOBAL_WINDOW = 60 # 全局窗口60秒 + self.GLOBAL_LIMIT = 10 # 全局限制10个请求/分钟 + + self.IP_WINDOW = 60 # IP窗口60秒 + self.IP_LIMIT = 5 # 单IP限制5个请求/分钟 + + self.ARTICLE_INTERVAL = 3 # 文章获取间隔3秒 + + def check_rate_limit(self, ip: str, endpoint: str) -> tuple[bool, Optional[str]]: + """ + 检查是否超过限频 + + Args: + ip: 客户端IP + endpoint: 请求端点 + + Returns: + (是否允许, 错误消息) + """ + with self._lock: + current_time = time.time() + + # 清理过期记录 + self._cleanup_old_requests(current_time) + + # 检查全局限制 + if len(self._global_requests) >= self.GLOBAL_LIMIT: + oldest = self._global_requests[0] + wait_time = int(self.GLOBAL_WINDOW - (current_time - oldest) + 1) + return False, f"全局请求过多,请{wait_time}秒后重试" + + # 检查IP限制 + if ip not in self._ip_requests: + self._ip_requests[ip] = deque() + + if len(self._ip_requests[ip]) >= self.IP_LIMIT: + oldest = self._ip_requests[ip][0] + wait_time = int(self.IP_WINDOW - (current_time - oldest) + 1) + return False, f"请求过于频繁,请{wait_time}秒后重试" + + # 检查文章获取间隔 + if endpoint == "/api/article" and self._article_requests: + last_article = self._article_requests[-1] + if current_time - last_article < self.ARTICLE_INTERVAL: + wait_time = int(self.ARTICLE_INTERVAL - (current_time - last_article) + 1) + return False, f"文章获取过快,请{wait_time}秒后重试(防风控)" + + # 记录请求 + self._global_requests.append(current_time) + self._ip_requests[ip].append(current_time) + + if endpoint == "/api/article": + self._article_requests.append(current_time) + + return True, None + + def _cleanup_old_requests(self, current_time: float): + """清理过期的请求记录""" + # 清理全局请求 + while self._global_requests and current_time - self._global_requests[0] > self.GLOBAL_WINDOW: + self._global_requests.popleft() + + # 清理IP请求 + for ip in list(self._ip_requests.keys()): + while self._ip_requests[ip] and current_time - self._ip_requests[ip][0] > self.IP_WINDOW: + self._ip_requests[ip].popleft() + + # 删除空记录 + if not self._ip_requests[ip]: + del self._ip_requests[ip] + + # 清理文章请求(保留最近10条) + while len(self._article_requests) > 10: + self._article_requests.popleft() + + def get_stats(self) -> Dict: + """获取限频统计""" + with self._lock: + current_time = time.time() + self._cleanup_old_requests(current_time) + + return { + "global_requests": len(self._global_requests), + "global_limit": self.GLOBAL_LIMIT, + "active_ips": len(self._ip_requests), + "article_requests": len(self._article_requests) + } + +# 全局限频器实例 +rate_limiter = RateLimiter() + diff --git a/utils/webhook.py b/utils/webhook.py new file mode 100644 index 0000000..4b7685b --- /dev/null +++ b/utils/webhook.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (C) 2026 tmwgsicp +# Licensed under the GNU Affero General Public License v3.0 +# See LICENSE file in the project root for full license text. +# SPDX-License-Identifier: AGPL-3.0-only +""" +Webhook 通知模块 +支持企业微信群机器人和通用 Webhook +""" + +import httpx +import time +import os +import logging +from typing import Optional, Dict +from datetime import datetime + +logger = logging.getLogger("webhook") + +EVENT_LABELS = { + "login_success": "登录成功", + "login_expired": "登录过期", + "verification_required": "触发验证", +} + + +class WebhookNotifier: + + def __init__(self): + self._last_notification: Dict[str, float] = {} + self._notification_interval = int( + os.getenv("WEBHOOK_NOTIFICATION_INTERVAL", "300") + ) + + @property + def webhook_url(self) -> str: + """每次读取时从 .env 刷新,确保运行中修改配置也能生效""" + from pathlib import Path + env_path = Path(__file__).resolve().parent.parent / ".env" + if env_path.exists(): + from dotenv import dotenv_values + vals = dotenv_values(env_path) + url = vals.get("WEBHOOK_URL", "") + else: + url = os.getenv("WEBHOOK_URL", "") + return (url or "").strip() + + @property + def enabled(self) -> bool: + return bool(self.webhook_url) + + def _is_wecom(self, url: str) -> bool: + return "qyapi.weixin.qq.com" in url + + def _build_payload(self, url: str, event: str, data: Dict) -> dict: + """根据 webhook 类型构造消息体""" + label = EVENT_LABELS.get(event, event) + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + lines = [f"**{label}**", f"> {ts}"] + for k, v in (data or {}).items(): + if v: + lines.append(f"> {k}: {v}") + + if self._is_wecom(url): + return { + "msgtype": "markdown", + "markdown": {"content": "\n".join(lines)}, + } + + return { + "event": event, + "timestamp": int(time.time()), + "timestamp_str": ts, + "message": "\n".join(lines), + "data": data or {}, + } + + async def notify(self, event: str, data: Optional[Dict] = None) -> bool: + url = self.webhook_url + if not url: + return False + + now = time.time() + last = self._last_notification.get(event, 0) + if now - last < self._notification_interval: + logger.debug("Skip duplicate webhook: %s (%ds since last)", event, int(now - last)) + return False + + payload = self._build_payload(url, event, data or {}) + + try: + async with httpx.AsyncClient(timeout=10.0) as client: + resp = await client.post(url, json=payload) + resp.raise_for_status() + + ct = resp.headers.get("content-type", "") + body = resp.json() if "json" in ct else {} + errcode = body.get("errcode", 0) + if errcode != 0: + errmsg = body.get("errmsg", "unknown") + logger.error("Webhook errcode=%s: %s", errcode, errmsg) + return False + + self._last_notification[event] = now + logger.info("Webhook sent: %s", event) + return True + except Exception as e: + logger.error("Webhook failed: %s - %s", event, e) + return False + + +webhook = WebhookNotifier()