From a84452ba1c8ff4eb7b2264c6565c3d83cc2c059d Mon Sep 17 00:00:00 2001 From: hejianjun Date: Sun, 30 Jan 2022 03:37:08 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=8191=E5=88=B6=E7=89=87=E5=AE=A4?= =?UTF-8?q?=E5=92=8C=E9=BA=BB=E8=B1=86=EF=BC=8C=E4=BC=98=E5=8C=96=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E8=A3=81=E5=89=AA=E5=8A=9F=E8=83=BD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E4=BA=BA=E8=84=B8=E8=AF=86=E5=88=AB=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/__init__.py | 6 +- WebCrawler/javdb.py | 10 +-- WebCrawler/madou.py | 164 +++++++++++++++++++++++++++++++++++++++++ WebCrawler/mv91.py | 158 +++++++++++++++++++++++++++++++++++++++ config.ini | 6 +- config.py | 16 ++++ core.py | 133 +++++++++++++++++++++++---------- requirements.txt | 1 + 8 files changed, 449 insertions(+), 45 deletions(-) create mode 100644 WebCrawler/madou.py create mode 100644 WebCrawler/mv91.py diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index c661700..f690e0c 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -22,6 +22,8 @@ from . import xcity from . import dlsite from . import carib from . import fc2club +from . import mv91 +from . import madou def get_data_state(data: dict) -> bool: # 元数据获取失败检测 @@ -57,7 +59,9 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 # "javlib": javlib.main, "dlsite": dlsite.main, "carib": carib.main, - "fc2club": fc2club.main + "fc2club": fc2club.main, + "mv91": mv91.main, + "madou": madou.main } conf = config.getInstance() diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 8a5de7a..e46cd7f 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -1,3 +1,4 @@ +import logging import sys sys.path.append('../') import re @@ -139,7 +140,8 @@ def getCover_small(html, index=0): def getTrailer(htmlcode): # 获取预告片 video_pather = re.compile(r'