From e226aa255e20088589096294b750d4ca3e34adfb Mon Sep 17 00:00:00 2001 From: godvmxi Date: Sat, 29 Jan 2022 09:42:28 +0000 Subject: [PATCH 01/22] describe foloder_path var in a global area to avoid no assign error in linux --- Movie_Data_Capture.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index a4f91b8..b4f9854 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -455,6 +455,7 @@ def main(): main_mode = conf.main_mode() + folder_path = "" if not main_mode in (1, 2, 3): print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.") sys.exit(4) From a84452ba1c8ff4eb7b2264c6565c3d83cc2c059d Mon Sep 17 00:00:00 2001 From: hejianjun Date: Sun, 30 Jan 2022 03:37:08 +0800 Subject: [PATCH 02/22] =?UTF-8?q?=E6=94=AF=E6=8C=8191=E5=88=B6=E7=89=87?= =?UTF-8?q?=E5=AE=A4=E5=92=8C=E9=BA=BB=E8=B1=86=EF=BC=8C=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E5=9B=BE=E7=89=87=E8=A3=81=E5=89=AA=E5=8A=9F=E8=83=BD=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E4=BA=86=E4=BA=BA=E8=84=B8=E8=AF=86=E5=88=AB=E6=A8=A1?= =?UTF-8?q?=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/__init__.py | 6 +- WebCrawler/javdb.py | 10 +-- WebCrawler/madou.py | 164 +++++++++++++++++++++++++++++++++++++++++ WebCrawler/mv91.py | 158 +++++++++++++++++++++++++++++++++++++++ config.ini | 6 +- config.py | 16 ++++ core.py | 133 +++++++++++++++++++++++---------- requirements.txt | 1 + 8 files changed, 449 insertions(+), 45 deletions(-) create mode 100644 WebCrawler/madou.py create mode 100644 WebCrawler/mv91.py diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index c661700..f690e0c 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -22,6 +22,8 @@ from . import xcity from . import dlsite from . import carib from . import fc2club +from . import mv91 +from . import madou def get_data_state(data: dict) -> bool: # 元数据获取失败检测 @@ -57,7 +59,9 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 # "javlib": javlib.main, "dlsite": dlsite.main, "carib": carib.main, - "fc2club": fc2club.main + "fc2club": fc2club.main, + "mv91": mv91.main, + "madou": madou.main } conf = config.getInstance() diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 8a5de7a..e46cd7f 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -1,3 +1,4 @@ +import logging import sys sys.path.append('../') import re @@ -139,7 +140,8 @@ def getCover_small(html, index=0): def getTrailer(htmlcode): # 获取预告片 video_pather = re.compile(r'