1. 动态加载爬虫

2. 修复pyinstaller路径查找子包问题 3. madou的番号处理移动到爬虫内部 4. 过滤javday中多余的tag
2023-03-27 15:37:00 +08:00
parent 24e8b75dab
commit 1d46a70eed
9 changed files with 89 additions and 125 deletions
--- a/scrapinglib/api.py
+++ b/scrapinglib/api.py
@@ -2,30 +2,9 @@

 import re
 import json
-
+from .parser import Parser
 import config
-from .airav import Airav
-from .carib import Carib
-from .dlsite import Dlsite
-from .fanza import Fanza
-from .gcolle import Gcolle
-from .getchu import Getchu
-from .jav321 import Jav321
-from .javdb import Javdb
-from .fc2 import Fc2
-from .madou import Madou
-from .mgstage import Mgstage
-from .javbus import Javbus
-from .xcity import Xcity
-from .avsox import Avsox
-from .javlibrary import Javlibrary
-from .javday import Javday
-from .pissplay import Pissplay
-from .javmenu import Javmenu
-
-from .tmdb import Tmdb
-from .imdb import Imdb
-
+import importlib

 def search(number, sources: str = None, **kwargs):
    """ 根据`番号/电影`名搜索信息
@@ -53,35 +32,11 @@ class Scraping:
    """
    """
    adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
-                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 
-                          'getchu', 'gcolle','javday','pissplay','javmenu'
+                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
+                          'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu'
                          ]
-    adult_func_mapping = {
-        'avsox': Avsox().scrape,
-        'javbus': Javbus().scrape,
-        'xcity': Xcity().scrape,
-        'mgstage': Mgstage().scrape,
-        'madou': Madou().scrape,
-        'fc2': Fc2().scrape,
-        'dlsite': Dlsite().scrape,
-        'jav321': Jav321().scrape,
-        'fanza': Fanza().scrape,
-        'airav': Airav().scrape,
-        'carib': Carib().scrape,
-        'gcolle': Gcolle().scrape,
-        'javdb': Javdb().scrape,
-        'getchu': Getchu().scrape,
-        'javlibrary': Javlibrary().scrape,
-        'javday': Javday().scrape,
-        'pissplay': Pissplay().scrape,
-        'javmenu': Javmenu().scrape
-    }

    general_full_sources = ['tmdb', 'imdb']
-    general_func_mapping = {
-        'tmdb': Tmdb().scrape,
-        'imdb': Imdb().scrape,
-    }

    debug = False

@@ -126,13 +81,16 @@ class Scraping:
                if self.debug:
                    print('[+]select', source)
                try:
-                    data = self.general_func_mapping[source](name, self)
+                    module = importlib.import_module('.'+source,'scrapinglib')
+                    parser_type = getattr(module, source.capitalize())
+                    parser:Parser = parser_type()
+                    data = parser.scrape(name,self)
                    if data == 404:
                        continue
                    json_data = json.loads(data)
                except Exception as e:
-                    # print('[!] 出错啦')
-                    # print(e)
+                    print('[!] 出错啦')
+                    print(e)
                    pass
                # if any service return a valid return, break
                if self.get_data_state(json_data):
@@ -162,13 +120,16 @@ class Scraping:
                if self.debug:
                    print('[+]select', source)
                try:
-                    data = self.adult_func_mapping[source](number, self)
+                    module = importlib.import_module('.'+source,'scrapinglib')
+                    parser_type = getattr(module, source.capitalize())
+                    parser:Parser = parser_type()
+                    data = parser.scrape(number,self)
                    if data == 404:
                        continue
                    json_data = json.loads(data)
                except Exception as e:
-                    # print('[!] 出错啦')
-                    # print(e)
+                    print('[!] 出错啦')
+                    print(e)
                    pass
                    # json_data = self.func_mapping[source](number, self)
                # if any service return a valid return, break
@@ -216,7 +177,7 @@ class Scraping:
        # check sources in func_mapping
        todel = []
        for s in sources:
-            if not s in self.general_func_mapping:
+            if not s in self.general_full_sources:
                print('[!] Source Not Exist : ' + s)
                todel.append(s)
        for d in todel:
@@ -235,7 +196,7 @@ class Scraping:
                sources.insert(0, sources.pop(sources.index(source)))
            return sources

-        if len(sources) <= len(self.adult_func_mapping):
+        if len(sources) <= len(self.adult_full_sources):
            # if the input file name matches certain rules,
            # move some web service to the beginning of the list
            lo_file_number = file_number.lower()
@@ -271,7 +232,7 @@ class Scraping:
        # check sources in func_mapping
        todel = []
        for s in sources:
-            if not s in self.adult_func_mapping and config.getInstance().debug():
+            if not s in self.adult_full_sources and config.getInstance().debug():
                print('[!] Source Not Exist : ' + s)
                todel.append(s)
        for d in todel: