1. 动态加载爬虫

2. 修复pyinstaller路径查找子包问题
3. madou的番号处理移动到爬虫内部
4. 过滤javday中多余的tag
This commit is contained in:
hejianjun
2023-03-27 15:37:00 +08:00
parent 24e8b75dab
commit 1d46a70eed
9 changed files with 89 additions and 125 deletions

View File

@@ -2,30 +2,9 @@
import re
import json
from .parser import Parser
import config
from .airav import Airav
from .carib import Carib
from .dlsite import Dlsite
from .fanza import Fanza
from .gcolle import Gcolle
from .getchu import Getchu
from .jav321 import Jav321
from .javdb import Javdb
from .fc2 import Fc2
from .madou import Madou
from .mgstage import Mgstage
from .javbus import Javbus
from .xcity import Xcity
from .avsox import Avsox
from .javlibrary import Javlibrary
from .javday import Javday
from .pissplay import Pissplay
from .javmenu import Javmenu
from .tmdb import Tmdb
from .imdb import Imdb
import importlib
def search(number, sources: str = None, **kwargs):
""" 根据`番号/电影`名搜索信息
@@ -53,35 +32,11 @@ class Scraping:
"""
"""
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle','javday','pissplay','javmenu'
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu'
]
adult_func_mapping = {
'avsox': Avsox().scrape,
'javbus': Javbus().scrape,
'xcity': Xcity().scrape,
'mgstage': Mgstage().scrape,
'madou': Madou().scrape,
'fc2': Fc2().scrape,
'dlsite': Dlsite().scrape,
'jav321': Jav321().scrape,
'fanza': Fanza().scrape,
'airav': Airav().scrape,
'carib': Carib().scrape,
'gcolle': Gcolle().scrape,
'javdb': Javdb().scrape,
'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape,
'pissplay': Pissplay().scrape,
'javmenu': Javmenu().scrape
}
general_full_sources = ['tmdb', 'imdb']
general_func_mapping = {
'tmdb': Tmdb().scrape,
'imdb': Imdb().scrape,
}
debug = False
@@ -126,13 +81,16 @@ class Scraping:
if self.debug:
print('[+]select', source)
try:
data = self.general_func_mapping[source](name, self)
module = importlib.import_module('.'+source,'scrapinglib')
parser_type = getattr(module, source.capitalize())
parser:Parser = parser_type()
data = parser.scrape(name,self)
if data == 404:
continue
json_data = json.loads(data)
except Exception as e:
# print('[!] 出错啦')
# print(e)
print('[!] 出错啦')
print(e)
pass
# if any service return a valid return, break
if self.get_data_state(json_data):
@@ -162,13 +120,16 @@ class Scraping:
if self.debug:
print('[+]select', source)
try:
data = self.adult_func_mapping[source](number, self)
module = importlib.import_module('.'+source,'scrapinglib')
parser_type = getattr(module, source.capitalize())
parser:Parser = parser_type()
data = parser.scrape(number,self)
if data == 404:
continue
json_data = json.loads(data)
except Exception as e:
# print('[!] 出错啦')
# print(e)
print('[!] 出错啦')
print(e)
pass
# json_data = self.func_mapping[source](number, self)
# if any service return a valid return, break
@@ -216,7 +177,7 @@ class Scraping:
# check sources in func_mapping
todel = []
for s in sources:
if not s in self.general_func_mapping:
if not s in self.general_full_sources:
print('[!] Source Not Exist : ' + s)
todel.append(s)
for d in todel:
@@ -235,7 +196,7 @@ class Scraping:
sources.insert(0, sources.pop(sources.index(source)))
return sources
if len(sources) <= len(self.adult_func_mapping):
if len(sources) <= len(self.adult_full_sources):
# if the input file name matches certain rules,
# move some web service to the beginning of the list
lo_file_number = file_number.lower()
@@ -271,7 +232,7 @@ class Scraping:
# check sources in func_mapping
todel = []
for s in sources:
if not s in self.adult_func_mapping and config.getInstance().debug():
if not s in self.adult_full_sources and config.getInstance().debug():
print('[!] Source Not Exist : ' + s)
todel.append(s)
for d in todel: