Merge pull request #875 from Suwmlee/master

update scrapinglib
This commit is contained in:
Yoshiko2
2022-09-16 16:36:11 +08:00
committed by GitHub
4 changed files with 16 additions and 15 deletions

View File

@@ -55,7 +55,8 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
json_data = search(file_number, sources, proxies=proxies, verify=cacert,
dbsite=javdb_site, dbcookies=javdb_cookies,
morestoryline=conf.is_storyline(),
specifiedSource=specified_source, specifiedUrl=specified_url)
specifiedSource=specified_source, specifiedUrl=specified_url,
debug = conf.debug())
# Return if data not found in all sources
if not json_data:
print('[-]Movie Number not found!')

View File

@@ -3,7 +3,6 @@
import re
import json
import config
from .airav import Airav
from .carib import Carib
from .dlsite import Dlsite
@@ -25,9 +24,7 @@ from .tmdb import Tmdb
from .imdb import Imdb
def search(number, sources: str=None, proxies=None, verify=None, type='adult',
specifiedSource=None, specifiedUrl=None,
dbcookies=None, dbsite=None, morestoryline=False):
def search(number, sources: str=None, **kwargs):
""" 根据`番号/电影`名搜索信息
:param number: number/name depends on type
@@ -35,9 +32,7 @@ def search(number, sources: str=None, proxies=None, verify=None, type='adult',
:param type: `adult`, `general`
"""
sc = Scraping()
return sc.search(number, sources, proxies=proxies, verify=verify, type=type,
specifiedSource=specifiedSource, specifiedUrl=specifiedUrl,
dbcookies=dbcookies, dbsite=dbsite, morestoryline=morestoryline)
return sc.search(number, sources, **kwargs)
def getSupportedSources(tag='adult'):
@@ -83,6 +78,8 @@ class Scraping():
'imdb': Imdb().scrape,
}
debug = False
proxies = None
verify = None
specifiedSource = None
@@ -95,7 +92,9 @@ class Scraping():
def search(self, number, sources=None, proxies=None, verify=None, type='adult',
specifiedSource=None, specifiedUrl=None,
dbcookies=None, dbsite=None, morestoryline=False):
dbcookies=None, dbsite=None, morestoryline=False,
debug=False):
self.debug = debug
self.proxies = proxies
self.verify = verify
self.specifiedSource = specifiedSource
@@ -119,7 +118,7 @@ class Scraping():
json_data = {}
for source in sources:
try:
if config.getInstance().debug():
if self.debug:
print('[+]select', source)
try:
data = self.general_func_mapping[source](name, self)
@@ -152,7 +151,7 @@ class Scraping():
json_data = {}
for source in sources:
try:
if config.getInstance().debug():
if self.debug:
print('[+]select', source)
try:
data = self.adult_func_mapping[source](number, self)

View File

@@ -52,8 +52,7 @@ class wwwGetchu(Parser):
idn = re.findall('\d+',number)[0]
return "http://www.getchu.com/soft.phtml?id=" + idn
else:
self.number = quote(number, encoding="euc_jp")
queryUrl = self.GETCHU_WWW_SEARCH_URL.replace("_WORD_", self.number)
queryUrl = self.GETCHU_WWW_SEARCH_URL.replace("_WORD_", quote(number, encoding="euc_jp"))
# NOTE dont know why will try 2 times
retry = 2
for i in range(retry):

View File

@@ -37,7 +37,9 @@ class Parser:
expr_userrating = ''
expr_uservotes = ''
def __init__(self):
def init(self):
""" 初始化参数
"""
# 推荐剪切poster封面:
# `0` 复制cover
# `1` 裁剪cover
@@ -63,7 +65,7 @@ class Parser:
""" 刮削番号
"""
# 每次调用,初始化参数
self.__init__()
self.init()
self.updateCore(core)
result = self.search(number)
return result