更新“无演员自动填充佚名”功能中对目标语言的判断条件,添加符合DeepLX的目标语言“ZH”,修正原代码在使用DeepLX翻译到中文时将无演员的情况填充为Anonymous的问题
267 lines
10 KiB
Python
267 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
import re
|
||
import json
|
||
from .parser import Parser
|
||
import config
|
||
import importlib
|
||
|
||
|
||
def search(number, sources: str = None, **kwargs):
|
||
""" 根据`番号/电影`名搜索信息
|
||
|
||
:param number: number/name depends on type
|
||
:param sources: sources string with `,` Eg: `avsox,javbus`
|
||
:param type: `adult`, `general`
|
||
"""
|
||
sc = Scraping()
|
||
return sc.search(number, sources, **kwargs)
|
||
|
||
|
||
def getSupportedSources(tag='adult'):
|
||
"""
|
||
:param tag: `adult`, `general`
|
||
"""
|
||
sc = Scraping()
|
||
if tag == 'adult':
|
||
return ','.join(sc.adult_full_sources)
|
||
else:
|
||
return ','.join(sc.general_full_sources)
|
||
|
||
|
||
class Scraping:
|
||
"""
|
||
"""
|
||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'msin',
|
||
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu', 'pcolle', 'caribpr'
|
||
]
|
||
|
||
general_full_sources = ['tmdb', 'imdb']
|
||
|
||
debug = False
|
||
|
||
proxies = None
|
||
verify = None
|
||
specifiedSource = None
|
||
specifiedUrl = None
|
||
|
||
dbcookies = None
|
||
dbsite = None
|
||
# 使用storyline方法进一步获取故事情节
|
||
morestoryline = False
|
||
|
||
def search(self, number, sources=None, proxies=None, verify=None, type='adult',
|
||
specifiedSource=None, specifiedUrl=None,
|
||
dbcookies=None, dbsite=None, morestoryline=False,
|
||
debug=False):
|
||
self.debug = debug
|
||
self.proxies = proxies
|
||
self.verify = verify
|
||
self.specifiedSource = specifiedSource
|
||
self.specifiedUrl = specifiedUrl
|
||
self.dbcookies = dbcookies
|
||
self.dbsite = dbsite
|
||
self.morestoryline = morestoryline
|
||
if type == 'adult':
|
||
return self.searchAdult(number, sources)
|
||
else:
|
||
return self.searchGeneral(number, sources)
|
||
|
||
def searchGeneral(self, name, sources):
|
||
""" 查询电影电视剧
|
||
imdb,tmdb
|
||
"""
|
||
if self.specifiedSource:
|
||
sources = [self.specifiedSource]
|
||
else:
|
||
sources = self.checkGeneralSources(sources, name)
|
||
json_data = {}
|
||
for source in sources:
|
||
try:
|
||
if self.debug:
|
||
print('[+]select', source)
|
||
try:
|
||
module = importlib.import_module('.' + source, 'scrapinglib')
|
||
parser_type = getattr(module, source.capitalize())
|
||
parser: Parser = parser_type()
|
||
data = parser.scrape(name, self)
|
||
if data == 404:
|
||
continue
|
||
json_data = json.loads(data)
|
||
except Exception as e:
|
||
if config.getInstance().debug():
|
||
print(e)
|
||
# if any service return a valid return, break
|
||
if self.get_data_state(json_data):
|
||
if self.debug:
|
||
print(f"[+]Find movie [{name}] metadata on website '{source}'")
|
||
break
|
||
except:
|
||
continue
|
||
|
||
# Return if data not found in all sources
|
||
if not json_data or json_data['title'] == "":
|
||
return None
|
||
|
||
# If actor is anonymous, Fill in Anonymous
|
||
if len(json_data['actor']) == 0:
|
||
if config.getInstance().anonymous_fill() == True:
|
||
if "zh_" in config.getInstance().get_target_language() or "ZH" in config.getInstance().get_target_language():
|
||
json_data['actor'] = "佚名"
|
||
else:
|
||
json_data['actor'] = "Anonymous"
|
||
|
||
return json_data
|
||
|
||
def searchAdult(self, number, sources):
|
||
if self.specifiedSource:
|
||
sources = [self.specifiedSource]
|
||
elif type(sources) is list:
|
||
pass
|
||
else:
|
||
sources = self.checkAdultSources(sources, number)
|
||
json_data = {}
|
||
for source in sources:
|
||
try:
|
||
if self.debug:
|
||
print('[+]select', source)
|
||
try:
|
||
module = importlib.import_module('.' + source, 'scrapinglib')
|
||
parser_type = getattr(module, source.capitalize())
|
||
parser: Parser = parser_type()
|
||
data = parser.scrape(number, self)
|
||
if data == 404:
|
||
continue
|
||
json_data = json.loads(data)
|
||
except Exception as e:
|
||
if config.getInstance().debug():
|
||
print(e)
|
||
# json_data = self.func_mapping[source](number, self)
|
||
# if any service return a valid return, break
|
||
if self.get_data_state(json_data):
|
||
if self.debug:
|
||
print(f"[+]Find movie [{number}] metadata on website '{source}'")
|
||
break
|
||
except:
|
||
continue
|
||
|
||
# javdb的封面有水印,如果可以用其他源的封面来替换javdb的封面
|
||
if 'source' in json_data and json_data['source'] == 'javdb':
|
||
# search other sources
|
||
# If cover not found in other source, then skip using other sources using javdb cover instead
|
||
try:
|
||
other_sources = sources[sources.index('javdb') + 1:]
|
||
other_json_data = self.searchAdult(number, other_sources)
|
||
if other_json_data is not None and 'cover' in other_json_data and other_json_data['cover'] != '':
|
||
json_data['cover'] = other_json_data['cover']
|
||
if self.debug:
|
||
print(f"[+]Find movie [{number}] cover on website '{other_json_data['cover']}'")
|
||
except:
|
||
pass
|
||
|
||
# Return if data not found in all sources
|
||
if not json_data or json_data['title'] == "":
|
||
return None
|
||
|
||
# If actor is anonymous, Fill in Anonymous
|
||
if len(json_data['actor']) == 0:
|
||
if config.getInstance().anonymous_fill() == True:
|
||
if "zh_" in config.getInstance().get_target_language() or "ZH" in config.getInstance().get_target_language():
|
||
json_data['actor'] = "佚名"
|
||
else:
|
||
json_data['actor'] = "Anonymous"
|
||
|
||
return json_data
|
||
|
||
def checkGeneralSources(self, c_sources, name):
|
||
if not c_sources:
|
||
sources = self.general_full_sources
|
||
else:
|
||
sources = c_sources.split(',')
|
||
|
||
# check sources in func_mapping
|
||
todel = []
|
||
for s in sources:
|
||
if not s in self.general_full_sources:
|
||
print('[!] Source Not Exist : ' + s)
|
||
todel.append(s)
|
||
for d in todel:
|
||
print('[!] Remove Source : ' + s)
|
||
sources.remove(d)
|
||
return sources
|
||
|
||
def checkAdultSources(self, c_sources, file_number):
|
||
if not c_sources:
|
||
sources = self.adult_full_sources
|
||
else:
|
||
sources = c_sources.split(',')
|
||
|
||
def insert(sources, source):
|
||
if source in sources:
|
||
sources.insert(0, sources.pop(sources.index(source)))
|
||
return sources
|
||
|
||
if len(sources) <= len(self.adult_full_sources):
|
||
# if the input file name matches certain rules,
|
||
# move some web service to the beginning of the list
|
||
lo_file_number = file_number.lower()
|
||
if "carib" in sources and (re.search(r"^\d{6}-\d{3}", file_number)
|
||
):
|
||
sources = insert(sources, "carib")
|
||
elif "caribpr" in sources and (re.search(r"^\d{6}-\d{3}", file_number)
|
||
):
|
||
sources = insert(sources, "caribpr")
|
||
elif "item" in file_number or "GETCHU" in file_number.upper():
|
||
sources = ["getchu"]
|
||
elif "rj" in lo_file_number or "vj" in lo_file_number or re.search(r"[\u3040-\u309F\u30A0-\u30FF]+",
|
||
file_number):
|
||
sources = ["dlsite", "getchu"]
|
||
elif "pcolle" in sources and "pcolle" in lo_file_number:
|
||
sources = ["pcolle"]
|
||
elif "fc2" in lo_file_number:
|
||
if "fc2" in sources:
|
||
sources = insert(sources, "msin")
|
||
sources = insert(sources, "fc2")
|
||
elif "mgstage" in sources and \
|
||
(re.search(r"\d+\D+", file_number) or "siro" in lo_file_number):
|
||
sources = insert(sources, "mgstage")
|
||
elif "gcolle" in sources and (re.search("\d{6}", file_number)):
|
||
sources = insert(sources, "gcolle")
|
||
elif "madou" in sources and (re.search(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)):
|
||
sources = insert(sources, "madou")
|
||
|
||
elif re.search(r"^\d{5,}", file_number) or "heyzo" in lo_file_number:
|
||
if "avsox" in sources:
|
||
sources = insert(sources, "avsox")
|
||
elif re.search(r"^[a-z0-9]{3,}$", lo_file_number):
|
||
if "xcity" in sources:
|
||
sources = insert(sources, "xcity")
|
||
if "madou" in sources:
|
||
sources = insert(sources, "madou")
|
||
|
||
# check sources in func_mapping
|
||
todel = []
|
||
for s in sources:
|
||
if not s in self.adult_full_sources and config.getInstance().debug():
|
||
print('[!] Source Not Exist : ' + s)
|
||
todel.append(s)
|
||
for d in todel:
|
||
if config.getInstance().debug():
|
||
print('[!] Remove Source : ' + s)
|
||
sources.remove(d)
|
||
return sources
|
||
|
||
def get_data_state(self, data: dict) -> bool: # 元数据获取失败检测
|
||
if "title" not in data or "number" not in data:
|
||
return False
|
||
if data["title"] is None or data["title"] == "" or data["title"] == "null":
|
||
return False
|
||
if data["number"] is None or data["number"] == "" or data["number"] == "null":
|
||
return False
|
||
if (data["cover"] is None or data["cover"] == "" or data["cover"] == "null") \
|
||
and (data["cover_small"] is None or data["cover_small"] == "" or
|
||
data["cover_small"] == "null"):
|
||
return False
|
||
return True
|