add type hinting

PEP8 formatting
This commit is contained in:
naughtyGitCat
2022-09-16 18:23:20 +08:00
parent daedd3071c
commit f56400a56b
4 changed files with 58 additions and 43 deletions

View File

@@ -350,7 +350,7 @@ def translate(
return trans_result return trans_result
def load_cookies(cookie_json_filename: str): def load_cookies(cookie_json_filename: str) -> typing.Tuple[typing.Optional[dict], typing.Optional[str]]:
""" """
加载cookie,用于以会员方式访问非游客内容 加载cookie,用于以会员方式访问非游客内容

View File

@@ -563,8 +563,10 @@ class IniProxy():
self.proxytype = proxytype self.proxytype = proxytype
def proxies(self): def proxies(self):
''' 获得代理参数默认http代理 """
''' 获得代理参数默认http代理
get proxy params, use http proxy for default
"""
if self.address: if self.address:
if self.proxytype in self.SUPPORT_PROXY_TYPE: if self.proxytype in self.SUPPORT_PROXY_TYPE:
proxies = {"http": self.proxytype + "://" + self.address, proxies = {"http": self.proxytype + "://" + self.address,

View File

@@ -1,10 +1,19 @@
# build-in lib
import json import json
import secrets import secrets
import config
from lxml import etree
from pathlib import Path from pathlib import Path
from ADC_function import delete_all_elements_in_list, delete_all_elements_in_str, file_modification_days, load_cookies, translate # third party lib
from lxml import etree
# project wide definitions
import config
from ADC_function import (translate,
load_cookies,
file_modification_days,
delete_all_elements_in_str,
delete_all_elements_in_list
)
from scrapinglib.api import search from scrapinglib.api import search
@@ -22,11 +31,11 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
# TODO 准备参数 # TODO 准备参数
# - 清理 ADC_function, webcrawler # - 清理 ADC_function, webcrawler
proxies = None proxies: dict = None
configProxy = conf.proxy() config_proxy = conf.proxy()
if configProxy.enable: if config_proxy.enable:
proxies = configProxy.proxies() proxies = config_proxy.proxies()
javdb_sites = conf.javdb_sites().split(',') javdb_sites = conf.javdb_sites().split(',')
for i in javdb_sites: for i in javdb_sites:
javdb_sites[javdb_sites.index(i)] = "javdb" + i javdb_sites[javdb_sites.index(i)] = "javdb" + i
@@ -44,19 +53,21 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
has_json = True has_json = True
break break
elif cdays != 9999: elif cdays != 9999:
print(f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.') print(
f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
if not has_json: if not has_json:
# get real random site from javdb_sites, because random is not really random when the seed value is known
javdb_site = secrets.choice(javdb_sites) javdb_site = secrets.choice(javdb_sites)
javdb_cookies = None javdb_cookies = None
cacert =None ca_cert = None
if conf.cacert_file(): if conf.cacert_file():
cacert = conf.cacert_file() ca_cert = conf.cacert_file()
json_data = search(file_number, sources, proxies=proxies, verify=cacert, json_data = search(file_number, sources, proxies=proxies, verify=ca_cert,
dbsite=javdb_site, dbcookies=javdb_cookies, dbsite=javdb_site, dbcookies=javdb_cookies,
morestoryline=conf.is_storyline(), morestoryline=conf.is_storyline(),
specifiedSource=specified_source, specifiedUrl=specified_url) specifiedSource=specified_source, specifiedUrl=specified_url)
# Return if data not found in all sources # Return if data not found in all sources
if not json_data: if not json_data:
print('[-]Movie Number not found!') print('[-]Movie Number not found!')
@@ -181,8 +192,8 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
if oCC: if oCC:
cc_vars = conf.cc_convert_vars().split(",") cc_vars = conf.cc_convert_vars().split(",")
ccm = conf.cc_convert_mode() ccm = conf.cc_convert_mode()
def convert_list(mapping_data,language,vars): def convert_list(mapping_data, language, vars):
total = [] total = []
for i in vars: for i in vars:
if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")) != 0: if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")) != 0:
@@ -190,11 +201,12 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
total.append(i) total.append(i)
return total return total
def convert(mapping_data,language,vars): def convert(mapping_data, language, vars):
if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)) != 0: if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)) != 0:
return mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)[0] return mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)[0]
else: else:
raise IndexError('keyword not found') raise IndexError('keyword not found')
for cc in cc_vars: for cc in cc_vars:
if json_data[cc] == "" or len(json_data[cc]) == 0: if json_data[cc] == "" or len(json_data[cc]) == 0:
continue continue
@@ -241,7 +253,7 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
except: except:
pass pass
naming_rule="" naming_rule = ""
for i in conf.naming_rule().split("+"): for i in conf.naming_rule().split("+"):
if i not in json_data: if i not in json_data:
naming_rule += i.strip("'").strip('"') naming_rule += i.strip("'").strip('"')
@@ -256,17 +268,17 @@ def get_data_from_json(file_number, oCC, specified_source, specified_url):
def special_characters_replacement(text) -> str: def special_characters_replacement(text) -> str:
if not isinstance(text, str): if not isinstance(text, str):
return text return text
return (text.replace('\\', ''). # U+2216 SET MINUS @ Basic Multilingual Plane return (text.replace('\\', ''). # U+2216 SET MINUS @ Basic Multilingual Plane
replace('/', ''). # U+2215 DIVISION SLASH @ Basic Multilingual Plane replace('/', ''). # U+2215 DIVISION SLASH @ Basic Multilingual Plane
replace(':', ''). # U+A789 MODIFIER LETTER COLON @ Latin Extended-D replace(':', ''). # U+A789 MODIFIER LETTER COLON @ Latin Extended-D
replace('*', ''). # U+2217 ASTERISK OPERATOR @ Basic Multilingual Plane replace('*', ''). # U+2217 ASTERISK OPERATOR @ Basic Multilingual Plane
replace('?', ''). # U+FF1F FULLWIDTH QUESTION MARK @ Basic Multilingual Plane replace('?', ''). # U+FF1F FULLWIDTH QUESTION MARK @ Basic Multilingual Plane
replace('"', ''). # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane replace('"', ''). # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane
replace('<', ''). # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane replace('<', ''). # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane
replace('>', ''). # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane replace('>', ''). # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane
replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane
replace('&lsquo;', ''). # U+02018 LEFT SINGLE QUOTATION MARK replace('&lsquo;', ''). # U+02018 LEFT SINGLE QUOTATION MARK
replace('&rsquo;', ''). # U+02019 RIGHT SINGLE QUOTATION MARK replace('&rsquo;', ''). # U+02019 RIGHT SINGLE QUOTATION MARK
replace('&hellip;',''). replace('&hellip;', '').
replace('&amp;', '') replace('&amp;', '')
) )

View File

@@ -25,9 +25,9 @@ from .tmdb import Tmdb
from .imdb import Imdb from .imdb import Imdb
def search(number, sources: str=None, proxies=None, verify=None, type='adult', def search(number, sources: str = None, proxies=None, verify=None, type='adult',
specifiedSource=None, specifiedUrl=None, specifiedSource=None, specifiedUrl=None,
dbcookies=None, dbsite=None, morestoryline=False): dbcookies=None, dbsite=None, morestoryline=False):
""" 根据`番号/电影`名搜索信息 """ 根据`番号/电影`名搜索信息
:param number: number/name depends on type :param number: number/name depends on type
@@ -51,11 +51,11 @@ def getSupportedSources(tag='adult'):
return ','.join(sc.general_full_sources) return ','.join(sc.general_full_sources)
class Scraping(): class Scraping:
""" """
""" """
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91', 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91',
'getchu', 'gcolle' 'getchu', 'gcolle'
] ]
adult_func_mapping = { adult_func_mapping = {
@@ -77,7 +77,7 @@ class Scraping():
'javlibrary': Javlibrary().scrape, 'javlibrary': Javlibrary().scrape,
} }
general_full_sources = ['tmdb','imdb'] general_full_sources = ['tmdb', 'imdb']
general_func_mapping = { general_func_mapping = {
'tmdb': Tmdb().scrape, 'tmdb': Tmdb().scrape,
'imdb': Imdb().scrape, 'imdb': Imdb().scrape,
@@ -200,7 +200,8 @@ class Scraping():
sources = self.adult_full_sources sources = self.adult_full_sources
else: else:
sources = c_sources.split(',') sources = c_sources.split(',')
def insert(sources,source):
def insert(sources, source):
if source in sources: if source in sources:
sources.insert(0, sources.pop(sources.index(source))) sources.insert(0, sources.pop(sources.index(source)))
return sources return sources