From 669b11b313a2e5504379a4d14b20781fe8dc2e1e Mon Sep 17 00:00:00 2001 From: Mathhew Date: Thu, 28 Jul 2022 18:47:41 +0800 Subject: [PATCH] support specifiedUrl when scraping single movie --- Movie_Data_Capture.py | 16 ++++++++++------ core.py | 4 ++-- scraper.py | 6 ++++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 10888a1..85b72d8 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -83,6 +83,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]: help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") parser.add_argument("-v", "--version", action="version", version=ver) + parser.add_argument("-ss", "--specified-source", default='', nargs='?', help="specified Source.") + parser.add_argument("-su", "--specified-url", default='', nargs='?', help="specified Url.") args = parser.parse_args() @@ -120,7 +122,7 @@ is performed. It may help you correct wrong numbers before real job.""") if no_net_op: conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1") - return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op + return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op, args.specified_source, args.specified_url class OutLogger(object): @@ -487,13 +489,13 @@ def create_data_and_move(movie_path: str, zero_op: bool, no_net_op: bool, oCC): print('[!]', err) -def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): +def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC, specified_source, specified_url): conf = config.getInstance() file_name = os.path.basename(file_path) try: print("[!] [{1}] As Number Processing for '{0}'".format(file_path, custom_number)) if custom_number: - core_main(file_path, custom_number, oCC) + core_main(file_path, custom_number, oCC, specified_source, specified_url) else: print("[-] number empty ERROR") print("[*]======================================================") @@ -513,7 +515,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): def main(args: tuple) -> Path: - (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args + (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op, specified_source, specified_url) = args conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" @@ -609,9 +611,11 @@ def main(args: tuple) -> Path: print('[+]==================== Single File =====================') if custom_number == '': create_data_and_move_with_custom_number(single_file_path, - get_number(conf.debug(), os.path.basename(single_file_path)), oCC) + get_number(conf.debug(), os.path.basename(single_file_path)), oCC, + specified_source, specified_url) else: - create_data_and_move_with_custom_number(single_file_path, custom_number, oCC) + create_data_and_move_with_custom_number(single_file_path, custom_number, oCC, + specified_source, specified_url) else: folder_path = conf.source_folder() if not isinstance(folder_path, str) or folder_path == '': diff --git a/core.py b/core.py index dcfd173..5eb2ef8 100644 --- a/core.py +++ b/core.py @@ -760,7 +760,7 @@ def core_main_no_net_op(movie_path, number): linkImage(path, number, part, leak_word, c_word, hack_word, ext) -def core_main(movie_path, number_th, oCC): +def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=None): conf = config.getInstance() # =======================================================================初始化所需变量 multi_part = 0 @@ -775,7 +775,7 @@ def core_main(movie_path, number_th, oCC): # 下面被注释的变量不需要 #rootpath= os.getcwd number = number_th - json_data = get_data_from_json(number, oCC) # 定义番号 + json_data = get_data_from_json(number, oCC, specified_source, specified_url) # 定义番号 # Return if blank dict returned (data not found) if not json_data: diff --git a/scraper.py b/scraper.py index ccfb041..f8bbc06 100644 --- a/scraper.py +++ b/scraper.py @@ -7,7 +7,7 @@ from pathlib import Path from ADC_function import delete_all_elements_in_list, delete_all_elements_in_str, file_modification_days, load_cookies, translate from scrapinglib.api import search -def get_data_from_json(file_number, oCC): +def get_data_from_json(file_number, oCC, specified_source, specified_url): """ iterate through all services and fetch the data 从JSON返回元数据 """ @@ -51,9 +51,11 @@ def get_data_from_json(file_number, oCC): cacert =None if conf.cacert_file(): cacert = conf.cacert_file() + json_data = search(file_number, sources, proxies=proxies, verify=cacert, dbsite=javdb_site, dbcookies=javdb_cookies, - morestoryline=conf.is_storyline()) + morestoryline=conf.is_storyline(), + specifiedSource=specified_source, specifiedUrl=specified_url) # Return if data not found in all sources if not json_data: print('[-]Movie Number not found!')