From aa0f72edd8582454a8bece187b41a6be51d9c7d2 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Fri, 11 Feb 2022 03:23:32 +0800 Subject: [PATCH 1/9] Update 6.0.2 --- Movie_Data_Capture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index b4f9854..b592c6b 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -443,7 +443,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): def main(): - version = '6.0.1' + version = '6.0.2' urllib3.disable_warnings() #Ignore http proxy warning # Read config.ini first, in argparse_function() need conf.failed_folder() From 123a2a0c73b239e5277363df8efb09bd0946d135 Mon Sep 17 00:00:00 2001 From: FatalFurY Date: Wed, 16 Feb 2022 23:37:31 +0800 Subject: [PATCH 2/9] typo transalte to translate,and some blank lines --- Movie_Data_Capture.py | 13 +++-- WebCrawler/__init__.py | 10 ++-- config.ini | 4 +- config.py | 106 +++++++++++++++++++++++++---------------- 4 files changed, 81 insertions(+), 52 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index b4f9854..0263ba2 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -3,18 +3,21 @@ import json import os import re import sys +import time import shutil import typing import urllib3 import signal import platform +import multiprocessing +from datetime import datetime, timedelta +from pathlib import Path + + from opencc import OpenCC -import ADC_function + import config -from datetime import datetime, timedelta -import time -from pathlib import Path from ADC_function import file_modification_days, get_html, parallel_download_files from number_parser import get_number from core import core_main, moveFailedFolder @@ -581,7 +584,7 @@ def main(): sys.exit(0) -import multiprocessing + if __name__ == '__main__': multiprocessing.freeze_support() main() diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index f690e0c..e431f9f 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -235,8 +235,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 json_data['studio'] = studio json_data['director'] = director - if conf.is_transalte(): - translate_values = conf.transalte_values().split(",") + if conf.is_translate(): + translate_values = conf.translate_values().split(",") for translate_value in translate_values: if json_data[translate_value] == "": continue @@ -248,12 +248,12 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 continue except: pass - if conf.get_transalte_engine() == "azure": + if conf.get_translate_engine() == "azure": t = translate( json_data[translate_value], target_language="zh-Hans", - engine=conf.get_transalte_engine(), - key=conf.get_transalte_key(), + engine=conf.get_translate_engine(), + key=conf.get_translate_key(), ) else: t = translate(json_data[translate_value]) diff --git a/config.ini b/config.ini index 252797f..e611762 100755 --- a/config.ini +++ b/config.ini @@ -8,7 +8,7 @@ success_output_folder=JAV_output soft_link=0 failed_move=1 auto_exit=0 -transalte_to_sc=0 +translate_to_sc=0 multi_threading=0 ;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧) actor_gender=female @@ -51,7 +51,7 @@ folders=failed,JAV_output switch=0 ; 机器翻译 -[transalte] +[translate] switch=0 ;可选项 google-free,azure engine=google-free diff --git a/config.py b/config.py index edb2443..0f5654e 100644 --- a/config.py +++ b/config.py @@ -5,18 +5,17 @@ import configparser import time from pathlib import Path - G_conf_override = { # index 0 save Config() first instance for quick access by using getInstance() - 0 : None, + 0: None, # register override config items - "common:main_mode" : None, - "common:source_folder" : None, - "common:auto_exit" : None, - "common:nfo_skip_days" : None, - "common:stop_counter" : None, - "common:ignore_failed_list" : None, - "debug_mode:switch" : None + "common:main_mode": None, + "common:source_folder": None, + "common:auto_exit": None, + "common:nfo_skip_days": None, + "common:stop_counter": None, + "common:ignore_failed_list": None, + "debug_mode:switch": None } @@ -81,7 +80,7 @@ class Config: sys.exit(2) # 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的 # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。 - write_path = path_search_order[2] # Path.home() / "mdc.ini" + write_path = path_search_order[2] # Path.home() / "mdc.ini" write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8') print("Config file '{}' created.".format(write_path.resolve())) input("Press Enter key exit...") @@ -98,14 +97,18 @@ class Config: # print("[-]",e) # sys.exit(3) # #self.conf = self._default_config() + def getboolean_override(self, section, item) -> bool: - return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(G_conf_override[f"{section}:{item}"]) + return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool( + G_conf_override[f"{section}:{item}"]) def getint_override(self, section, item) -> int: - return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(G_conf_override[f"{section}:{item}"]) + return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int( + G_conf_override[f"{section}:{item}"]) def get_override(self, section, item) -> str: - return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(G_conf_override[f"{section}:{item}"]) + return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str( + G_conf_override[f"{section}:{item}"]) def main_mode(self) -> int: try: @@ -127,34 +130,46 @@ class Config: def soft_link(self) -> bool: return self.conf.getboolean("common", "soft_link") + def failed_move(self) -> bool: return self.conf.getboolean("common", "failed_move") + def auto_exit(self) -> bool: return self.getboolean_override("common", "auto_exit") - def transalte_to_sc(self) -> bool: - return self.conf.getboolean("common", "transalte_to_sc") + + def translate_to_sc(self) -> bool: + return self.conf.getboolean("common", "translate_to_sc") + def multi_threading(self) -> bool: return self.conf.getboolean("common", "multi_threading") + def del_empty_folder(self) -> bool: return self.conf.getboolean("common", "del_empty_folder") + def nfo_skip_days(self) -> int: try: return self.getint_override("common", "nfo_skip_days") except: return 30 + def stop_counter(self) -> int: try: return self.getint_override("common", "stop_counter") except: return 0 + def ignore_failed_list(self) -> bool: return self.getboolean_override("common", "ignore_failed_list") + def download_only_missing_images(self) -> bool: return self.conf.getboolean("common", "download_only_missing_images") + def mapping_table_validity(self) -> int: return self.conf.getint("common", "mapping_table_validity") - def is_transalte(self) -> bool: - return self.conf.getboolean("transalte", "switch") + + def is_translate(self) -> bool: + return self.conf.getboolean("translate", "switch") + def is_trailer(self) -> bool: return self.conf.getboolean("trailer", "switch") @@ -190,18 +205,25 @@ class Config: return extrafanart_download except ValueError: self._exit("extrafanart_folder") - def get_transalte_engine(self) -> str: - return self.conf.get("transalte","engine") - # def get_transalte_appId(self) ->str: - # return self.conf.get("transalte","appid") - def get_transalte_key(self) -> str: - return self.conf.get("transalte","key") - def get_transalte_delay(self) -> int: - return self.conf.getint("transalte","delay") - def transalte_values(self) -> str: - return self.conf.get("transalte", "values") + + def get_translate_engine(self) -> str: + return self.conf.get("translate", "engine") + + # def get_translate_appId(self) ->str: + # return self.conf.get("translate","appid") + + def get_translate_key(self) -> str: + return self.conf.get("translate", "key") + + def get_translate_delay(self) -> int: + return self.conf.getint("translate", "delay") + + def translate_values(self) -> str: + return self.conf.get("translate", "values") + def get_translate_service_site(self) -> str: - return self.conf.get("transalte", "service_site") + return self.conf.get("translate", "service_site") + def proxy(self): try: sec = "proxy" @@ -284,21 +306,21 @@ class Config: def storyline_show(self) -> int: try: v = self.conf.getint("storyline", "show_result") - return v if v in (0,1,2) else 2 if v > 2 else 0 + return v if v in (0, 1, 2) else 2 if v > 2 else 0 except: return 0 def storyline_mode(self) -> int: try: v = self.conf.getint("storyline", "run_mode") - return v if v in (0,1,2) else 2 if v > 2 else 0 + return v if v in (0, 1, 2) else 2 if v > 2 else 0 except: return 1 def cc_convert_mode(self) -> int: try: v = self.conf.getint("cc_convert", "mode") - return v if v in (0,1,2) else 2 if v > 2 else 0 + return v if v in (0, 1, 2) else 2 if v > 2 else 0 except: return 1 @@ -320,7 +342,6 @@ class Config: except: return "hog" - @staticmethod def _exit(sec: str) -> None: print("[-] Read config error! Please check the {} section in config.ini", sec) @@ -340,7 +361,7 @@ class Config: conf.set(sec1, "soft_link", "0") conf.set(sec1, "failed_move", "1") conf.set(sec1, "auto_exit", "0") - conf.set(sec1, "transalte_to_sc", "1") + conf.set(sec1, "translate_to_sc", "1") # actor_gender value: female or male or both or all(含人妖) conf.set(sec1, "actor_gender", "female") conf.set(sec1, "del_empty_folder", "1") @@ -358,7 +379,6 @@ class Config: conf.set(sec2, "type", "socks5") conf.set(sec2, "cacert_file", "") - sec3 = "Name_Rule" conf.add_section(sec3) conf.set(sec3, "location_rule", "actor + '/' + number") @@ -382,7 +402,7 @@ class Config: conf.add_section(sec7) conf.set(sec7, "switch", "0") - sec8 = "transalte" + sec8 = "translate" conf.add_section(sec8) conf.set(sec8, "switch", "0") conf.set(sec8, "engine", "google-free") @@ -402,8 +422,10 @@ class Config: sec11 = "media" conf.add_section(sec11) - conf.set(sec11, "media_type", ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO") - conf.set(sec11, "sub_type", ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml") + conf.set(sec11, "media_type", + ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO") + conf.set(sec11, "sub_type", + ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml") sec12 = "watermark" conf.add_section(sec12) @@ -464,7 +486,8 @@ class IniProxy(): ''' if self.address: if self.proxytype in self.SUPPORT_PROXY_TYPE: - proxies = {"http": self.proxytype + "://" + self.address, "https": self.proxytype + "://" + self.address} + proxies = {"http": self.proxytype + "://" + self.address, + "https": self.proxytype + "://" + self.address} else: proxies = {"http": "http://" + self.address, "https": "https://" + self.address} else: @@ -477,13 +500,16 @@ if __name__ == "__main__": def evprint(evstr): code = compile(evstr, "", "eval") print('{}: "{}"'.format(evstr, eval(code))) + + config = Config() - mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', 'ini_path'} + mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', + 'ini_path'} for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]: evprint(f'config.{_m}()') pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'} # test getInstance() - assert(getInstance() == config) + assert (getInstance() == config) for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]: evprint(f'getInstance().proxy().{_p}') From ec28814449f199df9af2c60d453523a7b7b53163 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 17 Feb 2022 17:18:41 +0800 Subject: [PATCH 3/9] update User-Agent --- ADC_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ADC_function.py b/ADC_function.py index 1a64477..de36c73 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -25,7 +25,7 @@ def getXpathSingle(htmlcode, xpath): return result1 -G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36' +G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36' # 网页请求核心 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): From 592005be01ebb599cf0af52f45379a756e079ee3 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 17 Feb 2022 17:19:47 +0800 Subject: [PATCH 4/9] =?UTF-8?q?=E4=B8=8D=E5=86=8D=E4=BB=A5=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=E4=B8=B2=E9=95=BF=E5=BA=A6=E4=B8=BA=E9=99=90=E5=88=B6?= =?UTF-8?q?=E6=9D=A1=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index f690e0c..4ee92d6 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -67,7 +67,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 conf = config.getInstance() # default fetch order list, from the beginning to the end sources = conf.sources().split(',') - if not len(conf.sources()) > 80: + if len(sources) <= len(func_mapping): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower() From c1568cd64ac21da0ac55d411df4afcedbce72831 Mon Sep 17 00:00:00 2001 From: FatalFurY Date: Fri, 18 Feb 2022 00:01:21 +0800 Subject: [PATCH 5/9] PEP8 PREFIX, AND SOME TYPING ANNOTATION --- Movie_Data_Capture.py | 153 ++++++++++++++++++++++++------------------ number_parser.py | 121 ++++++++++++++++++--------------- 2 files changed, 156 insertions(+), 118 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 0263ba2..63718b4 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -13,12 +13,10 @@ import multiprocessing from datetime import datetime, timedelta from pathlib import Path - from opencc import OpenCC - import config -from ADC_function import file_modification_days, get_html, parallel_download_files +from ADC_function import file_modification_days, get_html, parallel_download_files from number_parser import get_number from core import core_main, moveFailedFolder @@ -33,7 +31,7 @@ def check_update(local_version): time.sleep(60) os._exit(-1) data = json.loads(htmlcode) - remote = int(data["tag_name"].replace(".","")) + remote = int(data["tag_name"].replace(".", "")) local_version = int(local_version.replace(".", "")) if local_version < remote: print("[*]" + ("* New update " + str(data["tag_name"]) + " *").center(54)) @@ -46,36 +44,44 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: conf = config.getInstance() parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.") parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") - parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.") - parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") + parser.add_argument("-p", "--path", default='', nargs='?', help="Analysis folder path.") + parser.add_argument("-m", "--main-mode", default='', nargs='?', + help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.") # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.") default_logdir = str(Path.home() / '.mlogs') - parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?', - help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. + parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?', + help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. default folder for current user: '{default_logdir}'. Change default folder to an empty file, or use --log-dir= to turn log off.""") - parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.") - parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.") - parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.") + parser.add_argument("-q", "--regex-query", dest='regexstr', default='', nargs='?', + help="python re module regex filepath filtering.") + parser.add_argument("-d", "--nfo-skip-days", dest='days', default='', nargs='?', + help="Override nfo_skip_days value in config.") + parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?', + help="Override stop_counter value in config.") parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format( - os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) + os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) parser.add_argument("-a", "--auto-exit", action="store_true", help="Auto exit after program complete") - parser.add_argument("-g","--debug", action="store_true", + parser.add_argument("-g", "--debug", action="store_true", help="Turn on debug mode to generate diagnostic log for issue report.") - parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true", + parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true", help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") parser.add_argument("-v", "--version", action="version", version=ver) args = parser.parse_args() + def get_natural_number_or_none(value): - return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None + return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None + def get_str_or_none(value): return value if isinstance(value, str) and len(value) else None + def get_bool_or_none(value): return True if isinstance(value, bool) and value else None + config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode) config.G_conf_override["common:source_folder"] = get_str_or_none(args.path) config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit) @@ -86,43 +92,53 @@ is performed. It may help you correct wrong numbers before real job.""") return args.file, args.number, args.logdir, args.regexstr, args.zero_op + class OutLogger(object): def __init__(self, logfile) -> None: self.term = sys.stdout - self.log = open(logfile,"w",encoding='utf-8',buffering=1) + self.log = open(logfile, "w", encoding='utf-8', buffering=1) self.filepath = logfile + def __del__(self): self.close() + def __enter__(self): pass + def __exit__(self, *args): self.close() - def write(self,msg): + + def write(self, msg): self.term.write(msg) self.log.write(msg) + def flush(self): self.term.flush() self.log.flush() os.fsync(self.log.fileno()) + def close(self): - if self.term != None: + if self.term is not None: sys.stdout = self.term self.term = None - if self.log != None: + if self.log is not None: self.log.close() self.log = None class ErrLogger(OutLogger): + def __init__(self, logfile) -> None: self.term = sys.stderr - self.log = open(logfile,"w",encoding='utf-8',buffering=1) + self.log = open(logfile, "w", encoding='utf-8', buffering=1) self.filepath = logfile + def close(self): - if self.term != None: + if self.term is not None: sys.stderr = self.term self.term = None - if self.log != None: + + if self.log is not None: self.log.close() self.log = None @@ -133,7 +149,7 @@ def dupe_stdout_to_logfile(logdir: str): log_dir = Path(logdir) if not log_dir.exists(): try: - log_dir.mkdir(parents=True,exist_ok=True) + log_dir.mkdir(parents=True, exist_ok=True) except: pass if not log_dir.is_dir(): @@ -150,7 +166,7 @@ def dupe_stdout_to_logfile(logdir: str): def close_logfile(logdir: str): if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir): return - #日志关闭前保存日志路径 + # 日志关闭前保存日志路径 filepath = None try: filepath = sys.stdout.filepath @@ -161,7 +177,7 @@ def close_logfile(logdir: str): log_dir = Path(logdir).resolve() if isinstance(filepath, Path): print(f"Log file '{filepath}' saved.") - assert(filepath.parent.samefile(log_dir)) + assert (filepath.parent.samefile(log_dir)) # 清理空文件 for f in log_dir.glob(r'*_err.txt'): if f.stat().st_size == 0: @@ -201,7 +217,7 @@ def close_logfile(logdir: str): cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt for f in day_merge: try: - day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt + day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt with open(day_file_name, 'a', encoding='utf-8') as m: m.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -213,7 +229,7 @@ def close_logfile(logdir: str): if not txts or not len(txts): break txts.sort() - tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32") + tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3 * 30)).strftime("%Y%m32") deadline_month = f'mdc_{tmstr_3_month_ago}' month_merge = [f for f in txts if f.stem < deadline_month] if not month_merge or not len(month_merge): @@ -221,7 +237,7 @@ def close_logfile(logdir: str): tomonth = len('01.txt') # cut length mdc_202012|01.txt for f in month_merge: try: - month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt + month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt with open(month_file_name, 'a', encoding='utf-8') as m: m.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -234,14 +250,14 @@ def close_logfile(logdir: str): if not mons or not len(mons): return mons.sort() - deadline_year = f'mdc_{today.year-1}13' + deadline_year = f'mdc_{today.year - 1}13' year_merge = [f for f in mons if f.stem < deadline_year] if not year_merge or not len(year_merge): return - toyear = len('12.txt') # cut length mdc_2020|12.txt + toyear = len('12.txt') # cut length mdc_2020|12.txt for f in year_merge: try: - year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt + year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt with open(year_file_name, 'a', encoding='utf-8') as y: y.write(f.read_text(encoding='utf-8')) f.unlink(missing_ok=True) @@ -257,13 +273,14 @@ def signal_handler(*args): print('[!]Ctrl+C detected, Exit.') sys.exit(9) + def sigdebug_handler(*args): config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"] print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF')) # 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告 -def movie_lists(source_folder, regexstr): +def movie_lists(source_folder, regexstr: str) -> list[str]: conf = config.getInstance() main_mode = conf.main_mode() debug = conf.debug() @@ -283,9 +300,9 @@ def movie_lists(source_folder, regexstr): try: flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines() failed_set = set(flist) - if len(flist) != len(failed_set): # 检查去重并写回,但是不改变failed_list.txt内条目的先后次序,重复的只保留最后的 + if len(flist) != len(failed_set): # 检查去重并写回,但是不改变failed_list.txt内条目的先后次序,重复的只保留最后的 fset = failed_set.copy() - for i in range(len(flist)-1, -1, -1): + for i in range(len(flist) - 1, -1, -1): fset.remove(flist[i]) if flist[i] in fset else flist.pop(i) failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8') assert len(fset) == 0 and len(flist) == len(failed_set) @@ -311,14 +328,15 @@ def movie_lists(source_folder, regexstr): continue is_sym = full_name.is_symlink() if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标 - continue # file is symlink or hardlink(Linux/NTFS/Darwin) + continue # file is symlink or hardlink(Linux/NTFS/Darwin) # 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB) movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测 - if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120 + if 0 < movie_size < 125829120: # 1024*1024*120=125829120 continue if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name): continue - if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days: + if main_mode == 3 and nfo_skip_days > 0 and file_modification_days( + full_name.with_suffix('.nfo')) <= nfo_skip_days: skip_nfo_days_cnt += 1 if debug: print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'") @@ -328,7 +346,8 @@ def movie_lists(source_folder, regexstr): if skip_failed_cnt: print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.") if skip_nfo_days_cnt: - print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.") + print( + f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.") if nfo_skip_days <= 0 or not soft_link or main_mode == 3: return total # 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的 @@ -354,13 +373,17 @@ def movie_lists(source_folder, regexstr): if debug: print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'") if len(rm_list): - print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.") + print( + f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.") return total -def create_failed_folder(failed_folder): - if not os.path.exists(failed_folder): # 新建failed文件夹 +def create_failed_folder(failed_folder: str): + """ + 新建failed文件夹 + """ + if not os.path.exists(failed_folder): try: os.makedirs(failed_folder) except: @@ -373,9 +396,7 @@ def rm_empty_folder(path): deleted = set() for current_dir, subdirs, files in os.walk(abspath, topdown=False): try: - still_has_subdirs = any( - _ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted - ) + still_has_subdirs = any(_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted) if not any(files) and not still_has_subdirs and not os.path.samefile(path, current_dir): os.rmdir(current_dir) deleted.add(current_dir) @@ -390,7 +411,7 @@ def create_data_and_move(file_path: str, zero_op, oCC): n_number = get_number(debug, os.path.basename(file_path)) file_path = os.path.abspath(file_path) - if debug == True: + if debug is True: print(f"[!] [{n_number}] As Number making data for '{file_path}'") if zero_op: return @@ -447,7 +468,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): def main(): version = '6.0.1' - urllib3.disable_warnings() #Ignore http proxy warning + urllib3.disable_warnings() # Ignore http proxy warning # Read config.ini first, in argparse_function() need conf.failed_folder() conf = config.Config("config.ini") @@ -455,11 +476,9 @@ def main(): # Parse command line args single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version) - - main_mode = conf.main_mode() folder_path = "" - if not main_mode in (1, 2, 3): + if main_mode not in (1, 2, 3): print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.") sys.exit(4) @@ -470,7 +489,8 @@ def main(): signal.signal(signal.SIGWINCH, sigdebug_handler) dupe_stdout_to_logfile(logdir) - platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version()) + platform_total = str( + ' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version()) print('[*]================= Movie Data Capture =================') print('[*]' + version.center(54)) @@ -488,15 +508,15 @@ def main(): print('[+]Enable debug') if conf.soft_link(): print('[!]Enable soft link') - if len(sys.argv)>1: - print('[!]CmdLine:'," ".join(sys.argv[1:])) + if len(sys.argv) > 1: + print('[!]CmdLine:', " ".join(sys.argv[1:])) print('[+]Main Working mode ## {}: {} ## {}{}{}' - .format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1], - "" if not conf.multi_threading() else ", multi_threading on", - "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", - "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}" - ) if not single_file_path else ('-','Single File', '','','')) - ) + .format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1], + "" if not conf.multi_threading() else ", multi_threading on", + "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", + "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}" + ) if not single_file_path else ('-', 'Single File', '', '', '')) + ) if conf.update_check(): check_update(version) @@ -507,8 +527,9 @@ def main(): def fmd(f): return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, Path.home() / '.local' / 'share' / 'mdc' / f) + map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) - for k,v in map_tab: + for k, v in map_tab: if v.exists(): if file_modification_days(str(v)) >= conf.mapping_table_validity(): print("[+]Mapping Table Out of date! Remove", str(v)) @@ -528,14 +549,15 @@ def main(): try: oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json') except: - # some OS no OpennCC cpython, try opencc-python-reimplemented. + # some OS no OpenCC cpython, try opencc-python-reimplemented. # pip uninstall opencc && pip install opencc-python-reimplemented oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t') - if not single_file_path == '': #Single File + if not single_file_path == '': # Single File print('[+]==================== Single File =====================') if custom_number == '': - create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC) + create_data_and_move_with_custom_number(single_file_path, + get_number(conf.debug(), os.path.basename(single_file_path)), oCC) else: create_data_and_move_with_custom_number(single_file_path, custom_number, oCC) else: @@ -550,7 +572,7 @@ def main(): print('[+]Find', count_all, 'movies.') print('[*]======================================================') stop_count = conf.stop_counter() - if stop_count<1: + if stop_count < 1: stop_count = 999999 else: count_all = str(min(len(movie_list), stop_count)) @@ -558,7 +580,8 @@ def main(): for movie_path in movie_list: # 遍历电影列表 交给core处理 count = count + 1 percentage = str(count / int(count_all) * 100)[:4] + '%' - print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) + print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', + time.strftime("%H:%M:%S"))) create_data_and_move(movie_path, zero_op, oCC) if count >= stop_count: print("[!]Stop counter triggered!") @@ -573,7 +596,7 @@ def main(): end_time = time.time() total_time = str(timedelta(seconds=end_time - start_time)) print("[+]Running time", total_time[:len(total_time) if total_time.rfind('.') < 0 else -3], - " End at", time.strftime("%Y-%m-%d %H:%M:%S")) + " End at", time.strftime("%Y-%m-%d %H:%M:%S")) print("[+]All finished!!!") diff --git a/number_parser.py b/number_parser.py index 1afe5f3..e74ddcb 100755 --- a/number_parser.py +++ b/number_parser.py @@ -2,36 +2,37 @@ import os import re import sys import config +import typing G_spat = re.compile( "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@|-uncensored|_uncensored|-leak|_leak", re.IGNORECASE) -def get_number(debug,file_path: str) -> str: - # """ - # >>> from number_parser import get_number - # >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4") - # 'snis-829' - # >>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4") - # 'snis-829' - # >>> get_number("C:¥Users¥Guest¥snis-829.mp4") - # 'snis-829' - # >>> get_number("C:¥Users¥Guest¥snis-829-C.mp4") - # 'snis-829' - # >>> get_number("./snis-829.mp4") - # 'snis-829' - # >>> get_number("./snis-829-C.mp4") - # 'snis-829' - # >>> get_number(".¥snis-829.mp4") - # 'snis-829' - # >>> get_number(".¥snis-829-C.mp4") - # 'snis-829' - # >>> get_number("snis-829.mp4") - # 'snis-829' - # >>> get_number("snis-829-C.mp4") - # 'snis-829' - # """ +def get_number(debug: bool, file_path: str) -> str: + """ + 从文件路径中提取番号 from number_parser import get_number + >>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829.mp4") + 'snis-829' + >>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829-C.mp4") + 'snis-829' + >>> get_number(False, "C:¥Users¥Guest¥snis-829.mp4") + 'snis-829' + >>> get_number(False, "C:¥Users¥Guest¥snis-829-C.mp4") + 'snis-829' + >>> get_number(False, "./snis-829.mp4") + 'snis-829' + >>> get_number(False, "./snis-829-C.mp4") + 'snis-829' + >>> get_number(False, ".¥snis-829.mp4") + 'snis-829' + >>> get_number(False, ".¥snis-829-C.mp4") + 'snis-829' + >>> get_number(False, "snis-829.mp4") + 'snis-829' + >>> get_number(False, "snis-829-C.mp4") + 'snis-829' + """ filepath = os.path.basename(file_path) # debug True 和 False 两块代码块合并,原因是此模块及函数只涉及字符串计算,没有IO操作,debug on时输出导致异常信息即可 try: @@ -56,7 +57,7 @@ def get_number(debug,file_path: str) -> str: try: return str( re.findall(r'(.+?)\.', - str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip( + str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip( "['']").replace('_', '-') except: return str(re.search(r'(.+?)\.', filepath)[0]) @@ -68,29 +69,33 @@ def get_number(debug,file_path: str) -> str: # 按javdb数据源的命名规范提取number G_TAKE_NUM_RULES = { - 'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()), - 'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'), - '1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'), - '10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'), - 'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()), - 'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), - 'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), - 'heyzo' : lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0] + 'tokyo.*hot': lambda x: str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()), + 'carib': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'), + '1pon|mura|paco': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'), + '10mu': lambda x: str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'), + 'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()), + 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), + 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), + 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0] } -def get_number_by_dict(filename: str) -> str: + +def get_number_by_dict(filename: str) -> typing.Optional[str]: try: - for k,v in G_TAKE_NUM_RULES.items(): + for k, v in G_TAKE_NUM_RULES.items(): if re.search(k, filename, re.I): return v(filename) except: pass return None + class Cache_uncensored_conf: prefix = None + def is_empty(self): return bool(self.prefix is None) + def set(self, v: list): if not v or not len(v) or not len(v[0]): raise ValueError('input prefix list empty or None') @@ -99,28 +104,32 @@ class Cache_uncensored_conf: for i in v[1:]: s += f"|{i}.+" self.prefix = re.compile(s, re.I) + def check(self, number): if self.prefix is None: raise ValueError('No init re compile') return self.prefix.match(number) + G_cache_uncensored_conf = Cache_uncensored_conf() + # ========================================================================是否为无码 def is_uncensored(number): if re.match( -r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}', - number, - re.I + r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}', + number, + re.I ): return True if G_cache_uncensored_conf.is_empty(): G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) return G_cache_uncensored_conf.check(number) + if __name__ == "__main__": -# import doctest -# doctest.testmod(raise_on_error=True) + # import doctest + # doctest.testmod(raise_on_error=True) test_use_cases = ( "MEYD-594-C.mp4", "SSIS-001_C.mp4", @@ -131,26 +140,30 @@ if __name__ == "__main__": "SDDE-625_uncensored_C.mp4", "SDDE-625_uncensored_leak_C.mp4", "SDDE-625_uncensored_leak_C_cd1.mp4", - "Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取 + "Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取 "TokyoHot-n1287-HD SP2006 .mp4", - "caribean-020317_001.nfo", # -号误命名为_号的 + "caribean-020317_001.nfo", # -号误命名为_号的 "257138_3xplanet_1Pondo_080521_001.mp4", - "ADV-R0624-CD3.wmv", # 多碟影片 - "XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源 + "ADV-R0624-CD3.wmv", # 多碟影片 + "XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源 "xxx-av 20589.mp4", "Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源 - "heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源 - "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源 - "pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源 + "heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源 + "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源 + "pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源 "sbw99.cc@heyzo_hd_2636_full.mp4" ) + + def evprint(evstr): code = compile(evstr, "", "eval") print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code))) + + for t in test_use_cases: evprint(f'get_number(True, "{t}")') - if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE): + if len(sys.argv) <= 1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE): sys.exit(0) # 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据,参数为盘符 A .. Z 或带盘符路径 @@ -169,6 +182,7 @@ if __name__ == "__main__": # 示例: # python3 ./number_parser.py ALL import subprocess + ES_search_path = "ALL disks" if sys.argv[1] == "ALL": if sys.platform == "win32": @@ -176,18 +190,19 @@ if __name__ == "__main__": ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内 ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v' out_bytes = subprocess.check_output(ES_cmdline.split(' ')) - out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030,此编码为UNICODE方言与UTF-8系全射关系无转码损失 + out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030,此编码为UNICODE方言与UTF-8系全射关系无转码损失 out_list = out_text.splitlines() elif sys.platform in ("linux", "darwin"): ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate' - ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path) + ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format( + ES_prog_path) out_bytes = subprocess.check_output(ES_cmdline.split(' ')) out_text = out_bytes.decode('utf-8') - out_list = [ os.path.basename(line) for line in out_text.splitlines()] + out_list = [os.path.basename(line) for line in out_text.splitlines()] else: print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.') sys.exit(1) - else: # Windows single disk + else: # Windows single disk if sys.platform != "win32": print('[!]Usage: python3 ./number_parser.py ALL') sys.exit(0) @@ -202,7 +217,7 @@ if __name__ == "__main__": ES_search_path = os.path.normcase(ES_search_path) ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v' out_bytes = subprocess.check_output(ES_cmdline.split(' ')) - out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030,此编码为UNICODE方言与UTF-8系全射关系无转码损失 + out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030,此编码为UNICODE方言与UTF-8系全射关系无转码损失 out_list = out_text.splitlines() print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...') print(f'[+]Find {len(out_list)} Movies.') From 2c6c79ac4f6ba5015630eb4db89bd29269915c59 Mon Sep 17 00:00:00 2001 From: sastar Date: Wed, 23 Feb 2022 00:01:36 +0800 Subject: [PATCH 6/9] fix #685 --- WebCrawler/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index f690e0c..edcd76d 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -326,7 +326,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 if i not in json_data: naming_rule += i.strip("'").strip('"') else: - naming_rule += json_data.get(i) + item = json_data.get(i) + naming_rule += item if type(item) is not list else "&".join(item) json_data['naming_rule'] = naming_rule return json_data From 377a9f308bf6dc68e6e9272cc41a5937f40342c5 Mon Sep 17 00:00:00 2001 From: FatalFurY Date: Wed, 23 Feb 2022 22:11:45 +0800 Subject: [PATCH 7/9] PEP8 PREFIX, AND SOME TYPING ANNOTATION, FUNCTION COMMENT --- ADC_function.py | 182 ++++++++++++++++++++++++++--------------- Movie_Data_Capture.py | 7 +- WebCrawler/__init__.py | 6 +- core.py | 26 ++++-- 4 files changed, 144 insertions(+), 77 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 1a64477..6cf90e4 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,6 +1,6 @@ from os import replace import requests -#import hashlib +# import hashlib from pathlib import Path import secrets import os.path @@ -11,6 +11,7 @@ import time from lxml import etree import re import config +import typing from urllib.parse import urljoin import mechanicalsoup from requests.adapters import HTTPAdapter @@ -27,8 +28,11 @@ def getXpathSingle(htmlcode, xpath): G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36' -# 网页请求核心 + def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): + """ + 网页请求核心函数 + """ verify = config.getInstance().cacert_file() configProxy = config.getInstance().proxy() errors = "" @@ -39,7 +43,8 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, try: if configProxy.enable: proxies = configProxy.proxies() - result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, proxies=proxies, verify=verify, + result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, proxies=proxies, + verify=verify, cookies=cookies) else: result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, cookies=cookies) @@ -89,7 +94,8 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response: print("[-]" + errors) -G_DEFAULT_TIMEOUT = 10 # seconds +G_DEFAULT_TIMEOUT = 10 # seconds + class TimeoutHTTPAdapter(HTTPAdapter): def __init__(self, *args, **kwargs): @@ -98,6 +104,7 @@ class TimeoutHTTPAdapter(HTTPAdapter): self.timeout = kwargs["timeout"] del kwargs["timeout"] super().__init__(*args, **kwargs) + def send(self, request, **kwargs): timeout = kwargs.get("timeout") if timeout is None: @@ -106,12 +113,14 @@ class TimeoutHTTPAdapter(HTTPAdapter): # with keep-alive feature -def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): +def get_html_session(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None, + encoding: str = None): configProxy = config.getInstance().proxy() session = requests.Session() if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(session.cookies, cookies) - retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504]) session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: @@ -122,7 +131,7 @@ def get_html_session(url:str = None, cookies: dict = None, ua: str = None, retur try: if isinstance(url, str) and len(url): result = session.get(str(url)) - else: # 空url参数直接返回可重用session对象,无需设置return_type + else: # 空url参数直接返回可重用session对象,无需设置return_type return session if not result.ok: return None @@ -142,12 +151,14 @@ def get_html_session(url:str = None, cookies: dict = None, ua: str = None, retur return None -def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None, use_scraper: bool = False): +def get_html_by_browser(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None, + encoding: str = None, use_scraper: bool = False): configProxy = config.getInstance().proxy() - s = create_scraper(browser={'custom': ua or G_USER_AGENT,}) if use_scraper else requests.Session() + s = create_scraper(browser={'custom': ua or G_USER_AGENT, }) if use_scraper else requests.Session() if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(s.cookies, cookies) - retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504]) s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: @@ -178,12 +189,14 @@ def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, re return None -def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): +def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, + return_type: str = None, encoding: str = None): configProxy = config.getInstance().proxy() s = requests.Session() if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(s.cookies, cookies) - retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504]) s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: @@ -216,12 +229,14 @@ def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: return None -def get_html_by_scraper(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): +def get_html_by_scraper(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None, + encoding: str = None): configProxy = config.getInstance().proxy() - session = create_scraper(browser={'custom': ua or G_USER_AGENT,}) + session = create_scraper(browser={'custom': ua or G_USER_AGENT, }) if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(session.cookies, cookies) - retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504]) session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: @@ -230,7 +245,7 @@ def get_html_by_scraper(url:str = None, cookies: dict = None, ua: str = None, re try: if isinstance(url, str) and len(url): result = session.get(str(url)) - else: # 空url参数直接返回可重用scraper对象,无需设置return_type + else: # 空url参数直接返回可重用scraper对象,无需设置return_type return session if not result.ok: return None @@ -285,7 +300,12 @@ def translate( app_id: str = "", key: str = "", delay: int = 0, -): +) -> str: + """ + translate japanese kana to simplified chinese + 翻译日语假名到简体中文 + :raises ValueError: Non-existent translation engine + """ trans_result = "" # 中文句子如果包含&等符号会被谷歌翻译截断损失内容,而且中文翻译到中文也没有意义,故而忽略,只翻译带有日语假名的 if not is_japanese(src): @@ -295,7 +315,7 @@ def translate( if not re.match('^translate\.google\.(com|com\.\w{2}|\w{2})$', gsite): gsite = 'translate.google.cn' url = ( -f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={target_language}&q={src}" + f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={target_language}&q={src}" ) result = get_html(url=url, return_type="object") if not result.ok: @@ -324,26 +344,27 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t return trans_result -# 从浏览器中导出网站登录验证信息的cookies,能够以会员方式打开游客无法访问到的页面 -# 示例: FC2-755670 url https://javdb9.com/v/vO8Mn -# json 文件格式 -# 文件名: 站点名.json,示例 javdb9.json -# 内容(文件编码:UTF-8): -''' -{ - "over18":"1", - "redirect_to":"%2Fv%2FvO8Mn", - "remember_me_token":"cbJdeaFpbHMiOnsibWVzc2FnZSI6IklrNVJjbTAzZFVSRVlVaEtPWEpUVFhOVU0yNXhJZz09IiwiZXhwIjoiMjAyMS0wNS0xNVQxMzoyODoxNy4wMDBaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX21lX3Rva2VuIn19--a7131611e844cf75f9db4cd411b635889bff3fe3", - "_jdb_session":"asddefqfwfwwrfdsdaAmqKj1%2FvOrDQP4b7h%2BvGp7brvIShi2Y%2FHBUr%2BklApk06TfhBOK3g5gRImZzoi49GINH%2FK49o3W%2FX64ugBiUAcudN9b27Mg6Ohu%2Bx9Z7A4bbqmqCt7XR%2Bao8PRuOjMcdDG5czoYHJCPIPZQFU28Gd7Awc2jc5FM5CoIgSRyaYDy9ulTO7DlavxoNL%2F6OFEL%2FyaA6XUYTB2Gs1kpPiUDqwi854mo5%2FrNxMhTeBK%2BjXciazMtN5KlE5JIOfiWAjNrnx7SV3Hj%2FqPNxRxXFQyEwHr5TZa0Vk1%2FjbwWQ0wcIFfh%2FMLwwqKydAh%2FLndc%2Bmdv3e%2FJ%2BiL2--xhqYnMyVRlxJajdN--u7nl0M7Oe7tZtPd4kIaEbg%3D%3D", - "locale":"zh", - "__cfduid":"dee27116d98c432a5cabc1fe0e7c2f3c91620479752", - "theme":"auto" -} -''' -# 从网站登录后,通过浏览器插件(CookieBro或EdittThisCookie)或者直接在地址栏网站链接信息处都可以复制或者导出cookie内容, -# 并填写到以上json文件的相应字段中 -def load_cookies(filename): - filename = os.path.basename(filename) +def load_cookies(cookie_json_filename: str): + """ + 加载cookie,用于以会员方式访问非游客内容 + + :filename: cookie文件名。获取cookie方式:从网站登录后,通过浏览器插件(CookieBro或EdittThisCookie)或者直接在地址栏网站链接信息处都可以复制或者导出cookie内容,以JSON方式保存 + + # 示例: FC2-755670 url https://javdb9.com/v/vO8Mn + # json 文件格式 + # 文件名: 站点名.json,示例 javdb9.json + # 内容(文件编码:UTF-8): + { + "over18":"1", + "redirect_to":"%2Fv%2FvO8Mn", + "remember_me_token":"***********", + "_jdb_session":"************", + "locale":"zh", + "__cfduid":"*********", + "theme":"auto" + } + """ + filename = os.path.basename(cookie_json_filename) if not len(filename): return None, None path_search_order = ( @@ -364,8 +385,11 @@ def load_cookies(filename): except: return None, None -# 文件修改时间距此时的天数 -def file_modification_days(filename) -> int: + +def file_modification_days(filename: str) -> int: + """ + 文件修改时间距此时的天数 + """ mfile = Path(filename) if not mfile.is_file(): return 9999 @@ -376,48 +400,61 @@ def file_modification_days(filename) -> int: return 9999 return days + def file_not_exist_or_empty(filepath) -> bool: return not os.path.isfile(filepath) or os.path.getsize(filepath) == 0 -# 日语简单检测 -def is_japanese(s) -> bool: - return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE)) + +def is_japanese(raw: str) -> bool: + """ + 日语简单检测 + """ + return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', raw, re.UNICODE)) # Usage: python ./ADC_function.py https://cn.bing.com/ if __name__ == "__main__": import sys, timeit from http.client import HTTPConnection + + def benchmark(t, url): print(f"HTTP GET Benchmark times:{t} url:{url}") tm = timeit.timeit(f"_ = session1.get('{url}')", - "from __main__ import get_html_session;session1=get_html_session()", - number=t) + "from __main__ import get_html_session;session1=get_html_session()", + number=t) print(f' *{tm:>10.5f}s get_html_session() Keep-Alive enable') tm = timeit.timeit(f"_ = scraper1.get('{url}')", - "from __main__ import get_html_by_scraper;scraper1=get_html_by_scraper()", - number=t) + "from __main__ import get_html_by_scraper;scraper1=get_html_by_scraper()", + number=t) print(f' *{tm:>10.5f}s get_html_by_scraper() Keep-Alive enable') tm = timeit.timeit(f"_ = browser1.open('{url}')", - "from __main__ import get_html_by_browser;browser1=get_html_by_browser()", - number=t) + "from __main__ import get_html_by_browser;browser1=get_html_by_browser()", + number=t) print(f' *{tm:>10.5f}s get_html_by_browser() Keep-Alive enable') tm = timeit.timeit(f"_ = get_html('{url}')", - "from __main__ import get_html", - number=t) + "from __main__ import get_html", + number=t) print(f' *{tm:>10.5f}s get_html()') + + t = 100 - #url = "https://www.189.cn/" + + # url = "https://www.189.cn/" url = "http://www.chinaunicom.com" HTTPConnection.debuglevel = 1 s = get_html_session() _ = s.get(url) HTTPConnection.debuglevel = 0 - if len(sys.argv)>1: + if len(sys.argv) > 1: url = sys.argv[1] benchmark(t, url) -def download_file_with_filename(url, filename, path): + +def download_file_with_filename(url: str, filename: str, path: str) -> None: + """ + download file save to give path with given name from given url + """ conf = config.getInstance() configProxy = conf.proxy() @@ -475,38 +512,55 @@ def download_file_with_filename(url, filename, path): raise ValueError('[-]Connect Failed! Please check your Proxy or Network!') return -def download_one_file(args): + +def download_one_file(args) -> str: + """ + download file save to given path from given url + wrapped for map function + """ + def _inner(url: str, save_path: Path): filebytes = get_html(url, return_type='content') if isinstance(filebytes, bytes) and len(filebytes): if len(filebytes) == save_path.open('wb').write(filebytes): return str(save_path) + return _inner(*args) -'''用法示例: 2线程同时下载两个不同文件,并保存到不同路径,路径目录可未创建,但需要具备对目标目录和文件的写权限 -parallel_download_files([ + +def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0): + """ + download files in parallel 多线程下载文件 + + 用法示例: 2线程同时下载两个不同文件,并保存到不同路径,路径目录可未创建,但需要具备对目标目录和文件的写权限 + parallel_download_files([ ('https://site1/img/p1.jpg', 'C:/temp/img/p1.jpg'), ('https://site2/cover/n1.xml', 'C:/tmp/cover/n1.xml') ]) -''' -# dn_list 可以是 tuple或者list: ((url1, save_fullpath1),(url2, save_fullpath2),) -# parallel: 并行下载的线程池线程数,为0则由函数自己决定 -def parallel_download_files(dn_list, parallel: int = 0): + + :dn_list: 可以是 tuple或者list: ((url1, save_fullpath1),(url2, save_fullpath2),) fullpath可以是str或Path + :parallel: 并行下载的线程池线程数,为0则由函数自己决定 + """ mp_args = [] for url, fullpath in dn_list: - if url and isinstance(url, str) and url.startswith('http') and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)): + if url and isinstance(url, str) and url.startswith('http') \ + and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)): fullpath = Path(fullpath) fullpath.parent.mkdir(parents=True, exist_ok=True) mp_args.append((url, fullpath)) if not len(mp_args): return [] - if not isinstance(parallel, int) or parallel not in range(1,200): + if not isinstance(parallel, int) or parallel not in range(1, 200): parallel = min(5, len(mp_args)) with ThreadPoolExecutor(parallel) as pool: results = list(pool.map(download_one_file, mp_args)) return results -def delete_all_elements_in_list(string,lists): + +def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]): + """ + delete same string in given list + """ new_lists = [] for i in lists: if i != string: diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 237eec9..141a241 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -280,7 +280,7 @@ def sigdebug_handler(*args): # 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告 -def movie_lists(source_folder, regexstr: str) -> list[str]: +def movie_lists(source_folder, regexstr: str) -> typing.List[str]: conf = config.getInstance() main_mode = conf.main_mode() debug = conf.debug() @@ -526,7 +526,10 @@ def main(): create_failed_folder(conf.failed_folder()) # Download Mapping Table, parallel version - def fmd(f): + def fmd(f) -> typing.Tuple[str, Path]: + """ + + """ return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, Path.home() / '.local' / 'share' / 'mdc' / f) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index e431f9f..38d96d5 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -38,9 +38,10 @@ def get_data_state(data: dict) -> bool: # 元数据获取失败检测 return True -def get_data_from_json(file_number, oCC): # 从JSON返回元数据 + +def get_data_from_json(file_number, oCC): """ - iterate through all services and fetch the data + iterate through all services and fetch the data 从JSON返回元数据 """ actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml')) @@ -331,6 +332,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 json_data['naming_rule'] = naming_rule return json_data + def special_characters_replacement(text) -> str: if not isinstance(text, str): return text diff --git a/core.py b/core.py index 7290b21..15eadb4 100644 --- a/core.py +++ b/core.py @@ -371,15 +371,19 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f moveFailedFolder(filepath) return -# 此函数从gui版copy过来用用 -# 参数说明 -# poster_path -# thumb_path -# cn_sub 中文字幕 参数值为 1 0 -# leak 流出 参数值为 1 0 -# uncensored 无码 参数值为 1 0 -# ========================================================================加水印 -def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack): + +def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack) -> None: + """ + add watermark on poster or thumb for describe extra properties 给海报和缩略图加属性水印 + + 此函数从gui版copy过来用用 + + :poster_path 海报位置 + :thumb_path 缩略图位置 + :cn_sub: 中文字幕 可选值:1,"1" 或其他值 + :uncensored 无码 可选值:1,"1" 或其他值 + :hack 破解 可选值:1,"1" 或其他值 + """ mark_type = '' if cn_sub: mark_type += ',字幕' @@ -396,6 +400,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack): add_mark_thread(poster_path, cn_sub, leak, uncensored, hack) print('[+]Poster Add Mark: ' + mark_type.strip(',')) + def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack): size = 9 img_pic = Image.open(pic_path) @@ -414,6 +419,7 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack): add_to_pic(pic_path, img_pic, size, count, 4) img_pic.close() + def add_to_pic(pic_path, img_pic, size, count, mode): mark_pic_path = '' pngpath = '' @@ -455,6 +461,7 @@ def add_to_pic(pic_path, img_pic, size, count, mode): img_pic.save(pic_path, quality=95) # ========================结束================================= + def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置 filepath_obj = pathlib.Path(filepath) houzhui = filepath_obj.suffix @@ -546,6 +553,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo print(f'[-]OS Error errno {oserr.errno}') return + def get_part(filepath): try: if re.search('-CD\d+', filepath): From a4758a36699733f487b3ef08a9d9d25b579292d1 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sat, 26 Feb 2022 04:21:22 +0800 Subject: [PATCH 8/9] Update main.yml --- .github/workflows/main.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0988bdc..e99d99a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,8 +39,6 @@ jobs: run: | pyinstaller \ --onefile Movie_Data_Capture.py \ - --hidden-import ADC_function.py \ - --hidden-import core.py \ --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \ --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \ --add-data "Img:Img" \ @@ -51,8 +49,6 @@ jobs: run: | pyinstaller ` --onefile Movie_Data_Capture.py ` - --hidden-import ADC_function.py ` - --hidden-import core.py ` --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" ` --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" ` --add-data "Img;Img" ` From d4197a9d1699064342f371c7eb68f663b66520b3 Mon Sep 17 00:00:00 2001 From: Yoshiko2 Date: Sat, 26 Feb 2022 04:24:00 +0800 Subject: [PATCH 9/9] Update main.yml --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e99d99a..b05c5fa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -42,6 +42,7 @@ jobs: --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \ --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \ --add-data "Img:Img" \ + --add-data "ImageProcessing:ImageProcessing" \ --add-data "config.ini:." \ - name: Build with PyInstaller for windows @@ -52,6 +53,7 @@ jobs: --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" ` --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" ` --add-data "Img;Img" ` + --add-data "ImageProcessing:ImageProcessing" ` --add-data "config.ini;." ` - name: Copy config.ini