diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 4a24948..c3c891e 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -62,6 +62,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: help="Override nfo_skip_days value in config.") parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?', help="Override stop_counter value in config.") + parser.add_argument("-R", "--rerun-delay", dest='delaytm', default='', nargs='?', + help="Delay (eg. 1h10m30s or 60 (second)) time and rerun, until all movies proceed. Note: stop_counter value in config or -c must none zero.") parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format( os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) parser.add_argument("-a", "--auto-exit", action="store_true", @@ -92,6 +94,7 @@ is performed. It may help you correct wrong numbers before real job.""") config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt) config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list) config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug) + config.G_conf_override["common:rerun_delay"] = get_str_or_none(args.delaytm) return args.file, args.number, args.logdir, args.regexstr, args.zero_op @@ -250,29 +253,31 @@ def close_logfile(logdir: str): except: pass # 第三步,月合并到年 - if today.month < 4: - return - mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)] - if not mons or not len(mons): - return - mons.sort() - deadline_year = f'mdc_{today.year - 1}13' - year_merge = [f for f in mons if f.stem < deadline_year] - if not year_merge or not len(year_merge): - return - toyear = len('12.txt') # cut length mdc_2020|12.txt - for f in year_merge: - try: - year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt - with open(year_file_name, 'a', encoding='utf-8') as y: - y.write(f.read_text(encoding='utf-8')) - f.unlink(missing_ok=True) - except: - pass + for i in range(1): + if today.month < 4: + break + mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)] + if not mons or not len(mons): + break + mons.sort() + deadline_year = f'mdc_{today.year - 1}13' + year_merge = [f for f in mons if f.stem < deadline_year] + if not year_merge or not len(year_merge): + break + toyear = len('12.txt') # cut length mdc_2020|12.txt + for f in year_merge: + try: + year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt + with open(year_file_name, 'a', encoding='utf-8') as y: + y.write(f.read_text(encoding='utf-8')) + f.unlink(missing_ok=True) + except: + pass # 第四步,压缩年志 如果有压缩需求,请自行手工压缩,或者使用外部脚本来定时完成。推荐nongnu的lzip,对于 # 这种粒度的文本日志,压缩比是目前最好的。lzip -9的运行参数下,日志压缩比要高于xz -9,而且内存占用更少, # 多核利用率更高(plzip多线程版本),解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右, # 100MB的日志文件能缩小到3.7MB。 + return filepath def signal_handler(*args): @@ -472,18 +477,9 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): print('[!]', err) -def main(): - version = '6.0.3' - urllib3.disable_warnings() # Ignore http proxy warning - - # Read config.ini first, in argparse_function() need conf.failed_folder() - conf = config.Config("config.ini") - - # Parse command line args - single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version) - - - +def main(args: tuple) -> Path: + (single_file_path, custom_number, logdir, regexstr, zero_op) = args + conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" if main_mode not in (1, 2, 3): @@ -614,14 +610,55 @@ def main(): print("[+]All finished!!!") - close_logfile(logdir) + return close_logfile(logdir) + + +def 分析日志文件(logfile): + try: + if not (isinstance(logfile, Path) and logfile.is_file()): + raise FileNotFoundError('log file not found') + logtxt = logfile.read_text(encoding='utf-8') + 扫描电影数 = int(re.findall(r'\[\+]Find (.*) movies\.', logtxt)[0]) + 已处理 = int(re.findall(r'\[1/(.*?)] -', logtxt)[0]) + 完成数 = logtxt.count(r'[+]Wrote!') + return 扫描电影数, 已处理, 完成数 + except: + return None, None, None + + +if __name__ == '__main__': + version = '6.0.3' + multiprocessing.freeze_support() + urllib3.disable_warnings() # Ignore http proxy warning + + # Read config.ini first, in argparse_function() need conf.failed_folder() + conf = config.Config("config.ini") + + # Parse command line args + args = tuple(argparse_function(version)) + + 再运行延迟 = conf.rerun_delay() + if 再运行延迟 > 0 and conf.stop_counter() > 0: + while True: + try: + logfile = main(args) + (扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile)) + if all(isinstance(v, int) for v in 分析结果元组): + 剩余个数 = 扫描电影数 - 已处理 + print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}') + if 剩余个数 == 0: + break + 下次运行 = datetime.now() + timedelta(seconds=再运行延迟) + print(f'Next run time: {下次运行.strftime("%H:%M:%S")}, rerun_delay={再运行延迟}, press Ctrl+C stop run.') + time.sleep(再运行延迟) + else: + break + except: + break + else: + main(args) if not conf.auto_exit(): input("Press enter key exit, you can check the error message before you exit...") sys.exit(0) - - -if __name__ == '__main__': - multiprocessing.freeze_support() - main() diff --git a/config.ini b/config.ini index f0e4456..011b8e5 100755 --- a/config.ini +++ b/config.ini @@ -20,7 +20,10 @@ del_empty_folder=1 nfo_skip_days=30 ; 处理完多少个视频文件后停止,0为处理所有视频文件 stop_counter=0 -; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁 +; 再运行延迟时间,单位:h时m分s秒 举例: 1h30m45s(1小时30分45秒) 45(45秒) +; stop_counter不为零的条件下才有效,每处理stop_counter部影片后延迟rerun_delay秒再次运行 +rerun_delay=0 +; 以上三个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁 ignore_failed_list=0 download_only_missing_images=1 mapping_table_validity=7 diff --git a/config.py b/config.py index 63a12a8..1132a3e 100644 --- a/config.py +++ b/config.py @@ -16,6 +16,7 @@ G_conf_override = { "common:nfo_skip_days": None, "common:stop_counter": None, "common:ignore_failed_list": None, + "common:rerun_delay": None, "debug_mode:switch": None } @@ -103,9 +104,12 @@ class Config: return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool( G_conf_override[f"{section}:{item}"]) - def getint_override(self, section, item) -> int: - return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int( - G_conf_override[f"{section}:{item}"]) + def getint_override(self, section, item, fallback=None) -> int: + if G_conf_override[f"{section}:{item}"] is not None: + return int(G_conf_override[f"{section}:{item}"]) + if fallback is not None: + return self.conf.getint(section, item, fallback=fallback) + return self.conf.getint(section, item) def get_override(self, section, item) -> str: return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str( @@ -151,16 +155,10 @@ class Config: return self.conf.getboolean("common", "del_empty_folder") def nfo_skip_days(self) -> int: - try: - return self.getint_override("common", "nfo_skip_days") - except: - return 30 + return self.getint_override("common", "nfo_skip_days", fallback=30) def stop_counter(self) -> int: - try: - return self.getint_override("common", "stop_counter") - except: - return 0 + return self.getint_override("common", "stop_counter", fallback=0) def ignore_failed_list(self) -> bool: return self.getboolean_override("common", "ignore_failed_list") @@ -171,6 +169,24 @@ class Config: def mapping_table_validity(self) -> int: return self.conf.getint("common", "mapping_table_validity") + def rerun_delay(self) -> int: + value = self.get_override("common", "rerun_delay") + if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)): + return 0 # not match '1h30m45s' or '30' or '1s2m1h4s5m' + if value.isnumeric() and int(value) >= 0: + return int(value) + sec = 0 + sv = re.findall(r'(\d+)s', value, re.I) + mv = re.findall(r'(\d+)m', value, re.I) + hv = re.findall(r'(\d+)h', value, re.I) + for v in sv: + sec += int(v) + for v in mv: + sec += int(v) * 60 + for v in hv: + sec += int(v) * 3600 + return sec + def is_translate(self) -> bool: return self.conf.getboolean("translate", "switch") @@ -375,6 +391,7 @@ class Config: conf.set(sec1, "ignore_failed_list", 0) conf.set(sec1, "download_only_missing_images", 1) conf.set(sec1, "mapping_table_validity", 7) + conf.set(sec1, "rerun_delay", 0) sec2 = "proxy" conf.add_section(sec2)