diff --git a/ADC_function.py b/ADC_function.py index a11ef3b..e755fb5 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -622,11 +622,3 @@ def file_modification_days(filename) -> int: if days < 0: return 9999 return days - -# 检查文件是否是链接 -def is_link(filename: str): - if os.path.islink(filename): - return True # symlink - elif os.stat(filename).st_nlink > 1: - return True # hard link Linux MAC OSX Windows NTFS - return False diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 6ab00ad..9b75f50 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -11,7 +11,7 @@ import config from datetime import datetime, timedelta import time from pathlib import Path -from ADC_function import file_modification_days, get_html, is_link +from ADC_function import file_modification_days, get_html from number_parser import get_number from core import core_main, moveFailedFolder @@ -35,25 +35,48 @@ def check_update(local_version): def argparse_function(ver: str) -> typing.Tuple[str, str, bool]: - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) + conf = config.getInstance() + parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.") parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.") - # parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.") - default_logdir = os.path.join(Path.home(),'.avlogs') + parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") + parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.") + # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.") + default_logdir = Path.home() / '.avlogs' parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?', - help=f"""Duplicate stdout and stderr to logfiles -in logging folder, default on. -default for current user: {default_logdir} -Use --log-dir= to turn off logging feature.""") - parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number") - parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", - help="Auto exit after program complete") + help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. + default folder for current user: '{default_logdir}'. Change default folder to an empty file, + or use --log-dir= to turn log off.""") parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.") + parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.") + parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.") + parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format( + os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) + parser.add_argument("-a", "--auto-exit", action="store_true", + help="Auto exit after program complete") + parser.add_argument("-g","--debug", action="store_true", + help="Turn on debug mode to generate diagnostic log for issue report.") + parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true", + help="""Only show job list of files and numbers, and **NO** actual operation +is performed. It may help you correct wrong numbers before real job.""") parser.add_argument("-v", "--version", action="version", version=ver) + #ini_path args = parser.parse_args() + def get_natural_number_or_none(value): + return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None + def get_str_or_none(value): + return value if isinstance(value, str) and len(value) else None + def get_bool_or_none(value): + return True if isinstance(value, bool) and value else None + config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode) + config.G_conf_override["common:source_folder"] = get_str_or_none(args.path) + config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit) + config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days) + config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt) + config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list) + config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug) - return args.file, args.path, args.number, args.autoexit, args.logdir, args.regexstr - + return args.file, args.number, args.logdir, args.regexstr, args.zero_op class OutLogger(object): def __init__(self, logfile) -> None: @@ -200,15 +223,14 @@ def close_logfile(logdir: str): # 100MB的日志文件能缩小到3.7MB。 -# 重写视频文件扫描,消除递归,取消全局变量,新增失败文件列表跳过处理 -def movie_lists(root, conf, regexstr): - escape_folder = re.split("[,,]", conf.escape_folder()) +# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告 +def movie_lists(source_folder, regexstr): + conf = config.getInstance() main_mode = conf.main_mode() debug = conf.debug() nfo_skip_days = conf.nfo_skip_days() soft_link = conf.soft_link() - total = [] - file_type = conf.media_type().upper().split(",") + file_type = conf.media_type().lower().split(",") trailerRE = re.compile(r'-trailer\.', re.IGNORECASE) cliRE = None if isinstance(regexstr, str) and len(regexstr): @@ -216,61 +238,85 @@ def movie_lists(root, conf, regexstr): cliRE = re.compile(regexstr, re.IGNORECASE) except: pass + failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt' failed_set = set() - if main_mode == 3 or soft_link: + if (main_mode == 3 or soft_link) and not conf.ignore_failed_list(): try: - with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'r', encoding='utf-8') as flt: + with open(failed_list_txt_path, 'r', encoding='utf-8') as flt: flist = flt.read().splitlines() failed_set = set(flist) - flt.close() if len(flist) != len(failed_set): - with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'w', encoding='utf-8') as flt: - flt.writelines([line + '\n' for line in failed_set]) - flt.close() + with open(failed_list_txt_path, 'w', encoding='utf-8') as flt: + wtlines = [line + '\n' for line in failed_set] + wtlines.sort() + flt.writelines(wtlines) except: pass - for current_dir, subdirs, files in os.walk(root, topdown=False): - if len(set(current_dir.replace("\\","/").split("/")) & set(escape_folder)) > 0: + if not Path(source_folder).is_dir(): + print('[-]Source folder not found!') + return [] + total = [] + source = Path(source_folder).resolve() + skip_failed_cnt, skip_nfo_days_cnt = 0, 0 + escape_folder_set = set(re.split("[,,]", conf.escape_folder())) + for full_name in source.glob(r'**/*'): + if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set: continue - for f in files: - full_name = os.path.join(current_dir, f) - if not os.path.splitext(full_name)[1].upper() in file_type: - continue - absf = os.path.abspath(full_name) - if absf in failed_set: - if debug: - print('[!]Skip failed file:', absf) - continue - if cliRE and not cliRE.search(absf): - continue - if main_mode == 3 and nfo_skip_days > 0: - nfo = Path(absf).with_suffix('.nfo') - if file_modification_days(nfo) <= nfo_skip_days: - continue - if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f): - total.append(absf) + if not full_name.suffix.lower() in file_type: + continue + absf = str(full_name) + if absf in failed_set: + skip_failed_cnt += 1 + if debug: + print('[!]Skip failed movie:', absf) + continue + is_sym = full_name.is_symlink() + if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标 + continue # file is symlink or hardlink(Linux/NTFS/Darwin) + # 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB) + movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测 + if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120 + continue + if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name): + continue + if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days: + skip_nfo_days_cnt += 1 + if debug: + print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'") + continue + total.append(absf) + + if skip_failed_cnt: + print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.") + if skip_nfo_days_cnt: + print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.") if nfo_skip_days <= 0 or not soft_link or main_mode == 3: return total # 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的 skip_numbers = set() - success_folder = conf.success_folder() - for current_dir, subdirs, files in os.walk(success_folder, topdown=False): - for f in files: - f_obj = Path(f) - if f_obj.suffix.lower() != '.nfo': - continue - if file_modification_days(Path(current_dir) / f_obj) > nfo_skip_days: - continue - number = get_number(False, f_obj.stem) - if number: - skip_numbers.add(number.upper()) + success_folder = Path(conf.success_folder()).resolve() + for f in success_folder.glob(r'**/*'): + if not re.match(r'\.nfo', f.suffix, re.IGNORECASE): + continue + if file_modification_days(f) > nfo_skip_days: + continue + number = get_number(False, f.stem) + if not number: + continue + skip_numbers.add(number.lower()) + rm_list = [] for f in total: n_number = get_number(False, os.path.basename(f)) - if n_number and n_number.upper() in skip_numbers: + if n_number and n_number.lower() in skip_numbers: rm_list.append(f) for f in rm_list: total.remove(f) + if debug: + print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'") + if len(rm_list): + print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.") + return total @@ -299,14 +345,18 @@ def rm_empty_folder(path): pass -def create_data_and_move(file_path: str, c: config.Config, debug): +def create_data_and_move(file_path: str, zero_op): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 + c = config.getInstance() + debug = c.debug() file_name = os.path.basename(file_path) n_number = get_number(debug, file_name) file_path = os.path.abspath(file_path) if debug == True: print(f"[!] [{n_number}] As Number making data for '{file_path}'") + if zero_op: + return if n_number: core_main(file_path, n_number, c) else: @@ -315,6 +365,8 @@ def create_data_and_move(file_path: str, c: config.Config, debug): else: try: print(f"[!] [{n_number}] As Number making data for '{file_path}'") + if zero_op: + return if n_number: core_main(file_path, n_number, c) else: @@ -357,8 +409,17 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu if __name__ == '__main__': version = '5.0.1' urllib3.disable_warnings() #Ignore http proxy warning + + # Read config.ini first, in argparse_function() need conf.failed_folder() + conf = config.Config("config.ini") + # Parse command line args - single_file_path, folder_path, custom_number, auto_exit, logdir, regexstr = argparse_function(version) + single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version) + + main_mode = conf.main_mode() + if not main_mode in (1, 2, 3): + print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.") + sys.exit(4) dupe_stdout_to_logfile(logdir) @@ -368,9 +429,8 @@ if __name__ == '__main__': print('[*]======================================================') print('[*]严禁在墙内宣传本项目') - # Read config.ini - conf = config.Config("config.ini") - + start_time = time.time() + print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S")) if conf.update_check(): check_update(version) @@ -382,9 +442,15 @@ if __name__ == '__main__': print('[!]Enable soft link') if len(sys.argv)>1: print('[!]CmdLine:'," ".join(sys.argv[1:])) + print('[+]Main Working mode ## {}: {} ## {}{}{}' + .format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1], + "" if not conf.multi_threading() else ", multi_threading on", + "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", + "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}" + ) if not single_file_path else ('-','Single File', '','','')) + ) create_failed_folder(conf.failed_folder()) - start_time = time.time() if not single_file_path == '': #Single File print('[+]==================== Single File =====================') @@ -393,32 +459,31 @@ if __name__ == '__main__': else: create_data_and_move_with_custom_number(single_file_path, conf, custom_number) else: - if folder_path == '': + folder_path = conf.source_folder() + if not isinstance(folder_path, str) or folder_path == '': folder_path = os.path.abspath(".") - movie_list = movie_lists(folder_path, conf, regexstr) + movie_list = movie_lists(folder_path, regexstr) count = 0 count_all = str(len(movie_list)) - print('[+]Find', count_all, 'movies. Start at', time.strftime("%Y-%m-%d %H:%M:%S")) - main_mode = conf.main_mode() + print('[+]Find', count_all, 'movies.') stop_count = conf.stop_counter() if stop_count<1: stop_count = 999999 else: count_all = str(min(len(movie_list), stop_count)) - if main_mode == 3: - print(f'[!]运行模式:**维护模式**,本程序将在处理{count_all}个视频文件后停止,如需后台执行自动退出请结合 -a 参数。') + for movie_path in movie_list: # 遍历电影列表 交给core处理 count = count + 1 percentage = str(count / int(count_all) * 100)[:4] + '%' print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) - create_data_and_move(movie_path, conf, conf.debug()) + create_data_and_move(movie_path, zero_op) if count >= stop_count: print("[!]Stop counter triggered!") break - if conf.del_empty_folder(): + if conf.del_empty_folder() and not zero_op: rm_empty_folder(conf.success_folder()) rm_empty_folder(conf.failed_folder()) if len(folder_path): @@ -433,7 +498,7 @@ if __name__ == '__main__': close_logfile(logdir) - if not (conf.auto_exit() or auto_exit): + if not conf.auto_exit(): input("Press enter key exit, you can check the error message before you exit...") sys.exit(0)