AV_Data_Capture.py:command params new add -m -d -c -i -g -z

This commit is contained in:
lededev
2021-10-08 13:02:52 +08:00
parent b87206870b
commit 8ab736e4fa
2 changed files with 135 additions and 78 deletions

View File

@@ -11,7 +11,7 @@ import config
from datetime import datetime, timedelta
import time
from pathlib import Path
from ADC_function import file_modification_days, get_html, is_link
from ADC_function import file_modification_days, get_html
from number_parser import get_number
from core import core_main, moveFailedFolder
@@ -35,25 +35,48 @@ def check_update(local_version):
def argparse_function(ver: str) -> typing.Tuple[str, str, bool]:
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
conf = config.getInstance()
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
# parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = os.path.join(Path.home(),'.avlogs')
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = Path.home() / '.avlogs'
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
help=f"""Duplicate stdout and stderr to logfiles
in logging folder, default on.
default for current user: {default_logdir}
Use --log-dir= to turn off logging feature.""")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number")
parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true",
help="Auto exit after program complete")
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
or use --log-dir= to turn log off.""")
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
parser.add_argument("-a", "--auto-exit", action="store_true",
help="Auto exit after program complete")
parser.add_argument("-g","--debug", action="store_true",
help="Turn on debug mode to generate diagnostic log for issue report.")
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
help="""Only show job list of files and numbers, and **NO** actual operation
is performed. It may help you correct wrong numbers before real job.""")
parser.add_argument("-v", "--version", action="version", version=ver)
#ini_path
args = parser.parse_args()
def get_natural_number_or_none(value):
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
def get_str_or_none(value):
return value if isinstance(value, str) and len(value) else None
def get_bool_or_none(value):
return True if isinstance(value, bool) and value else None
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
return args.file, args.path, args.number, args.autoexit, args.logdir, args.regexstr
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
class OutLogger(object):
def __init__(self, logfile) -> None:
@@ -200,15 +223,14 @@ def close_logfile(logdir: str):
# 100MB的日志文件能缩小到3.7MB。
# 重写视频文件扫描,消除递归,取消全局变量,新增失败文件列表跳过处理
def movie_lists(root, conf, regexstr):
escape_folder = re.split("[,]", conf.escape_folder())
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理提示跳过视频总数调试模式(-g)下详细被跳过文件,跳过小广告
def movie_lists(source_folder, regexstr):
conf = config.getInstance()
main_mode = conf.main_mode()
debug = conf.debug()
nfo_skip_days = conf.nfo_skip_days()
soft_link = conf.soft_link()
total = []
file_type = conf.media_type().upper().split(",")
file_type = conf.media_type().lower().split(",")
trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
cliRE = None
if isinstance(regexstr, str) and len(regexstr):
@@ -216,61 +238,85 @@ def movie_lists(root, conf, regexstr):
cliRE = re.compile(regexstr, re.IGNORECASE)
except:
pass
failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
failed_set = set()
if main_mode == 3 or soft_link:
if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
try:
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'r', encoding='utf-8') as flt:
with open(failed_list_txt_path, 'r', encoding='utf-8') as flt:
flist = flt.read().splitlines()
failed_set = set(flist)
flt.close()
if len(flist) != len(failed_set):
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'w', encoding='utf-8') as flt:
flt.writelines([line + '\n' for line in failed_set])
flt.close()
with open(failed_list_txt_path, 'w', encoding='utf-8') as flt:
wtlines = [line + '\n' for line in failed_set]
wtlines.sort()
flt.writelines(wtlines)
except:
pass
for current_dir, subdirs, files in os.walk(root, topdown=False):
if len(set(current_dir.replace("\\","/").split("/")) & set(escape_folder)) > 0:
if not Path(source_folder).is_dir():
print('[-]Source folder not found!')
return []
total = []
source = Path(source_folder).resolve()
skip_failed_cnt, skip_nfo_days_cnt = 0, 0
escape_folder_set = set(re.split("[,]", conf.escape_folder()))
for full_name in source.glob(r'**/*'):
if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
continue
for f in files:
full_name = os.path.join(current_dir, f)
if not os.path.splitext(full_name)[1].upper() in file_type:
continue
absf = os.path.abspath(full_name)
if absf in failed_set:
if debug:
print('[!]Skip failed file:', absf)
continue
if cliRE and not cliRE.search(absf):
continue
if main_mode == 3 and nfo_skip_days > 0:
nfo = Path(absf).with_suffix('.nfo')
if file_modification_days(nfo) <= nfo_skip_days:
continue
if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f):
total.append(absf)
if not full_name.suffix.lower() in file_type:
continue
absf = str(full_name)
if absf in failed_set:
skip_failed_cnt += 1
if debug:
print('[!]Skip failed movie:', absf)
continue
is_sym = full_name.is_symlink()
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
# 调试用0字节样本允许通过去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size直接赋0跳过小视频检测
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120
continue
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
continue
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
skip_nfo_days_cnt += 1
if debug:
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
continue
total.append(absf)
if skip_failed_cnt:
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
if skip_nfo_days_cnt:
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
return total
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数跳过N天内更新过的
skip_numbers = set()
success_folder = conf.success_folder()
for current_dir, subdirs, files in os.walk(success_folder, topdown=False):
for f in files:
f_obj = Path(f)
if f_obj.suffix.lower() != '.nfo':
continue
if file_modification_days(Path(current_dir) / f_obj) > nfo_skip_days:
continue
number = get_number(False, f_obj.stem)
if number:
skip_numbers.add(number.upper())
success_folder = Path(conf.success_folder()).resolve()
for f in success_folder.glob(r'**/*'):
if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
continue
if file_modification_days(f) > nfo_skip_days:
continue
number = get_number(False, f.stem)
if not number:
continue
skip_numbers.add(number.lower())
rm_list = []
for f in total:
n_number = get_number(False, os.path.basename(f))
if n_number and n_number.upper() in skip_numbers:
if n_number and n_number.lower() in skip_numbers:
rm_list.append(f)
for f in rm_list:
total.remove(f)
if debug:
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
if len(rm_list):
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
return total
@@ -299,14 +345,18 @@ def rm_empty_folder(path):
pass
def create_data_and_move(file_path: str, c: config.Config, debug):
def create_data_and_move(file_path: str, zero_op):
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
c = config.getInstance()
debug = c.debug()
file_name = os.path.basename(file_path)
n_number = get_number(debug, file_name)
file_path = os.path.abspath(file_path)
if debug == True:
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op:
return
if n_number:
core_main(file_path, n_number, c)
else:
@@ -315,6 +365,8 @@ def create_data_and_move(file_path: str, c: config.Config, debug):
else:
try:
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op:
return
if n_number:
core_main(file_path, n_number, c)
else:
@@ -357,8 +409,17 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu
if __name__ == '__main__':
version = '5.0.1'
urllib3.disable_warnings() #Ignore http proxy warning
# Read config.ini first, in argparse_function() need conf.failed_folder()
conf = config.Config("config.ini")
# Parse command line args
single_file_path, folder_path, custom_number, auto_exit, logdir, regexstr = argparse_function(version)
single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
main_mode = conf.main_mode()
if not main_mode in (1, 2, 3):
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
sys.exit(4)
dupe_stdout_to_logfile(logdir)
@@ -368,9 +429,8 @@ if __name__ == '__main__':
print('[*]======================================================')
print('[*]严禁在墙内宣传本项目')
# Read config.ini
conf = config.Config("config.ini")
start_time = time.time()
print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
if conf.update_check():
check_update(version)
@@ -382,9 +442,15 @@ if __name__ == '__main__':
print('[!]Enable soft link')
if len(sys.argv)>1:
print('[!]CmdLine:'," ".join(sys.argv[1:]))
print('[+]Main Working mode ## {}: {} ## {}{}{}'
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
"" if not conf.multi_threading() else ", multi_threading on",
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
) if not single_file_path else ('-','Single File', '','',''))
)
create_failed_folder(conf.failed_folder())
start_time = time.time()
if not single_file_path == '': #Single File
print('[+]==================== Single File =====================')
@@ -393,32 +459,31 @@ if __name__ == '__main__':
else:
create_data_and_move_with_custom_number(single_file_path, conf, custom_number)
else:
if folder_path == '':
folder_path = conf.source_folder()
if not isinstance(folder_path, str) or folder_path == '':
folder_path = os.path.abspath(".")
movie_list = movie_lists(folder_path, conf, regexstr)
movie_list = movie_lists(folder_path, regexstr)
count = 0
count_all = str(len(movie_list))
print('[+]Find', count_all, 'movies. Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
main_mode = conf.main_mode()
print('[+]Find', count_all, 'movies.')
stop_count = conf.stop_counter()
if stop_count<1:
stop_count = 999999
else:
count_all = str(min(len(movie_list), stop_count))
if main_mode == 3:
print(f'[!]运行模式:**维护模式**,本程序将在处理{count_all}个视频文件后停止,如需后台执行自动退出请结合 -a 参数。')
for movie_path in movie_list: # 遍历电影列表 交给core处理
count = count + 1
percentage = str(count / int(count_all) * 100)[:4] + '%'
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
create_data_and_move(movie_path, conf, conf.debug())
create_data_and_move(movie_path, zero_op)
if count >= stop_count:
print("[!]Stop counter triggered!")
break
if conf.del_empty_folder():
if conf.del_empty_folder() and not zero_op:
rm_empty_folder(conf.success_folder())
rm_empty_folder(conf.failed_folder())
if len(folder_path):
@@ -433,7 +498,7 @@ if __name__ == '__main__':
close_logfile(logdir)
if not (conf.auto_exit() or auto_exit):
if not conf.auto_exit():
input("Press enter key exit, you can check the error message before you exit...")
sys.exit(0)