AV_Data_Capture.py:command params new add -m -d -c -i -g -z
This commit is contained in:
@@ -11,7 +11,7 @@ import config
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
from pathlib import Path
|
||||
from ADC_function import file_modification_days, get_html, is_link
|
||||
from ADC_function import file_modification_days, get_html
|
||||
from number_parser import get_number
|
||||
from core import core_main, moveFailedFolder
|
||||
|
||||
@@ -35,25 +35,48 @@ def check_update(local_version):
|
||||
|
||||
|
||||
def argparse_function(ver: str) -> typing.Tuple[str, str, bool]:
|
||||
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
|
||||
conf = config.getInstance()
|
||||
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
|
||||
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
|
||||
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
|
||||
# parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||
default_logdir = os.path.join(Path.home(),'.avlogs')
|
||||
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
||||
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||
default_logdir = Path.home() / '.avlogs'
|
||||
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
|
||||
help=f"""Duplicate stdout and stderr to logfiles
|
||||
in logging folder, default on.
|
||||
default for current user: {default_logdir}
|
||||
Use --log-dir= to turn off logging feature.""")
|
||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number")
|
||||
parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true",
|
||||
help="Auto exit after program complete")
|
||||
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
||||
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
|
||||
or use --log-dir= to turn log off.""")
|
||||
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
|
||||
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
|
||||
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
|
||||
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
|
||||
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
|
||||
parser.add_argument("-a", "--auto-exit", action="store_true",
|
||||
help="Auto exit after program complete")
|
||||
parser.add_argument("-g","--debug", action="store_true",
|
||||
help="Turn on debug mode to generate diagnostic log for issue report.")
|
||||
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
|
||||
help="""Only show job list of files and numbers, and **NO** actual operation
|
||||
is performed. It may help you correct wrong numbers before real job.""")
|
||||
parser.add_argument("-v", "--version", action="version", version=ver)
|
||||
#ini_path
|
||||
args = parser.parse_args()
|
||||
def get_natural_number_or_none(value):
|
||||
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
|
||||
def get_str_or_none(value):
|
||||
return value if isinstance(value, str) and len(value) else None
|
||||
def get_bool_or_none(value):
|
||||
return True if isinstance(value, bool) and value else None
|
||||
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
|
||||
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
|
||||
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
|
||||
config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
|
||||
config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
|
||||
config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
|
||||
config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
|
||||
|
||||
return args.file, args.path, args.number, args.autoexit, args.logdir, args.regexstr
|
||||
|
||||
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
|
||||
|
||||
class OutLogger(object):
|
||||
def __init__(self, logfile) -> None:
|
||||
@@ -200,15 +223,14 @@ def close_logfile(logdir: str):
|
||||
# 100MB的日志文件能缩小到3.7MB。
|
||||
|
||||
|
||||
# 重写视频文件扫描,消除递归,取消全局变量,新增失败文件列表跳过处理
|
||||
def movie_lists(root, conf, regexstr):
|
||||
escape_folder = re.split("[,,]", conf.escape_folder())
|
||||
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告
|
||||
def movie_lists(source_folder, regexstr):
|
||||
conf = config.getInstance()
|
||||
main_mode = conf.main_mode()
|
||||
debug = conf.debug()
|
||||
nfo_skip_days = conf.nfo_skip_days()
|
||||
soft_link = conf.soft_link()
|
||||
total = []
|
||||
file_type = conf.media_type().upper().split(",")
|
||||
file_type = conf.media_type().lower().split(",")
|
||||
trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
|
||||
cliRE = None
|
||||
if isinstance(regexstr, str) and len(regexstr):
|
||||
@@ -216,61 +238,85 @@ def movie_lists(root, conf, regexstr):
|
||||
cliRE = re.compile(regexstr, re.IGNORECASE)
|
||||
except:
|
||||
pass
|
||||
failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
|
||||
failed_set = set()
|
||||
if main_mode == 3 or soft_link:
|
||||
if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
|
||||
try:
|
||||
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'r', encoding='utf-8') as flt:
|
||||
with open(failed_list_txt_path, 'r', encoding='utf-8') as flt:
|
||||
flist = flt.read().splitlines()
|
||||
failed_set = set(flist)
|
||||
flt.close()
|
||||
if len(flist) != len(failed_set):
|
||||
with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'w', encoding='utf-8') as flt:
|
||||
flt.writelines([line + '\n' for line in failed_set])
|
||||
flt.close()
|
||||
with open(failed_list_txt_path, 'w', encoding='utf-8') as flt:
|
||||
wtlines = [line + '\n' for line in failed_set]
|
||||
wtlines.sort()
|
||||
flt.writelines(wtlines)
|
||||
except:
|
||||
pass
|
||||
for current_dir, subdirs, files in os.walk(root, topdown=False):
|
||||
if len(set(current_dir.replace("\\","/").split("/")) & set(escape_folder)) > 0:
|
||||
if not Path(source_folder).is_dir():
|
||||
print('[-]Source folder not found!')
|
||||
return []
|
||||
total = []
|
||||
source = Path(source_folder).resolve()
|
||||
skip_failed_cnt, skip_nfo_days_cnt = 0, 0
|
||||
escape_folder_set = set(re.split("[,,]", conf.escape_folder()))
|
||||
for full_name in source.glob(r'**/*'):
|
||||
if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
|
||||
continue
|
||||
for f in files:
|
||||
full_name = os.path.join(current_dir, f)
|
||||
if not os.path.splitext(full_name)[1].upper() in file_type:
|
||||
continue
|
||||
absf = os.path.abspath(full_name)
|
||||
if absf in failed_set:
|
||||
if debug:
|
||||
print('[!]Skip failed file:', absf)
|
||||
continue
|
||||
if cliRE and not cliRE.search(absf):
|
||||
continue
|
||||
if main_mode == 3 and nfo_skip_days > 0:
|
||||
nfo = Path(absf).with_suffix('.nfo')
|
||||
if file_modification_days(nfo) <= nfo_skip_days:
|
||||
continue
|
||||
if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f):
|
||||
total.append(absf)
|
||||
if not full_name.suffix.lower() in file_type:
|
||||
continue
|
||||
absf = str(full_name)
|
||||
if absf in failed_set:
|
||||
skip_failed_cnt += 1
|
||||
if debug:
|
||||
print('[!]Skip failed movie:', absf)
|
||||
continue
|
||||
is_sym = full_name.is_symlink()
|
||||
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
|
||||
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
|
||||
# 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
|
||||
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测
|
||||
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120
|
||||
continue
|
||||
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
|
||||
continue
|
||||
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
|
||||
skip_nfo_days_cnt += 1
|
||||
if debug:
|
||||
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
|
||||
continue
|
||||
total.append(absf)
|
||||
|
||||
if skip_failed_cnt:
|
||||
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
|
||||
if skip_nfo_days_cnt:
|
||||
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
|
||||
return total
|
||||
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的
|
||||
skip_numbers = set()
|
||||
success_folder = conf.success_folder()
|
||||
for current_dir, subdirs, files in os.walk(success_folder, topdown=False):
|
||||
for f in files:
|
||||
f_obj = Path(f)
|
||||
if f_obj.suffix.lower() != '.nfo':
|
||||
continue
|
||||
if file_modification_days(Path(current_dir) / f_obj) > nfo_skip_days:
|
||||
continue
|
||||
number = get_number(False, f_obj.stem)
|
||||
if number:
|
||||
skip_numbers.add(number.upper())
|
||||
success_folder = Path(conf.success_folder()).resolve()
|
||||
for f in success_folder.glob(r'**/*'):
|
||||
if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
|
||||
continue
|
||||
if file_modification_days(f) > nfo_skip_days:
|
||||
continue
|
||||
number = get_number(False, f.stem)
|
||||
if not number:
|
||||
continue
|
||||
skip_numbers.add(number.lower())
|
||||
|
||||
rm_list = []
|
||||
for f in total:
|
||||
n_number = get_number(False, os.path.basename(f))
|
||||
if n_number and n_number.upper() in skip_numbers:
|
||||
if n_number and n_number.lower() in skip_numbers:
|
||||
rm_list.append(f)
|
||||
for f in rm_list:
|
||||
total.remove(f)
|
||||
if debug:
|
||||
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
|
||||
if len(rm_list):
|
||||
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
|
||||
return total
|
||||
|
||||
|
||||
@@ -299,14 +345,18 @@ def rm_empty_folder(path):
|
||||
pass
|
||||
|
||||
|
||||
def create_data_and_move(file_path: str, c: config.Config, debug):
|
||||
def create_data_and_move(file_path: str, zero_op):
|
||||
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
||||
c = config.getInstance()
|
||||
debug = c.debug()
|
||||
file_name = os.path.basename(file_path)
|
||||
n_number = get_number(debug, file_name)
|
||||
file_path = os.path.abspath(file_path)
|
||||
|
||||
if debug == True:
|
||||
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
|
||||
if zero_op:
|
||||
return
|
||||
if n_number:
|
||||
core_main(file_path, n_number, c)
|
||||
else:
|
||||
@@ -315,6 +365,8 @@ def create_data_and_move(file_path: str, c: config.Config, debug):
|
||||
else:
|
||||
try:
|
||||
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
|
||||
if zero_op:
|
||||
return
|
||||
if n_number:
|
||||
core_main(file_path, n_number, c)
|
||||
else:
|
||||
@@ -357,8 +409,17 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu
|
||||
if __name__ == '__main__':
|
||||
version = '5.0.1'
|
||||
urllib3.disable_warnings() #Ignore http proxy warning
|
||||
|
||||
# Read config.ini first, in argparse_function() need conf.failed_folder()
|
||||
conf = config.Config("config.ini")
|
||||
|
||||
# Parse command line args
|
||||
single_file_path, folder_path, custom_number, auto_exit, logdir, regexstr = argparse_function(version)
|
||||
single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
|
||||
|
||||
main_mode = conf.main_mode()
|
||||
if not main_mode in (1, 2, 3):
|
||||
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
|
||||
sys.exit(4)
|
||||
|
||||
dupe_stdout_to_logfile(logdir)
|
||||
|
||||
@@ -368,9 +429,8 @@ if __name__ == '__main__':
|
||||
print('[*]======================================================')
|
||||
print('[*]严禁在墙内宣传本项目')
|
||||
|
||||
# Read config.ini
|
||||
conf = config.Config("config.ini")
|
||||
|
||||
start_time = time.time()
|
||||
print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
|
||||
|
||||
if conf.update_check():
|
||||
check_update(version)
|
||||
@@ -382,9 +442,15 @@ if __name__ == '__main__':
|
||||
print('[!]Enable soft link')
|
||||
if len(sys.argv)>1:
|
||||
print('[!]CmdLine:'," ".join(sys.argv[1:]))
|
||||
print('[+]Main Working mode ## {}: {} ## {}{}{}'
|
||||
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
|
||||
"" if not conf.multi_threading() else ", multi_threading on",
|
||||
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
|
||||
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
|
||||
) if not single_file_path else ('-','Single File', '','',''))
|
||||
)
|
||||
|
||||
create_failed_folder(conf.failed_folder())
|
||||
start_time = time.time()
|
||||
|
||||
if not single_file_path == '': #Single File
|
||||
print('[+]==================== Single File =====================')
|
||||
@@ -393,32 +459,31 @@ if __name__ == '__main__':
|
||||
else:
|
||||
create_data_and_move_with_custom_number(single_file_path, conf, custom_number)
|
||||
else:
|
||||
if folder_path == '':
|
||||
folder_path = conf.source_folder()
|
||||
if not isinstance(folder_path, str) or folder_path == '':
|
||||
folder_path = os.path.abspath(".")
|
||||
|
||||
movie_list = movie_lists(folder_path, conf, regexstr)
|
||||
movie_list = movie_lists(folder_path, regexstr)
|
||||
|
||||
count = 0
|
||||
count_all = str(len(movie_list))
|
||||
print('[+]Find', count_all, 'movies. Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
|
||||
main_mode = conf.main_mode()
|
||||
print('[+]Find', count_all, 'movies.')
|
||||
stop_count = conf.stop_counter()
|
||||
if stop_count<1:
|
||||
stop_count = 999999
|
||||
else:
|
||||
count_all = str(min(len(movie_list), stop_count))
|
||||
if main_mode == 3:
|
||||
print(f'[!]运行模式:**维护模式**,本程序将在处理{count_all}个视频文件后停止,如需后台执行自动退出请结合 -a 参数。')
|
||||
|
||||
for movie_path in movie_list: # 遍历电影列表 交给core处理
|
||||
count = count + 1
|
||||
percentage = str(count / int(count_all) * 100)[:4] + '%'
|
||||
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
|
||||
create_data_and_move(movie_path, conf, conf.debug())
|
||||
create_data_and_move(movie_path, zero_op)
|
||||
if count >= stop_count:
|
||||
print("[!]Stop counter triggered!")
|
||||
break
|
||||
|
||||
if conf.del_empty_folder():
|
||||
if conf.del_empty_folder() and not zero_op:
|
||||
rm_empty_folder(conf.success_folder())
|
||||
rm_empty_folder(conf.failed_folder())
|
||||
if len(folder_path):
|
||||
@@ -433,7 +498,7 @@ if __name__ == '__main__':
|
||||
|
||||
close_logfile(logdir)
|
||||
|
||||
if not (conf.auto_exit() or auto_exit):
|
||||
if not conf.auto_exit():
|
||||
input("Press enter key exit, you can check the error message before you exit...")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
Reference in New Issue
Block a user