PEP8 PREFIX, AND SOME TYPING ANNOTATION

This commit is contained in:
FatalFurY
2022-02-18 00:01:21 +08:00
parent 123a2a0c73
commit c1568cd64a
2 changed files with 156 additions and 118 deletions

View File

@@ -13,12 +13,10 @@ import multiprocessing
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from opencc import OpenCC from opencc import OpenCC
import config import config
from ADC_function import file_modification_days, get_html, parallel_download_files from ADC_function import file_modification_days, get_html, parallel_download_files
from number_parser import get_number from number_parser import get_number
from core import core_main, moveFailedFolder from core import core_main, moveFailedFolder
@@ -33,7 +31,7 @@ def check_update(local_version):
time.sleep(60) time.sleep(60)
os._exit(-1) os._exit(-1)
data = json.loads(htmlcode) data = json.loads(htmlcode)
remote = int(data["tag_name"].replace(".","")) remote = int(data["tag_name"].replace(".", ""))
local_version = int(local_version.replace(".", "")) local_version = int(local_version.replace(".", ""))
if local_version < remote: if local_version < remote:
print("[*]" + ("* New update " + str(data["tag_name"]) + " *").center(54)) print("[*]" + ("* New update " + str(data["tag_name"]) + " *").center(54))
@@ -46,36 +44,44 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
conf = config.getInstance() conf = config.getInstance()
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.") parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.") parser.add_argument("-p", "--path", default='', nargs='?', help="Analysis folder path.")
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder") parser.add_argument("-m", "--main-mode", default='', nargs='?',
help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.") parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.") # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = str(Path.home() / '.mlogs') default_logdir = str(Path.home() / '.mlogs')
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?', parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on. help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
default folder for current user: '{default_logdir}'. Change default folder to an empty file, default folder for current user: '{default_logdir}'. Change default folder to an empty file,
or use --log-dir= to turn log off.""") or use --log-dir= to turn log off.""")
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.") parser.add_argument("-q", "--regex-query", dest='regexstr', default='', nargs='?',
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.") help="python re module regex filepath filtering.")
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.") parser.add_argument("-d", "--nfo-skip-days", dest='days', default='', nargs='?',
help="Override nfo_skip_days value in config.")
parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
help="Override stop_counter value in config.")
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format( parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
parser.add_argument("-a", "--auto-exit", action="store_true", parser.add_argument("-a", "--auto-exit", action="store_true",
help="Auto exit after program complete") help="Auto exit after program complete")
parser.add_argument("-g","--debug", action="store_true", parser.add_argument("-g", "--debug", action="store_true",
help="Turn on debug mode to generate diagnostic log for issue report.") help="Turn on debug mode to generate diagnostic log for issue report.")
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true", parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
help="""Only show job list of files and numbers, and **NO** actual operation help="""Only show job list of files and numbers, and **NO** actual operation
is performed. It may help you correct wrong numbers before real job.""") is performed. It may help you correct wrong numbers before real job.""")
parser.add_argument("-v", "--version", action="version", version=ver) parser.add_argument("-v", "--version", action="version", version=ver)
args = parser.parse_args() args = parser.parse_args()
def get_natural_number_or_none(value): def get_natural_number_or_none(value):
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None
def get_str_or_none(value): def get_str_or_none(value):
return value if isinstance(value, str) and len(value) else None return value if isinstance(value, str) and len(value) else None
def get_bool_or_none(value): def get_bool_or_none(value):
return True if isinstance(value, bool) and value else None return True if isinstance(value, bool) and value else None
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode) config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path) config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit) config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
@@ -86,43 +92,53 @@ is performed. It may help you correct wrong numbers before real job.""")
return args.file, args.number, args.logdir, args.regexstr, args.zero_op return args.file, args.number, args.logdir, args.regexstr, args.zero_op
class OutLogger(object): class OutLogger(object):
def __init__(self, logfile) -> None: def __init__(self, logfile) -> None:
self.term = sys.stdout self.term = sys.stdout
self.log = open(logfile,"w",encoding='utf-8',buffering=1) self.log = open(logfile, "w", encoding='utf-8', buffering=1)
self.filepath = logfile self.filepath = logfile
def __del__(self): def __del__(self):
self.close() self.close()
def __enter__(self): def __enter__(self):
pass pass
def __exit__(self, *args): def __exit__(self, *args):
self.close() self.close()
def write(self,msg):
def write(self, msg):
self.term.write(msg) self.term.write(msg)
self.log.write(msg) self.log.write(msg)
def flush(self): def flush(self):
self.term.flush() self.term.flush()
self.log.flush() self.log.flush()
os.fsync(self.log.fileno()) os.fsync(self.log.fileno())
def close(self): def close(self):
if self.term != None: if self.term is not None:
sys.stdout = self.term sys.stdout = self.term
self.term = None self.term = None
if self.log != None: if self.log is not None:
self.log.close() self.log.close()
self.log = None self.log = None
class ErrLogger(OutLogger): class ErrLogger(OutLogger):
def __init__(self, logfile) -> None: def __init__(self, logfile) -> None:
self.term = sys.stderr self.term = sys.stderr
self.log = open(logfile,"w",encoding='utf-8',buffering=1) self.log = open(logfile, "w", encoding='utf-8', buffering=1)
self.filepath = logfile self.filepath = logfile
def close(self): def close(self):
if self.term != None: if self.term is not None:
sys.stderr = self.term sys.stderr = self.term
self.term = None self.term = None
if self.log != None:
if self.log is not None:
self.log.close() self.log.close()
self.log = None self.log = None
@@ -133,7 +149,7 @@ def dupe_stdout_to_logfile(logdir: str):
log_dir = Path(logdir) log_dir = Path(logdir)
if not log_dir.exists(): if not log_dir.exists():
try: try:
log_dir.mkdir(parents=True,exist_ok=True) log_dir.mkdir(parents=True, exist_ok=True)
except: except:
pass pass
if not log_dir.is_dir(): if not log_dir.is_dir():
@@ -150,7 +166,7 @@ def dupe_stdout_to_logfile(logdir: str):
def close_logfile(logdir: str): def close_logfile(logdir: str):
if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir): if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir):
return return
#日志关闭前保存日志路径 # 日志关闭前保存日志路径
filepath = None filepath = None
try: try:
filepath = sys.stdout.filepath filepath = sys.stdout.filepath
@@ -161,7 +177,7 @@ def close_logfile(logdir: str):
log_dir = Path(logdir).resolve() log_dir = Path(logdir).resolve()
if isinstance(filepath, Path): if isinstance(filepath, Path):
print(f"Log file '{filepath}' saved.") print(f"Log file '{filepath}' saved.")
assert(filepath.parent.samefile(log_dir)) assert (filepath.parent.samefile(log_dir))
# 清理空文件 # 清理空文件
for f in log_dir.glob(r'*_err.txt'): for f in log_dir.glob(r'*_err.txt'):
if f.stat().st_size == 0: if f.stat().st_size == 0:
@@ -201,7 +217,7 @@ def close_logfile(logdir: str):
cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt
for f in day_merge: for f in day_merge:
try: try:
day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
with open(day_file_name, 'a', encoding='utf-8') as m: with open(day_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8')) m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -213,7 +229,7 @@ def close_logfile(logdir: str):
if not txts or not len(txts): if not txts or not len(txts):
break break
txts.sort() txts.sort()
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32") tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3 * 30)).strftime("%Y%m32")
deadline_month = f'mdc_{tmstr_3_month_ago}' deadline_month = f'mdc_{tmstr_3_month_ago}'
month_merge = [f for f in txts if f.stem < deadline_month] month_merge = [f for f in txts if f.stem < deadline_month]
if not month_merge or not len(month_merge): if not month_merge or not len(month_merge):
@@ -221,7 +237,7 @@ def close_logfile(logdir: str):
tomonth = len('01.txt') # cut length mdc_202012|01.txt tomonth = len('01.txt') # cut length mdc_202012|01.txt
for f in month_merge: for f in month_merge:
try: try:
month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
with open(month_file_name, 'a', encoding='utf-8') as m: with open(month_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8')) m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -234,14 +250,14 @@ def close_logfile(logdir: str):
if not mons or not len(mons): if not mons or not len(mons):
return return
mons.sort() mons.sort()
deadline_year = f'mdc_{today.year-1}13' deadline_year = f'mdc_{today.year - 1}13'
year_merge = [f for f in mons if f.stem < deadline_year] year_merge = [f for f in mons if f.stem < deadline_year]
if not year_merge or not len(year_merge): if not year_merge or not len(year_merge):
return return
toyear = len('12.txt') # cut length mdc_2020|12.txt toyear = len('12.txt') # cut length mdc_2020|12.txt
for f in year_merge: for f in year_merge:
try: try:
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
with open(year_file_name, 'a', encoding='utf-8') as y: with open(year_file_name, 'a', encoding='utf-8') as y:
y.write(f.read_text(encoding='utf-8')) y.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
@@ -257,13 +273,14 @@ def signal_handler(*args):
print('[!]Ctrl+C detected, Exit.') print('[!]Ctrl+C detected, Exit.')
sys.exit(9) sys.exit(9)
def sigdebug_handler(*args): def sigdebug_handler(*args):
config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"] config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF')) print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理提示跳过视频总数调试模式(-g)下详细被跳过文件,跳过小广告 # 新增失败文件列表跳过处理,及.nfo修改天数跳过处理提示跳过视频总数调试模式(-g)下详细被跳过文件,跳过小广告
def movie_lists(source_folder, regexstr): def movie_lists(source_folder, regexstr: str) -> list[str]:
conf = config.getInstance() conf = config.getInstance()
main_mode = conf.main_mode() main_mode = conf.main_mode()
debug = conf.debug() debug = conf.debug()
@@ -283,9 +300,9 @@ def movie_lists(source_folder, regexstr):
try: try:
flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines() flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
failed_set = set(flist) failed_set = set(flist)
if len(flist) != len(failed_set): # 检查去重并写回但是不改变failed_list.txt内条目的先后次序重复的只保留最后的 if len(flist) != len(failed_set): # 检查去重并写回但是不改变failed_list.txt内条目的先后次序重复的只保留最后的
fset = failed_set.copy() fset = failed_set.copy()
for i in range(len(flist)-1, -1, -1): for i in range(len(flist) - 1, -1, -1):
fset.remove(flist[i]) if flist[i] in fset else flist.pop(i) fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8') failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8')
assert len(fset) == 0 and len(flist) == len(failed_set) assert len(fset) == 0 and len(flist) == len(failed_set)
@@ -311,14 +328,15 @@ def movie_lists(source_folder, regexstr):
continue continue
is_sym = full_name.is_symlink() is_sym = full_name.is_symlink()
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标 if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
continue # file is symlink or hardlink(Linux/NTFS/Darwin) continue # file is symlink or hardlink(Linux/NTFS/Darwin)
# 调试用0字节样本允许通过去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB) # 调试用0字节样本允许通过去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size直接赋0跳过小视频检测 movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size直接赋0跳过小视频检测
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120 if 0 < movie_size < 125829120: # 1024*1024*120=125829120
continue continue
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name): if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
continue continue
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days: if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(
full_name.with_suffix('.nfo')) <= nfo_skip_days:
skip_nfo_days_cnt += 1 skip_nfo_days_cnt += 1
if debug: if debug:
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'") print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
@@ -328,7 +346,8 @@ def movie_lists(source_folder, regexstr):
if skip_failed_cnt: if skip_failed_cnt:
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.") print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
if skip_nfo_days_cnt: if skip_nfo_days_cnt:
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.") print(
f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
if nfo_skip_days <= 0 or not soft_link or main_mode == 3: if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
return total return total
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数跳过N天内更新过的 # 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数跳过N天内更新过的
@@ -354,13 +373,17 @@ def movie_lists(source_folder, regexstr):
if debug: if debug:
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'") print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
if len(rm_list): if len(rm_list):
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.") print(
f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
return total return total
def create_failed_folder(failed_folder): def create_failed_folder(failed_folder: str):
if not os.path.exists(failed_folder): # 新建failed文件夹 """
新建failed文件夹
"""
if not os.path.exists(failed_folder):
try: try:
os.makedirs(failed_folder) os.makedirs(failed_folder)
except: except:
@@ -373,9 +396,7 @@ def rm_empty_folder(path):
deleted = set() deleted = set()
for current_dir, subdirs, files in os.walk(abspath, topdown=False): for current_dir, subdirs, files in os.walk(abspath, topdown=False):
try: try:
still_has_subdirs = any( still_has_subdirs = any(_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted)
_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted
)
if not any(files) and not still_has_subdirs and not os.path.samefile(path, current_dir): if not any(files) and not still_has_subdirs and not os.path.samefile(path, current_dir):
os.rmdir(current_dir) os.rmdir(current_dir)
deleted.add(current_dir) deleted.add(current_dir)
@@ -390,7 +411,7 @@ def create_data_and_move(file_path: str, zero_op, oCC):
n_number = get_number(debug, os.path.basename(file_path)) n_number = get_number(debug, os.path.basename(file_path))
file_path = os.path.abspath(file_path) file_path = os.path.abspath(file_path)
if debug == True: if debug is True:
print(f"[!] [{n_number}] As Number making data for '{file_path}'") print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op: if zero_op:
return return
@@ -447,7 +468,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
def main(): def main():
version = '6.0.1' version = '6.0.1'
urllib3.disable_warnings() #Ignore http proxy warning urllib3.disable_warnings() # Ignore http proxy warning
# Read config.ini first, in argparse_function() need conf.failed_folder() # Read config.ini first, in argparse_function() need conf.failed_folder()
conf = config.Config("config.ini") conf = config.Config("config.ini")
@@ -455,11 +476,9 @@ def main():
# Parse command line args # Parse command line args
single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version) single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
main_mode = conf.main_mode() main_mode = conf.main_mode()
folder_path = "" folder_path = ""
if not main_mode in (1, 2, 3): if main_mode not in (1, 2, 3):
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.") print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
sys.exit(4) sys.exit(4)
@@ -470,7 +489,8 @@ def main():
signal.signal(signal.SIGWINCH, sigdebug_handler) signal.signal(signal.SIGWINCH, sigdebug_handler)
dupe_stdout_to_logfile(logdir) dupe_stdout_to_logfile(logdir)
platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version()) platform_total = str(
' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
print('[*]================= Movie Data Capture =================') print('[*]================= Movie Data Capture =================')
print('[*]' + version.center(54)) print('[*]' + version.center(54))
@@ -488,15 +508,15 @@ def main():
print('[+]Enable debug') print('[+]Enable debug')
if conf.soft_link(): if conf.soft_link():
print('[!]Enable soft link') print('[!]Enable soft link')
if len(sys.argv)>1: if len(sys.argv) > 1:
print('[!]CmdLine:'," ".join(sys.argv[1:])) print('[!]CmdLine:', " ".join(sys.argv[1:]))
print('[+]Main Working mode ## {}: {} ## {}{}{}' print('[+]Main Working mode ## {}: {} ## {}{}{}'
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1], .format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1],
"" if not conf.multi_threading() else ", multi_threading on", "" if not conf.multi_threading() else ", multi_threading on",
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}", "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}" "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
) if not single_file_path else ('-','Single File', '','','')) ) if not single_file_path else ('-', 'Single File', '', '', ''))
) )
if conf.update_check(): if conf.update_check():
check_update(version) check_update(version)
@@ -507,8 +527,9 @@ def main():
def fmd(f): def fmd(f):
return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f, return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
Path.home() / '.local' / 'share' / 'mdc' / f) Path.home() / '.local' / 'share' / 'mdc' / f)
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json')) map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
for k,v in map_tab: for k, v in map_tab:
if v.exists(): if v.exists():
if file_modification_days(str(v)) >= conf.mapping_table_validity(): if file_modification_days(str(v)) >= conf.mapping_table_validity():
print("[+]Mapping Table Out of date! Remove", str(v)) print("[+]Mapping Table Out of date! Remove", str(v))
@@ -528,14 +549,15 @@ def main():
try: try:
oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json') oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json')
except: except:
# some OS no OpennCC cpython, try opencc-python-reimplemented. # some OS no OpenCC cpython, try opencc-python-reimplemented.
# pip uninstall opencc && pip install opencc-python-reimplemented # pip uninstall opencc && pip install opencc-python-reimplemented
oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t') oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')
if not single_file_path == '': #Single File if not single_file_path == '': # Single File
print('[+]==================== Single File =====================') print('[+]==================== Single File =====================')
if custom_number == '': if custom_number == '':
create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC) create_data_and_move_with_custom_number(single_file_path,
get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
else: else:
create_data_and_move_with_custom_number(single_file_path, custom_number, oCC) create_data_and_move_with_custom_number(single_file_path, custom_number, oCC)
else: else:
@@ -550,7 +572,7 @@ def main():
print('[+]Find', count_all, 'movies.') print('[+]Find', count_all, 'movies.')
print('[*]======================================================') print('[*]======================================================')
stop_count = conf.stop_counter() stop_count = conf.stop_counter()
if stop_count<1: if stop_count < 1:
stop_count = 999999 stop_count = 999999
else: else:
count_all = str(min(len(movie_list), stop_count)) count_all = str(min(len(movie_list), stop_count))
@@ -558,7 +580,8 @@ def main():
for movie_path in movie_list: # 遍历电影列表 交给core处理 for movie_path in movie_list: # 遍历电影列表 交给core处理
count = count + 1 count = count + 1
percentage = str(count / int(count_all) * 100)[:4] + '%' percentage = str(count / int(count_all) * 100)[:4] + '%'
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
time.strftime("%H:%M:%S")))
create_data_and_move(movie_path, zero_op, oCC) create_data_and_move(movie_path, zero_op, oCC)
if count >= stop_count: if count >= stop_count:
print("[!]Stop counter triggered!") print("[!]Stop counter triggered!")
@@ -573,7 +596,7 @@ def main():
end_time = time.time() end_time = time.time()
total_time = str(timedelta(seconds=end_time - start_time)) total_time = str(timedelta(seconds=end_time - start_time))
print("[+]Running time", total_time[:len(total_time) if total_time.rfind('.') < 0 else -3], print("[+]Running time", total_time[:len(total_time) if total_time.rfind('.') < 0 else -3],
" End at", time.strftime("%Y-%m-%d %H:%M:%S")) " End at", time.strftime("%Y-%m-%d %H:%M:%S"))
print("[+]All finished!!!") print("[+]All finished!!!")

View File

@@ -2,36 +2,37 @@ import os
import re import re
import sys import sys
import config import config
import typing
G_spat = re.compile( G_spat = re.compile(
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@|-uncensored|_uncensored|-leak|_leak", "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@|-uncensored|_uncensored|-leak|_leak",
re.IGNORECASE) re.IGNORECASE)
def get_number(debug,file_path: str) -> str: def get_number(debug: bool, file_path: str) -> str:
# """ """
# >>> from number_parser import get_number 从文件路径中提取番号 from number_parser import get_number
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4") >>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4") >>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("C:¥Users¥Guest¥snis-829.mp4") >>> get_number(False, "C:¥Users¥Guest¥snis-829.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("C:¥Users¥Guest¥snis-829-C.mp4") >>> get_number(False, "C:¥Users¥Guest¥snis-829-C.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("./snis-829.mp4") >>> get_number(False, "./snis-829.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("./snis-829-C.mp4") >>> get_number(False, "./snis-829-C.mp4")
# 'snis-829' 'snis-829'
# >>> get_number(".¥snis-829.mp4") >>> get_number(False, ".¥snis-829.mp4")
# 'snis-829' 'snis-829'
# >>> get_number(".¥snis-829-C.mp4") >>> get_number(False, ".¥snis-829-C.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("snis-829.mp4") >>> get_number(False, "snis-829.mp4")
# 'snis-829' 'snis-829'
# >>> get_number("snis-829-C.mp4") >>> get_number(False, "snis-829-C.mp4")
# 'snis-829' 'snis-829'
# """ """
filepath = os.path.basename(file_path) filepath = os.path.basename(file_path)
# debug True 和 False 两块代码块合并原因是此模块及函数只涉及字符串计算没有IO操作debug on时输出导致异常信息即可 # debug True 和 False 两块代码块合并原因是此模块及函数只涉及字符串计算没有IO操作debug on时输出导致异常信息即可
try: try:
@@ -56,7 +57,7 @@ def get_number(debug,file_path: str) -> str:
try: try:
return str( return str(
re.findall(r'(.+?)\.', re.findall(r'(.+?)\.',
str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip( str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
"['']").replace('_', '-') "['']").replace('_', '-')
except: except:
return str(re.search(r'(.+?)\.', filepath)[0]) return str(re.search(r'(.+?)\.', filepath)[0])
@@ -68,29 +69,33 @@ def get_number(debug,file_path: str) -> str:
# 按javdb数据源的命名规范提取number # 按javdb数据源的命名规范提取number
G_TAKE_NUM_RULES = { G_TAKE_NUM_RULES = {
'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()), 'tokyo.*hot': lambda x: str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'), 'carib': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
'1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'), '1pon|mura|paco': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'), '10mu': lambda x: str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()), 'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
'heyzo' : lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0] 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
} }
def get_number_by_dict(filename: str) -> str:
def get_number_by_dict(filename: str) -> typing.Optional[str]:
try: try:
for k,v in G_TAKE_NUM_RULES.items(): for k, v in G_TAKE_NUM_RULES.items():
if re.search(k, filename, re.I): if re.search(k, filename, re.I):
return v(filename) return v(filename)
except: except:
pass pass
return None return None
class Cache_uncensored_conf: class Cache_uncensored_conf:
prefix = None prefix = None
def is_empty(self): def is_empty(self):
return bool(self.prefix is None) return bool(self.prefix is None)
def set(self, v: list): def set(self, v: list):
if not v or not len(v) or not len(v[0]): if not v or not len(v) or not len(v[0]):
raise ValueError('input prefix list empty or None') raise ValueError('input prefix list empty or None')
@@ -99,28 +104,32 @@ class Cache_uncensored_conf:
for i in v[1:]: for i in v[1:]:
s += f"|{i}.+" s += f"|{i}.+"
self.prefix = re.compile(s, re.I) self.prefix = re.compile(s, re.I)
def check(self, number): def check(self, number):
if self.prefix is None: if self.prefix is None:
raise ValueError('No init re compile') raise ValueError('No init re compile')
return self.prefix.match(number) return self.prefix.match(number)
G_cache_uncensored_conf = Cache_uncensored_conf() G_cache_uncensored_conf = Cache_uncensored_conf()
# ========================================================================是否为无码 # ========================================================================是否为无码
def is_uncensored(number): def is_uncensored(number):
if re.match( if re.match(
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}', r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
number, number,
re.I re.I
): ):
return True return True
if G_cache_uncensored_conf.is_empty(): if G_cache_uncensored_conf.is_empty():
G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
return G_cache_uncensored_conf.check(number) return G_cache_uncensored_conf.check(number)
if __name__ == "__main__": if __name__ == "__main__":
# import doctest # import doctest
# doctest.testmod(raise_on_error=True) # doctest.testmod(raise_on_error=True)
test_use_cases = ( test_use_cases = (
"MEYD-594-C.mp4", "MEYD-594-C.mp4",
"SSIS-001_C.mp4", "SSIS-001_C.mp4",
@@ -131,26 +140,30 @@ if __name__ == "__main__":
"SDDE-625_uncensored_C.mp4", "SDDE-625_uncensored_C.mp4",
"SDDE-625_uncensored_leak_C.mp4", "SDDE-625_uncensored_leak_C.mp4",
"SDDE-625_uncensored_leak_C_cd1.mp4", "SDDE-625_uncensored_leak_C_cd1.mp4",
"Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取 "Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取
"TokyoHot-n1287-HD SP2006 .mp4", "TokyoHot-n1287-HD SP2006 .mp4",
"caribean-020317_001.nfo", # -号误命名为_号的 "caribean-020317_001.nfo", # -号误命名为_号的
"257138_3xplanet_1Pondo_080521_001.mp4", "257138_3xplanet_1Pondo_080521_001.mp4",
"ADV-R0624-CD3.wmv", # 多碟影片 "ADV-R0624-CD3.wmv", # 多碟影片
"XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源 "XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
"xxx-av 20589.mp4", "xxx-av 20589.mp4",
"Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源 "Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源
"heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源 "heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源 "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源 "pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
"sbw99.cc@heyzo_hd_2636_full.mp4" "sbw99.cc@heyzo_hd_2636_full.mp4"
) )
def evprint(evstr): def evprint(evstr):
code = compile(evstr, "<string>", "eval") code = compile(evstr, "<string>", "eval")
print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code))) print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code)))
for t in test_use_cases: for t in test_use_cases:
evprint(f'get_number(True, "{t}")') evprint(f'get_number(True, "{t}")')
if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE): if len(sys.argv) <= 1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
sys.exit(0) sys.exit(0)
# 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据参数为盘符 A .. Z 或带盘符路径 # 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据参数为盘符 A .. Z 或带盘符路径
@@ -169,6 +182,7 @@ if __name__ == "__main__":
# 示例: # 示例:
# python3 ./number_parser.py ALL # python3 ./number_parser.py ALL
import subprocess import subprocess
ES_search_path = "ALL disks" ES_search_path = "ALL disks"
if sys.argv[1] == "ALL": if sys.argv[1] == "ALL":
if sys.platform == "win32": if sys.platform == "win32":
@@ -176,18 +190,19 @@ if __name__ == "__main__":
ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内 ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内
ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v' ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' ')) out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失 out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines() out_list = out_text.splitlines()
elif sys.platform in ("linux", "darwin"): elif sys.platform in ("linux", "darwin"):
ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate' ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate'
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path) ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(
ES_prog_path)
out_bytes = subprocess.check_output(ES_cmdline.split(' ')) out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('utf-8') out_text = out_bytes.decode('utf-8')
out_list = [ os.path.basename(line) for line in out_text.splitlines()] out_list = [os.path.basename(line) for line in out_text.splitlines()]
else: else:
print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.') print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.')
sys.exit(1) sys.exit(1)
else: # Windows single disk else: # Windows single disk
if sys.platform != "win32": if sys.platform != "win32":
print('[!]Usage: python3 ./number_parser.py ALL') print('[!]Usage: python3 ./number_parser.py ALL')
sys.exit(0) sys.exit(0)
@@ -202,7 +217,7 @@ if __name__ == "__main__":
ES_search_path = os.path.normcase(ES_search_path) ES_search_path = os.path.normcase(ES_search_path)
ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v' ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' ')) out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失 out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines() out_list = out_text.splitlines()
print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...') print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...')
print(f'[+]Find {len(out_list)} Movies.') print(f'[+]Find {len(out_list)} Movies.')