Merge pull request #698 from naughtyGitCat/master

typo transalte to translate,and some blank lines
This commit is contained in:
Yoshiko2
2022-02-23 05:28:59 +08:00
committed by GitHub
5 changed files with 235 additions and 166 deletions

View File

@@ -3,19 +3,20 @@ import json
import os
import re
import sys
import time
import shutil
import typing
import urllib3
import signal
import platform
import multiprocessing
from datetime import datetime, timedelta
from pathlib import Path
from opencc import OpenCC
import ADC_function
import config
from datetime import datetime, timedelta
import time
from pathlib import Path
from ADC_function import file_modification_days, get_html, parallel_download_files
from ADC_function import file_modification_days, get_html, parallel_download_files
from number_parser import get_number
from core import core_main, moveFailedFolder
@@ -30,7 +31,7 @@ def check_update(local_version):
time.sleep(60)
os._exit(-1)
data = json.loads(htmlcode)
remote = int(data["tag_name"].replace(".",""))
remote = int(data["tag_name"].replace(".", ""))
local_version = int(local_version.replace(".", ""))
if local_version < remote:
print("[*]" + ("* New update " + str(data["tag_name"]) + " *").center(54))
@@ -43,36 +44,44 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
conf = config.getInstance()
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-p", "--path", default='', nargs='?', help="Analysis folder path.")
parser.add_argument("-m", "--main-mode", default='', nargs='?',
help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
default_logdir = str(Path.home() / '.mlogs')
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
or use --log-dir= to turn log off.""")
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
parser.add_argument("-q", "--regex-query", dest='regexstr', default='', nargs='?',
help="python re module regex filepath filtering.")
parser.add_argument("-d", "--nfo-skip-days", dest='days', default='', nargs='?',
help="Override nfo_skip_days value in config.")
parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
help="Override stop_counter value in config.")
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
parser.add_argument("-a", "--auto-exit", action="store_true",
help="Auto exit after program complete")
parser.add_argument("-g","--debug", action="store_true",
parser.add_argument("-g", "--debug", action="store_true",
help="Turn on debug mode to generate diagnostic log for issue report.")
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
help="""Only show job list of files and numbers, and **NO** actual operation
is performed. It may help you correct wrong numbers before real job.""")
parser.add_argument("-v", "--version", action="version", version=ver)
args = parser.parse_args()
def get_natural_number_or_none(value):
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None
def get_str_or_none(value):
return value if isinstance(value, str) and len(value) else None
def get_bool_or_none(value):
return True if isinstance(value, bool) and value else None
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
@@ -83,43 +92,53 @@ is performed. It may help you correct wrong numbers before real job.""")
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
class OutLogger(object):
def __init__(self, logfile) -> None:
self.term = sys.stdout
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
self.log = open(logfile, "w", encoding='utf-8', buffering=1)
self.filepath = logfile
def __del__(self):
self.close()
def __enter__(self):
pass
def __exit__(self, *args):
self.close()
def write(self,msg):
def write(self, msg):
self.term.write(msg)
self.log.write(msg)
def flush(self):
self.term.flush()
self.log.flush()
os.fsync(self.log.fileno())
def close(self):
if self.term != None:
if self.term is not None:
sys.stdout = self.term
self.term = None
if self.log != None:
if self.log is not None:
self.log.close()
self.log = None
class ErrLogger(OutLogger):
def __init__(self, logfile) -> None:
self.term = sys.stderr
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
self.log = open(logfile, "w", encoding='utf-8', buffering=1)
self.filepath = logfile
def close(self):
if self.term != None:
if self.term is not None:
sys.stderr = self.term
self.term = None
if self.log != None:
if self.log is not None:
self.log.close()
self.log = None
@@ -130,7 +149,7 @@ def dupe_stdout_to_logfile(logdir: str):
log_dir = Path(logdir)
if not log_dir.exists():
try:
log_dir.mkdir(parents=True,exist_ok=True)
log_dir.mkdir(parents=True, exist_ok=True)
except:
pass
if not log_dir.is_dir():
@@ -147,7 +166,7 @@ def dupe_stdout_to_logfile(logdir: str):
def close_logfile(logdir: str):
if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir):
return
#日志关闭前保存日志路径
# 日志关闭前保存日志路径
filepath = None
try:
filepath = sys.stdout.filepath
@@ -158,7 +177,7 @@ def close_logfile(logdir: str):
log_dir = Path(logdir).resolve()
if isinstance(filepath, Path):
print(f"Log file '{filepath}' saved.")
assert(filepath.parent.samefile(log_dir))
assert (filepath.parent.samefile(log_dir))
# 清理空文件
for f in log_dir.glob(r'*_err.txt'):
if f.stat().st_size == 0:
@@ -198,7 +217,7 @@ def close_logfile(logdir: str):
cutday = len('T235959.txt') # cut length mdc_20201201|T235959.txt
for f in day_merge:
try:
day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
day_file_name = str(f)[:-cutday] + '.txt' # mdc_20201201.txt
with open(day_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
@@ -210,7 +229,7 @@ def close_logfile(logdir: str):
if not txts or not len(txts):
break
txts.sort()
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3 * 30)).strftime("%Y%m32")
deadline_month = f'mdc_{tmstr_3_month_ago}'
month_merge = [f for f in txts if f.stem < deadline_month]
if not month_merge or not len(month_merge):
@@ -218,7 +237,7 @@ def close_logfile(logdir: str):
tomonth = len('01.txt') # cut length mdc_202012|01.txt
for f in month_merge:
try:
month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
month_file_name = str(f)[:-tomonth] + '.txt' # mdc_202012.txt
with open(month_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
@@ -231,14 +250,14 @@ def close_logfile(logdir: str):
if not mons or not len(mons):
return
mons.sort()
deadline_year = f'mdc_{today.year-1}13'
deadline_year = f'mdc_{today.year - 1}13'
year_merge = [f for f in mons if f.stem < deadline_year]
if not year_merge or not len(year_merge):
return
toyear = len('12.txt') # cut length mdc_2020|12.txt
toyear = len('12.txt') # cut length mdc_2020|12.txt
for f in year_merge:
try:
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
with open(year_file_name, 'a', encoding='utf-8') as y:
y.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
@@ -254,13 +273,14 @@ def signal_handler(*args):
print('[!]Ctrl+C detected, Exit.')
sys.exit(9)
def sigdebug_handler(*args):
config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理提示跳过视频总数调试模式(-g)下详细被跳过文件,跳过小广告
def movie_lists(source_folder, regexstr):
def movie_lists(source_folder, regexstr: str) -> list[str]:
conf = config.getInstance()
main_mode = conf.main_mode()
debug = conf.debug()
@@ -280,9 +300,9 @@ def movie_lists(source_folder, regexstr):
try:
flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
failed_set = set(flist)
if len(flist) != len(failed_set): # 检查去重并写回但是不改变failed_list.txt内条目的先后次序重复的只保留最后的
if len(flist) != len(failed_set): # 检查去重并写回但是不改变failed_list.txt内条目的先后次序重复的只保留最后的
fset = failed_set.copy()
for i in range(len(flist)-1, -1, -1):
for i in range(len(flist) - 1, -1, -1):
fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8')
assert len(fset) == 0 and len(flist) == len(failed_set)
@@ -308,14 +328,15 @@ def movie_lists(source_folder, regexstr):
continue
is_sym = full_name.is_symlink()
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
# 调试用0字节样本允许通过去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size直接赋0跳过小视频检测
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120
if 0 < movie_size < 125829120: # 1024*1024*120=125829120
continue
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
continue
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(
full_name.with_suffix('.nfo')) <= nfo_skip_days:
skip_nfo_days_cnt += 1
if debug:
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
@@ -325,7 +346,8 @@ def movie_lists(source_folder, regexstr):
if skip_failed_cnt:
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
if skip_nfo_days_cnt:
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
print(
f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
return total
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数跳过N天内更新过的
@@ -351,13 +373,17 @@ def movie_lists(source_folder, regexstr):
if debug:
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
if len(rm_list):
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
print(
f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
return total
def create_failed_folder(failed_folder):
if not os.path.exists(failed_folder): # 新建failed文件夹
def create_failed_folder(failed_folder: str):
"""
新建failed文件夹
"""
if not os.path.exists(failed_folder):
try:
os.makedirs(failed_folder)
except:
@@ -370,9 +396,7 @@ def rm_empty_folder(path):
deleted = set()
for current_dir, subdirs, files in os.walk(abspath, topdown=False):
try:
still_has_subdirs = any(
_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted
)
still_has_subdirs = any(_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted)
if not any(files) and not still_has_subdirs and not os.path.samefile(path, current_dir):
os.rmdir(current_dir)
deleted.add(current_dir)
@@ -387,7 +411,7 @@ def create_data_and_move(file_path: str, zero_op, oCC):
n_number = get_number(debug, os.path.basename(file_path))
file_path = os.path.abspath(file_path)
if debug == True:
if debug is True:
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
if zero_op:
return
@@ -444,7 +468,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
def main():
version = '6.0.2'
urllib3.disable_warnings() #Ignore http proxy warning
urllib3.disable_warnings() # Ignore http proxy warning
# Read config.ini first, in argparse_function() need conf.failed_folder()
conf = config.Config("config.ini")
@@ -456,7 +480,7 @@ def main():
main_mode = conf.main_mode()
folder_path = ""
if not main_mode in (1, 2, 3):
if main_mode not in (1, 2, 3):
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
sys.exit(4)
@@ -467,7 +491,8 @@ def main():
signal.signal(signal.SIGWINCH, sigdebug_handler)
dupe_stdout_to_logfile(logdir)
platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
platform_total = str(
' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
print('[*]================= Movie Data Capture =================')
print('[*]' + version.center(54))
@@ -485,15 +510,15 @@ def main():
print('[+]Enable debug')
if conf.soft_link():
print('[!]Enable soft link')
if len(sys.argv)>1:
print('[!]CmdLine:'," ".join(sys.argv[1:]))
if len(sys.argv) > 1:
print('[!]CmdLine:', " ".join(sys.argv[1:]))
print('[+]Main Working mode ## {}: {} ## {}{}{}'
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
"" if not conf.multi_threading() else ", multi_threading on",
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
) if not single_file_path else ('-','Single File', '','',''))
)
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1],
"" if not conf.multi_threading() else ", multi_threading on",
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
) if not single_file_path else ('-', 'Single File', '', '', ''))
)
if conf.update_check():
check_update(version)
@@ -504,8 +529,9 @@ def main():
def fmd(f):
return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
Path.home() / '.local' / 'share' / 'mdc' / f)
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
for k,v in map_tab:
for k, v in map_tab:
if v.exists():
if file_modification_days(str(v)) >= conf.mapping_table_validity():
print("[+]Mapping Table Out of date! Remove", str(v))
@@ -525,14 +551,15 @@ def main():
try:
oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json')
except:
# some OS no OpennCC cpython, try opencc-python-reimplemented.
# some OS no OpenCC cpython, try opencc-python-reimplemented.
# pip uninstall opencc && pip install opencc-python-reimplemented
oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')
if not single_file_path == '': #Single File
if not single_file_path == '': # Single File
print('[+]==================== Single File =====================')
if custom_number == '':
create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
create_data_and_move_with_custom_number(single_file_path,
get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
else:
create_data_and_move_with_custom_number(single_file_path, custom_number, oCC)
else:
@@ -547,7 +574,7 @@ def main():
print('[+]Find', count_all, 'movies.')
print('[*]======================================================')
stop_count = conf.stop_counter()
if stop_count<1:
if stop_count < 1:
stop_count = 999999
else:
count_all = str(min(len(movie_list), stop_count))
@@ -555,7 +582,8 @@ def main():
for movie_path in movie_list: # 遍历电影列表 交给core处理
count = count + 1
percentage = str(count / int(count_all) * 100)[:4] + '%'
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
time.strftime("%H:%M:%S")))
create_data_and_move(movie_path, zero_op, oCC)
if count >= stop_count:
print("[!]Stop counter triggered!")
@@ -570,7 +598,7 @@ def main():
end_time = time.time()
total_time = str(timedelta(seconds=end_time - start_time))
print("[+]Running time", total_time[:len(total_time) if total_time.rfind('.') < 0 else -3],
" End at", time.strftime("%Y-%m-%d %H:%M:%S"))
" End at", time.strftime("%Y-%m-%d %H:%M:%S"))
print("[+]All finished!!!")
@@ -581,7 +609,7 @@ def main():
sys.exit(0)
import multiprocessing
if __name__ == '__main__':
multiprocessing.freeze_support()
main()

View File

@@ -235,8 +235,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
json_data['studio'] = studio
json_data['director'] = director
if conf.is_transalte():
translate_values = conf.transalte_values().split(",")
if conf.is_translate():
translate_values = conf.translate_values().split(",")
for translate_value in translate_values:
if json_data[translate_value] == "":
continue
@@ -248,12 +248,12 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
continue
except:
pass
if conf.get_transalte_engine() == "azure":
if conf.get_translate_engine() == "azure":
t = translate(
json_data[translate_value],
target_language="zh-Hans",
engine=conf.get_transalte_engine(),
key=conf.get_transalte_key(),
engine=conf.get_translate_engine(),
key=conf.get_translate_key(),
)
else:
t = translate(json_data[translate_value])

View File

@@ -8,7 +8,7 @@ success_output_folder=JAV_output
soft_link=0
failed_move=1
auto_exit=0
transalte_to_sc=0
translate_to_sc=0
multi_threading=0
;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
actor_gender=female
@@ -51,7 +51,7 @@ folders=failed,JAV_output
switch=0
; 机器翻译
[transalte]
[translate]
switch=0
;可选项 google-free,azure
engine=google-free

106
config.py
View File

@@ -5,18 +5,17 @@ import configparser
import time
from pathlib import Path
G_conf_override = {
# index 0 save Config() first instance for quick access by using getInstance()
0 : None,
0: None,
# register override config items
"common:main_mode" : None,
"common:source_folder" : None,
"common:auto_exit" : None,
"common:nfo_skip_days" : None,
"common:stop_counter" : None,
"common:ignore_failed_list" : None,
"debug_mode:switch" : None
"common:main_mode": None,
"common:source_folder": None,
"common:auto_exit": None,
"common:nfo_skip_days": None,
"common:stop_counter": None,
"common:ignore_failed_list": None,
"debug_mode:switch": None
}
@@ -81,7 +80,7 @@ class Config:
sys.exit(2)
# 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
write_path = path_search_order[2] # Path.home() / "mdc.ini"
write_path = path_search_order[2] # Path.home() / "mdc.ini"
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
print("Config file '{}' created.".format(write_path.resolve()))
input("Press Enter key exit...")
@@ -98,14 +97,18 @@ class Config:
# print("[-]",e)
# sys.exit(3)
# #self.conf = self._default_config()
def getboolean_override(self, section, item) -> bool:
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(G_conf_override[f"{section}:{item}"])
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(
G_conf_override[f"{section}:{item}"])
def getint_override(self, section, item) -> int:
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(G_conf_override[f"{section}:{item}"])
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(
G_conf_override[f"{section}:{item}"])
def get_override(self, section, item) -> str:
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(G_conf_override[f"{section}:{item}"])
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(
G_conf_override[f"{section}:{item}"])
def main_mode(self) -> int:
try:
@@ -127,34 +130,46 @@ class Config:
def soft_link(self) -> bool:
return self.conf.getboolean("common", "soft_link")
def failed_move(self) -> bool:
return self.conf.getboolean("common", "failed_move")
def auto_exit(self) -> bool:
return self.getboolean_override("common", "auto_exit")
def transalte_to_sc(self) -> bool:
return self.conf.getboolean("common", "transalte_to_sc")
def translate_to_sc(self) -> bool:
return self.conf.getboolean("common", "translate_to_sc")
def multi_threading(self) -> bool:
return self.conf.getboolean("common", "multi_threading")
def del_empty_folder(self) -> bool:
return self.conf.getboolean("common", "del_empty_folder")
def nfo_skip_days(self) -> int:
try:
return self.getint_override("common", "nfo_skip_days")
except:
return 30
def stop_counter(self) -> int:
try:
return self.getint_override("common", "stop_counter")
except:
return 0
def ignore_failed_list(self) -> bool:
return self.getboolean_override("common", "ignore_failed_list")
def download_only_missing_images(self) -> bool:
return self.conf.getboolean("common", "download_only_missing_images")
def mapping_table_validity(self) -> int:
return self.conf.getint("common", "mapping_table_validity")
def is_transalte(self) -> bool:
return self.conf.getboolean("transalte", "switch")
def is_translate(self) -> bool:
return self.conf.getboolean("translate", "switch")
def is_trailer(self) -> bool:
return self.conf.getboolean("trailer", "switch")
@@ -190,18 +205,25 @@ class Config:
return extrafanart_download
except ValueError:
self._exit("extrafanart_folder")
def get_transalte_engine(self) -> str:
return self.conf.get("transalte","engine")
# def get_transalte_appId(self) ->str:
# return self.conf.get("transalte","appid")
def get_transalte_key(self) -> str:
return self.conf.get("transalte","key")
def get_transalte_delay(self) -> int:
return self.conf.getint("transalte","delay")
def transalte_values(self) -> str:
return self.conf.get("transalte", "values")
def get_translate_engine(self) -> str:
return self.conf.get("translate", "engine")
# def get_translate_appId(self) ->str:
# return self.conf.get("translate","appid")
def get_translate_key(self) -> str:
return self.conf.get("translate", "key")
def get_translate_delay(self) -> int:
return self.conf.getint("translate", "delay")
def translate_values(self) -> str:
return self.conf.get("translate", "values")
def get_translate_service_site(self) -> str:
return self.conf.get("transalte", "service_site")
return self.conf.get("translate", "service_site")
def proxy(self):
try:
sec = "proxy"
@@ -284,21 +306,21 @@ class Config:
def storyline_show(self) -> int:
try:
v = self.conf.getint("storyline", "show_result")
return v if v in (0,1,2) else 2 if v > 2 else 0
return v if v in (0, 1, 2) else 2 if v > 2 else 0
except:
return 0
def storyline_mode(self) -> int:
try:
v = self.conf.getint("storyline", "run_mode")
return v if v in (0,1,2) else 2 if v > 2 else 0
return v if v in (0, 1, 2) else 2 if v > 2 else 0
except:
return 1
def cc_convert_mode(self) -> int:
try:
v = self.conf.getint("cc_convert", "mode")
return v if v in (0,1,2) else 2 if v > 2 else 0
return v if v in (0, 1, 2) else 2 if v > 2 else 0
except:
return 1
@@ -320,7 +342,6 @@ class Config:
except:
return "hog"
@staticmethod
def _exit(sec: str) -> None:
print("[-] Read config error! Please check the {} section in config.ini", sec)
@@ -340,7 +361,7 @@ class Config:
conf.set(sec1, "soft_link", "0")
conf.set(sec1, "failed_move", "1")
conf.set(sec1, "auto_exit", "0")
conf.set(sec1, "transalte_to_sc", "1")
conf.set(sec1, "translate_to_sc", "1")
# actor_gender value: female or male or both or all(含人妖)
conf.set(sec1, "actor_gender", "female")
conf.set(sec1, "del_empty_folder", "1")
@@ -358,7 +379,6 @@ class Config:
conf.set(sec2, "type", "socks5")
conf.set(sec2, "cacert_file", "")
sec3 = "Name_Rule"
conf.add_section(sec3)
conf.set(sec3, "location_rule", "actor + '/' + number")
@@ -382,7 +402,7 @@ class Config:
conf.add_section(sec7)
conf.set(sec7, "switch", "0")
sec8 = "transalte"
sec8 = "translate"
conf.add_section(sec8)
conf.set(sec8, "switch", "0")
conf.set(sec8, "engine", "google-free")
@@ -402,8 +422,10 @@ class Config:
sec11 = "media"
conf.add_section(sec11)
conf.set(sec11, "media_type", ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
conf.set(sec11, "sub_type", ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
conf.set(sec11, "media_type",
".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
conf.set(sec11, "sub_type",
".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
sec12 = "watermark"
conf.add_section(sec12)
@@ -464,7 +486,8 @@ class IniProxy():
'''
if self.address:
if self.proxytype in self.SUPPORT_PROXY_TYPE:
proxies = {"http": self.proxytype + "://" + self.address, "https": self.proxytype + "://" + self.address}
proxies = {"http": self.proxytype + "://" + self.address,
"https": self.proxytype + "://" + self.address}
else:
proxies = {"http": "http://" + self.address, "https": "https://" + self.address}
else:
@@ -477,13 +500,16 @@ if __name__ == "__main__":
def evprint(evstr):
code = compile(evstr, "<string>", "eval")
print('{}: "{}"'.format(evstr, eval(code)))
config = Config()
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', 'ini_path'}
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override',
'ini_path'}
for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
evprint(f'config.{_m}()')
pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
# test getInstance()
assert(getInstance() == config)
assert (getInstance() == config)
for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
evprint(f'getInstance().proxy().{_p}')

View File

@@ -2,6 +2,7 @@ import os
import re
import sys
import config
import typing
G_spat = re.compile(
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|"
@@ -9,30 +10,30 @@ G_spat = re.compile(
re.IGNORECASE)
def get_number(debug,file_path: str) -> str:
# """
# >>> from number_parser import get_number
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
# 'snis-829'
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
# 'snis-829'
# >>> get_number("C:¥Users¥Guest¥snis-829.mp4")
# 'snis-829'
# >>> get_number("C:¥Users¥Guest¥snis-829-C.mp4")
# 'snis-829'
# >>> get_number("./snis-829.mp4")
# 'snis-829'
# >>> get_number("./snis-829-C.mp4")
# 'snis-829'
# >>> get_number(".¥snis-829.mp4")
# 'snis-829'
# >>> get_number(".¥snis-829-C.mp4")
# 'snis-829'
# >>> get_number("snis-829.mp4")
# 'snis-829'
# >>> get_number("snis-829-C.mp4")
# 'snis-829'
# """
def get_number(debug: bool, file_path: str) -> str:
"""
从文件路径中提取番号 from number_parser import get_number
>>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829.mp4")
'snis-829'
>>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
'snis-829'
>>> get_number(False, "C:¥Users¥Guest¥snis-829.mp4")
'snis-829'
>>> get_number(False, "C:¥Users¥Guest¥snis-829-C.mp4")
'snis-829'
>>> get_number(False, "./snis-829.mp4")
'snis-829'
>>> get_number(False, "./snis-829-C.mp4")
'snis-829'
>>> get_number(False, ".¥snis-829.mp4")
'snis-829'
>>> get_number(False, ".¥snis-829-C.mp4")
'snis-829'
>>> get_number(False, "snis-829.mp4")
'snis-829'
>>> get_number(False, "snis-829-C.mp4")
'snis-829'
"""
filepath = os.path.basename(file_path)
# debug True 和 False 两块代码块合并原因是此模块及函数只涉及字符串计算没有IO操作debug on时输出导致异常信息即可
try:
@@ -57,7 +58,7 @@ def get_number(debug,file_path: str) -> str:
try:
return str(
re.findall(r'(.+?)\.',
str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
"['']").replace('_', '-')
except:
return str(re.search(r'(.+?)\.', filepath)[0])
@@ -69,29 +70,33 @@ def get_number(debug,file_path: str) -> str:
# 按javdb数据源的命名规范提取number
G_TAKE_NUM_RULES = {
'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
'1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
'heyzo' : lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
'tokyo.*hot': lambda x: str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
'carib': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
'1pon|mura|paco': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
'10mu': lambda x: str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
}
def get_number_by_dict(filename: str) -> str:
def get_number_by_dict(filename: str) -> typing.Optional[str]:
try:
for k,v in G_TAKE_NUM_RULES.items():
for k, v in G_TAKE_NUM_RULES.items():
if re.search(k, filename, re.I):
return v(filename)
except:
pass
return None
class Cache_uncensored_conf:
prefix = None
def is_empty(self):
return bool(self.prefix is None)
def set(self, v: list):
if not v or not len(v) or not len(v[0]):
raise ValueError('input prefix list empty or None')
@@ -100,28 +105,32 @@ class Cache_uncensored_conf:
for i in v[1:]:
s += f"|{i}.+"
self.prefix = re.compile(s, re.I)
def check(self, number):
if self.prefix is None:
raise ValueError('No init re compile')
return self.prefix.match(number)
G_cache_uncensored_conf = Cache_uncensored_conf()
# ========================================================================是否为无码
def is_uncensored(number):
if re.match(
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
number,
re.I
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
number,
re.I
):
return True
if G_cache_uncensored_conf.is_empty():
G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
return G_cache_uncensored_conf.check(number)
if __name__ == "__main__":
# import doctest
# doctest.testmod(raise_on_error=True)
# import doctest
# doctest.testmod(raise_on_error=True)
test_use_cases = (
"MEYD-594-C.mp4",
"SSIS-001_C.mp4",
@@ -132,26 +141,30 @@ if __name__ == "__main__":
"SDDE-625_uncensored_C.mp4",
"SDDE-625_uncensored_leak_C.mp4",
"SDDE-625_uncensored_leak_C_cd1.mp4",
"Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取
"Tokyo Hot n9001 FHD.mp4", # 无-号,以前无法正确提取
"TokyoHot-n1287-HD SP2006 .mp4",
"caribean-020317_001.nfo", # -号误命名为_号的
"caribean-020317_001.nfo", # -号误命名为_号的
"257138_3xplanet_1Pondo_080521_001.mp4",
"ADV-R0624-CD3.wmv", # 多碟影片
"XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
"ADV-R0624-CD3.wmv", # 多碟影片
"XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
"xxx-av 20589.mp4",
"Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源
"heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
"heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
"sbw99.cc@heyzo_hd_2636_full.mp4"
)
def evprint(evstr):
code = compile(evstr, "<string>", "eval")
print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code)))
for t in test_use_cases:
evprint(f'get_number(True, "{t}")')
if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
if len(sys.argv) <= 1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
sys.exit(0)
# 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据参数为盘符 A .. Z 或带盘符路径
@@ -170,6 +183,7 @@ if __name__ == "__main__":
# 示例:
# python3 ./number_parser.py ALL
import subprocess
ES_search_path = "ALL disks"
if sys.argv[1] == "ALL":
if sys.platform == "win32":
@@ -177,18 +191,19 @@ if __name__ == "__main__":
ES_prog_path = 'es.exe' # es.exe需要放在PATH环境变量的路径之内
ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines()
elif sys.platform in ("linux", "darwin"):
ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate'
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path)
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(
ES_prog_path)
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('utf-8')
out_list = [ os.path.basename(line) for line in out_text.splitlines()]
out_list = [os.path.basename(line) for line in out_text.splitlines()]
else:
print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.')
sys.exit(1)
else: # Windows single disk
else: # Windows single disk
if sys.platform != "win32":
print('[!]Usage: python3 ./number_parser.py ALL')
sys.exit(0)
@@ -203,7 +218,7 @@ if __name__ == "__main__":
ES_search_path = os.path.normcase(ES_search_path)
ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v'
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030此编码为UNICODE方言与UTF-8系全射关系无转码损失
out_list = out_text.splitlines()
print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...')
print(f'[+]Find {len(out_list)} Movies.')