Merge pull request #746 from lededev/num-4k

新功能：完整的多次少量处理功能，增加再次运行延迟
2022-04-12 18:59:07 +08:00
parent 38b18efdb2 475f02fbe6
commit 3bcaf8d318
14 changed files with 464 additions and 244 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -18,6 +18,7 @@ from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from cloudscraper import create_scraper
 from concurrent.futures import ThreadPoolExecutor
 from unicodedata import category
 def getXpathSingle(htmlcode, xpath):
@@ -26,7 +27,7 @@ def getXpathSingle(htmlcode, xpath):
    return result1
-G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
+G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
@@ -381,7 +382,7 @@ def load_cookies(cookie_json_filename: str):
                break
        if not cookies_filename:
            return None, None
-        return json.load(open(cookies_filename)), cookies_filename
+        return json.loads(Path(cookies_filename).read_text(encoding='utf-8')), cookies_filename
    except:
        return None, None
@@ -519,14 +520,13 @@ def download_one_file(args) -> str:
    wrapped for map function
    """
-    def _inner(url: str, save_path: Path):
+    (url, save_path) = args
-        filebytes = get_html(url, return_type='content')
+    filebytes = get_html(url, return_type='content')
-        if isinstance(filebytes, bytes) and len(filebytes):
+    if isinstance(filebytes, bytes) and len(filebytes):
-            if len(filebytes) == save_path.open('wb').write(filebytes):
+        with save_path.open('wb') as fpbyte:
            if len(filebytes) == fpbyte.write(filebytes):
                return str(save_path)
    return _inner(*args)
 def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0):
    """
@@ -567,6 +567,7 @@ def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]):
            new_lists.append(i)
    return new_lists
 def delete_all_elements_in_str(string_delete: str, string: str):
    """
    delete same string in given list
@@ -574,4 +575,14 @@ def delete_all_elements_in_str(string_delete: str, string: str):
    for i in string:
        if i == string_delete:
            string = string.replace(i,"")
-    return string
+    return string
 def cnspace(v: str, n: int) -> int:
    """
    print format空格填充对齐内容包含中文时的空格计算
    """
    cw = 0
    for c in v:
        cw += 1 if category(c) in ('Lo',) else 0
    return n - cw
--- a/ImageProcessing/init.py
+++ b/ImageProcessing/init.py
@@ -2,11 +2,14 @@ import logging
 import os
 import config
 import importlib
 from pathlib import Path
 from PIL import Image
 import shutil
 from ADC_function import file_not_exist_or_empty
 def face_crop_width(filename, width, height):
    aspect_ratio = config.getInstance().face_aspect_ratio()
    # 新宽度是高度的2/3
    cropWidthHalf = int(height/3)
    try:
@@ -21,15 +24,15 @@ def face_crop_width(filename, width, height):
                # 越界处理
                if cropLeft < 0:
                    cropLeft = 0
-                    cropRight = cropWidthHalf*2
+                    cropRight = cropWidthHalf * aspect_ratio
                elif cropRight > width:
-                    cropLeft = width-cropWidthHalf*2
+                    cropLeft = width - cropWidthHalf * aspect_ratio
                    cropRight = width
                return (cropLeft, 0, cropRight, height)
    except:
        print('[-]Not found face!   ' + filename)
    # 默认靠右切
-    return (width-cropWidthHalf*2, 0, width, height)
+    return (width-cropWidthHalf * aspect_ratio, 0, width, height)
 def face_crop_height(filename, width, height):
@@ -54,31 +57,39 @@ def face_crop_height(filename, width, height):
    return (0, 0, width, cropHeight)
-def cutImage(imagecut, path, fanart_path, poster_path):
+def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
    conf = config.getInstance()
    fullpath_fanart = os.path.join(path, fanart_path)
    fullpath_poster = os.path.join(path, poster_path)
-    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster):
+    aspect_ratio = conf.face_aspect_ratio()
    if conf.face_aways_imagecut():
        imagecut = 1
    elif conf.download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster):
        return
    if imagecut == 1:  # 剪裁大封面
        try:
            img = Image.open(fullpath_fanart)
            width, height = img.size
            if width/height > 2/3:  # 如果宽度大于2
-                # 以人像为中心切取
+                if skip_facerec:
-                img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
+                    # 有码封面默认靠右切
                    img2 = img.crop((width - int(height/3) * aspect_ratio, 0, width, height))
                else:
                    # 以人像为中心切取
                    img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
            elif width/height < 2/3:  # 如果高度大于3
                # 从底部向上切割
                img2 = img.crop(face_crop_height(fullpath_fanart, width, height))
            else:  # 如果等于2/3
                img2 = img
            img2.save(fullpath_poster)
-            print('[+]Image Cutted!     ' + fullpath_poster)
+            print(f"[+]Image Cutted!     {Path(fullpath_poster).name}")
        except Exception as e:
            print(e)
            print('[-]Cover cut failed!')
    elif imagecut == 0:  # 复制封面
        shutil.copyfile(fullpath_fanart, fullpath_poster)
-        print('[+]Image Copyed!     ' + fullpath_poster)
+        print(f"[+]Image Copyed!     {Path(fullpath_poster).name}")
 def face_center(filename, model):
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -18,7 +18,7 @@ from opencc import OpenCC
 import config
 from ADC_function import file_modification_days, get_html, parallel_download_files
 from number_parser import get_number
-from core import core_main, moveFailedFolder
+from core import core_main, core_main_no_net_op, moveFailedFolder
 def check_update(local_version):
@@ -40,7 +40,7 @@ def check_update(local_version):
        print("[*]======================================================")
-def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
+def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]:
    conf = config.getInstance()
    parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
    parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
@@ -62,12 +62,16 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
                        help="Override nfo_skip_days value in config.")
    parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
                        help="Override stop_counter value in config.")
    parser.add_argument("-R", "--rerun-delay", dest='delaytm', default='', nargs='?',
                        help="Delay (eg. 1h10m30s or 60 (second)) time and rerun, until all movies proceed. Note: stop_counter value in config or -c must none zero.")
    parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
        os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
    parser.add_argument("-a", "--auto-exit", action="store_true",
                        help="Auto exit after program complete")
    parser.add_argument("-g", "--debug", action="store_true",
                        help="Turn on debug mode to generate diagnostic log for issue report.")
    parser.add_argument("-N", "--no-network-operation", action="store_true",
                        help="No network query, do not get metadata, for cover cropping purposes, only takes effect when main mode is 3.")
    parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
                        help="""Only show job list of files and numbers, and **NO** actual operation
 is performed. It may help you correct wrong numbers before real job.""")
@@ -92,8 +96,17 @@ is performed. It may help you correct wrong numbers before real job.""")
    config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
    config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
    config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
    config.G_conf_override["common:rerun_delay"] = get_str_or_none(args.delaytm)
-    return args.file, args.number, args.logdir, args.regexstr, args.zero_op
+    no_net_op = False
    if conf.main_mode() == 3:
        no_net_op = args.no_network_operation
        if no_net_op:
            config.G_conf_override["common:stop_counter"] = 0
            config.G_conf_override["common:rerun_delay"] = '0s'
            config.G_conf_override["face:aways_imagecut"] = True
    return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op
 class OutLogger(object):
@@ -250,29 +263,31 @@ def close_logfile(logdir: str):
            except:
                pass
    # 第三步，月合并到年
-    if today.month < 4:
+    for i in range(1):
-        return
+        if today.month < 4:
-    mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
+            break
-    if not mons or not len(mons):
+        mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
-        return
+        if not mons or not len(mons):
-    mons.sort()
+            break
-    deadline_year = f'mdc_{today.year - 1}13'
+        mons.sort()
-    year_merge = [f for f in mons if f.stem < deadline_year]
+        deadline_year = f'mdc_{today.year - 1}13'
-    if not year_merge or not len(year_merge):
+        year_merge = [f for f in mons if f.stem < deadline_year]
-        return
+        if not year_merge or not len(year_merge):
-    toyear = len('12.txt')  # cut length mdc_2020|12.txt
+            break
-    for f in year_merge:
+        toyear = len('12.txt')  # cut length mdc_2020|12.txt
-        try:
+        for f in year_merge:
-            year_file_name = str(f)[:-toyear] + '.txt'  # mdc_2020.txt
+            try:
-            with open(year_file_name, 'a', encoding='utf-8') as y:
+                year_file_name = str(f)[:-toyear] + '.txt'  # mdc_2020.txt
-                y.write(f.read_text(encoding='utf-8'))
+                with open(year_file_name, 'a', encoding='utf-8') as y:
-            f.unlink(missing_ok=True)
+                    y.write(f.read_text(encoding='utf-8'))
-        except:
+                f.unlink(missing_ok=True)
-            pass
+            except:
                pass
    # 第四步，压缩年志 如果有压缩需求，请自行手工压缩，或者使用外部脚本来定时完成。推荐nongnu的lzip，对于
    # 这种粒度的文本日志，压缩比是目前最好的。lzip -9的运行参数下，日志压缩比要高于xz -9，而且内存占用更少，
    # 多核利用率更高(plzip多线程版本)，解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右，
    # 100MB的日志文件能缩小到3.7MB。
    return filepath
 def signal_handler(*args):
@@ -360,7 +375,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
    skip_numbers = set()
    success_folder = Path(conf.success_folder()).resolve()
    for f in success_folder.glob(r'**/*'):
-        if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
+        if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE):
            continue
        if file_modification_days(f) > nfo_skip_days:
            continue
@@ -411,38 +426,44 @@ def rm_empty_folder(path):
            pass
-def create_data_and_move(file_path: str, zero_op, oCC):
+def create_data_and_move(movie_path: str, zero_op: bool, no_net_op: bool, oCC):
    # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
    debug = config.getInstance().debug()
-    n_number = get_number(debug, os.path.basename(file_path))
+    n_number = get_number(debug, os.path.basename(movie_path))
-    file_path = os.path.abspath(file_path)
+    movie_path = os.path.abspath(movie_path)
    if debug is True:
-        print(f"[!] [{n_number}] As Number making data for '{file_path}'")
+        print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
        if zero_op:
            return
        if n_number:
-            core_main(file_path, n_number, oCC)
+            if no_net_op:
                core_main_no_net_op(movie_path, n_number)
            else:
                core_main(movie_path, n_number, oCC)
        else:
            print("[-] number empty ERROR")
-            moveFailedFolder(file_path)
+            moveFailedFolder(movie_path)
        print("[*]======================================================")
    else:
        try:
-            print(f"[!] [{n_number}] As Number making data for '{file_path}'")
+            print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
            if zero_op:
                return
            if n_number:
-                core_main(file_path, n_number, oCC)
+                if no_net_op:
                    core_main_no_net_op(movie_path, n_number)
                else:
                    core_main(movie_path, n_number, oCC)
            else:
                raise ValueError("number empty")
            print("[*]======================================================")
        except Exception as err:
-            print(f"[-] [{file_path}] ERROR:")
+            print(f"[-] [{movie_path}] ERROR:")
            print('[-]', err)
            try:
-                moveFailedFolder(file_path)
+                moveFailedFolder(movie_path)
            except Exception as err:
                print('[!]', err)
@@ -472,18 +493,9 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
                print('[!]', err)
-def main():
+def main(args: tuple) -> Path:
-    version = '6.0.3'
+    (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args
-    urllib3.disable_warnings()  # Ignore http proxy warning
+    conf = config.getInstance()
    # Read config.ini first, in argparse_function() need conf.failed_folder()
    conf = config.Config("config.ini")
    # Parse command line args
    single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
    main_mode = conf.main_mode()
    folder_path = ""
    if main_mode not in (1, 2, 3):
@@ -596,7 +608,7 @@ def main():
            percentage = str(count / int(count_all) * 100)[:4] + '%'
            print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
                                            time.strftime("%H:%M:%S")))
-            create_data_and_move(movie_path, zero_op, oCC)
+            create_data_and_move(movie_path, zero_op, no_net_op, oCC)
            if count >= stop_count:
                print("[!]Stop counter triggered!")
                break
@@ -614,14 +626,68 @@ def main():
    print("[+]All finished!!!")
-    close_logfile(logdir)
+    return close_logfile(logdir)
 def 分析日志文件(logfile):
    try:
        if not (isinstance(logfile, Path) and logfile.is_file()):
            raise FileNotFoundError('log file not found')
        logtxt = logfile.read_text(encoding='utf-8')
        扫描电影数 = int(re.findall(r'\[\+]Find (.*) movies\.', logtxt)[0])
        已处理 = int(re.findall(r'\[1/(.*?)] -', logtxt)[0])
        完成数 = logtxt.count(r'[+]Wrote!')
        return 扫描电影数, 已处理, 完成数
    except:
        return None, None, None
 def period(delta, pattern):
    d = {'d': delta.days}
    d['h'], rem = divmod(delta.seconds, 3600)
    d['m'], d['s'] = divmod(rem, 60)
    return pattern.format(**d)
 if __name__ == '__main__':
    version = '6.0.3'
    multiprocessing.freeze_support()
    urllib3.disable_warnings()  # Ignore http proxy warning
    app_start = time.time()
    # Read config.ini first, in argparse_function() need conf.failed_folder()
    conf = config.Config("config.ini")
    # Parse command line args
    args = tuple(argparse_function(version))
    再运行延迟 = conf.rerun_delay()
    if 再运行延迟 > 0 and conf.stop_counter() > 0:
        while True:
            try:
                logfile = main(args)
                (扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile))
                if all(isinstance(v, int) for v in 分析结果元组):
                    剩余个数 = 扫描电影数 - 已处理
                    总用时 = timedelta(seconds = time.time() - app_start)
                    print(f'All movies:{扫描电影数}  processed:{已处理}  successes:{完成数}  remain:{剩余个数}' +
                        '  Elapsed time {}'.format(
                        period(总用时, "{d} day {h}:{m:02}:{s:02}") if 总用时.days == 1
                            else period(总用时, "{d} days {h}:{m:02}:{s:02}") if 总用时.days > 1
                            else period(总用时, "{h}:{m:02}:{s:02}")))
                    if 剩余个数 == 0:
                        break
                    下次运行 = datetime.now() + timedelta(seconds=再运行延迟)
                    print(f'Next run time: {下次运行.strftime("%H:%M:%S")}, rerun_delay={再运行延迟}, press Ctrl+C stop run.')
                    time.sleep(再运行延迟)
                else:
                    break
            except:
                break
    else:
        main(args)
    if not conf.auto_exit():
        input("Press enter key exit, you can check the error message before you exit...")
    sys.exit(0)
 if __name__ == '__main__':
    multiprocessing.freeze_support()
    main()
--- a/WebCrawler/init.py
+++ b/WebCrawler/init.py
@@ -248,8 +248,8 @@ def get_data_from_json(file_number, oCC):
            if json_data[translate_value] == "":
                continue
            if translate_value == "title":
-                title_dict = json.load(
+                title_dict = json.loads(
-                    open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
+                    (Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json').read_text(encoding="utf-8"))
                try:
                    json_data[translate_value] = title_dict[number]
                    continue
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -40,6 +40,7 @@ def main(number: str) -> json:
            'website': f'{G_SITE}/moviepages/{number}/index.html',
            'source': 'carib.py',
            'series': get_series(lx),
            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
        return js
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -250,6 +250,13 @@ def main(number):
        # but the hinban on the page is test00012
        # so get the hinban first, and then pass it to following functions
        fanza_hinban = getNum(htmlcode)
        out_num = fanza_hinban
        number_lo = number.lower()
        if (re.sub('-|_', '', number_lo) == fanza_hinban or
            number_lo.replace('-', '00') == fanza_hinban or
            number_lo.replace('-', '') + 'so' == fanza_hinban
        ):
            out_num = number
        data = {
            "title": getTitle(htmlcode).strip(),
            "studio": getStudio(htmlcode),
@@ -258,7 +265,7 @@ def main(number):
            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
            "release": getRelease(htmlcode),
-            "number": fanza_hinban,
+            "number": out_num,
            "cover": getCover(htmlcode, fanza_hinban),
            "imagecut": 1,
            "tag": getTag(htmlcode),
@@ -315,3 +322,5 @@ if __name__ == "__main__":
    # print(main("DV-1562"))
    # print(main("96fad1217"))
    print(main("pred00251"))
    print(main("MIAA-391"))
    print(main("OBA-326"))
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -83,9 +83,13 @@ def getExtrafanart(htmlcode):  # 获取剧照
        if extrafanart_imgs:
            return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
    return ''
 def getUncensored(html):
    x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]')
    return bool(x)
 def main_uncensored(number):
-    htmlcode = get_html('https://www.javbus.com/ja/' + number)
+    w_number = number.replace('.', '-')
    htmlcode = get_html('https://www.javbus.red/' + w_number)
    if "<title>404 Page Not Found" in htmlcode:
        raise Exception('404 page not found')
    lx = etree.fromstring(htmlcode, etree.HTMLParser())
@@ -94,7 +98,7 @@ def main_uncensored(number):
        'title': title,
        'studio': getStudioJa(lx),
        'year': getYear(lx),
-        'outline': getOutline(number, title),
+        'outline': getOutline(w_number, title),
        'runtime': getRuntime(lx),
        'director': getDirectorJa(lx),
        'actor': getActor(lx),
@@ -106,9 +110,10 @@ def main_uncensored(number):
        'label': getSeriseJa(lx),
        'imagecut': 0,
 #        'actor_photo': '',
-        'website': 'https://www.javbus.com/ja/' + number,
+        'website': 'https://www.javbus.red/' + w_number,
        'source': 'javbus.py',
        'series': getSeriseJa(lx),
        '无码': True
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
@@ -151,6 +156,7 @@ def main(number):
                'website': 'https://www.javbus.com/' + number,
                'source': 'javbus.py',
                'series': getSerise(lx),
                '无码': getUncensored(lx)
            }
            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8')
            return js
@@ -169,12 +175,13 @@ def main(number):
 if __name__ == "__main__" :
    config.G_conf_override['debug_mode:switch'] = True
-    print(main('ABP-888'))
+    # print(main('ABP-888'))
-    print(main('ABP-960'))
+    # print(main('ABP-960'))
-    print(main('ADV-R0624'))    # 404
+    # print(main('ADV-R0624'))    # 404
-    print(main('MMNT-010'))
+    # print(main('MMNT-010'))
-    print(main('ipx-292'))
+    # print(main('ipx-292'))
-    print(main('CEMD-011'))
+    # print(main('CEMD-011'))
-    print(main('CJOD-278'))
+    # print(main('CJOD-278'))
    print(main('BrazzersExxtra.21.02.01'))
    print(main('100221_001'))
    print(main('AVSW-061'))
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -179,6 +179,10 @@ def getUserRating(html):
        return float(v[0][0]), int(v[0][1])
    except:
        return
 def getUncensored(html):
    x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")'
                ' or contains(@href,"/tags/western?")]')
    return bool(x)
 def main(number):
    # javdb更新后同一时间只能登录一个数字站，最新登录站会踢出旧的登录，因此按找到的第一个javdb*.json文件选择站点，
@@ -300,7 +304,7 @@ def main(number):
            'website': urljoin('https://javdb.com', correct_url),
            'source': 'javdb.py',
            'series': getSeries(lx),
-
+            '无码': getUncensored(lx)
        }
        userrating = getUserRating(lx)
        if isinstance(userrating, tuple) and len(userrating) == 2:
@@ -328,7 +332,7 @@ if __name__ == "__main__":
    # print(main('blacked.20.05.30'))
    # print(main('AGAV-042'))
    # print(main('BANK-022'))
-    # print(main('070116-197'))
+    print(main('070116-197'))
    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
    #print(main('FC2-2278260'))
    # print(main('FC2-735670'))
@@ -339,3 +343,4 @@ if __name__ == "__main__":
    # print(main('EBOD-646'))
    # print(main('LOVE-262'))
    print(main('ABP-890'))
    print(main('blacked.14.12.08'))
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -146,7 +146,8 @@ def main(number):
            'website': url,
            'source': 'madou.py',
            # 使用
-            'series': getSerise(html)
+            'series': getSerise(html),
            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
                        indent=4, separators=(',', ':'), )  # .encode('UTF-8')
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -80,34 +80,34 @@ def getStoryline(number, title, sites: list=None):
 def getStoryline_mp(args):
-    def _inner(site, number, title, debug):
+    (site, number, title, debug) = args
-        start_time = time.time()
+    start_time = time.time()
-        storyline = None
+    storyline = None
-        if not isinstance(site, str):
+    if not isinstance(site, str):
            return storyline
        elif site == "airavwiki":
            storyline = getStoryline_airavwiki(number, debug)
        elif site == "airav":
            storyline = getStoryline_airav(number, debug)
        elif site == "avno1":
            storyline = getStoryline_avno1(number, debug)
        elif site == "xcity":
            storyline = getStoryline_xcity(number, debug)
        elif site == "amazon":
            storyline = getStoryline_amazon(title, number, debug)
        elif site == "58avgo":
            storyline = getStoryline_58avgo(number, debug)
        if not debug:
            return storyline
        # 进程池模式的子进程getStoryline_*()的print()不会写入日志中，线程池和顺序执行不受影响
        print("[!]MP 进程[{}]运行{:.3f}秒，结束于{}返回结果: {}".format(
                site,
                time.time() - start_time,
                time.strftime("%H:%M:%S"),
                storyline if isinstance(storyline, str) and len(storyline) else '[空]')
        )
        return storyline
-    return _inner(*args)
+    elif site == "airavwiki":
        storyline = getStoryline_airavwiki(number, debug)
        #storyline = getStoryline_airavwiki_super(number, debug)
    elif site == "airav":
        storyline = getStoryline_airav(number, debug)
    elif site == "avno1":
        storyline = getStoryline_avno1(number, debug)
    elif site == "xcity":
        storyline = getStoryline_xcity(number, debug)
    elif site == "amazon":
        storyline = getStoryline_amazon(title, number, debug)
    elif site == "58avgo":
        storyline = getStoryline_58avgo(number, debug)
    if not debug:
        return storyline
    # 进程池模式的子进程getStoryline_*()的print()不会写入日志中，线程池和顺序执行不受影响
    print("[!]MP 进程[{}]运行{:.3f}秒，结束于{}返回结果: {}".format(
            site,
            time.time() - start_time,
            time.strftime("%H:%M:%S"),
            storyline if isinstance(storyline, str) and len(storyline) else '[空]')
    )
    return storyline
 def getStoryline_airav(number, debug):
@@ -308,8 +308,8 @@ def getStoryline_amazon(q_title, number, debug):
            res = session.get(urljoin(res.url, lks[0]))
            cookie = None
            lx = fromstring(res.text)
-        titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()")
+        titles = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/text()")
-        urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href")
+        urls = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/../@href")
        if not len(urls) or len(urls) != len(titles):
            raise ValueError("titles not found")
        idx = amazon_select_one(titles, q_title, number, debug)
@@ -325,8 +325,9 @@ def getStoryline_amazon(q_title, number, debug):
            res = session.get(urljoin(res.url, lks[0]))
            cookie = None
            lx = fromstring(res.text)
-        div = lx.xpath('//*[@id="productDescription"]')[0]
+        p1 = lx.xpath('//*[@id="productDescription"]/p[1]/span/text()')
-        ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)])
+        p2 = lx.xpath('//*[@id="productDescription"]/p[2]/span/text()')
        ama_t = ' '.join(p1) + ' '.join(p2)
        ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip()
        if cookie is None:
@@ -406,10 +407,10 @@ def amazon_select_one(a_titles, q_title, number, debug):
    # debug 模式下记录识别准确率日志
    if ratio < 0.9:
        # 相似度[0.5, 0.9)的淘汰结果单独记录日志
-        (Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write(
+        with (Path.home() / '.mlogs/ratio0.5.txt').open('a', encoding='utf-8') as hrt:
-            f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
+            hrt.write(f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
        return -1
    # 被采信的结果日志
-    (Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write(
+    with (Path.home() / '.mlogs/ratio.txt').open('a', encoding='utf-8') as hrt:
-        f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
+        hrt.write(f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
    return sel
--- a/config.ini
+++ b/config.ini
@@ -20,7 +20,10 @@ del_empty_folder=1
 nfo_skip_days=30
 ; 处理完多少个视频文件后停止，0为处理所有视频文件
 stop_counter=0
-; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
+; 再运行延迟时间，单位：h时m分s秒 举例: 1h30m45s(1小时30分45秒)  45(45秒)
 ; stop_counter不为零的条件下才有效，每处理stop_counter部影片后延迟rerun_delay秒再次运行
 rerun_delay=0
 ; 以上三个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
 ignore_failed_list=0
 download_only_missing_images=1
 mapping_table_validity=7
@@ -76,7 +79,7 @@ uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD
 ; 影片后缀
 media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v
 ; 字幕后缀
-sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
+sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
 ; 水印
 [watermark]
@@ -114,8 +117,14 @@ mode=1
 vars=outline,series,studio,tag,title
 [javdb]
-sites=37,38
+sites=38,39
-; 人脸识别 hog:方向梯度直方图(不太准确，速度快) cnn:深度学习模型(准确，需要GPU/CUDA,速度慢)
+; 人脸识别 locations_model=hog:方向梯度直方图(不太准确，速度快) cnn:深度学习模型(准确，需要GPU/CUDA,速度慢)
 ; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面，有码封面直接切右半部分
 ; aways_imagecut=0:按各网站默认行为 1:总是裁剪封面，开启此项将无视[common]download_only_missing_images=1总是覆盖封面
 ; 封面裁剪的宽高比可配置，公式为aspect_ratio/3。默认aspect_ratio=2.12: 适配大部分有码影片封面，前一版本默认为2/3即aspect_ratio=2
 [face]
 locations_model=hog
 uncensored_only=1
 aways_imagecut=0
 aspect_ratio=2.12
--- a/config.py
+++ b/config.py
@@ -3,6 +3,7 @@ import re
 import sys
 import configparser
 import time
 import typing
 from pathlib import Path
 G_conf_override = {
@@ -16,7 +17,9 @@ G_conf_override = {
    "common:nfo_skip_days": None,
    "common:stop_counter": None,
    "common:ignore_failed_list": None,
-    "debug_mode:switch": None
+    "common:rerun_delay": None,
    "debug_mode:switch": None,
    "face:aways_imagecut": None
 }
@@ -99,13 +102,19 @@ class Config:
            #     sys.exit(3)
            #     #self.conf = self._default_config()
-    def getboolean_override(self, section, item) -> bool:
+    def getboolean_override(self, section, item, fallback=None) -> bool:
-        return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(
+        if G_conf_override[f"{section}:{item}"] is not None:
-            G_conf_override[f"{section}:{item}"])
+            return bool(G_conf_override[f"{section}:{item}"])
        if fallback is not None:
            return self.conf.getboolean(section, item, fallback=fallback)
        return self.conf.getboolean(section, item)
-    def getint_override(self, section, item) -> int:
+    def getint_override(self, section, item, fallback=None) -> int:
-        return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(
+        if G_conf_override[f"{section}:{item}"] is not None:
-            G_conf_override[f"{section}:{item}"])
+            return int(G_conf_override[f"{section}:{item}"])
        if fallback is not None:
            return self.conf.getint(section, item, fallback=fallback)
        return self.conf.getint(section, item)
    def get_override(self, section, item) -> str:
        return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(
@@ -151,16 +160,10 @@ class Config:
        return self.conf.getboolean("common", "del_empty_folder")
    def nfo_skip_days(self) -> int:
-        try:
+        return self.getint_override("common", "nfo_skip_days", fallback=30)
            return self.getint_override("common", "nfo_skip_days")
        except:
            return 30
    def stop_counter(self) -> int:
-        try:
+        return self.getint_override("common", "stop_counter", fallback=0)
            return self.getint_override("common", "stop_counter")
        except:
            return 0
    def ignore_failed_list(self) -> bool:
        return self.getboolean_override("common", "ignore_failed_list")
@@ -171,6 +174,24 @@ class Config:
    def mapping_table_validity(self) -> int:
        return self.conf.getint("common", "mapping_table_validity")
    def rerun_delay(self) -> int:
        value = self.get_override("common", "rerun_delay")
        if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)):
            return 0   # not match '1h30m45s' or '30' or '1s2m1h4s5m'
        if value.isnumeric() and int(value) >= 0:
            return int(value)
        sec = 0
        sv = re.findall(r'(\d+)s', value, re.I)
        mv = re.findall(r'(\d+)m', value, re.I)
        hv = re.findall(r'(\d+)h', value, re.I)
        for v in sv:
            sec += int(v)
        for v in mv:
            sec += int(v) * 60
        for v in hv:
            sec += int(v) * 3600
        return sec
    def is_translate(self) -> bool:
        return self.conf.getboolean("translate", "switch")
@@ -247,8 +268,8 @@ class Config:
    def media_type(self) -> str:
        return self.conf.get('media', 'media_type')
-    def sub_rule(self):
+    def sub_rule(self) -> typing.Set[str]:
-        return self.conf.get('media', 'sub_type').split(',')
+        return set(self.conf.get('media', 'sub_type').lower().split(','))
    def naming_rule(self) -> str:
        return self.conf.get("Name_Rule", "naming_rule")
@@ -329,22 +350,23 @@ class Config:
            return 1
    def cc_convert_vars(self) -> str:
-        try:
+        return self.conf.get("cc_convert", "vars",
-            return self.conf.get("cc_convert", "vars")
+            fallback="actor,director,label,outline,series,studio,tag,title")
        except:
            return "actor,director,label,outline,series,studio,tag,title"
    def javdb_sites(self) -> str:
-        try:
+        return self.conf.get("javdb", "sites", fallback="38,39")
            return self.conf.get("javdb", "sites")
        except:
            return "33,34"
    def face_locations_model(self) -> str:
-        try:
+        return self.conf.get("face", "locations_model", fallback="hog")
-            return self.conf.get("face", "locations_model")
+
-        except:
+    def face_uncensored_only(self) -> bool:
-            return "hog"
+        return self.conf.getboolean("face", "uncensored_only", fallback=True)
    def face_aways_imagecut(self) -> bool:
        return self.getboolean_override("face", "aways_imagecut", fallback=False)
    def face_aspect_ratio(self) -> float:
        return self.conf.getfloat("face", "aspect_ratio", fallback=2.12)
    @staticmethod
    def _exit(sec: str) -> None:
@@ -375,6 +397,7 @@ class Config:
        conf.set(sec1, "ignore_failed_list", 0)
        conf.set(sec1, "download_only_missing_images", 1)
        conf.set(sec1, "mapping_table_validity", 7)
        conf.set(sec1, "rerun_delay", 0)
        sec2 = "proxy"
        conf.add_section(sec2)
@@ -428,9 +451,9 @@ class Config:
        sec11 = "media"
        conf.add_section(sec11)
        conf.set(sec11, "media_type",
-                 ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
+                 ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,iso")
        conf.set(sec11, "sub_type",
-                 ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
+                 ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
        sec12 = "watermark"
        conf.add_section(sec12)
--- a/core.py
+++ b/core.py
@@ -10,6 +10,7 @@ from PIL import Image
 from io import BytesIO
 from pathlib import Path
 from datetime import datetime
 from lxml import etree
 from ADC_function import *
 from WebCrawler import get_data_from_json
@@ -69,10 +70,12 @@ def get_info(json_data):  # 返回json里的数据
    return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label
-def small_cover_check(path, number, cover_small, leak_word, c_word, hack_word, filepath):
+def small_cover_check(path, filename, cover_small, movie_path):
-    filename = f"{number}{leak_word}{c_word}{hack_word}-poster.jpg"
+    full_filepath = Path(path) / filename
-    download_file_with_filename(cover_small, filename, path, filepath)
+    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(str(full_filepath)):
-    print('[+]Image Downloaded! ' + os.path.join(path, filename))
+        return
    download_file_with_filename(cover_small, filename, path, movie_path)
    print('[+]Image Downloaded! ' + full_filepath.name)
 def create_folder(json_data):  # 创建文件夹
@@ -213,7 +216,7 @@ def extrafanart_download_one_by_one(data, path, filepath):
                break
        if file_not_exist_or_empty(jpg_fullpath):
            return
-        print('[+]Image Downloaded!', jpg_fullpath)
+        print('[+]Image Downloaded!', Path(jpg_fullpath).name)
        j += 1
    if conf.debug():
        print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
@@ -244,7 +247,7 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
    if failed: # 非致命错误，电影不移入失败文件夹，将来可以用模式3补齐
        print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
    else:
-        print(f"[+]Successfully downloaded {len(result)} extrafanart to '{extrafanart_dir}'")
+        print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
    if conf.debug():
        print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
@@ -255,7 +258,7 @@ def image_ext(url):
        return ".jpg"
 # 封面是否下载成功，否则移动到failed
-def image_download(cover, fanart_path,thumb_path, path, filepath):
+def image_download(cover, fanart_path, thumb_path, path, filepath):
    full_filepath = os.path.join(path, fanart_path)
    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
        return
@@ -273,7 +276,7 @@ def image_download(cover, fanart_path,thumb_path, path, filepath):
            break
    if file_not_exist_or_empty(full_filepath):
        return
-    print('[+]Image Downloaded!', full_filepath)
+    print('[+]Image Downloaded!', Path(full_filepath).name)
    shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
@@ -291,6 +294,12 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
                print(f"[-]Fatal error! can not make folder '{path}'")
                sys.exit(0)
        old_nfo = None
        try:
            if os.path.isfile(nfo_path):
                old_nfo = etree.parse(nfo_path)
        except:
            pass
        # KODI内查看影片信息时找不到number，配置naming_rule=number+'#'+title虽可解决
        # 但使得标题太长，放入时常为空的outline内会更适合，软件给outline留出的显示版面也较大
        outline = f"{number}#{outline}"
@@ -354,11 +363,17 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            print("  <premiered>" + release + "</premiered>", file=code)
            print("  <releasedate>" + release + "</releasedate>", file=code)
            print("  <release>" + release + "</release>", file=code)
            if old_nfo:
                try:
                    xur = old_nfo.xpath('//userrating/text()')[0]
                    if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()):
                        print(f"  <userrating>{xur.strip()}</userrating>", file=code)
                except:
                    pass
            try:
                f_rating = json_data['用户评分']
                uc = json_data['评分人数']
-                print(f"""  <userrating>{round(f_rating * 2.0)}</userrating>
+                print(f"""  <rating>{round(f_rating * 2.0, 1)}</rating>
  <rating>{round(f_rating * 2.0, 1)}</rating>
  <criticrating>{round(f_rating * 20.0, 1)}</criticrating>
  <ratings>
    <rating name="javdb" max="5" default="true">
@@ -476,10 +491,9 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
 # ========================结束=================================
-def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word):  # 文件路径，番号，后缀，要移动至的位置
+def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word):  # 文件路径，番号，后缀，要移动至的位置
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
    file_parent_origin_path = str(filepath_obj.parent)
    try:
        targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}")
        # 任何情况下都不要覆盖，以免遭遇数据源或者引擎错误导致所有文件得到同一个number，逐一
@@ -505,25 +519,21 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word):
                filerelpath = os.path.relpath(filepath, path)
                os.symlink(filerelpath, targetpath)
            except:
-                os.symlink(filepath_obj.resolve(), targetpath)
+                os.symlink(str(filepath_obj.resolve()), targetpath)
        sub_res = config.getInstance().sub_rule()
-        for subname in sub_res:
+        sub_res = config.getInstance().sub_rule()
-            sub_filepath = str(filepath_obj.with_suffix(subname))
+        for subfile in filepath_obj.parent.glob('**/*'):
-            if os.path.isfile(sub_filepath.replace(subname,".chs" + subname)):
+            if subfile.is_file() and subfile.suffix.lower() in sub_res:
-                sub_filepath = sub_filepath.replace(subname,".chs" + subname)
+                if multi_part and part.lower() not in subfile.name.lower():
-                subname = ".chs" + subname
+                    continue
-            elif os.path.isfile(sub_filepath.replace(subname,".cht" + subname)):
+                sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
                sub_filepath = sub_filepath.replace(subname, ".cht" + subname)
                subname = ".cht" + subname
            if os.path.isfile(sub_filepath):
                if link_mode not in (1, 2):
-                    shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}"))
+                    shutil.move(str(subfile), str(sub_targetpath))
-                    print('[+]Sub moved!')
+                    print(f"[+]Sub Moved!        {sub_targetpath.name}")
                else:
-                    shutil.copyfile(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}"))
+                    shutil.copyfile(str(subfile), str(sub_targetpath))
-                    print('[+]Sub Copied!')
+                    print(f"[+]Sub Copied!       {sub_targetpath.name}")
-                return True
+        return
    except FileExistsError as fee:
        print(f'[-]FileExistsError: {fee}')
@@ -542,24 +552,39 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        number += part  # 这时number会被附加上CD1后缀
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
    file_parent_origin_path = str(filepath_obj.parent)
    targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}")
    if os.path.exists(targetpath):
        raise FileExistsError('File Exists on destination path, we will never overwriting.')
    try:
-        if config.getInstance().link_mode():
+        link_mode = config.getInstance().link_mode()
-            os.symlink(filepath, targetpath)
+        create_softlink = False
-        else:
+        if link_mode not in (1, 2):
            shutil.move(filepath, targetpath)
        elif link_mode == 2:
            try:
                os.link(filepath, targetpath, follow_symlinks=False)
            except:
                create_softlink = True
        if link_mode == 1 or create_softlink:
            try:
                filerelpath = os.path.relpath(filepath, path)
                os.symlink(filerelpath, targetpath)
            except:
                os.symlink(str(filepath_obj.resolve()), targetpath)
        sub_res = config.getInstance().sub_rule()
-        for subname in sub_res:
+        for subfile in filepath_obj.parent.glob('**/*'):
-            sub_filepath = str(filepath_obj.with_suffix(subname))
+            if subfile.is_file() and subfile.suffix.lower() in sub_res:
-            if os.path.isfile(sub_filepath):  # 字幕移动
+                if multi_part and part.lower() not in subfile.name.lower():
-                shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{subname}"))
+                    continue
-                print('[+]Sub moved!')
+                sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
-                print('[!]Success')
+                if link_mode not in (1, 2):
-                return True
+                    shutil.move(str(subfile), str(sub_targetpath))
                    print(f"[+]Sub Moved!        {sub_targetpath.name}")
                else:
                    shutil.copyfile(str(subfile), str(sub_targetpath))
                    print(f"[+]Sub Copied!       {sub_targetpath.name}")
        return
    except FileExistsError as fee:
        print(f'[-]FileExistsError: {fee}')
        return
@@ -571,18 +596,6 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        return
 def get_part(filepath):
    try:
        if re.search('-CD\d+', filepath):
            return re.findall('-CD\d+', filepath)[0]
        if re.search('-cd\d+', filepath):
            return re.findall('-cd\d+', filepath)[0]
    except:
        print("[-]failed!Please rename the filename again!")
        moveFailedFolder(filepath)
        return
 def debug_print(data: json):
    try:
        print("[+] ------- DEBUG INFO -------")
@@ -595,14 +608,65 @@ def debug_print(data: json):
            if i == 'extrafanart':
                print('[+]  -', "%-14s" % i, ':', len(v), 'links')
                continue
-            print('[+]  -', "%-14s" % i, ':', v)
+            print(f'[+]  - {i:<{cnspace(i,14)}} : {v}')
        print("[+] ------- DEBUG INFO -------")
    except:
        pass
-def core_main(file_path, number_th, oCC):
+def core_main_no_net_op(movie_path, number):
    conf = config.getInstance()
    part = ''
    leak_word = ''
    leak = 0
    c_word = ''
    cn_sub = ''
    hack = ''
    hack_word = ''
    ext = '.jpg'
    imagecut = 1
    path = str(Path(movie_path).parent)
    if re.search('-CD\d+', movie_path, re.IGNORECASE):
        part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0].upper()
    if re.search(r'-C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
            re.I) or '中文' in movie_path or '字幕' in movie_path:
        cn_sub = '1'
        c_word = '-C'  # 中文字幕影片后缀
    uncensored = 1 if is_uncensored(number) else 0
    if '流出' in movie_path or 'uncensored' in movie_path:
        leak_word = '-流出' # 流出影片后缀
        leak = 1
    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
        hack = 1
        hack_word = "-hack"
    prestr = f"{number}{leak_word}{c_word}{hack_word}"
    fanart_path =  f"{prestr}-fanart{ext}"
    poster_path = f"{prestr}-poster{ext}"
    thumb_path =  f"{prestr}-thumb{ext}"
    full_fanart_path = os.path.join(path, fanart_path)
    full_poster_path = os.path.join(path, poster_path)
    full_thumb_path = os.path.join(path, thumb_path)
    full_nfo = Path(path) / f"{prestr}{part}.nfo"
    if full_nfo.is_file():
        if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
            uncensored = 1
    else:
        return
    if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
        return
    cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
    if conf.is_watermark():
        add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack)
 def core_main(movie_path, number_th, oCC):
    conf = config.getInstance()
    # =======================================================================初始化所需变量
    multi_part = 0
@@ -614,8 +678,6 @@ def core_main(file_path, number_th, oCC):
    hack = ''
    hack_word = ''
    filepath = file_path  # 影片的路径 绝对路径
    # 下面被注释的变量不需要
    #rootpath= os.getcwd
    number = number_th
@@ -623,7 +685,7 @@ def core_main(file_path, number_th, oCC):
    # Return if blank dict returned (data not found)
    if not json_data:
-        moveFailedFolder(filepath)
+        moveFailedFolder(movie_path)
        return
    if json_data["number"] != number:
@@ -636,25 +698,28 @@ def core_main(file_path, number_th, oCC):
    imagecut =  json_data.get('imagecut')
    tag =  json_data.get('tag')
    # =======================================================================判断-C,-CD后缀
-    if '-CD' in filepath or '-cd' in filepath:
+    if re.search('-CD\d+', movie_path, re.IGNORECASE):
        multi_part = 1
-        part = get_part(filepath)
+        part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0].upper()
-    if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
+    if re.search(r'-C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
            re.I) or '中文' in movie_path or '字幕' in movie_path:
        cn_sub = '1'
        c_word = '-C'  # 中文字幕影片后缀
    # 判断是否无码
    uncensored = 1 if is_uncensored(number) else 0
    unce = json_data.get('无码')
    if type(unce) is bool:
        uncensored = 1 if unce else 0
-
+    if '流出' in movie_path or 'uncensored' in movie_path:
    if '流出' in filepath or 'uncensored' in filepath:
        liuchu = '流出'
        leak = 1
        leak_word = '-流出' # 流出影片后缀
    else:
        leak = 0
-    if 'hack'.upper() in str(filepath).upper() or '破解' in filepath:
+    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
        hack = 1
        hack_word = "-hack"
@@ -683,77 +748,76 @@ def core_main(file_path, number_th, oCC):
        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number,  json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
+            small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)
        # creatFolder会返回番号路径
-        image_download( cover, fanart_path,thumb_path, path, filepath)
+        image_download( cover, fanart_path,thumb_path, path, movie_path)
        if not multi_part or part.lower() == '-cd1':
            try:
                # 下载预告片
                if conf.is_trailer() and json_data.get('trailer'):
-                    trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
+                    trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
            except:
                pass
            try:
                # 下载剧照 data, path, filepath
                if conf.is_extrafanart() and json_data.get('extrafanart'):
-                    extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
+                    extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
            except:
                pass
        # 裁剪图
-        cutImage(imagecut, path , fanart_path, poster_path)
+        cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
        # 添加水印
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
        # 移动电影
-        paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word)
+        paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
-        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag,  json_data.get('actor_list'), liuchu, uncensored, hack_word
+        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,  json_data.get('actor_list'), liuchu, uncensored, hack_word
        ,fanart_path,poster_path,thumb_path)
    elif conf.main_mode() == 2:
        # 创建文件夹
        path = create_folder(json_data)
        # 移动文件
-        paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word)
+        paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
    elif conf.main_mode() == 3:
-        path = str(Path(file_path).parent)
+        path = str(Path(movie_path).parent)
        if multi_part == 1:
            number += part  # 这时number会被附加上CD1后缀
        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
+            small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)
        # creatFolder会返回番号路径
-        image_download( cover, fanart_path,thumb_path, path, filepath)
+        image_download( cover, fanart_path, thumb_path, path, movie_path)
        if not multi_part or part.lower() == '-cd1':
            # 下载预告片
            if conf.is_trailer() and json_data.get('trailer'):
-                trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
+                trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
            # 下载剧照 data, path, filepath
            if conf.is_extrafanart() and json_data.get('extrafanart'):
-                extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
+                extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
        # 裁剪图
-        cutImage(imagecut, path , fanart_path, poster_path)
+        cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
        # 添加水印
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
-        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
+        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
                    tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
--- a/number_parser.py
+++ b/number_parser.py
@@ -5,8 +5,9 @@ import config
 import typing
 G_spat = re.compile(
-    "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|"
+    "^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
-    "^\w+\.(cc|com)@|-uncensored|_uncensored|-leak|_leak|-4K|_4K",
+    "^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
    "(-|_)(fhd|hd|sd|1080p|720p|4K|uncensored|leak)",
    re.IGNORECASE)
@@ -46,9 +47,13 @@ def get_number(debug: bool, file_path: str) -> str:
            lower_check = filename.lower()
            if 'fc2' in lower_check:
                filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
-            filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE)
+            filename = re.sub("-cd\d{1,2}", "", filename, flags=re.IGNORECASE)
            if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况，例如n1012-CD1.wmv
                return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
            file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
            file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
            if re.search("\d+ch$", file_number, flags=re.I):
                file_number = file_number[:-2]
            return file_number.upper()
        else:  # 提取不含减号-的番号，FANZA CID
            # 欧美番号匹配规则
@@ -146,16 +151,23 @@ if __name__ == "__main__":
        "caribean-020317_001.nfo",  # -号误命名为_号的
        "257138_3xplanet_1Pondo_080521_001.mp4",
        "ADV-R0624-CD3.wmv",  # 多碟影片
-        "XXX-AV   22061-CD5.iso",  # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
+        "XXX-AV   22061-CD5.iso",  # 支持片商格式 xxx-av-22061 命名规则来自javdb数据源
        "xxx-av 20589.mp4",
-        "Muramura-102114_145-HD.wmv",  # 新支持片商格式 102114_145  命名规则来自javdb数据源
+        "Muramura-102114_145-HD.wmv",  # 支持片商格式 102114_145  命名规则来自javdb数据源
-        "heydouga-4102-023-CD2.iso",  # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
+        "heydouga-4102-023-CD2.iso",  # 支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
        "HeyDOuGa4236-1048 Ai Qiu - .mp4",  # heydouga-4236-1048 命名规则来自javdb数据源
-        "pacopacomama-093021_539-FHD.mkv",  # 新支持片商格式 093021_539 命名规则来自javdb数据源
+        "pacopacomama-093021_539-FHD.mkv",  # 支持片商格式 093021_539 命名规则来自javdb数据源
        "sbw99.cc@heyzo_hd_2636_full.mp4",
-        "hhd800.com@STARS-566.mp4",
+        "hhd800.com@STARS-566-HD.mp4",
-        "jav20s8.com@GIGL-677.mp4",
+        "jav20s8.com@GIGL-677_4K.mp4",
-        "sbw99.cc@iesp-653.mp4"
+        "sbw99.cc@iesp-653-4K.mp4",
        "4K-ABP-358_C.mkv",
        "n1012-CD1.wmv",
        "[]n1012-CD2.wmv",
        "rctd-460ch.mp4",  # 除支持-C硬字幕外，新支持ch硬字幕
        "rctd-461CH-CD2.mp4",  # ch后可加CDn
        "rctd-461-Cd3-C.mp4",  # CDn后可加-C
        "rctd-461-C-cD4.mp4",  # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1
    )