Merge branch 'upstream'

# Conflicts: # WebCrawler/fanza.py
2022-04-29 23:53:21 +08:00
parent edfddc18d8 1655d5ff3e
commit 5e42eb8236
34 changed files with 25968 additions and 18336 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -39,7 +39,7 @@ jobs:
        run: |
          pyinstaller \
            --onefile Movie_Data_Capture.py \
-            --hidden-import "ImageProcessing.hog" \
+            --hidden-import "ImageProcessing.cnn" \
            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
            --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
            --add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
@@ -51,7 +51,7 @@ jobs:
        run: |
          pyinstaller `
            --onefile Movie_Data_Capture.py `
-            --hidden-import "ImageProcessing.hog" `
+            --hidden-import "ImageProcessing.cnn" `
            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
            --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
            --add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -18,6 +18,7 @@ from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 from cloudscraper import create_scraper
 from concurrent.futures import ThreadPoolExecutor
+from unicodedata import category


 def getXpathSingle(htmlcode, xpath):
@@ -26,7 +27,7 @@ def getXpathSingle(htmlcode, xpath):
    return result1


-G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
+G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'


 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
@@ -69,7 +70,6 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None,
        print('[-]Connect Failed! Please check your Proxy or Network!')
    raise Exception('Connect Failed')

-
 def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
    configProxy = config.getInstance().proxy()
    errors = ""
@@ -381,7 +381,7 @@ def load_cookies(cookie_json_filename: str):
                break
        if not cookies_filename:
            return None, None
-        return json.load(open(cookies_filename)), cookies_filename
+        return json.loads(Path(cookies_filename).read_text(encoding='utf-8')), cookies_filename
    except:
        return None, None

@@ -466,7 +466,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None:
                        os.makedirs(path)
                    except:
                        print(f"[-]Fatal error! Can not make folder '{path}'")
-                        sys.exit(0)
+                        os._exit(0)
                proxies = configProxy.proxies()
                headers = {
                    'User-Agent': G_USER_AGENT}
@@ -483,7 +483,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None:
                        os.makedirs(path)
                    except:
                        print(f"[-]Fatal error! Can not make folder '{path}'")
-                        sys.exit(0)
+                        os._exit(0)
                headers = {
                    'User-Agent': G_USER_AGENT}
                r = requests.get(url, timeout=configProxy.timeout, headers=headers)
@@ -519,14 +519,13 @@ def download_one_file(args) -> str:
    wrapped for map function
    """

-    def _inner(url: str, save_path: Path):
-        filebytes = get_html(url, return_type='content')
-        if isinstance(filebytes, bytes) and len(filebytes):
-            if len(filebytes) == save_path.open('wb').write(filebytes):
+    (url, save_path) = args
+    filebytes = get_html(url, return_type='content')
+    if isinstance(filebytes, bytes) and len(filebytes):
+        with save_path.open('wb') as fpbyte:
+            if len(filebytes) == fpbyte.write(filebytes):
                return str(save_path)

-    return _inner(*args)
-

 def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0):
    """
@@ -567,6 +566,7 @@ def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]):
            new_lists.append(i)
    return new_lists

+
 def delete_all_elements_in_str(string_delete: str, string: str):
    """
    delete same string in given list
@@ -574,4 +574,9 @@ def delete_all_elements_in_str(string_delete: str, string: str):
    for i in string:
        if i == string_delete:
            string = string.replace(i,"")
-    return string
+    return string
+
+
+# print format空格填充对齐内容包含中文时的空格计算
+def cnspace(v: str, n: int) -> int:
+    return n - [category(c) for c in v].count('Lo')
--- a/ImageProcessing/init.py
+++ b/ImageProcessing/init.py
@@ -1,12 +1,18 @@
+import sys
+sys.path.append('../')
+
 import logging
 import os
 import config
 import importlib
+from pathlib import Path
 from PIL import Image
 import shutil
+from ADC_function import file_not_exist_or_empty


 def face_crop_width(filename, width, height):
+    aspect_ratio = config.getInstance().face_aspect_ratio()
    # 新宽度是高度的2/3
    cropWidthHalf = int(height/3)
    try:
@@ -21,15 +27,15 @@ def face_crop_width(filename, width, height):
                # 越界处理
                if cropLeft < 0:
                    cropLeft = 0
-                    cropRight = cropWidthHalf*2
+                    cropRight = cropWidthHalf * aspect_ratio
                elif cropRight > width:
-                    cropLeft = width-cropWidthHalf*2
+                    cropLeft = width - cropWidthHalf * aspect_ratio
                    cropRight = width
                return (cropLeft, 0, cropRight, height)
    except:
        print('[-]Not found face!   ' + filename)
    # 默认靠右切
-    return (width-cropWidthHalf*2, 0, width, height)
+    return (width-cropWidthHalf * aspect_ratio, 0, width, height)


 def face_crop_height(filename, width, height):
@@ -54,29 +60,43 @@ def face_crop_height(filename, width, height):
    return (0, 0, width, cropHeight)


-def cutImage(imagecut, path, fanart_path, poster_path):
+def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
+    conf = config.getInstance()
    fullpath_fanart = os.path.join(path, fanart_path)
    fullpath_poster = os.path.join(path, poster_path)
-    if imagecut == 1:  # 剪裁大封面
+    aspect_ratio = conf.face_aspect_ratio()
+    if conf.face_aways_imagecut():
+        imagecut = 1
+    elif conf.download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster):
+        return
+    # imagecut为4时同时也是有码影片 也用人脸识别裁剪封面
+    if imagecut == 1 or imagecut == 4:  # 剪裁大封面
        try:
            img = Image.open(fullpath_fanart)
            width, height = img.size
            if width/height > 2/3:  # 如果宽度大于2
-                # 以人像为中心切取
-                img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
+                if imagecut == 4:
+                    # 以人像为中心切取
+                    img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
+                elif skip_facerec:
+                    # 有码封面默认靠右切
+                    img2 = img.crop((width - int(height / 3) * aspect_ratio, 0, width, height))
+                else:
+                    # 以人像为中心切取
+                    img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
            elif width/height < 2/3:  # 如果高度大于3
                # 从底部向上切割
                img2 = img.crop(face_crop_height(fullpath_fanart, width, height))
            else:  # 如果等于2/3
                img2 = img
            img2.save(fullpath_poster)
-            print('[+]Image Cutted!     ' + fullpath_poster)
+            print(f"[+]Image Cutted!     {Path(fullpath_poster).name}")
        except Exception as e:
            print(e)
            print('[-]Cover cut failed!')
    elif imagecut == 0:  # 复制封面
        shutil.copyfile(fullpath_fanart, fullpath_poster)
-        print('[+]Image Copyed!     ' + fullpath_poster)
+        print(f"[+]Image Copyed!     {Path(fullpath_poster).name}")


 def face_center(filename, model):
@@ -91,5 +111,5 @@ def face_center(filename, model):
        return (0, 0)

 if __name__ == '__main__':
-    cutImage(1,'H:\\test\\','12.jpg','test.jpg')
-    
+    cutImage(1,'z:/t/','p.jpg','o.jpg')
+    #cutImage(1,'H:\\test\\','12.jpg','test.jpg')
--- a/ImageProcessing/cnn.py
+++ b/ImageProcessing/cnn.py
@@ -1,4 +1,8 @@
-import hog
+import sys
+sys.path.append('../')
+
+from ImageProcessing.hog import face_center as hog_face_center
+

 def face_center(filename, model):
-    return hog.face_center(filename, model)
+    return hog_face_center(filename, model)
--- a/12
+++ b/12
@@ -7,18 +7,20 @@ SHELL = /bin/bash

 .DEFAULT: make
 make:
-	#@echo "[+]make prepare-dev"
-	#sudo apt-get -y install python3.7 python3-pip
-	#pip3 install -r requirements.txt
-	#pip3 install pyinstaller
+	@echo "[+]make prepare-dev"
+	#sudo apt-get -y install python3 python3-pip
+	pip3 install -r requirements.txt
+	pip3 install pyinstaller

 	#@echo "[+]Set CLOUDSCRAPER_PATH variable"
 	#export cloudscraper_path=$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1)

 	@echo "[+]Pyinstaller make"
-	pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
+	pyinstaller --onefile Movie_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
+	    --hidden-import "ImageProcessing.cnn" \
 		--add-data "`python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1`:cloudscraper" \
 		--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
+        --add-data "`python3 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1`:face_recognition_models" \
 		--add-data "Img:Img" \
 		--add-data "config.ini:." \

--- a/MappingTable/c_number.json
+++ b/MappingTable/c_number.json
--- a/MappingTable/mapping_actor.xml
+++ b/MappingTable/mapping_actor.xml
--- a/MappingTable/mapping_info.xml
+++ b/MappingTable/mapping_info.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <!-- 说明：可使用文本编辑器打开本文件后自行编辑。
 keyword：用于匹配标签/导演/系列/制作/发行的关键词，每个名字前后都需要用逗号隔开。当其中包含刮削得到的关键词时，可以输出对应语言的词。
 zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。当输出词为“删除”时表示：遇到该关键词时，在对应内容中删除该关键词-->
@@ -575,7 +575,7 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="一本道" zh_tw="一本道" jp="一本道" keyword=",一本道,"/>
  <a zh_cn="加勒比" zh_tw="加勒比" jp="加勒比" keyword=",加勒比,カリビアンコム,"/>
  <a zh_cn="东京热" zh_tw="東京熱" jp="TOKYO-HOT" keyword=",东京热,東京熱,東熱,TOKYO-HOT,"/>
-  <a zh_cn="SOD" zh_tw="SOD" jp="SOD" keyword=",SOD,SODクリエイト,サディスティックヴィレッジ,"/>
+  <a zh_cn="SOD" zh_tw="SOD" jp="SOD" keyword=",SOD,SODクリエイト,"/>
  <a zh_cn="PRESTIGE" zh_tw="PRESTIGE" jp="PRESTIGE" keyword=",PRESTIGE,プレステージ,"/>
  <a zh_cn="MOODYZ" zh_tw="MOODYZ" jp="MOODYZ" keyword=",MOODYZ,ムーディーズ,"/>
  <a zh_cn="ROCKET" zh_tw="ROCKET" jp="ROCKET" keyword=",ROCKET,"/>
@@ -600,28 +600,5 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="WANZ" zh_tw="WANZ" jp="WANZ" keyword=",WANZ,ワンズファクトリー,"/>
  <a zh_cn="BeFree" zh_tw="BeFree" jp="BeFree" keyword=",BeFree,"/>
  <a zh_cn="MAX-A" zh_tw="MAX-A" jp="MAX-A" keyword=",MAX-A,マックスエー,"/>
-  <!-- 2021-11-8 Update -->
-  <a zh_cn="Energy" zh_tw="Energy" jp="アイエナジー" keyword=",アイエナジー,"/>
-  <a zh_cn="Idea Pocket" zh_tw="Idea Pocket" jp="アイデアポケット" keyword=",アイデアポケット,"/>
-  <a zh_cn="AKNR" zh_tw="AKNR" jp="アキノリ" keyword=",アキノリ,"/>
-  <a zh_cn="Attackers" zh_tw="Attackers" jp="アタッカーズ" keyword=",アタッカーズ,"/>
-  <a zh_cn="Alice Japan" zh_tw="Alice Japan" jp="アリスJAPAN" keyword=",アリスJAPAN,"/>
-  <a zh_cn="Aurora Project Annex" zh_tw="Aurora Project Annex" jp="オーロラプロジェクト・アネックス" keyword=",オーロラプロジェクト・アネックス,"/>
-  <a zh_cn="Crystal 映像" zh_tw="Crystal 映像" jp="クリスタル映像" keyword=",クリスタル映像,"/>
-  <a zh_cn="Glory Quest" zh_tw="Glory Quest" jp="グローリークエスト" keyword=",グローリークエスト,"/>
-  <a zh_cn="DAS！" zh_tw="DAS！" jp="ダスッ！" keyword=",ダスッ！,"/>
-  <a zh_cn="DEEP’s" zh_tw="DEEP’s" jp="ディープス" keyword=",ディープス,"/>
-  <a zh_cn="Dogma" zh_tw="Dogma" jp="ドグマ" keyword=",ドグマ,"/>
-  <a zh_cn="宇宙企画" zh_tw="宇宙企画" jp="メディアステーション" keyword=",メディアステーション,"/>
-  <a zh_cn="WANZ FACTORY" zh_tw="WANZ FACTORY" jp="ワンズファクトリー" keyword=",ワンズファクトリー,"/>
-  <a zh_cn="V＆R PRODUCE" zh_tw="V＆R PRODUCE" jp="V＆Rプロダクツ" keyword=",V＆Rプロダクツ,V＆RPRODUCE,"/>
-  <a zh_cn="Real Works" zh_tw="Real Works" jp="レアルワークス" keyword=",レアルワークス,"/>
-  <a zh_cn="MAX-A" zh_tw="MAX-A" jp="マックスエー" keyword=",マックスエー,"/>
-  <a zh_cn="PETERS MAX" zh_tw="PETERS MAX" jp="ピーターズMAX" keyword=",ピーターズMAX,"/>
-  <a zh_cn="NATURAL HIGH" zh_tw="NATURAL HIGH" jp="ナチュラルハイ" keyword=",ナチュラルハイ,"/>
-  <a zh_cn="MAXING" zh_tw="MAXING" jp="マキシング" keyword=",マキシング,"/>
-  <a zh_cn="M’s Video Group" zh_tw="M’s Video Group" jp="エムズビデオグループ" keyword=",エムズビデオグループ,"/>
-  <a zh_cn="Minimum" zh_tw="Minimum" jp="ミニマム" keyword=",ミニマム,"/>
-  <a zh_cn="WAAP Entertainment" zh_tw="WAAP Entertainment" jp="ワープエンタテインメント" keyword=",ワープエンタテインメント,"/>
-  <a zh_cn="pacopacomama" zh_tw="pacopacomama" jp="パコパコママ" keyword=",pacopacomama,パコパコママ,"/>
+
 </info>
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -18,7 +18,7 @@ from opencc import OpenCC
 import config
 from ADC_function import file_modification_days, get_html, parallel_download_files
 from number_parser import get_number
-from core import core_main, moveFailedFolder
+from core import core_main, core_main_no_net_op, moveFailedFolder


 def check_update(local_version):
@@ -40,7 +40,7 @@ def check_update(local_version):
        print("[*]======================================================")


-def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
+def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]:
    conf = config.getInstance()
    parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
    parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
@@ -49,6 +49,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
                        help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
    parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
    # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
+    parser.add_argument("-L", "--link-mode", default='', nargs='?',
+                        help="Create movie file link. 0:moving movie file, do not create link 1:soft link 2:try hard link first")
    default_logdir = str(Path.home() / '.mlogs')
    parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
                        help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
@@ -60,12 +62,22 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
                        help="Override nfo_skip_days value in config.")
    parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
                        help="Override stop_counter value in config.")
+    parser.add_argument("-R", "--rerun-delay", dest='delaytm', default='', nargs='?',
+                        help="Delay (eg. 1h10m30s or 60 (second)) time and rerun, until all movies proceed. Note: stop_counter value in config or -c must none zero.")
    parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
        os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
    parser.add_argument("-a", "--auto-exit", action="store_true",
                        help="Auto exit after program complete")
    parser.add_argument("-g", "--debug", action="store_true",
                        help="Turn on debug mode to generate diagnostic log for issue report.")
+    parser.add_argument("-N", "--no-network-operation", action="store_true",
+                        help="No network query, do not get metadata, for cover cropping purposes, only takes effect when main mode is 3.")
+    parser.add_argument("-w", "--website", dest='site', default='', nargs='?',
+                        help="Override [priority]website= in config.")
+    parser.add_argument("-D", "--download-images", dest='dnimg', action="store_true",
+                        help="Override [common]download_only_missing_images=0 force invoke image downloading.")
+    parser.add_argument("-C", "--config-override", dest='cfgcmd', default='', nargs='?',
+                        help="Common use config override. grammar: section:key=value[;[section:]key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1")
    parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
                        help="""Only show job list of files and numbers, and **NO** actual operation
 is performed. It may help you correct wrong numbers before real job.""")
@@ -73,24 +85,40 @@ is performed. It may help you correct wrong numbers before real job.""")

    args = parser.parse_args()

-    def get_natural_number_or_none(value):
-        return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None
+    def set_natural_number_or_none(sk, value):
+        if isinstance(value, str) and value.isnumeric() and int(value) >= 0:
+            conf.set_override(f'{sk}={value}')

-    def get_str_or_none(value):
-        return value if isinstance(value, str) and len(value) else None
+    def set_str_or_none(sk, value):
+        if isinstance(value, str) and len(value):
+            conf.set_override(f'{sk}={value}')

-    def get_bool_or_none(value):
-        return True if isinstance(value, bool) and value else None
+    def set_bool_or_none(sk, value):
+        if isinstance(value, bool) and value:
+            conf.set_override(f'{sk}=1')

-    config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
-    config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
-    config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
-    config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
-    config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
-    config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
-    config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
+    set_natural_number_or_none("common:main_mode", args.main_mode)
+    set_natural_number_or_none("common:link_mode", args.link_mode)
+    set_str_or_none("common:source_folder", args.path)
+    set_bool_or_none("common:auto_exit", args.auto_exit)
+    set_natural_number_or_none("common:nfo_skip_days", args.days)
+    set_natural_number_or_none("common:stop_counter", args.cnt)
+    set_bool_or_none("common:ignore_failed_list", args.ignore_failed_list)
+    set_str_or_none("common:rerun_delay", args.delaytm)
+    set_str_or_none("priority:website", args.site)
+    if isinstance(args.dnimg, bool) and args.dnimg:
+        conf.set_override("common:download_only_missing_images=0")
+    set_bool_or_none("debug_mode:switch", args.debug)
+    if isinstance(args.cfgcmd, str) and len(args.cfgcmd.strip()):
+        conf.set_override(args.cfgcmd.strip())

-    return args.file, args.number, args.logdir, args.regexstr, args.zero_op
+    no_net_op = False
+    if conf.main_mode() == 3:
+        no_net_op = args.no_network_operation
+        if no_net_op:
+            conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1")
+
+    return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op


 class OutLogger(object):
@@ -113,9 +141,12 @@ class OutLogger(object):
        self.log.write(msg)

    def flush(self):
-        self.term.flush()
-        self.log.flush()
-        os.fsync(self.log.fileno())
+        if 'flush' in dir(self.term):
+            self.term.flush()
+        if 'flush' in dir(self.log):
+            self.log.flush()
+        if 'fileno' in dir(self.log):
+            os.fsync(self.log.fileno())

    def close(self):
        if self.term is not None:
@@ -244,39 +275,42 @@ def close_logfile(logdir: str):
            except:
                pass
    # 第三步，月合并到年
-    if today.month < 4:
-        return
-    mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
-    if not mons or not len(mons):
-        return
-    mons.sort()
-    deadline_year = f'mdc_{today.year - 1}13'
-    year_merge = [f for f in mons if f.stem < deadline_year]
-    if not year_merge or not len(year_merge):
-        return
-    toyear = len('12.txt')  # cut length mdc_2020|12.txt
-    for f in year_merge:
-        try:
-            year_file_name = str(f)[:-toyear] + '.txt'  # mdc_2020.txt
-            with open(year_file_name, 'a', encoding='utf-8') as y:
-                y.write(f.read_text(encoding='utf-8'))
-            f.unlink(missing_ok=True)
-        except:
-            pass
+    for i in range(1):
+        if today.month < 4:
+            break
+        mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
+        if not mons or not len(mons):
+            break
+        mons.sort()
+        deadline_year = f'mdc_{today.year - 1}13'
+        year_merge = [f for f in mons if f.stem < deadline_year]
+        if not year_merge or not len(year_merge):
+            break
+        toyear = len('12.txt')  # cut length mdc_2020|12.txt
+        for f in year_merge:
+            try:
+                year_file_name = str(f)[:-toyear] + '.txt'  # mdc_2020.txt
+                with open(year_file_name, 'a', encoding='utf-8') as y:
+                    y.write(f.read_text(encoding='utf-8'))
+                f.unlink(missing_ok=True)
+            except:
+                pass
    # 第四步，压缩年志 如果有压缩需求，请自行手工压缩，或者使用外部脚本来定时完成。推荐nongnu的lzip，对于
    # 这种粒度的文本日志，压缩比是目前最好的。lzip -9的运行参数下，日志压缩比要高于xz -9，而且内存占用更少，
    # 多核利用率更高(plzip多线程版本)，解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右，
    # 100MB的日志文件能缩小到3.7MB。
+    return filepath


 def signal_handler(*args):
    print('[!]Ctrl+C detected, Exit.')
-    sys.exit(9)
+    os._exit(9)


 def sigdebug_handler(*args):
-    config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
-    print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
+    conf = config.getInstance()
+    conf.set_override(f"debug_mode:switch={int(not conf.debug())}")
+    print(f"[!]Debug {('oFF', 'On')[int(conf.debug())]}")


 # 新增失败文件列表跳过处理，及.nfo修改天数跳过处理，提示跳过视频总数，调试模式(-g)下详细被跳过文件，跳过小广告
@@ -285,7 +319,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
    main_mode = conf.main_mode()
    debug = conf.debug()
    nfo_skip_days = conf.nfo_skip_days()
-    soft_link = conf.soft_link()
+    link_mode = conf.link_mode()
    file_type = conf.media_type().lower().split(",")
    trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
    cliRE = None
@@ -296,7 +330,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
            pass
    failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
    failed_set = set()
-    if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
+    if (main_mode == 3 or link_mode) and not conf.ignore_failed_list():
        try:
            flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
            failed_set = set(flist)
@@ -327,20 +361,24 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
                print('[!]Skip failed movie:', absf)
            continue
        is_sym = full_name.is_symlink()
-        if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1):  # 短路布尔 符号链接不取stat()，因为符号链接可能指向不存在目标
-            continue  # file is symlink or hardlink(Linux/NTFS/Darwin)
+        if main_mode != 3 and (is_sym or (full_name.stat().st_nlink > 1 and not conf.scan_hardlink())):  # 短路布尔 符号链接不取stat()，因为符号链接可能指向不存在目标
+            continue  # 模式不等于3下跳过软连接和未配置硬链接刮削
        # 调试用0字节样本允许通过，去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
        movie_size = 0 if is_sym else full_name.stat().st_size  # 同上 符号链接不取stat()及st_size，直接赋0跳过小视频检测
        if 0 < movie_size < 125829120:  # 1024*1024*120=125829120
            continue
        if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
            continue
-        if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(
-                full_name.with_suffix('.nfo')) <= nfo_skip_days:
-            skip_nfo_days_cnt += 1
-            if debug:
-                print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
-            continue
+        if main_mode == 3:
+            nfo = full_name.with_suffix('.nfo')
+            if not nfo.is_file():
+                if debug:
+                    print(f"[!]Metadata {nfo.name} not found for '{absf}'")
+            elif nfo_skip_days > 0 and file_modification_days(nfo) <= nfo_skip_days:
+                skip_nfo_days_cnt += 1
+                if debug:
+                    print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
+                continue
        total.append(absf)

    if skip_failed_cnt:
@@ -348,13 +386,13 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
    if skip_nfo_days_cnt:
        print(
            f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
-    if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
+    if nfo_skip_days <= 0 or not link_mode or main_mode == 3:
        return total
    # 软连接方式，已经成功削刮的也需要从成功目录中检查.nfo更新天数，跳过N天内更新过的
    skip_numbers = set()
    success_folder = Path(conf.success_folder()).resolve()
    for f in success_folder.glob(r'**/*'):
-        if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
+        if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE):
            continue
        if file_modification_days(f) > nfo_skip_days:
            continue
@@ -388,7 +426,7 @@ def create_failed_folder(failed_folder: str):
            os.makedirs(failed_folder)
        except:
            print(f"[-]Fatal error! Can not make folder '{failed_folder}'")
-            sys.exit(0)
+            os._exit(0)


 def rm_empty_folder(path):
@@ -405,38 +443,44 @@ def rm_empty_folder(path):
            pass


-def create_data_and_move(file_path: str, zero_op, oCC):
+def create_data_and_move(movie_path: str, zero_op: bool, no_net_op: bool, oCC):
    # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
    debug = config.getInstance().debug()
-    n_number = get_number(debug, os.path.basename(file_path))
-    file_path = os.path.abspath(file_path)
+    n_number = get_number(debug, os.path.basename(movie_path))
+    movie_path = os.path.abspath(movie_path)

    if debug is True:
-        print(f"[!] [{n_number}] As Number making data for '{file_path}'")
+        print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
        if zero_op:
            return
        if n_number:
-            core_main(file_path, n_number, oCC)
+            if no_net_op:
+                core_main_no_net_op(movie_path, n_number)
+            else:
+                core_main(movie_path, n_number, oCC)
        else:
            print("[-] number empty ERROR")
-            moveFailedFolder(file_path)
+            moveFailedFolder(movie_path)
        print("[*]======================================================")
    else:
        try:
-            print(f"[!] [{n_number}] As Number making data for '{file_path}'")
+            print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
            if zero_op:
                return
            if n_number:
-                core_main(file_path, n_number, oCC)
+                if no_net_op:
+                    core_main_no_net_op(movie_path, n_number)
+                else:
+                    core_main(movie_path, n_number, oCC)
            else:
                raise ValueError("number empty")
            print("[*]======================================================")
        except Exception as err:
-            print(f"[-] [{file_path}] ERROR:")
+            print(f"[-] [{movie_path}] ERROR:")
            print('[-]', err)

            try:
-                moveFailedFolder(file_path)
+                moveFailedFolder(movie_path)
            except Exception as err:
                print('[!]', err)

@@ -455,7 +499,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
        print("[-] [{}] ERROR:".format(file_path))
        print('[-]', err)

-        if conf.soft_link():
+        if conf.link_mode():
            print("[-]Link {} to failed folder".format(file_path))
            os.symlink(file_path, os.path.join(conf.failed_folder(), file_name))
        else:
@@ -466,23 +510,14 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
                print('[!]', err)


-def main():
-    version = '6.0.2'
-    urllib3.disable_warnings()  # Ignore http proxy warning
-
-    # Read config.ini first, in argparse_function() need conf.failed_folder()
-    conf = config.Config("config.ini")
-
-    # Parse command line args
-    single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
-
-
-
+def main(args: tuple) -> Path:
+    (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args
+    conf = config.getInstance()
    main_mode = conf.main_mode()
    folder_path = ""
    if main_mode not in (1, 2, 3):
        print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
-        sys.exit(4)
+        os._exit(4)

    signal.signal(signal.SIGINT, signal_handler)
    if sys.platform == 'win32':
@@ -508,8 +543,8 @@ def main():
    print(f"[+]Load Config file '{conf.ini_path}'.")
    if conf.debug():
        print('[+]Enable debug')
-    if conf.soft_link():
-        print('[!]Enable soft link')
+    if conf.link_mode() in (1, 2):
+        print('[!]Enable {} link'.format(('soft', 'hard')[conf.link_mode() - 1]))
    if len(sys.argv) > 1:
        print('[!]CmdLine:', " ".join(sys.argv[1:]))
    print('[+]Main Working mode ## {}: {} ## {}{}{}'
@@ -521,7 +556,10 @@ def main():
          )

    if conf.update_check():
-        check_update(version)
+        try:
+            check_update(version)
+        except Exception as e:
+            print('[-]Update check failed!',e)

    create_failed_folder(conf.failed_folder())

@@ -539,15 +577,21 @@ def main():
            if file_modification_days(str(v)) >= conf.mapping_table_validity():
                print("[+]Mapping Table Out of date! Remove", str(v))
                os.remove(str(v))
-    res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
-    for i, fp in enumerate(res, start=1):
-        if fp and len(fp):
-            print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
-        else:
-            print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
-            print("[-] --- AUTO EXIT AFTER 30s !!! --- ")
-            time.sleep(30)
-            os._exit(-1)
+    try:
+        res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
+        for i, fp in enumerate(res, start=1):
+            if fp and len(fp):
+                print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
+            else:
+                print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
+    except Exception as e:
+        print("[!] ==================== ERROR ====================")
+        print("[!] " + "Mapping Table Download FAILED".center(47))
+        print("[!] " + "无法连接github".center(47))
+        print("[!] " + "请过几小时再试试".center(47))
+        print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47))
+        time.sleep(30)
+        os._exit(-1)

    # create OpenCC converter
    ccm = conf.cc_convert_mode()
@@ -587,7 +631,7 @@ def main():
            percentage = str(count / int(count_all) * 100)[:4] + '%'
            print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
                                            time.strftime("%H:%M:%S")))
-            create_data_and_move(movie_path, zero_op, oCC)
+            create_data_and_move(movie_path, zero_op, no_net_op, oCC)
            if count >= stop_count:
                print("[!]Stop counter triggered!")
                break
@@ -605,14 +649,68 @@ def main():

    print("[+]All finished!!!")

-    close_logfile(logdir)
+    return close_logfile(logdir)

-    if not conf.auto_exit():
-        input("Press enter key exit, you can check the error message before you exit...")

-    sys.exit(0)
+def 分析日志文件(logfile):
+    try:
+        if not (isinstance(logfile, Path) and logfile.is_file()):
+            raise FileNotFoundError('log file not found')
+        logtxt = logfile.read_text(encoding='utf-8')
+        扫描电影数 = int(re.findall(r'\[\+]Find (.*) movies\.', logtxt)[0])
+        已处理 = int(re.findall(r'\[1/(.*?)] -', logtxt)[0])
+        完成数 = logtxt.count(r'[+]Wrote!')
+        return 扫描电影数, 已处理, 完成数
+    except:
+        return None, None, None
+
+
+def period(delta, pattern):
+    d = {'d': delta.days}
+    d['h'], rem = divmod(delta.seconds, 3600)
+    d['m'], d['s'] = divmod(rem, 60)
+    return pattern.format(**d)


 if __name__ == '__main__':
-    multiprocessing.freeze_support()
-    main()
+    version = '6.1.1'
+    urllib3.disable_warnings()  # Ignore http proxy warning
+    app_start = time.time()
+
+    # Read config.ini first, in argparse_function() need conf.failed_folder()
+    conf = config.Config("config.ini")
+
+    # Parse command line args
+    args = tuple(argparse_function(version))
+
+    再运行延迟 = conf.rerun_delay()
+    if 再运行延迟 > 0 and conf.stop_counter() > 0:
+        while True:
+            try:
+                logfile = main(args)
+                (扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile))
+                if all(isinstance(v, int) for v in 分析结果元组):
+                    剩余个数 = 扫描电影数 - 已处理
+                    总用时 = timedelta(seconds = time.time() - app_start)
+                    print(f'All movies:{扫描电影数}  processed:{已处理}  successes:{完成数}  remain:{剩余个数}' +
+                        '  Elapsed time {}'.format(
+                        period(总用时, "{d} day {h}:{m:02}:{s:02}") if 总用时.days == 1
+                            else period(总用时, "{d} days {h}:{m:02}:{s:02}") if 总用时.days > 1
+                            else period(总用时, "{h}:{m:02}:{s:02}")))
+                    if 剩余个数 == 0:
+                        break
+                    下次运行 = datetime.now() + timedelta(seconds=再运行延迟)
+                    print(f'Next run time: {下次运行.strftime("%H:%M:%S")}, rerun_delay={再运行延迟}, press Ctrl+C stop run.')
+                    time.sleep(再运行延迟)
+                else:
+                    break
+            except:
+                break
+    else:
+        main(args)
+
+    if not conf.auto_exit():
+        if sys.platform == 'win32':
+            input("Press enter key exit, you can check the error message before you exit...")
+
+    sys.exit(0)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ CLI 版本

 # 文档
 * [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
-* [VergilGao's Docker部署](https://github.com/VergilGao/docker-avdc)
+* [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc)

 # 下载
 * [Releases](https://github.com/yoshiko2/Movie_Data_Capture/releases/latest)
@@ -36,43 +36,40 @@ CLI 版本
 #  申明
 当你查阅、下载了本项目源代码或二进制程序，即代表你接受了以下条款

-* 本软件仅供技术交流，学术交流使用
+* 本项目和项目成果仅供技术，学术交流和Python3性能测试使用
 * **请勿在墙内的社交平台上宣传此项目**
-* 本软件作者编写出该软件旨在学习 Python ，提高编程水平
-* 本软件不提供任何影片下载的线索
-* 用户在使用本软件前，请用户了解并遵守当地法律法规，如果本软件使用过程中存在违反当地法律法规的行为，请勿使用该软件
-* 用户在使用本软件时，若用户在当地产生一切违法行为由用户承担
-* 严禁用户将本软件使用于商业和个人其他意图
+* 本项目贡献者编写该项目旨在学习Python3 ，提高编程水平
+* 本项目不提供任何影片下载的线索
+* 用户在使用本项目和项目成果前，请用户了解并遵守当地法律法规，如果本项目及项目成果使用过程中存在违反当地法律法规的行为，请勿使用该项目及项目成果
+* 用户在使用本项目和项目成果时，若用户在当地产生一切违法行为由用户承担
+* 严禁用户将本项目和项目成果使用于商业和个人其他意图
 * 源代码和二进制程序请在下载后24小时内删除
-* 出售源码者的母亲会升天
-* 本项目发起者yoshiko2保留最终决定权和最终解释权
-* 若用户不同意上述条款任意一条，请勿使用本软件
+* 用户使用本项目及项目成果所造成的一切后果由用户自行承担，贡献者概不负责
+* 若用户不同意上述条款任意一条，请勿使用本项目和项目成果
 ---
 When you view and download the source code or binary program of this project, it means that you have accepted the following terms

-* This software is only for technical exchange and academic exchange
+* This project is only for technical exchange, academic exchange and Python3 performance test
 * **Please do not promote this project on popular social platforms**
-* The software author wrote this software to learn Python and improve programming
-* This software does not provide any clues for video download
-* Before using this software, please understand and abide by local laws and regulations. If there is any violation of local laws and regulations during the use of this software, * please do not use this software  
-* When the user uses this software, if the user has any illegal acts in the local area, the user shall bear
-* It is strictly forbidden for users to use this software for commercial and personal intentions
+* The project contributors wrote this project to learn Python and improve programming
+* This project does not provide any clues for video download
+* Before using this project results, please understand and abide by local laws and regulations. If there is any violation of local laws and regulations during the use of this project results, * please do not use this project results  
+* When the user uses this project results, if the user has any illegal acts in the local area, the user shall bear
+* It is strictly forbidden for users to use this project and project results for commercial and personal intentions
 * Please delete the source code and binary program within 24 hours after downloading
-* The mother of the source seller will die
-* The author of this software yoshiko2 reserves the right of final decision and final interpretation
-* If the user does not agree with any of the above terms, please do not use this software
+* All consequences caused by the user's use of this project and project results shall be borne by the user, and the contributors shall not be responsible
+* If the user does not agree with any of the above terms, please do not use this project results and project
 ---
 本プロジェクトのソースコード、バイナリファイルをダウンロード、または表示するしたうえで、あなたは本規約に同意したものと見なします。
-* このソフトウェアは、開発技術学習することのみに使用できます。
+* このプロジェクトは、開発技術学習、Python3性能テストすることのみに使用できます。
 * **ソーシャルメディアで本プロジェクトの宣伝をご遠慮ください**
-* 作者はPythonの勉強と技術力の向上のために、このソフトウェアを作成しました
-* 本ソフトウェアは、あらゆる動画ダウンロード機能一切提供しません
-* 本ソフトウェアを使用する前に、現地の法律規範をよく理解する必要があります。あなたは、適用される現地の法令を順守する責任を負います
-* 本ソフトウェアを使用した結果生じた損害や法的責任につきまして作者は一切責任を負いません
-* 本ソフトウェアを商用、業務、その他の営利目的のために使用することは一切禁止します。
+* 貢献者はPythonの勉強と技術力の向上のために、このソフトウェアを作成しました
+* 本プロジェクトは、あらゆる動画ダウンロード機能一切提供しません
+* 本プロジェクトとプロジェクトの成果を使用する前に、現地の法律規範をよく理解する必要があります。あなたは、適用される現地の法令を順守する責任を負います
+* 本プロジェクトとプロジェクトの成果を使用した結果生じた損害や法的責任につきまして作者は一切責任を負いません
+* 本プロジェクトとプロジェクトの成果を商用、業務、その他の営利目的のために使用することは一切禁止します。
 * 本プロジェクトのソースコード、バイナリファイルをダウンロードした場合、24時間以内に削除してください
-* 元売り手の母親が天に召される
-* 最終解釈権は作者yoshiko2に属します
+* ユーザーによるこのプロジェクトの使用およびプロジェクトの結果によって引き起こされるすべての結果は、ユーザーが負担するものとし、寄稿者は責任を負わないものとします。
 * 本規約およびすべての適用法、規約および規則を遵守する場合にのみ本ソフトウェアを使用することができます


--- a/WebCrawler/init.py
+++ b/WebCrawler/init.py
@@ -24,6 +24,7 @@ from . import carib
 from . import fc2club
 from . import mv91
 from . import madou
+from . import gcolle


 def get_data_state(data: dict) -> bool:  # 元数据获取失败检测
@@ -62,7 +63,8 @@ def get_data_from_json(file_number, oCC):
        "carib": carib.main,
        "fc2club": fc2club.main,
        "mv91": mv91.main,
-        "madou": madou.main
+        "madou": madou.main,
+        "gcolle": gcolle.main,
    }

    conf = config.getInstance()
@@ -91,6 +93,8 @@ def get_data_from_json(file_number, oCC):
                sources.insert(0, sources.pop(sources.index("fc2")))
            if "fc2club" in sources:
                sources.insert(0, sources.pop(sources.index("fc2club")))
+        elif "gcolle" in sources and (re.search("\d{6}", file_number)):
+            sources.insert(0, sources.pop(sources.index("gcolle")))
        elif "dlsite" in sources and (
                "rj" in lo_file_number or "vj" in lo_file_number
        ):
@@ -100,6 +104,12 @@ def get_data_from_json(file_number, oCC):
                sources.insert(0, sources.pop(sources.index("javdb")))
            if "xcity" in sources:
                sources.insert(0, sources.pop(sources.index("xcity")))
+            if "madou" in sources:
+                sources.insert(0, sources.pop(sources.index("madou")))
+        elif "madou" in sources and (
+                re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)
+        ):
+            sources.insert(0, sources.pop(sources.index("madou")))

    # check sources in func_mapping
    todel = []
@@ -124,7 +134,10 @@ def get_data_from_json(file_number, oCC):
        for source in sources:
            if conf.debug() == True:
                print('[+]select', source)
-            json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
+            try:
+                json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
+            except:
+                json_data = pool.apply_async(func_mapping[source], (file_number,)).get()
            # if any service return a valid return, break
            if get_data_state(json_data):
                print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
@@ -136,7 +149,10 @@ def get_data_from_json(file_number, oCC):
            try:
                if conf.debug() == True:
                    print('[+]select', source)
-                json_data = json.loads(func_mapping[source](file_number))
+                try:
+                    json_data = json.loads(func_mapping[source](file_number))
+                except:
+                    json_data = func_mapping[source](file_number)
                # if any service return a valid return, break
                if get_data_state(json_data):
                    print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
@@ -242,8 +258,8 @@ def get_data_from_json(file_number, oCC):
            if json_data[translate_value] == "":
                continue
            if translate_value == "title":
-                title_dict = json.load(
-                    open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
+                title_dict = json.loads(
+                    (Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json').read_text(encoding="utf-8"))
                try:
                    json_data[translate_value] = title_dict[number]
                    continue
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -5,6 +5,7 @@ from lxml import etree
 import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
+from WebCrawler.crawler import *
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

@@ -17,95 +18,64 @@ def getActorPhoto(html):
        p2 = {t: l}
        d.update(p2)
    return d
-def getTitle(html):
-    try:
-        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
-        return result.replace('/', '')
-    except:
-        return ''
+
 def getActor(html):
    a = html.xpath('//a[@class="avatar-box"]')
    d = []
    for i in a:
        d.append(i.find('span').text)
    return d
-def getStudio(html):
-    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
-    return result1
-def getRuntime(html):
-    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
-    return result1
-def getLabel(html):
-    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
-    return result1
-def getNum(html):
-    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
-    return result1
-def getYear(release):
-    try:
-        result = str(re.search('\d{4}',release).group())
-        return result
-    except:
-        return release
-def getRelease(html):
-    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
-    return result1
-def getCover(html):
-    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
-    return result
+
 def getCover_small(html):
    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
    return result
 def getTag(html):
    x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
    return [i.strip() for i in x[2:]]  if len(x) > 2 else []
-def getSeries(html):
-    try:
-        result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
-        return result1
-    except:
-        return ''

 def main(number):
    html = get_html('https://tellme.pw/avsox')
-    site = etree.HTML(html).xpath('//div[@class="container"]/div/a/@href')[0]
+    site = Crawler(html).getString('//div[@class="container"]/div/a/@href')
    a = get_html(site + '/cn/search/' + number)
-    html = etree.fromstring(a, etree.HTMLParser())
-    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+    html = Crawler(a)
+    result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
    if result1 == '' or result1 == 'null' or result1 == 'None':
        a = get_html(site + '/cn/search/' + number.replace('-', '_'))
-        html = etree.fromstring(a, etree.HTMLParser())
-        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+        html = Crawler(a)
+        result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
        if result1 == '' or result1 == 'null' or result1 == 'None':
            a = get_html(site + '/cn/search/' + number.replace('_', ''))
-            html = etree.fromstring(a, etree.HTMLParser())
-            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+            html = Crawler(a)
+            result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
    detail = get_html("https:" + result1)
    lx = etree.fromstring(detail, etree.HTMLParser())
+    avsox_crawler2 = Crawler(a)
+    avsox_crawler = Crawler(detail)
    try:
-        new_number = getNum(lx)
+        new_number = avsox_crawler.getString('//span[contains(text(),"识别码:")]/../span[2]/text()')
        if new_number.upper() != number.upper():
            raise ValueError('number not found')
-        title = getTitle(lx).strip(new_number)
+        title = avsox_crawler.getString('/html/body/div[2]/h3/text()').replace('/','').strip(new_number)
        dic = {
            'actor': getActor(lx),
            'title': title,
-            'studio': getStudio(lx),
+            'studio': avsox_crawler.getString('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()').replace("', '",' '),
            'outline': getStoryline(number, title),
-            'runtime': getRuntime(lx),
+            'runtime': avsox_crawler.getString('//span[contains(text(),"长度:")]/../text()').replace('分钟',''),
            'director': '',  #
-            'release': getRelease(lx),
+            'release': avsox_crawler.getString('//span[contains(text(),"发行时间:")]/../text()'),
            'number': new_number,
-            'cover': getCover(lx),
-            'cover_small': getCover_small(html),
+            'cover': avsox_crawler.getString('/html/body/div[2]/div[1]/div[1]/a/img/@src'),
+            #'cover_small' : getCover_small(html),
+            'cover_small': avsox_crawler2.getString('//*[@id="waterfall"]/div/a/div[1]/img/@src'),
            'imagecut': 3,
            'tag': getTag(lx),
-            'label': getLabel(lx),
-            'year': getYear(getRelease(lx)),
+            'label': avsox_crawler.getString('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'),
+            'year': re.findall('\d{4}',avsox_crawler.getString('//span[contains(text(),"发行时间:")]/../text()'))[0],
            'actor_photo': getActorPhoto(lx),
            'website': "https:" + result1,
            'source': 'avsox.py',
-            'series': getSeries(lx),
+            'series': avsox_crawler.getString('//span[contains(text(),"系列:")]/../span[2]/text()'),
        }
    except Exception as e:
        if config.getInstance().debug():
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -40,6 +40,7 @@ def main(number: str) -> json:
            'website': f'{G_SITE}/moviepages/{number}/index.html',
            'source': 'carib.py',
            'series': get_series(lx),
+            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
        return js
@@ -59,7 +60,7 @@ def get_year(lx: html.HtmlElement) -> str:

 def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
    o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
-    g = getStoryline(number, title)
+    g = getStoryline(number, title, 无码=True)
    if len(g):
        return g
    return o
--- a/WebCrawler/crawler.py
+++ b/WebCrawler/crawler.py
@@ -0,0 +1,28 @@
+from lxml import etree
+
+class Crawler:
+    def __init__(self,htmlcode):
+        self.html = etree.HTML(htmlcode)
+
+    def getString(self,_xpath):
+        if _xpath == "":
+            return ""
+        result = self.html.xpath(_xpath)
+        try:
+            return result[0]
+        except:
+            return ""
+
+    def getStrings(self,_xpath):
+        result = self.html.xpath(_xpath)
+        try:
+            return result
+        except:
+            return ""
+
+    def getOutline(self,_xpath):
+        result = self.html.xpath(_xpath)
+        try:
+            return "\n".join(result)
+        except:
+            return ""
--- a/WebCrawler/dlsite.py
+++ b/WebCrawler/dlsite.py
@@ -1,15 +1,14 @@
 import re
 from lxml import etree
 import json
-from bs4 import BeautifulSoup
 import sys
 sys.path.append('../')
 from ADC_function import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-#print(get_html('https://www.dlsite.com/pro/work/=/product_id/VJ013152.html'))
-#title //*[@id="work_name"]/a/text()
+#print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
+#title /html/head/title/text()
 #studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
 #release //th[contains(text(),"販売日")]/../td/a/text()
 #story //th[contains(text(),"シナリオ")]/../td/a/text()
@@ -18,14 +17,14 @@ from ADC_function import *
 #jianjie //*[@id="main_inner"]/div[3]/text()
 #photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src

-#https://www.dlsite.com/pro/work/=/product_id/VJ013152.html
+#https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html

-def getTitle(a):
-    html = etree.fromstring(a, etree.HTMLParser())
-    result = html.xpath('//*[@id="work_name"]/a/text()')[0]
+def getTitle(html):
+    result = str(html.xpath('/html/head/title/text()')[0])
+    result = result[:result.rfind(' | DLsite')]
+    result = result[:result.rfind(' [')]
    return result
-def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getActor(html):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
    try:
        result1 = html.xpath('//th[contains(text(),"声优")]/../td/a/text()')
    except:
@@ -38,8 +37,7 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
        p={i:''}
        d.update(p)
    return d
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getStudio(html):
    try:
        try:
            result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
@@ -53,8 +51,7 @@ def getRuntime(a):
    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getLabel(html):
    try:
        try:
            result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
@@ -69,12 +66,10 @@ def getYear(getRelease):
        return result
    except:
        return getRelease
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getRelease(html):
    result1 = html.xpath('//th[contains(text(),"贩卖日")]/../td/a/text()')[0]
    return result1.replace('年','-').replace('月','-').replace('日','')
-def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getTag(html):
    try:
        result = html.xpath('//th[contains(text(),"分类")]/../td/div/a/text()')
        return result
@@ -96,26 +91,22 @@ def getCover_small(a, index=0):
        if not 'https' in result:
            result = 'https:' + result
        return result
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = html.xpath('//*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src')[0]
-    return result
-def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getCover(html):
+    result = html.xpath('//*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li[1]/picture/source/@srcset')[0]
+    return result.replace('.webp', '.jpg')
+def getDirector(html):
    try:
        result = html.xpath('//th[contains(text(),"剧情")]/../td/a/text()')[0]
    except:
        result = ''
    return result
-def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+def getOutline(html):
    total = []
-    result = html.xpath('//*[@id="main_inner"]/div[3]/text()')
+    result = html.xpath('//*[@class="work_parts_area"]/p/text()')
    for i in result:
        total.append(i.strip('\r\n'))
    return str(total).strip(" ['']").replace("', '', '",r'\n').replace("', '",r'\n').strip(", '', '")
-def getSeries(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getSeries(html):
    try:
        try:
            result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
@@ -127,28 +118,28 @@ def getSeries(a):
 def main(number):
    try:
        number = number.upper()
-        htmlcode = get_html('https://www.dlsite.com/pro/work/=/product_id/' + number + '.html',
+        htmlcode = get_html('https://www.dlsite.com/maniax/work/=/product_id/' + number + '.html/?locale=zh_CN',
                            cookies={'locale': 'zh-cn'})
-
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
        dic = {
-            'actor': getActor(htmlcode),
-            'title': getTitle(htmlcode),
-            'studio': getStudio(htmlcode),
-            'outline': getOutline(htmlcode),
+            'actor': getActor(html),
+            'title': getTitle(html),
+            'studio': getStudio(html),
+            'outline': getOutline(html),
            'runtime': '',
-            'director': getDirector(htmlcode),
-            'release': getRelease(htmlcode),
+            'director': getDirector(html),
+            'release': getRelease(html),
            'number': number,
-            'cover': 'https:' + getCover(htmlcode),
+            'cover': 'https:' + getCover(html),
            'cover_small': '',
            'imagecut': 0,
-            'tag': getTag(htmlcode),
-            'label': getLabel(htmlcode),
-            'year': getYear(getRelease(htmlcode)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'tag': getTag(html),
+            'label': getLabel(html),
+            'year': getYear(getRelease(html)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo': '',
-            'website': 'https://www.dlsite.com/pro/work/=/product_id/' + number + '.html',
+            'website': 'https://www.dlsite.com/maniax/work/=/product_id/' + number + '.html',
            'source': 'dlsite.py',
-            'series': getSeries(htmlcode),
+            'series': getSeries(html),
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
        return js
@@ -166,4 +157,6 @@ def main(number):
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 if __name__ == "__main__":
+    config.getInstance().set_override("debug_mode:switch=1")
    print(main('VJ013178'))
+    print(main('RJ329607'))
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -9,130 +9,33 @@ from urllib.parse import urlencode
 from lxml import etree

 from ADC_function import *
-
+from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

+class fanzaCrawler(Crawler):
+    def getFanzaString(self,string):
+        result1 = str(self.html.xpath("//td[contains(text(),'"+string+"')]/following-sibling::td/a/text()")).strip(" ['']")
+        result2 = str(self.html.xpath("//td[contains(text(),'"+string+"')]/following-sibling::td/text()")).strip(" ['']")
+        return result1+result2

-def getTitle(text):
-    html = etree.fromstring(text, etree.HTMLParser())
-    result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
-    return result
+    def getFanzaStrings(self, string):
+        result1 = self.html.xpath("//td[contains(text(),'" + string + "')]/following-sibling::td/a/text()")
+        if len(result1) > 0:
+            return result1
+        result2 = self.html.xpath("//td[contains(text(),'" + string + "')]/following-sibling::td/text()")
+        return result2


-def getActor(text):
-    # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(text, etree.HTMLParser())
-    result = (
-        str(
-            html.xpath(
-                "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
-            )
-        )
-        .strip(" ['']")
-        .replace("', '", ",")
-    )
-    return result
+def getRelease(fanza_Crawler):
+    result = fanza_Crawler.getFanzaString('発売日：')
+    if result == '----':
+        result = fanza_Crawler.getFanzaString('配信開始日：')
+    return result.replace("/", "-").strip('\\n')


-def getStudio(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getRuntime(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
-    return re.search(r"\d+", str(result)).group()
-
-
-def getLabel(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'レーベル：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'レーベル：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getNum(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'品番：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getYear(getRelease):
-    try:
-        result = str(re.search(r"\d{4}", getRelease).group())
-        return result
-    except:
-        return getRelease
-
-
-def getRelease(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
-        )[0].lstrip("\n")
-    except:
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
-            )[0].lstrip("\n")
-        except:
-            result = "----"
-    if result == "----":
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'配信開始日：')]/following-sibling::td/a/text()"
-            )[0].lstrip("\n")
-        except:
-            try:
-                result = html.xpath(
-                    "//td[contains(text(),'配信開始日：')]/following-sibling::td/text()"
-                )[0].lstrip("\n")
-            except:
-                pass
-    return result.replace("/", "-")
-
-
-def getTag(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
-        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
-        )
-        return result
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
-        )
-        return result
-
-
-def getCover(text, number):
-    html = etree.fromstring(text, etree.HTMLParser())
+def getCover(html, number):
    cover_number = number
    try:
        result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
@@ -151,29 +54,11 @@ def getCover(text, number):
    return result


-def getDirector(text):
-    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getOutline(html):
    try:
-        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
-        )[0]
-    except:
-        result = html.xpath(
-            "//td[contains(text(),'監督：')]/following-sibling::td/text()"
-        )[0]
-    return result
-
-
-def getOutline(text):
-    html = etree.fromstring(text, etree.HTMLParser())
-    try:
-        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
-            "\n", ""
-        )
+        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace("\n", "")
        if result == "":
-            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
-                "\n", ""
-            )
+            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace("\n", "")
    except:
        # (TODO) handle more edge case
        # print(html)
@@ -181,23 +66,8 @@ def getOutline(text):
    return result


-def getSeries(text):
-    try:
-        html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        try:
-            result = html.xpath(
-                "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
-            )[0]
-        except:
-            result = html.xpath(
-                "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
-            )[0]
-        return result
-    except:
-        return ""
-
 def getExtrafanart(htmlcode):  # 获取剧照
-    html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\n</div>')
+    html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div></div>')
    html = html_pather.search(htmlcode)
    if html:
        html = html.group()
@@ -232,6 +102,7 @@ def main(number):
        "https://www.dmm.co.jp/rental/-/detail/=/cid=",
    ]
    chosen_url = ""
+    fanza_Crawler = ''

    for url in fanza_urls:
        chosen_url = url + fanza_search_number
@@ -240,6 +111,7 @@ def main(number):
                urlencode({"rurl": chosen_url})
            )
        )
+        fanza_Crawler = fanzaCrawler(htmlcode)
        if "404 Not Found" not in htmlcode:
            break
    if "404 Not Found" in htmlcode:
@@ -249,28 +121,34 @@ def main(number):
        # for example, the url will be cid=test012
        # but the hinban on the page is test00012
        # so get the hinban first, and then pass it to following functions
-        fanza_hinban = getNum(htmlcode)
+        fanza_hinban = fanza_Crawler.getFanzaString('品番：')
+        out_num = fanza_hinban
+        number_lo = number.lower()
+        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        if (re.sub('-|_', '', number_lo) == fanza_hinban or
+            number_lo.replace('-', '00') == fanza_hinban or
+            number_lo.replace('-', '') + 'so' == fanza_hinban
+        ):
+            out_num = number
        data = {
-            "title": getTitle(htmlcode).strip(),
-            "studio": getStudio(htmlcode),
-            "outline": getOutline(htmlcode),
-            "runtime": getRuntime(htmlcode),
-            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
-            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
-            "release": getRelease(htmlcode),
-            "number": fanza_hinban,
-            "cover": getCover(htmlcode, fanza_hinban),
+            "title": fanza_Crawler.getString('//*[starts-with(@id, "title")]/text()').strip(),
+            "studio": fanza_Crawler.getFanzaString('メーカー'),
+            "outline": getOutline(html),
+            "runtime": str(re.search(r'\d+',fanza_Crawler.getString("//td[contains(text(),'収録時間')]/following-sibling::td/text()")).group()).strip(" ['']"),
+            "director": fanza_Crawler.getFanzaString('監督：') if "anime" not in chosen_url else "",
+            "actor": fanza_Crawler.getString("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()").replace("', '", ",") if "anime" not in chosen_url else "",
+            "release": getRelease(fanza_Crawler),
+            "number": out_num,
+            "cover": getCover(html, fanza_hinban),
            "imagecut": 1,
-            "tag": getTag(htmlcode),
+            "tag": fanza_Crawler.getFanzaStrings('ジャンル：'),
            "extrafanart": getExtrafanart(htmlcode),
-            "label": getLabel(htmlcode),
-            "year": getYear(
-                getRelease(htmlcode)
-            ),  # str(re.search('\d{4}',getRelease(a)).group()),
+            "label": fanza_Crawler.getFanzaString('レーベル'),
+            "year": re.findall('\d{4}',getRelease(fanza_Crawler))[0],  # str(re.search('\d{4}',getRelease(a)).group()),
            "actor_photo": "",
            "website": chosen_url,
            "source": "fanza.py",
-            "series": getSeries(htmlcode),
+            "series": fanza_Crawler.getFanzaString('シリーズ：'),
        }
    except:
        data = {
@@ -314,4 +192,6 @@ def main_htmlcode(number):
 if __name__ == "__main__":
    # print(main("DV-1562"))
    # print(main("96fad1217"))
-    print(main("h_173ghmt68"))
+    print(main("pred00251"))
+    print(main("MIAA-391"))
+    print(main("OBA-326"))
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -4,58 +4,11 @@ import re
 from lxml import etree#need install
 import json
 import ADC_function
+from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

-def getTitle_fc2com(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
-    return result
-def getActor_fc2com(htmlcode):
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
-        return result
-    except:
-        return ''
-def getStudio_fc2com(htmlcode): #获取厂商
-    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
-        return result
-    except:
-        return ''
-def getNum_fc2com(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
-def getRelease_fc2com(htmlcode2): #
-    html=etree.fromstring(htmlcode2,etree.HTMLParser())
-    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/div[2]/p/text()')).strip(" ['販売日 : ']").replace('/','-')
-    return result
-def getCover_fc2com(htmlcode2): #获取厂商 #
-    html = etree.fromstring(htmlcode2, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
-    return 'http:' + result
-# def getOutline_fc2com(htmlcode2):     #获取番号 #
-#     xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
-#     path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
-#     html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
-#     print('https://adult.contents.fc2.com'+path)
-#     print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
-#     result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-#     return result
-def getTag_fc2com(lx):
-    result = lx.xpath("//a[@class='tag tagTag']/text()")
-    return result
-def getYear_fc2com(release):
-    try:
-        result = re.search('\d{4}',release).group()
-        return result
-    except:
-        return ''
-
 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
    html = html_pather.search(htmlcode)
@@ -79,27 +32,30 @@ def getTrailer(htmlcode, number):
        except:
            return ''
    else:
-        video_url = ''
+        return ''

 def main(number):
    try:
        number = number.replace('FC2-', '').replace('fc2-', '')
-        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/')
-        actor = getActor_fc2com(htmlcode2)
-        if not actor:
+        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/', encoding='utf-8')
+        fc2_crawler = Crawler(htmlcode2)
+        actor = fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')
+        if actor == "":
            actor = '素人'
        lx = etree.fromstring(htmlcode2, etree.HTMLParser())
-        cover = str(lx.xpath("//div[@class='items_article_MainitemThumb']/span/img/@src")).strip(" ['']")
+        cover = fc2_crawler.getString("//div[@class='items_article_MainitemThumb']/span/img/@src")
        cover = ADC_function.urljoin('https://adult.contents.fc2.com', cover)
+        release = fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/div[2]/p/text()').\
+            strip(" ['販売日 : ']").replace('/','-')
        dic = {
-            'title': lx.xpath('/html/head/title/text()')[0],
-            'studio': getStudio_fc2com(htmlcode2),
-            'year': getYear_fc2com(getRelease_fc2com(htmlcode2)),
+            'title': fc2_crawler.getString('/html/head/title/text()'),
+            'studio': fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'),
+            'year': re.findall('\d{4}',release)[0],
            'outline': '',  # getOutline_fc2com(htmlcode2),
            'runtime': str(lx.xpath("//p[@class='items_article_info']/text()")[0]),
-            'director': getStudio_fc2com(htmlcode2),
+            'director': fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'),
            'actor': actor,
-            'release': getRelease_fc2com(htmlcode2),
+            'release': release,
            'number': 'FC2-' + number,
            'label': '',
            'cover': cover,
@@ -107,7 +63,7 @@ def main(number):
            'extrafanart': getExtrafanart(htmlcode2),
            "trailer": getTrailer(htmlcode2, number),
            'imagecut': 0,
-            'tag': getTag_fc2com(lx),
+            'tag': fc2_crawler.getStrings("//a[@class='tag tagTag']/text()"),
            'actor_photo': '',
            'website': 'https://adult.contents.fc2.com/article/' + number + '/',
            'source': 'https://adult.contents.fc2.com/article/' + number + '/',
@@ -121,6 +77,4 @@ def main(number):
    return js

 if __name__ == '__main__':
-    print(main('FC2-1787685'))
-    print(main('FC2-2086710'))
-
+    print(main('FC2-2182382'))
--- a/WebCrawler/gcolle.py
+++ b/WebCrawler/gcolle.py
@@ -0,0 +1,88 @@
+import sys
+sys.path.append('../')
+
+from WebCrawler.crawler import *
+from ADC_function import *
+from lxml import etree
+
+
+def main(number):
+    save_cookies = False
+    cookie_filename = 'gcolle.json'
+    try:
+        gcolle_cooikes, cookies_filepath = load_cookies(cookie_filename)
+        session = get_html_session(cookies=gcolle_cooikes)
+        number = number.upper().replace('GCOLLE-','')
+
+        htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text
+        gcolle_crawler = Crawler(htmlcode)
+        r18_continue = gcolle_crawler.getString('//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href')
+        if r18_continue and r18_continue.startswith('http'):
+            htmlcode = session.get(r18_continue).text
+            gcolle_crawler = Crawler(htmlcode)
+            save_cookies = True
+            cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
+
+        number_html = gcolle_crawler.getString('//td[contains(text(),"商品番号")]/../td[2]/text()')
+        if number != number_html:
+            raise Exception('[-]gcolle.py: number not match')
+
+        if save_cookies:
+            cookies_save = Path.home() / f".local/share/mdc/{cookie_filename}"
+            cookies_save.parent.mkdir(parents=True, exist_ok=True)
+            cookies_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
+
+        # get extrafanart url
+        if len(gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')) == 0:
+            extrafanart = gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src')
+        else:
+            extrafanart = gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')
+        # Add "https:" in each extrafanart url
+        for i in range(len(extrafanart)):
+            extrafanart[i] = 'https:' + extrafanart[i]
+
+        dic = {
+            "title":      gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[1]/td/h1/text()').strip(),
+            "studio":     gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
+            "year":       re.findall('\d{4}',gcolle_crawler.getString('//td[contains(text(),"商品登録日")]/../td[2]/time/@datetime'))[0],
+            "outline":    gcolle_crawler.getOutline('//*[@id="cart_quantity"]/table/tr[3]/td/p/text()'),
+            "runtime":    '',
+            "director":   gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
+            "actor":      gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
+            "release":    re.findall('\d{4}-\d{2}-\d{2}',gcolle_crawler.getString('//td[contains(text(),"商品登録日")]/../td[2]/time/@datetime'))[0],
+            "number":     "GCOLLE-" + str(number_html),
+            "cover":      "https:" + gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[3]/td/table/tr/td/a/@href'),
+            "thumb":      "https:" + gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[3]/td/table/tr/td/a/@href'),
+            "trailer":    '',
+            "actor_photo":'',
+            "imagecut":   4, # 该值为4时同时也是有码影片 也用人脸识别裁剪封面
+            "tag":        gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[4]/td/a/text()'),
+            "extrafanart":extrafanart,
+            "label":      gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
+            "website":    'https://gcolle.net/product_info.php/products_id/' + number,
+            "source":     'gcolle.py',
+            "series":     gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
+            '无码': False,
+        }
+        # for k,v in dic.items():
+        #     if k == 'outline':
+        #         print(k,len(v))
+        #     else:
+        #         print(k,v)
+        # print('===============================================================')
+    except Exception as e:
+        dic = {'title':''}
+        if config.getInstance().debug():
+            print(e)
+
+    return dic
+
+if __name__ == '__main__':
+    from pprint import pprint
+    config.getInstance().set_override("debug_mode:switch=1")
+    pprint(main('840724'))
+    pprint(main('840386'))
+    pprint(main('838671'))
+    pprint(main('814179'))
+    pprint(main('834255'))
+    pprint(main('814179'))
--- a/WebCrawler/jav321.py
+++ b/WebCrawler/jav321.py
@@ -56,9 +56,9 @@ def parse_info(soup: BeautifulSoup) -> dict:
            "label": get_label(data_dic),
            "studio": get_studio(data_dic),
            "tag": get_tag(data_dic),
-            "number": get_number(data_dic),
+            "number": get_number(data_dic).upper(),
            "release": get_release(data_dic),
-            "runtime": get_runtime(data_dic),
+            "runtime": get_runtime(data_dic).replace(" minutes", ""),
            "series": get_series(data_dic),
        }
    else:
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -60,10 +60,10 @@ def getCID(html):
    string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
    result = re.sub('/.*?.jpg','',string)
    return result
-def getOutline(number, title):  #获取剧情介绍 多进程并发查询
+def getOutline(number, title, uncensored):  #获取剧情介绍 多进程并发查询
    if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
        return ''   # 从airav.py过来的调用不计算outline直接返回，避免重复抓取数据拖慢处理速度
-    return getStoryline(number,title)
+    return getStoryline(number,title, 无码=uncensored)
 def getSeriseJa(html):
    x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
    return str(x[0]) if len(x) else ''
@@ -83,9 +83,13 @@ def getExtrafanart(htmlcode):  # 获取剧照
        if extrafanart_imgs:
            return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
    return ''
+def getUncensored(html):
+    x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]')
+    return bool(x)

 def main_uncensored(number):
-    htmlcode = get_html('https://www.javbus.com/ja/' + number)
+    w_number = number.replace('.', '-')
+    htmlcode = get_html('https://www.javbus.red/' + w_number)
    if "<title>404 Page Not Found" in htmlcode:
        raise Exception('404 page not found')
    lx = etree.fromstring(htmlcode, etree.HTMLParser())
@@ -94,7 +98,7 @@ def main_uncensored(number):
        'title': title,
        'studio': getStudioJa(lx),
        'year': getYear(lx),
-        'outline': getOutline(number, title),
+        'outline': getOutline(w_number, title, True),
        'runtime': getRuntime(lx),
        'director': getDirectorJa(lx),
        'actor': getActor(lx),
@@ -106,9 +110,10 @@ def main_uncensored(number):
        'label': getSeriseJa(lx),
        'imagecut': 0,
 #        'actor_photo': '',
-        'website': 'https://www.javbus.com/ja/' + number,
+        'website': 'https://www.javbus.red/' + w_number,
        'source': 'javbus.py',
        'series': getSeriseJa(lx),
+        '无码': True
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
@@ -136,7 +141,7 @@ def main(number):
                'title': title,
                'studio': getStudio(lx),
                'year': getYear(lx),
-                'outline': getOutline(number, title),
+                'outline': getOutline(number, title, getUncensored(lx)),
                'runtime': getRuntime(lx),
                'director': getDirector(lx),
                'actor': getActor(lx),
@@ -151,6 +156,7 @@ def main(number):
                'website': 'https://www.javbus.com/' + number,
                'source': 'javbus.py',
                'series': getSerise(lx),
+                '无码': getUncensored(lx)
            }
            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8')
            return js
@@ -168,13 +174,14 @@ def main(number):
        return js

 if __name__ == "__main__" :
-    config.G_conf_override['debug_mode:switch'] = True
-    print(main('ABP-888'))
-    print(main('ABP-960'))
-    print(main('ADV-R0624'))    # 404
-    print(main('MMNT-010'))
-    print(main('ipx-292'))
-    print(main('CEMD-011'))
-    print(main('CJOD-278'))
+    config.getInstance().set_override("debug_mode:switch=1")
+    # print(main('ABP-888'))
+    # print(main('ABP-960'))
+    # print(main('ADV-R0624'))    # 404
+    # print(main('MMNT-010'))
+    # print(main('ipx-292'))
+    # print(main('CEMD-011'))
+    # print(main('CJOD-278'))
+    print(main('BrazzersExxtra.21.02.01'))
    print(main('100221_001'))
    print(main('AVSW-061'))
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -166,12 +166,23 @@ def getDirector(html):
    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(number, title):  #获取剧情介绍 多进程并发查询
-    return getStoryline(number,title)
+def getOutline(number, title, uncensored):  #获取剧情介绍 多进程并发查询
+    return getStoryline(number, title, 无码=uncensored)
 def getSeries(html):
    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getUserRating(html):
+    try:
+        result = str(html.xpath('//span[@class="score-stars"]/../text()')[0])
+        v = re.findall(r'(\d+|\d+\.\d+)分, 由(\d+)人評價', result)
+        return float(v[0][0]), int(v[0][1])
+    except:
+        return
+def getUncensored(html):
+    x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")'
+                ' or contains(@href,"/tags/western?")]')
+    return bool(x)

 def main(number):
    # javdb更新后同一时间只能登录一个数字站，最新登录站会踢出旧的登录，因此按找到的第一个javdb*.json文件选择站点，
@@ -276,7 +287,7 @@ def main(number):
            'actor': getActor(lx),
            'title': title,
            'studio': getStudio(detail_page, lx),
-            'outline': getOutline(number, title),
+            'outline': getOutline(number, title, getUncensored(lx)),
            'runtime': getRuntime(lx),
            'director': getDirector(lx),
            'release': getRelease(detail_page),
@@ -293,8 +304,12 @@ def main(number):
            'website': urljoin('https://javdb.com', correct_url),
            'source': 'javdb.py',
            'series': getSeries(lx),
-
+            '无码': getUncensored(lx)
        }
+        userrating = getUserRating(lx)
+        if isinstance(userrating, tuple) and len(userrating) == 2:
+            dic['用户评分'] = userrating[0]
+            dic['评分人数'] = userrating[1]
        if not dic['actor'] and re.match(r'FC2-[\d]+', number, re.A):
            dic['actor'].append('素人')
            if not dic['series']:
@@ -313,18 +328,19 @@ def main(number):
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 if __name__ == "__main__":
-    config.G_conf_override['debug_mode:switch'] = True
+    config.getInstance().set_override("debug_mode:switch=1")
    # print(main('blacked.20.05.30'))
    # print(main('AGAV-042'))
    # print(main('BANK-022'))
-    # print(main('070116-197'))
+    print(main('070116-197'))
    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
    #print(main('FC2-2278260'))
    # print(main('FC2-735670'))
    # print(main('FC2-1174949')) # not found
    #print(main('MVSD-439'))
    # print(main('EHM0001')) # not found
-    print(main('FC2-2314275'))
+    #print(main('FC2-2314275'))
    # print(main('EBOD-646'))
    # print(main('LOVE-262'))
-    #print(main('ABP-890'))
+    print(main('ABP-890'))
+    print(main('blacked.14.12.08'))
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -1,3 +1,5 @@
+import sys
+sys.path.append('../')
 from bs4 import BeautifulSoup  # need install
 from lxml import etree  # need install
 from pyquery import PyQuery as pq  # need install
@@ -5,24 +7,22 @@ from ADC_function import *
 import json
 import re
 from lib2to3.pgen2 import parse
-import sys

 from urllib.parse import urlparse, unquote
-sys.path.append('../')


 def getActorPhoto(html):
    return ''


-def getTitle(html, number):  # 获取标题
-    title = str(html.xpath('//h1[@class="article-title"]/text()')[0])
-    try:
-        result = str(re.split(r'[/|／|-]', title)[1])
-        return result.strip()
-    except:
-        return title.replace(number.upper(), '').strip()
-
+def getTitle(html):  # 获取标题
+    # <title>MD0140-2 / 家有性事EP2 爱在身边-麻豆社</title>
+    # <title>MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社</title>
+    # <title>MD0094／贫嘴贱舌中出大嫂／坏嫂嫂和小叔偷腥内射受孕-麻豆社</title>
+    # <title>TM0002-我的痴女女友-麻豆社</title>
+    browser_title = str(html.xpath("/html/head/title/text()")[0])
+    title = str(re.findall(r'^[A-Z0-9 /／\-]*(.*)-麻豆社$', browser_title)[0]).strip()
+    return title

 def getStudio(html):  # 获取厂商 已修改
    try:
@@ -61,7 +61,6 @@ def getNum(url, number):  # 获取番号
        filename = unquote(urlparse(url).path)
        # 裁剪文件名
        result = filename[1:-5].upper().strip()
-        print(result)
        # 移除中文
        if result.upper() != number.upper():
            result = re.split(r'[^\x00-\x7F]+', result, 1)[0]
@@ -83,13 +82,15 @@ def getSerise(html):  # 获取系列 已修改
    return ''


-def getTag(html):  # 获取标签
-    return html.xpath('//div[@class="article-tags"]/a/text()')
+def getTag(html, studio):  # 获取标签
+    x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
+    return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]


 def getExtrafanart(html):  # 获取剧照
    return ''

+
 def cutTags(tags):
    actors = []
    tags = []
@@ -109,13 +110,15 @@ def main(number):

        html = etree.fromstring(htmlcode, etree.HTMLParser())
        url = getUrl(html)
-        tags = getTag(html)
-        actor,tags = cutTags(tags);
+        studio = getStudio(html)
+        tags = getTag(html, studio)
+        #actor,tags = cutTags(tags) # 演员在tags中的位置不固定，放弃尝试获取
+        actor = ''
        dic = {
            # 标题
-            'title': getTitle(html, number),
+            'title': getTitle(html),
            # 制作商
-            'studio': getStudio(html),
+            'studio': studio,
            # 年份
            'year': getYear(html),
            # 简介
@@ -143,7 +146,8 @@ def main(number):
            'website': url,
            'source': 'madou.py',
            # 使用
-            'series': getSerise(html)
+            'series': getSerise(html),
+            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
                        indent=4, separators=(',', ':'), )  # .encode('UTF-8')
@@ -161,4 +165,11 @@ def main(number):


 if __name__ == '__main__':
-    print(main('MD0094'))
+    config.getInstance().set_override("debug_mode:switch=1")
+    print(main('MD0129'))
+    # print(main('TM0002'))
+    # print(main('MD0222'))
+    # print(main('MD0140-2'))
+    # print(main('MAD039'))
+    # print(main('JDMY027'))
+
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -5,95 +5,28 @@ from lxml import etree
 import json
 from bs4 import BeautifulSoup
 from ADC_function import *
+from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

-def getTitle(a):
-    try:
-        html = etree.fromstring(a, etree.HTMLParser())
-        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
-        return result.replace('/', ',')
-    except:
-        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-    result1=str(html.xpath('//th[contains(text(),"メーカー：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2=str(html.xpath('//th[contains(text(),"メーカー：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+')
-def getYear(getRelease):
-    try:
-        result = str(re.search('\d{4}',getRelease).group())
-        return result
-    except:
-        return getRelease
-def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace('/','-')
+class MgsCrawler(Crawler):
+    def getMgsString(self, _xpath):
+        html = self.html
+        result1 = str(html.xpath(_xpath)).strip(" ['']").strip('\\n    ').strip('\\n').strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
+        result2 = str(html.xpath(_xpath.replace('td/a/','td/'))).strip(" ['']").strip('\\n    ').strip('\\n')
+        return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+
 def getTag(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
    result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
    return result
-def getCover(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']")
-    # result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
-    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
-    return result
-def getDirector(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getOutline(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
-    return result
-def getSeries(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-        '\\n')
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')

-def getExtrafanart(htmlcode):  # 获取剧照
+def getExtrafanart(htmlcode2):  # 获取剧照
    html_pather = re.compile(r'<dd>\s*?<ul>[\s\S]*?</ul>\s*?</dd>')
-    html = html_pather.search(htmlcode)
+    html = html_pather.search(htmlcode2)
    if html:
        html = html.group()
        extrafanart_pather = re.compile(r'<a class=\"sample_image\" href=\"(.*?)\"')
@@ -104,36 +37,35 @@ def getExtrafanart(htmlcode):  # 获取剧照

 def main(number2):
    number=number2.upper()
-    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
-    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-    b = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    htmlcode2=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
+    soup = BeautifulSoup(htmlcode2, 'lxml')
+    a2 = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    b2 = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    htmlcode = MgsCrawler(htmlcode2)
+    a = MgsCrawler(a2)
+    b = MgsCrawler(b2)
    #print(b)
-    try:
-        dic = {
-            'title': getTitle(htmlcode).replace("\\n", '').replace('        ', ''),
-            'studio': getStudio(a),
-            'outline': getOutline(b),
-            'runtime': getRuntime(a),
-            'director': getDirector(a),
-            'actor': getActor(a),
-            'release': getRelease(a),
-            'number': getNum(a),
-            'cover': getCover(htmlcode),
-            'imagecut': 1,
-            'tag': getTag(a),
-            'label': getLabel(a),
-            'extrafanart': getExtrafanart(htmlcode),
-            'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
-            'actor_photo': '',
-            'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
-            'source': 'mgstage.py',
-            'series': getSeries(a),
-        }
-    except Exception as e:
-        if config.getInstance().debug():
-            print(e)
-        dic = {"title": ""}
+    dic = {
+        'title': htmlcode.getString('//*[@id="center_column"]/div[1]/h1/text()').replace('/', ',').replace("\\n",'').replace('        ', '').strip(),
+        'studio': a.getMgsString('//th[contains(text(),"メーカー：")]/../td/a/text()'),
+        'outline': b.getString('//p/text()').strip(" ['']").replace(u'\\n', '').replace("', '', '", ''),
+        'runtime': a.getMgsString('//th[contains(text(),"収録時間：")]/../td/a/text()').rstrip('mi'),
+        'director': a.getMgsString('//th[contains(text(),"シリーズ")]/../td/a/text()'),
+        'actor': a.getMgsString('//th[contains(text(),"出演：")]/../td/a/text()'),
+        'release': a.getMgsString('//th[contains(text(),"配信開始日：")]/../td/a/text()').replace('/','-'),
+        'number': a.getMgsString('//th[contains(text(),"品番：")]/../td/a/text()'),
+        'cover': htmlcode.getString('//*[@id="EnlargeImage"]/@href'),
+        'imagecut': 1,
+        'tag': getTag(a2),
+        'label': a.getMgsString('//th[contains(text(),"シリーズ：")]/../td/a/text()'),
+        'extrafanart': getExtrafanart(htmlcode2),
+        'year': str(re.findall('\d{4}',a.getMgsString('//th[contains(text(),"配信開始日：")]/../td/a/text()'))).strip(" ['']"),
+        # str(re.search('\d{4}',getRelease(a)).group()),
+        'actor_photo': '',
+        'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
+        'source': 'mgstage.py',
+        'series': a.getMgsString('//th[contains(text(),"シリーズ")]/../td/a/text()'),
+    }

    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -5,7 +5,6 @@ import json
 import builtins
 from ADC_function import *
 from lxml.html import fromstring
-from multiprocessing import Pool
 from multiprocessing.dummy import Pool as ThreadPool
 from difflib import SequenceMatcher
 from unicodedata import category
@@ -13,7 +12,7 @@ from number_parser import is_uncensored

 G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"}

-G_mode_txt = ('顺序执行','线程池','进程池')
+G_mode_txt = ('顺序执行','线程池')

 class noThread(object):
    def map(self, fn, param):
@@ -25,14 +24,15 @@ class noThread(object):


 # 获取剧情介绍 从列表中的站点同时查，取值优先级从前到后
-def getStoryline(number, title, sites: list=None):
+def getStoryline(number, title, sites: list=None, 无码=None):
    start_time = time.time()
    conf = config.getInstance()
    if not conf.is_storyline():
        return ''
    debug = conf.debug() or conf.storyline_show() == 2
    storyine_sites = conf.storyline_site().split(',') if sites is None else sites
-    if is_uncensored(number):
+    unc = 无码 if isinstance(无码, bool) else is_uncensored(number)
+    if unc:
        storyine_sites += conf.storyline_uncensored_site().split(',')
    else:
        storyine_sites += conf.storyline_censored_site().split(',')
@@ -49,9 +49,8 @@ def getStoryline(number, title, sites: list=None):
    cores = min(len(apply_sites), os.cpu_count())
    if cores == 0:
        return ''
-    run_mode = conf.storyline_mode()
-    assert run_mode in (0,1,2)
-    with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool:
+    run_mode = 1 if conf.storyline_mode() > 0 else 0
+    with ThreadPool(cores) if run_mode > 0 else noThread() as pool:
        results = pool.map(getStoryline_mp, mp_args)
    sel = ''
    if not debug and conf.storyline_show() == 0:
@@ -62,7 +61,7 @@ def getStoryline(number, title, sites: list=None):
                if not len(sel):
                    sel = value
        return sel
-    # 以下debug结果输出会写入日志，进程池中的则不会，只在标准输出中显示
+    # 以下debug结果输出会写入日志
    s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒，结束于{time.strftime("%H:%M:%S")}'
    sel_site = ''
    for site, desc in zip(apply_sites, results):
@@ -80,34 +79,33 @@ def getStoryline(number, title, sites: list=None):


 def getStoryline_mp(args):
-    def _inner(site, number, title, debug):
-        start_time = time.time()
-        storyline = None
-        if not isinstance(site, str):
-            return storyline
-        elif site == "airavwiki":
-            storyline = getStoryline_airavwiki(number, debug)
-        elif site == "airav":
-            storyline = getStoryline_airav(number, debug)
-        elif site == "avno1":
-            storyline = getStoryline_avno1(number, debug)
-        elif site == "xcity":
-            storyline = getStoryline_xcity(number, debug)
-        elif site == "amazon":
-            storyline = getStoryline_amazon(title, number, debug)
-        elif site == "58avgo":
-            storyline = getStoryline_58avgo(number, debug)
-        if not debug:
-            return storyline
-        # 进程池模式的子进程getStoryline_*()的print()不会写入日志中，线程池和顺序执行不受影响
-        print("[!]MP 进程[{}]运行{:.3f}秒，结束于{}返回结果: {}".format(
-                site,
-                time.time() - start_time,
-                time.strftime("%H:%M:%S"),
-                storyline if isinstance(storyline, str) and len(storyline) else '[空]')
-        )
+    (site, number, title, debug) = args
+    start_time = time.time()
+    storyline = None
+    if not isinstance(site, str):
        return storyline
-    return _inner(*args)
+    elif site == "airavwiki":
+        storyline = getStoryline_airavwiki(number, debug)
+        #storyline = getStoryline_airavwiki_super(number, debug)
+    elif site == "airav":
+        storyline = getStoryline_airav(number, debug)
+    elif site == "avno1":
+        storyline = getStoryline_avno1(number, debug)
+    elif site == "xcity":
+        storyline = getStoryline_xcity(number, debug)
+    elif site == "amazon":
+        storyline = getStoryline_amazon(title, number, debug)
+    elif site == "58avgo":
+        storyline = getStoryline_58avgo(number, debug)
+    if not debug:
+        return storyline
+    print("[!]MP 线程[{}]运行{:.3f}秒，结束于{}返回结果: {}".format(
+            site,
+            time.time() - start_time,
+            time.strftime("%H:%M:%S"),
+            storyline if isinstance(storyline, str) and len(storyline) else '[空]')
+    )
+    return storyline


 def getStoryline_airav(number, debug):
@@ -308,8 +306,8 @@ def getStoryline_amazon(q_title, number, debug):
            res = session.get(urljoin(res.url, lks[0]))
            cookie = None
            lx = fromstring(res.text)
-        titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()")
-        urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href")
+        titles = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/text()")
+        urls = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/../@href")
        if not len(urls) or len(urls) != len(titles):
            raise ValueError("titles not found")
        idx = amazon_select_one(titles, q_title, number, debug)
@@ -325,8 +323,9 @@ def getStoryline_amazon(q_title, number, debug):
            res = session.get(urljoin(res.url, lks[0]))
            cookie = None
            lx = fromstring(res.text)
-        div = lx.xpath('//*[@id="productDescription"]')[0]
-        ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)])
+        p1 = lx.xpath('//*[@id="productDescription"]/p[1]/span/text()')
+        p2 = lx.xpath('//*[@id="productDescription"]/p[2]/span/text()')
+        ama_t = ' '.join(p1) + ' '.join(p2)
        ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip()

        if cookie is None:
@@ -406,10 +405,10 @@ def amazon_select_one(a_titles, q_title, number, debug):
    # debug 模式下记录识别准确率日志
    if ratio < 0.9:
        # 相似度[0.5, 0.9)的淘汰结果单独记录日志
-        (Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write(
-            f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
+        with (Path.home() / '.mlogs/ratio0.5.txt').open('a', encoding='utf-8') as hrt:
+            hrt.write(f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
        return -1
    # 被采信的结果日志
-    (Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write(
-        f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
+    with (Path.home() / '.mlogs/ratio.txt').open('a', encoding='utf-8') as hrt:
+        hrt.write(f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
    return sel
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -128,7 +128,7 @@ def getOutline(html, number, title):
    a = set(storyline_site) & {'airav', 'avno1'}  # 只要中文的简介文字
    if len(a):
        site = [n for n in storyline_site if n in a]
-        g = getStoryline(number, title, site)
+        g = getStoryline(number, title, site, 无码=False)
        if len(g):
            return g
    try:
--- a/config.ini
+++ b/config.ini
@@ -1,119 +1,130 @@
-# 详细教程请看
-# - https://github.com/yoshiko2/Movie_Data_Capture/wiki#%E9%85%8D%E7%BD%AEconfigini
-[common]
-main_mode=1
-source_folder=./
-failed_output_folder=failed
-success_output_folder=JAV_output
-soft_link=0
-failed_move=1
-auto_exit=0
-translate_to_sc=0
-multi_threading=0
-;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
-actor_gender=female
-del_empty_folder=1
-; 跳过最近(默认:30)天新修改过的.NFO，可避免整理模式(main_mode=3)和软连接(soft_link=0)时
-; 反复刮削靠前的视频文件，0为处理所有视频文件
-nfo_skip_days=30
-; 处理完多少个视频文件后停止，0为处理所有视频文件
-stop_counter=0
-; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
-ignore_failed_list=0
-download_only_missing_images=1
-mapping_table_validity=7
-
-[proxy]
-;proxytype: http or socks5 or socks5h switch: 0 1
-switch=0
-type=socks5
-proxy=127.0.0.1:1080
-timeout=10
-retry=3
-cacert_file=
-
-[Name_Rule]
-location_rule=actor+'/'+number
-naming_rule=number+'-'+title
-max_title_len=50
-
-[update]
-update_check=1
-
-[priority]
-website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club
-
-[escape]
-literals=\()/
-folders=failed,JAV_output
-
-[debug_mode]
-switch=0
-
-; 机器翻译
-[translate]
-switch=0
-;可选项 google-free,azure
-engine=google-free
-; azure翻译密钥
-key=
-; 翻译延迟
-delay=1
-values=title,outline
-service_site=translate.google.cn
-
-; 预告片
-[trailer]
-switch=0
-
-; 用来确定是否是无码
-[uncensored]
-uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,MMDV
-
-[media]
-; 影片后缀
-media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v
-; 字幕后缀
-sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
-
-; 水印
-[watermark]
-switch=1
-water=2
-; 左上 0, 右上 1, 右下 2， 左下 3
-
-; 剧照
-[extrafanart]
-switch=1
-parallel_download=5
-extrafanart_folder=extrafanart
-
-; 剧情简介
-[storyline]
-switch=1
-; website为javbus javdb avsox xcity carib时，site censored_site uncensored_site 为获取剧情简介信息的
-; 可选数据源站点列表。列表内站点同时并发查询，取值优先级由冒号前的序号决定，从小到大，数字小的站点没数据才会采用后面站点获得的。
-; 其中airavwiki airav avno1 58avgo是中文剧情简介，区别是airav只能查有码，avno1 airavwiki 有码无码都能查，
-; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
-; xcity和amazon是日语的，由于amazon商城没有番号信息，选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询，
-; 设置成不查询可大幅提高刮削速度。
-; site=
-site=1:avno1,4:airavwiki
-censored_site=2:airav,5:xcity,6:amazon
-uncensored_site=3:58avgo
-; 运行模式：0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大，并发站点越多越快)
-run_mode=1
-; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志)，剧情简介失效时可打开2查看原因
-show_result=0
-
-; 繁简转换 繁简转换模式mode=0:不转换 1:繁转简 2:简转繁
-[cc_convert]
-mode=1
-vars=outline,series,studio,tag,title
-
-[javdb]
-sites=33,34
-
-; 人脸识别 hog:方向梯度直方图(不太准确，速度快) cnn:深度学习模型(准确，需要GPU/CUDA,速度慢)
-[face]
-locations_model=hog
+# 详细教程请看
+# - https://github.com/yoshiko2/Movie_Data_Capture/wiki#%E9%85%8D%E7%BD%AEconfigini
+[common]
+main_mode=1
+source_folder=./
+failed_output_folder=failed
+success_output_folder=JAV_output
+link_mode=0
+; 0: 不刮削硬链接文件 1: 刮削硬链接文件
+scan_hardlink=0
+failed_move=0
+auto_exit=0
+translate_to_sc=0
+multi_threading=0
+;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
+actor_gender=female
+del_empty_folder=1
+; 跳过最近(默认:30)天新修改过的.NFO，可避免整理模式(main_mode=3)和软连接(soft_link=0)时
+; 反复刮削靠前的视频文件，0为处理所有视频文件
+nfo_skip_days=30
+; 处理完多少个视频文件后停止，0为处理所有视频文件
+stop_counter=0
+; 再运行延迟时间，单位：h时m分s秒 举例: 1h30m45s(1小时30分45秒)  45(45秒)
+; stop_counter不为零的条件下才有效，每处理stop_counter部影片后延迟rerun_delay秒再次运行
+rerun_delay=0
+; 以上三个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
+ignore_failed_list=0
+download_only_missing_images=1
+mapping_table_validity=7
+
+[proxy]
+;proxytype: http or socks5 or socks5h switch: 0 1
+switch=0
+type=socks5
+proxy=127.0.0.1:1080
+timeout=10
+retry=3
+cacert_file=
+
+[Name_Rule]
+location_rule=actor+'/'+number
+naming_rule=number+'-'+title
+max_title_len=50
+
+[update]
+update_check=1
+
+[priority]
+website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,javdb,gcolle
+
+[escape]
+literals=\()/
+folders=failed,JAV_output
+
+[debug_mode]
+switch=0
+
+; 机器翻译
+[translate]
+switch=0
+;可选项 google-free,azure
+engine=google-free
+; azure翻译密钥
+key=
+; 翻译延迟
+delay=1
+values=title,outline
+service_site=translate.google.cn
+
+; 预告片
+[trailer]
+switch=0
+
+; 用来确定是否是无码
+[uncensored]
+uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,MMDV
+
+[media]
+; 影片后缀
+media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v
+; 字幕后缀
+sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
+
+; 水印
+[watermark]
+switch=1
+water=2
+; 左上 0, 右上 1, 右下 2， 左下 3
+
+; 剧照
+[extrafanart]
+switch=1
+parallel_download=5
+extrafanart_folder=extrafanart
+
+; 剧情简介
+[storyline]
+switch=1
+; website为javbus javdb avsox xcity carib时，site censored_site uncensored_site 为获取剧情简介信息的
+; 可选数据源站点列表。列表内站点同时并发查询，取值优先级由冒号前的序号决定，从小到大，数字小的站点没数据才会采用后面站点获得的。
+; 其中airavwiki airav avno1 58avgo是中文剧情简介，区别是airav只能查有码，avno1 airavwiki 有码无码都能查，
+; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
+; xcity和amazon是日语的，由于amazon商城没有番号信息，选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询，
+; 设置成不查询可大幅提高刮削速度。
+; site=
+site=1:avno1,4:airavwiki
+censored_site=2:airav,5:xcity,6:amazon
+uncensored_site=3:58avgo
+; 运行模式：0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大，并发站点越多越快)
+run_mode=1
+; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志)，剧情简介失效时可打开2查看原因
+show_result=0
+
+; 繁简转换 繁简转换模式mode=0:不转换 1:繁转简 2:简转繁
+[cc_convert]
+mode=1
+vars=outline,series,studio,tag,title
+
+[javdb]
+sites=38,39
+
+; 人脸识别 locations_model=hog:方向梯度直方图(不太准确，速度快) cnn:深度学习模型(准确，需要GPU/CUDA,速度慢)
+; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面，有码封面直接切右半部分
+; aways_imagecut=0:按各网站默认行为 1:总是裁剪封面，开启此项将无视[common]download_only_missing_images=1总是覆盖封面
+; 封面裁剪的宽高比可配置，公式为aspect_ratio/3。默认aspect_ratio=2.12: 适配大部分有码影片封面，前一版本默认为2/3即aspect_ratio=2
+[face]
+locations_model=hog
+uncensored_only=1
+aways_imagecut=0
+aspect_ratio=2.12
--- a/config.py
+++ b/config.py
@@ -3,19 +3,14 @@ import re
 import sys
 import configparser
 import time
+import typing
 from pathlib import Path

 G_conf_override = {
    # index 0 save Config() first instance for quick access by using getInstance()
    0: None,
    # register override config items
-    "common:main_mode": None,
-    "common:source_folder": None,
-    "common:auto_exit": None,
-    "common:nfo_skip_days": None,
-    "common:stop_counter": None,
-    "common:ignore_failed_list": None,
-    "debug_mode:switch": None
+    # no need anymore
 }


@@ -74,17 +69,17 @@ class Config:
            elif (Path(__file__).resolve().parent / 'config.ini').is_file():
                res_path = Path(__file__).resolve().parent / 'config.ini'
            if res_path is None:
-                sys.exit(2)
+                os._exit(2)
            ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
            if re.search('n', ins, re.I):
-                sys.exit(2)
+                os._exit(2)
            # 用户目录才确定具有写权限，因此选择 ~/mdc.ini 作为配置文件生成路径，而不是有可能并没有写权限的
            # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了，只是作为多配置文件的切换技巧保留。
            write_path = path_search_order[2]  # Path.home() / "mdc.ini"
            write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
            print("Config file '{}' created.".format(write_path.resolve()))
            input("Press Enter key exit...")
-            sys.exit(0)
+            os._exit(0)
            # self.conf = self._default_config()
            # try:
            #     self.conf = configparser.ConfigParser()
@@ -95,29 +90,86 @@ class Config:
            # except Exception as e:
            #     print("[-]Config file not found! Use the default settings")
            #     print("[-]",e)
-            #     sys.exit(3)
+            #     os._exit(3)
            #     #self.conf = self._default_config()

-    def getboolean_override(self, section, item) -> bool:
-        return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(
-            G_conf_override[f"{section}:{item}"])
+    def set_override(self, option_cmd: str):
+        """
+        通用的参数覆盖选项 -C 配置覆盖串
+        配置覆盖串语法：小节名:键名=值[;[小节名:]键名=值][;[小节名:]键名+=值]  多个键用分号分隔 名称可省略部分尾部字符
+        或 小节名:键名+=值[;[小节名:]键名=值][;[小节名:]键名+=值]  在已有值的末尾追加内容，多个键的=和+=可以交叉出现
+        例子: face:aspect_ratio=2;aways_imagecut=1;priority:website=javdb
+        小节名必须出现在开头至少一次，分号后可只出现键名=值，不再出现小节名，如果后续全部键名都属于同一个小节
+        例如配置文件存在两个小节[proxy][priority]，那么pro可指代proxy，pri可指代priority
+        [face]  ;face小节下方有4个键名locations_model= uncensored_only= aways_imagecut= aspect_ratio=
+        l,lo,loc,loca,locat,locati...直到locations_model完整名称都可以用来指代locations_model=键名
+        u,un,unc...直到uncensored_only完整名称都可以用来指代uncensored_only=键名
+        aw,awa...直到aways_imagecut完整名称都可以用来指代aways_imagecut=键名
+        as,asp...aspect_ratio完整名称都可以用来指代aspect_ratio=键名
+        a则因为二义性，不是合法的省略键名
+        """
+        def err_exit(str):
+            print(str)
+            os._exit(2)

-    def getint_override(self, section, item) -> int:
-        return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(
-            G_conf_override[f"{section}:{item}"])
-
-    def get_override(self, section, item) -> str:
-        return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(
-            G_conf_override[f"{section}:{item}"])
+        sections = self.conf.sections()
+        sec_name = None
+        for cmd in option_cmd.split(';'):
+            syntax_err = True
+            rex = re.findall(r'^(.*?):(.*?)(=|\+=)(.*)$', cmd, re.U)
+            if len(rex) and len(rex[0]) == 4:
+                (sec, key, assign, val) = rex[0]
+                sec_lo = sec.lower().strip()
+                key_lo = key.lower().strip()
+                syntax_err = False
+            elif sec_name:  # 已经出现过一次小节名，属于同一个小节的后续键名可以省略小节名
+                rex = re.findall(r'^(.*?)(=|\+=)(.*)$', cmd, re.U)
+                if len(rex) and len(rex[0]) == 3:
+                    (key, assign, val) = rex[0]
+                    sec_lo = sec_name.lower()
+                    key_lo = key.lower().strip()
+                    syntax_err = False
+            if syntax_err:
+                err_exit(f"[-]Config override syntax incorrect. example: 'd:s=1' or 'debug_mode:switch=1'. cmd='{cmd}' all='{option_cmd}'")
+            if not len(sec_lo):
+                err_exit(f"[-]Config override Section name '{sec}' is empty! cmd='{cmd}'")
+            if not len(key_lo):
+                err_exit(f"[-]Config override Key name '{key}' is empty! cmd='{cmd}'")
+            if not len(val.strip()):
+                print(f"[!]Conig overide value '{val}' is empty! cmd='{cmd}'")
+            sec_name = None
+            for s in sections:
+                if not s.lower().startswith(sec_lo):
+                    continue
+                if sec_name:
+                    err_exit(f"[-]Conig overide Section short name '{sec_lo}' is not unique! dup1='{sec_name}' dup2='{s}' cmd='{cmd}'")
+                sec_name = s
+            if sec_name is None:
+                err_exit(f"[-]Conig overide Section name '{sec}' not found! cmd='{cmd}'")
+            key_name = None
+            keys = self.conf[sec_name]
+            for k in keys:
+                if not k.lower().startswith(key_lo):
+                    continue
+                if key_name:
+                    err_exit(f"[-]Conig overide Key short name '{key_lo}' is not unique! dup1='{key_name}' dup2='{k}' cmd='{cmd}'")
+                key_name = k
+            if key_name is None:
+                err_exit(f"[-]Conig overide Key name '{key}' not found! cmd='{cmd}'")
+            if assign == "+=":
+                val = keys[key_name] + val
+            if self.debug():
+                print(f"[!]Set config override [{sec_name}]{key_name}={val}  by cmd='{cmd}'")
+            self.conf.set(sec_name, key_name, val)

    def main_mode(self) -> int:
        try:
-            return self.getint_override("common", "main_mode")
+            return self.conf.getint("common", "main_mode")
        except ValueError:
            self._exit("common:main_mode")

    def source_folder(self) -> str:
-        return self.get_override("common", "source_folder")
+        return self.conf.get("common", "source_folder")

    def failed_folder(self) -> str:
        return self.conf.get("common", "failed_output_folder")
@@ -128,14 +180,17 @@ class Config:
    def actor_gender(self) -> str:
        return self.conf.get("common", "actor_gender")

-    def soft_link(self) -> bool:
-        return self.conf.getboolean("common", "soft_link")
+    def link_mode(self) -> int:
+        return self.conf.getint("common", "link_mode")
+
+    def scan_hardlink(self) -> bool:
+        return self.conf.getboolean("common", "scan_hardlink", fallback=False)#未找到配置选项,默认不刮削

    def failed_move(self) -> bool:
        return self.conf.getboolean("common", "failed_move")

    def auto_exit(self) -> bool:
-        return self.getboolean_override("common", "auto_exit")
+        return self.conf.getboolean("common", "auto_exit")

    def translate_to_sc(self) -> bool:
        return self.conf.getboolean("common", "translate_to_sc")
@@ -147,19 +202,13 @@ class Config:
        return self.conf.getboolean("common", "del_empty_folder")

    def nfo_skip_days(self) -> int:
-        try:
-            return self.getint_override("common", "nfo_skip_days")
-        except:
-            return 30
+        return self.conf.getint("common", "nfo_skip_days", fallback=30)

    def stop_counter(self) -> int:
-        try:
-            return self.getint_override("common", "stop_counter")
-        except:
-            return 0
+        return self.conf.getint("common", "stop_counter", fallback=0)

    def ignore_failed_list(self) -> bool:
-        return self.getboolean_override("common", "ignore_failed_list")
+        return self.conf.getboolean("common", "ignore_failed_list")

    def download_only_missing_images(self) -> bool:
        return self.conf.getboolean("common", "download_only_missing_images")
@@ -167,6 +216,18 @@ class Config:
    def mapping_table_validity(self) -> int:
        return self.conf.getint("common", "mapping_table_validity")

+    def rerun_delay(self) -> int:
+        value = self.conf.get("common", "rerun_delay")
+        if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)):
+            return 0   # not match '1h30m45s' or '30' or '1s2m1h4s5m'
+        if value.isnumeric() and int(value) >= 0:
+            return int(value)
+        sec = 0
+        sec += sum(int(v)  for v in re.findall(r'(\d+)s', value, re.I))
+        sec += sum(int(v)  for v in re.findall(r'(\d+)m', value, re.I)) * 60
+        sec += sum(int(v)  for v in re.findall(r'(\d+)h', value, re.I)) * 3600
+        return sec
+
    def is_translate(self) -> bool:
        return self.conf.getboolean("translate", "switch")

@@ -243,8 +304,8 @@ class Config:
    def media_type(self) -> str:
        return self.conf.get('media', 'media_type')

-    def sub_rule(self):
-        return self.conf.get('media', 'sub_type').split(',')
+    def sub_rule(self) -> typing.Set[str]:
+        return set(self.conf.get('media', 'sub_type').lower().split(','))

    def naming_rule(self) -> str:
        return self.conf.get("Name_Rule", "naming_rule")
@@ -277,7 +338,7 @@ class Config:
        return self.conf.get("escape", "folders")

    def debug(self) -> bool:
-        return self.getboolean_override("debug_mode", "switch")
+        return self.conf.getboolean("debug_mode", "switch")

    def is_storyline(self) -> bool:
        try:
@@ -304,43 +365,34 @@ class Config:
            return "3:58avgo"

    def storyline_show(self) -> int:
-        try:
-            v = self.conf.getint("storyline", "show_result")
-            return v if v in (0, 1, 2) else 2 if v > 2 else 0
-        except:
-            return 0
+        v = self.conf.getint("storyline", "show_result", fallback=0)
+        return v if v in (0, 1, 2) else 2 if v > 2 else 0

    def storyline_mode(self) -> int:
-        try:
-            v = self.conf.getint("storyline", "run_mode")
-            return v if v in (0, 1, 2) else 2 if v > 2 else 0
-        except:
-            return 1
+        return 1 if self.conf.getint("storyline", "run_mode", fallback=1) > 0 else 0

    def cc_convert_mode(self) -> int:
-        try:
-            v = self.conf.getint("cc_convert", "mode")
-            return v if v in (0, 1, 2) else 2 if v > 2 else 0
-        except:
-            return 1
+        v = self.conf.getint("cc_convert", "mode", fallback=1)
+        return v if v in (0, 1, 2) else 2 if v > 2 else 0

    def cc_convert_vars(self) -> str:
-        try:
-            return self.conf.get("cc_convert", "vars")
-        except:
-            return "actor,director,label,outline,series,studio,tag,title"
+        return self.conf.get("cc_convert", "vars",
+            fallback="actor,director,label,outline,series,studio,tag,title")

    def javdb_sites(self) -> str:
-        try:
-            return self.conf.get("javdb", "sites")
-        except:
-            return "33,34"
+        return self.conf.get("javdb", "sites", fallback="38,39")

    def face_locations_model(self) -> str:
-        try:
-            return self.conf.get("face", "locations_model")
-        except:
-            return "hog"
+        return self.conf.get("face", "locations_model", fallback="hog")
+
+    def face_uncensored_only(self) -> bool:
+        return self.conf.getboolean("face", "uncensored_only", fallback=True)
+
+    def face_aways_imagecut(self) -> bool:
+        return self.conf.getboolean("face", "aways_imagecut", fallback=False)
+
+    def face_aspect_ratio(self) -> float:
+        return self.conf.getfloat("face", "aspect_ratio", fallback=2.12)

    @staticmethod
    def _exit(sec: str) -> None:
@@ -358,7 +410,8 @@ class Config:
        conf.set(sec1, "source_folder", "./")
        conf.set(sec1, "failed_output_folder", "failed")
        conf.set(sec1, "success_output_folder", "JAV_output")
-        conf.set(sec1, "soft_link", "0")
+        conf.set(sec1, "link_mode", "0")
+        conf.set(sec1, "scan_hardlink", "0")
        conf.set(sec1, "failed_move", "1")
        conf.set(sec1, "auto_exit", "0")
        conf.set(sec1, "translate_to_sc", "1")
@@ -370,6 +423,7 @@ class Config:
        conf.set(sec1, "ignore_failed_list", 0)
        conf.set(sec1, "download_only_missing_images", 1)
        conf.set(sec1, "mapping_table_validity", 7)
+        conf.set(sec1, "rerun_delay", 0)

        sec2 = "proxy"
        conf.add_section(sec2)
@@ -423,9 +477,9 @@ class Config:
        sec11 = "media"
        conf.add_section(sec11)
        conf.set(sec11, "media_type",
-                 ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
+                 ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,iso")
        conf.set(sec11, "sub_type",
-                 ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
+                 ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")

        sec12 = "watermark"
        conf.add_section(sec12)
@@ -503,8 +557,7 @@ if __name__ == "__main__":


    config = Config()
-    mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override',
-               'ini_path'}
+    mfilter = {'conf', 'proxy', '_exit', '_default_config', 'ini_path', 'set_override'}
    for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
        evprint(f'config.{_m}()')
    pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
@@ -513,36 +566,13 @@ if __name__ == "__main__":
    for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
        evprint(f'getInstance().proxy().{_p}')

-    # Override Test
-    G_conf_override["common:nfo_skip_days"] = 4321
-    G_conf_override["common:stop_counter"] = 1234
-    assert config.nfo_skip_days() == 4321
-    assert getInstance().stop_counter() == 1234
-    # remove override
-    G_conf_override["common:stop_counter"] = None
-    G_conf_override["common:nfo_skip_days"] = None
-    assert config.nfo_skip_days() != 4321
-    assert config.stop_counter() != 1234
    # Create new instance
    conf2 = Config()
    assert getInstance() != conf2
    assert getInstance() == config
-    G_conf_override["common:main_mode"] = 9
-    G_conf_override["common:source_folder"] = "A:/b/c"
-    # Override effect to all instances
-    assert config.main_mode() == 9
-    assert conf2.main_mode() == 9
-    assert getInstance().main_mode() == 9
-    assert conf2.source_folder() == "A:/b/c"
-    print("### Override Test ###".center(36))
-    evprint('getInstance().main_mode()')
-    evprint('config.source_folder()')
-    G_conf_override["common:main_mode"] = None
-    evprint('conf2.main_mode()')
-    evprint('config.main_mode()')
-    # unregister key acess will raise except
-    try:
-        print(G_conf_override["common:actor_gender"])
-    except KeyError as ke:
-        print(f'Catched KeyError: {ke} is not a register key of G_conf_override dict.', file=sys.stderr)
+
+    conf2.set_override("d:s=1;face:asp=2;f:aw=0;pri:w=javdb;f:l=")
+    assert conf2.face_aspect_ratio() == 2
+    assert conf2.face_aways_imagecut() == False
+    assert conf2.sources() == "javdb"
    print(f"Load Config file '{conf2.ini_path}'.")
--- a/core.py
+++ b/core.py
@@ -1,5 +1,6 @@
 import json
 import os.path
+import os
 import pathlib
 import re
 import shutil
@@ -10,6 +11,7 @@ from PIL import Image
 from io import BytesIO
 from pathlib import Path
 from datetime import datetime
+from lxml import etree

 from ADC_function import *
 from WebCrawler import get_data_from_json
@@ -27,15 +29,15 @@ def escape_path(path, escape_literals: str):  # Remove escape literals
 def moveFailedFolder(filepath):
    conf = config.getInstance()
    failed_folder = conf.failed_folder()
-    soft_link = conf.soft_link()
+    link_mode = conf.link_mode()
    # 模式3或软连接，改为维护一个失败列表，启动扫描时加载用于排除该路径，以免反复处理
    # 原先的创建软连接到失败目录，并不直观，不方便找到失败文件位置，不如直接记录该文件路径
-    if conf.main_mode() == 3 or soft_link:
+    if conf.main_mode() == 3 or link_mode:
        ftxt = os.path.abspath(os.path.join(failed_folder, 'failed_list.txt'))
        print("[-]Add to Failed List file, see '%s'" % ftxt)
        with open(ftxt, 'a', encoding='utf-8') as flt:
            flt.write(f'{filepath}\n')
-    elif conf.failed_move() and not soft_link:
+    elif conf.failed_move() and not link_mode:
        failed_name = os.path.join(failed_folder, os.path.basename(filepath))
        mtxt = os.path.abspath(os.path.join(failed_folder, 'where_was_i_before_being_moved.txt'))
        print("'[-]Move to Failed output folder, see '%s'" % mtxt)
@@ -69,10 +71,12 @@ def get_info(json_data):  # 返回json里的数据
    return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label


-def small_cover_check(path, number, cover_small, leak_word, c_word, hack_word, filepath):
-    filename = f"{number}{leak_word}{c_word}{hack_word}-poster.jpg"
-    download_file_with_filename(cover_small, filename, path, filepath)
-    print('[+]Image Downloaded! ' + os.path.join(path, filename))
+def small_cover_check(path, filename, cover_small, movie_path):
+    full_filepath = Path(path) / filename
+    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(str(full_filepath)):
+        return
+    download_file_with_filename(cover_small, filename, path, movie_path)
+    print('[+]Image Downloaded! ' + full_filepath.name)


 def create_folder(json_data):  # 创建文件夹
@@ -101,7 +105,7 @@ def create_folder(json_data):  # 创建文件夹
                os.makedirs(path)
            except:
                print(f"[-]Fatal error! Can not make folder '{path}'")
-                sys.exit(0)
+                os._exit(0)

    return os.path.normpath(path)

@@ -121,7 +125,7 @@ def download_file_with_filename(url, filename, path, filepath):
                        os.makedirs(path)
                    except:
                        print(f"[-]Fatal error! Can not make folder '{path}'")
-                        sys.exit(0)
+                        os._exit(0)
                proxies = configProxy.proxies()
                headers = {
                    'User-Agent': G_USER_AGENT}
@@ -138,7 +142,7 @@ def download_file_with_filename(url, filename, path, filepath):
                        os.makedirs(path)
                    except:
                        print(f"[-]Fatal error! Can not make folder '{path}'")
-                        sys.exit(0)
+                        os._exit(0)
                headers = {
                    'User-Agent': G_USER_AGENT}
                r = requests.get(url, timeout=configProxy.timeout, headers=headers)
@@ -213,7 +217,7 @@ def extrafanart_download_one_by_one(data, path, filepath):
                break
        if file_not_exist_or_empty(jpg_fullpath):
            return
-        print('[+]Image Downloaded!', jpg_fullpath)
+        print('[+]Image Downloaded!', Path(jpg_fullpath).name)
        j += 1
    if conf.debug():
        print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
@@ -244,7 +248,7 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
    if failed: # 非致命错误，电影不移入失败文件夹，将来可以用模式3补齐
        print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
    else:
-        print(f"[+]Successfully downloaded {len(result)} extrafanart to '{extrafanart_dir}'")
+        print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
    if conf.debug():
        print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')

@@ -255,7 +259,7 @@ def image_ext(url):
        return ".jpg"

 # 封面是否下载成功，否则移动到failed
-def image_download(cover, fanart_path,thumb_path, path, filepath):
+def image_download(cover, fanart_path, thumb_path, path, filepath):
    full_filepath = os.path.join(path, fanart_path)
    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
        return
@@ -273,7 +277,7 @@ def image_download(cover, fanart_path,thumb_path, path, filepath):
            break
    if file_not_exist_or_empty(full_filepath):
        return
-    print('[+]Image Downloaded!', full_filepath)
+    print('[+]Image Downloaded!', Path(full_filepath).name)
    shutil.copyfile(full_filepath, os.path.join(path, thumb_path))


@@ -289,8 +293,14 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
                os.makedirs(path)
            except:
                print(f"[-]Fatal error! can not make folder '{path}'")
-                sys.exit(0)
+                os._exit(0)

+        old_nfo = None
+        try:
+            if os.path.isfile(nfo_path):
+                old_nfo = etree.parse(nfo_path)
+        except:
+            pass
        # KODI内查看影片信息时找不到number，配置naming_rule=number+'#'+title虽可解决
        # 但使得标题太长，放入时常为空的outline内会更适合，软件给outline留出的显示版面也较大
        outline = f"{number}#{outline}"
@@ -354,6 +364,41 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            print("  <premiered>" + release + "</premiered>", file=code)
            print("  <releasedate>" + release + "</releasedate>", file=code)
            print("  <release>" + release + "</release>", file=code)
+            if old_nfo:
+                try:
+                    xur = old_nfo.xpath('//userrating/text()')[0]
+                    if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()):
+                        print(f"  <userrating>{xur.strip()}</userrating>", file=code)
+                except:
+                    pass
+            try:
+                f_rating = json_data['用户评分']
+                uc = json_data['评分人数']
+                print(f"""  <rating>{round(f_rating * 2.0, 1)}</rating>
+  <criticrating>{round(f_rating * 20.0, 1)}</criticrating>
+  <ratings>
+    <rating name="javdb" max="5" default="true">
+      <value>{f_rating}</value>
+      <votes>{uc}</votes>
+    </rating>
+  </ratings>""", file=code)
+            except:
+                if old_nfo:
+                    try:
+                        for rtag in ('rating', 'criticrating'):
+                            xur = old_nfo.xpath(f'//{rtag}/text()')[0]
+                            if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()):
+                                print(f"  <{rtag}>{xur.strip()}</{rtag}>", file=code)
+                        f_rating = old_nfo.xpath(f"//ratings/rating[@name='javdb']/value/text()")[0]
+                        uc = old_nfo.xpath(f"//ratings/rating[@name='javdb']/votes/text()")[0]
+                        print(f"""  <ratings>
+    <rating name="javdb" max="5" default="true">
+      <value>{f_rating}</value>
+      <votes>{uc}</votes>
+    </rating>
+  </ratings>""", file=code)
+                    except:
+                        pass
            print("  <cover>" + cover + "</cover>", file=code)
            if config.getInstance().is_trailer():
                print("  <trailer>" + trailer + "</trailer>", file=code)
@@ -462,51 +507,51 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
 # ========================结束=================================


-def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word):  # 文件路径，番号，后缀，要移动至的位置
+def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word):  # 文件路径，番号，后缀，要移动至的位置
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
-    file_parent_origin_path = str(filepath_obj.parent)
    try:
        targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}")
        # 任何情况下都不要覆盖，以免遭遇数据源或者引擎错误导致所有文件得到同一个number，逐一
        # 同名覆盖致使全部文件损失且不可追回的最坏情况
        if os.path.exists(targetpath):
            raise FileExistsError('File Exists on destination path, we will never overwriting.')
-        soft_link = config.getInstance().soft_link()
-        # 如果soft_link=1 使用软链接
-        if soft_link == 0:
+        link_mode = config.getInstance().link_mode()
+        # 如果link_mode 1: 建立软链接 2: 硬链接优先、无法建立硬链接再尝试软链接。
+        # 移除原先soft_link=2的功能代码，因默认记录日志，已经可追溯文件来源
+        create_softlink = False
+        if link_mode not in (1, 2):
            shutil.move(filepath, targetpath)
-        elif soft_link == 1:
+        elif link_mode == 2:
+            # 跨卷或跨盘符无法建立硬链接导致异常，回落到建立软链接
+            try:
+                os.link(filepath, targetpath, follow_symlinks=False)
+            except:
+                create_softlink = True
+        if link_mode == 1 or create_softlink:
            # 先尝试采用相对路径，以便网络访问时能正确打开视频，失败则可能是因为跨盘符等原因无法支持
            # 相对路径径，改用绝对路径方式尝试建立软链接
            try:
                filerelpath = os.path.relpath(filepath, path)
                os.symlink(filerelpath, targetpath)
            except:
-                os.symlink(filepath_obj.resolve(), targetpath)
-        elif soft_link == 2:
-            shutil.move(filepath, targetpath)
-            # 移走文件后，在原来位置增加一个可追溯的软链接，指向文件新位置
-            # 以便追查文件从原先位置被移动到哪里了，避免因为得到错误番号后改名移动导致的文件失踪
-            # 便于手工找回文件。由于目前软链接已经不会被刮削，文件名后缀无需再修改。
-            targetabspath = os.path.abspath(targetpath)
-            if targetabspath != os.path.abspath(filepath):
-                targetrelpath = os.path.relpath(targetabspath, file_parent_origin_path)
-                os.symlink(targetrelpath, filepath)
-        sub_res = config.getInstance().sub_rule()
+                os.symlink(str(filepath_obj.resolve()), targetpath)

-        for subname in sub_res:
-            sub_filepath = str(filepath_obj.with_suffix(subname))
-            if os.path.isfile(sub_filepath.replace(subname,".chs" + subname)):
-                sub_filepath = sub_filepath.replace(subname,".chs" + subname)
-                subname = ".chs" + subname
-            elif os.path.isfile(sub_filepath.replace(subname,".cht" + subname)):
-                sub_filepath = sub_filepath.replace(subname, ".cht" + subname)
-                subname = ".cht" + subname
-            if os.path.isfile(sub_filepath):
-                shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}"))
-                print('[+]Sub moved!')
-                return True
+        sub_res = config.getInstance().sub_rule()
+        for subfile in filepath_obj.parent.glob('**/*'):
+            if subfile.is_file() and subfile.suffix.lower() in sub_res:
+                if multi_part and part.lower() not in subfile.name.lower():
+                    continue
+                if filepath_obj.stem.split('.')[0].lower() != subfile.stem.split('.')[0].lower():
+                    continue
+                sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
+                if link_mode not in (1, 2):
+                    shutil.move(str(subfile), str(sub_targetpath))
+                    print(f"[+]Sub Moved!        {sub_targetpath.name}")
+                else:
+                    shutil.copyfile(str(subfile), str(sub_targetpath))
+                    print(f"[+]Sub Copied!       {sub_targetpath.name}")
+        return

    except FileExistsError as fee:
        print(f'[-]FileExistsError: {fee}')
@@ -525,24 +570,39 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        number += part  # 这时number会被附加上CD1后缀
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
-    file_parent_origin_path = str(filepath_obj.parent)
    targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}")
    if os.path.exists(targetpath):
        raise FileExistsError('File Exists on destination path, we will never overwriting.')
    try:
-        if config.getInstance().soft_link():
-            os.symlink(filepath, targetpath)
-        else:
+        link_mode = config.getInstance().link_mode()
+        create_softlink = False
+        if link_mode not in (1, 2):
            shutil.move(filepath, targetpath)
+        elif link_mode == 2:
+            try:
+                os.link(filepath, targetpath, follow_symlinks=False)
+            except:
+                create_softlink = True
+        if link_mode == 1 or create_softlink:
+            try:
+                filerelpath = os.path.relpath(filepath, path)
+                os.symlink(filerelpath, targetpath)
+            except:
+                os.symlink(str(filepath_obj.resolve()), targetpath)

        sub_res = config.getInstance().sub_rule()
-        for subname in sub_res:
-            sub_filepath = str(filepath_obj.with_suffix(subname))
-            if os.path.isfile(sub_filepath):  # 字幕移动
-                shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{subname}"))
-                print('[+]Sub moved!')
-                print('[!]Success')
-                return True
+        for subfile in filepath_obj.parent.glob('**/*'):
+            if subfile.is_file() and subfile.suffix.lower() in sub_res:
+                if multi_part and part.lower() not in subfile.name.lower():
+                    continue
+                sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
+                if link_mode not in (1, 2):
+                    shutil.move(str(subfile), str(sub_targetpath))
+                    print(f"[+]Sub Moved!        {sub_targetpath.name}")
+                else:
+                    shutil.copyfile(str(subfile), str(sub_targetpath))
+                    print(f"[+]Sub Copied!       {sub_targetpath.name}")
+        return
    except FileExistsError as fee:
        print(f'[-]FileExistsError: {fee}')
        return
@@ -554,18 +614,6 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        return


-def get_part(filepath):
-    try:
-        if re.search('-CD\d+', filepath):
-            return re.findall('-CD\d+', filepath)[0]
-        if re.search('-cd\d+', filepath):
-            return re.findall('-cd\d+', filepath)[0]
-    except:
-        print("[-]failed!Please rename the filename again!")
-        moveFailedFolder(filepath)
-        return
-
-
 def debug_print(data: json):
    try:
        print("[+] ------- DEBUG INFO -------")
@@ -578,14 +626,65 @@ def debug_print(data: json):
            if i == 'extrafanart':
                print('[+]  -', "%-14s" % i, ':', len(v), 'links')
                continue
-            print('[+]  -', "%-14s" % i, ':', v)
+            print(f'[+]  - {i:<{cnspace(i,14)}} : {v}')

        print("[+] ------- DEBUG INFO -------")
    except:
        pass


-def core_main(file_path, number_th, oCC):
+def core_main_no_net_op(movie_path, number):
+    conf = config.getInstance()
+    part = ''
+    leak_word = ''
+    leak = 0
+    c_word = ''
+    cn_sub = ''
+    hack = ''
+    hack_word = ''
+    ext = '.jpg'
+    imagecut = 1
+    path = str(Path(movie_path).parent)
+
+    if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
+        part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
+    if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
+            re.I) or '中文' in movie_path or '字幕' in movie_path:
+        cn_sub = '1'
+        c_word = '-C'  # 中文字幕影片后缀
+    uncensored = 1 if is_uncensored(number) else 0
+    if '流出' in movie_path or 'uncensored' in movie_path.lower():
+        leak_word = '-流出' # 流出影片后缀
+        leak = 1
+
+    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
+        hack = 1
+        hack_word = "-hack"
+
+    prestr = f"{number}{leak_word}{c_word}{hack_word}"
+    fanart_path =  f"{prestr}-fanart{ext}"
+    poster_path = f"{prestr}-poster{ext}"
+    thumb_path =  f"{prestr}-thumb{ext}"
+    full_fanart_path = os.path.join(path, fanart_path)
+    full_poster_path = os.path.join(path, poster_path)
+    full_thumb_path = os.path.join(path, thumb_path)
+    full_nfo = Path(path) / f"{prestr}{part}.nfo"
+
+    if full_nfo.is_file():
+        if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
+            uncensored = 1
+    else:
+        return
+
+    if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
+        return
+
+    cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
+    if conf.is_watermark():
+        add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack)
+
+
+def core_main(movie_path, number_th, oCC):
    conf = config.getInstance()
    # =======================================================================初始化所需变量
    multi_part = 0
@@ -597,8 +696,6 @@ def core_main(file_path, number_th, oCC):
    hack = ''
    hack_word = ''

-
-    filepath = file_path  # 影片的路径 绝对路径
    # 下面被注释的变量不需要
    #rootpath= os.getcwd
    number = number_th
@@ -606,7 +703,7 @@ def core_main(file_path, number_th, oCC):

    # Return if blank dict returned (data not found)
    if not json_data:
-        moveFailedFolder(filepath)
+        moveFailedFolder(movie_path)
        return

    if json_data["number"] != number:
@@ -619,25 +716,26 @@ def core_main(file_path, number_th, oCC):
    imagecut =  json_data.get('imagecut')
    tag =  json_data.get('tag')
    # =======================================================================判断-C,-CD后缀
-    if '-CD' in filepath or '-cd' in filepath:
+    if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
        multi_part = 1
-        part = get_part(filepath)
-    if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
+        part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
+    if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
+            re.I) or '中文' in movie_path or '字幕' in movie_path:
        cn_sub = '1'
        c_word = '-C'  # 中文字幕影片后缀

    # 判断是否无码
-    uncensored = 1 if is_uncensored(number) else 0
+    unce = json_data.get('无码')
+    uncensored = int(unce) if isinstance(unce, bool) else int(is_uncensored(number))

-
-    if '流出' in filepath or 'uncensored' in filepath:
+    if '流出' in movie_path or 'uncensored' in movie_path.lower():
        liuchu = '流出'
        leak = 1
        leak_word = '-流出' # 流出影片后缀
    else:
        leak = 0

-    if 'hack'.upper() in str(filepath).upper() or '破解' in filepath:
+    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
        hack = 1
        hack_word = "-hack"

@@ -666,78 +764,76 @@ def core_main(file_path, number_th, oCC):

        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number,  json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
-
+            small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)

        # creatFolder会返回番号路径
-        image_download( cover, fanart_path,thumb_path, path, filepath)
+        image_download( cover, fanart_path,thumb_path, path, movie_path)

        if not multi_part or part.lower() == '-cd1':
            try:
                # 下载预告片
                if conf.is_trailer() and json_data.get('trailer'):
-                    trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
+                    trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
            except:
                pass
            try:
                # 下载剧照 data, path, filepath
                if conf.is_extrafanart() and json_data.get('extrafanart'):
-                    extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
+                    extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
            except:
                pass

-       

        # 裁剪图
-        cutImage(imagecut, path , fanart_path, poster_path)
+        cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))

        # 添加水印
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)

        # 移动电影
-        paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word)
+        paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)

        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
-        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag,  json_data.get('actor_list'), liuchu, uncensored, hack_word
+        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,  json_data.get('actor_list'), liuchu, uncensored, hack_word
        ,fanart_path,poster_path,thumb_path)

    elif conf.main_mode() == 2:
        # 创建文件夹
        path = create_folder(json_data)
        # 移动文件
-        paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word)
+        paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)

    elif conf.main_mode() == 3:
-        path = str(Path(file_path).parent)
+        path = str(Path(movie_path).parent)
        if multi_part == 1:
            number += part  # 这时number会被附加上CD1后缀

        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
+            small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)

        # creatFolder会返回番号路径
-        image_download( cover, fanart_path,thumb_path, path, filepath)
+        image_download( cover, fanart_path, thumb_path, path, movie_path)

        if not multi_part or part.lower() == '-cd1':
            # 下载预告片
            if conf.is_trailer() and json_data.get('trailer'):
-                trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
+                trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)

            # 下载剧照 data, path, filepath
            if conf.is_extrafanart() and json_data.get('extrafanart'):
-                extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
+                extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)

        # 裁剪图
-        cutImage(imagecut, path , fanart_path, poster_path)
+        cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))

        # 添加水印
        if conf.is_watermark():
            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)

        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
-        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
+        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
                    tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
--- a/docker/config.ini
+++ b/docker/config.ini
@@ -2,7 +2,7 @@
 main_mode=1
 failed_output_folder=data/failure_output
 success_output_folder=data/organized
-soft_link=0
+link_mode=0

 [proxy]
 proxy=
--- a/number_parser.py
+++ b/number_parser.py
@@ -5,8 +5,9 @@ import config
 import typing

 G_spat = re.compile(
-    "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|"
-    "^hhd800\.com@|-uncensored|_uncensored|-leak|_leak|-4K|_4K",
+    "^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
+    "^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
+    "(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|leak)",
    re.IGNORECASE)


@@ -46,9 +47,13 @@ def get_number(debug: bool, file_path: str) -> str:
            lower_check = filename.lower()
            if 'fc2' in lower_check:
                filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
-            filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE)
+            filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
+            if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况，例如n1012-CD1.wmv
+                return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
            file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
            file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
+            if re.search("\d+ch$", file_number, flags=re.I):
+                file_number = file_number[:-2]
            return file_number.upper()
        else:  # 提取不含减号-的番号，FANZA CID
            # 欧美番号匹配规则
@@ -124,7 +129,8 @@ def is_uncensored(number):
    ):
        return True
    if G_cache_uncensored_conf.is_empty():
-        G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
+        if G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) == None:
+            return False
    return G_cache_uncensored_conf.check(number)


@@ -146,13 +152,23 @@ if __name__ == "__main__":
        "caribean-020317_001.nfo",  # -号误命名为_号的
        "257138_3xplanet_1Pondo_080521_001.mp4",
        "ADV-R0624-CD3.wmv",  # 多碟影片
-        "XXX-AV   22061-CD5.iso",  # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
+        "XXX-AV   22061-CD5.iso",  # 支持片商格式 xxx-av-22061 命名规则来自javdb数据源
        "xxx-av 20589.mp4",
-        "Muramura-102114_145-HD.wmv",  # 新支持片商格式 102114_145  命名规则来自javdb数据源
-        "heydouga-4102-023-CD2.iso",  # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
+        "Muramura-102114_145-HD.wmv",  # 支持片商格式 102114_145  命名规则来自javdb数据源
+        "heydouga-4102-023-CD2.iso",  # 支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
        "HeyDOuGa4236-1048 Ai Qiu - .mp4",  # heydouga-4236-1048 命名规则来自javdb数据源
-        "pacopacomama-093021_539-FHD.mkv",  # 新支持片商格式 093021_539 命名规则来自javdb数据源
-        "sbw99.cc@heyzo_hd_2636_full.mp4"
+        "pacopacomama-093021_539-FHD.mkv",  # 支持片商格式 093021_539 命名规则来自javdb数据源
+        "sbw99.cc@heyzo_hd_2636_full.mp4",
+        "hhd800.com@STARS-566-HD.mp4",
+        "jav20s8.com@GIGL-677_4K.mp4",
+        "sbw99.cc@iesp-653-4K.mp4",
+        "4K-ABP-358_C.mkv",
+        "n1012-CD1.wmv",
+        "[]n1012-CD2.wmv",
+        "rctd-460ch.mp4",  # 除支持-C硬字幕外，新支持ch硬字幕
+        "rctd-461CH-CD2.mp4",  # ch后可加CDn
+        "rctd-461-Cd3-C.mp4",  # CDn后可加-C
+        "rctd-461-C-cD4.mp4",  # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1
    )


--- a/py_to_exe.ps1
+++ b/py_to_exe.ps1
@@ -9,7 +9,7 @@ mkdir build
 mkdir __pycache__

 pyinstaller --onefile Movie_Data_Capture.py `
-    --hidden-import "ImageProcessing.hog" `
+    --hidden-import "ImageProcessing.cnn" `
    --add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
    --add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
    --add-data "$OPENCC_PATH;opencc" `
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ urllib3==1.24.3
 certifi==2020.12.5
 MechanicalSoup==1.1.0
 opencc-python-reimplemented
-face_recognition
+face_recognition
--- a/wrapper/FreeBSD.sh
+++ b/wrapper/FreeBSD.sh
@@ -1,8 +1,10 @@
 pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448 py38-mechanicalsoup
 pip install pyquery pyinstaller
 pyinstaller --onefile Movie_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
+    --hidden-import "ImageProcessing.cnn" \
    --add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
    --add-data "$(python3.8 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
+    --add-data "$(python3.8 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
    --add-data "Img:Img" \
    --add-data "config.ini:." \

--- a/wrapper/Linux.sh
+++ b/wrapper/Linux.sh
@@ -13,8 +13,10 @@
 pip3 install -r requirements.txt
 pip3 install cloudscraper==1.2.52
 pyinstaller --onefile Movie_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
+    --hidden-import "ImageProcessing.cnn" \
    --add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
    --add-data "$(python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
+    --add-data "$(python3 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
    --add-data "Img:Img" \
    --add-data "config.ini:." \