Merge pull request #607 from lededev/log-3

继续完善上个月提交的新功能
2021-10-22 00:30:38 +08:00
parent 380b220df1 850679705e
commit 7b4d246237
23 changed files with 1669 additions and 847 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -42,6 +42,8 @@ jobs:
            --hidden-import ADC_function.py \
            --hidden-import core.py \
            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
            --add-data "Img:Img" \
            --add-data "config.ini:." \
      - name: Build with PyInstaller for windows
        if: matrix.os == 'windows-latest'
@@ -51,6 +53,8 @@ jobs:
            --hidden-import ADC_function.py `
            --hidden-import core.py `
            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
            --add-data "Img;Img" `
            --add-data "config.ini;." `
      - name: Copy config.ini
        run: |
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,8 +1,8 @@
 from os import replace
 import requests
-import hashlib
+#import hashlib
 from pathlib import Path
-import random
+import secrets
 import os.path
 import uuid
 import json
@@ -20,12 +20,12 @@ def getXpathSingle(htmlcode, xpath):
    return result1
-G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
+G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'
 # 网页请求核心
 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
-    verify = config.Config().cacert_file()
+    verify = config.getInstance().cacert_file()
-    configProxy = config.Config().proxy()
+    configProxy = config.getInstance().proxy()
    errors = ""
    if ua is None:
@@ -61,7 +61,7 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None)
 def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
-    configProxy = config.Config().proxy()
+    configProxy = config.getInstance().proxy()
    errors = ""
    headers_ua = {"User-Agent": G_USER_AGENT}
    if headers is None:
@@ -85,8 +85,12 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
 def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type: str = None):
-    browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
+    s = None
-    configProxy = config.Config().proxy()
+    if isinstance(cookies, dict) and len(cookies):
        s = requests.Session()
        requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
    browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua, session=s)
    configProxy = config.getInstance().proxy()
    if configProxy.enable:
        browser.session.proxies = configProxy.proxies()
    result = browser.open(url)
@@ -103,17 +107,19 @@ def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type:
        return result.text
-def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
+def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
-    browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
+    s = None
-    if isinstance(cookies, dict):
+    if isinstance(cookies, dict) and len(cookies):
-        requests.utils.add_dict_to_cookiejar(browser.session.cookies, cookies)
+        s = requests.Session()
-    configProxy = config.Config().proxy()
+        requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
    browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua, session=s)
    configProxy = config.getInstance().proxy()
    if configProxy.enable:
        browser.session.proxies = configProxy.proxies()
    result = browser.open(url)
    if not result.ok:
        return ''
-    form = browser.select_form() if form_name is None else browser.select_form(form_name)
+    form = browser.select_form() if form_select is None else browser.select_form(form_select)
    if isinstance(fields, dict):
        for k, v in fields.items():
            browser[k] = v
@@ -131,7 +137,7 @@ def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: d
 # def get_javlib_cookie() -> [dict, str]:
 #     import cloudscraper
-#     switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
+#     switch, proxy, timeout, retry_count, proxytype = config.getInstance().proxy()
 #     proxies = get_proxy(proxy, proxytype)
 #
 #     raw_cookie = {}
@@ -158,7 +164,7 @@ def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: d
 def translateTag_to_sc(tag):
-    tranlate_to_sc = config.Config().transalte_to_sc()
+    tranlate_to_sc = config.getInstance().transalte_to_sc()
    if tranlate_to_sc:
        dict_gen = {'中文字幕': '中文字幕',
                    '高清': 'XXXX', '字幕': 'XXXX', '推薦作品': '推荐作品', '通姦': '通奸', '淋浴': '淋浴', '舌頭': '舌头',
@@ -505,8 +511,11 @@ def translate(
        delay: int = 0,
 ):
    trans_result = ""
    # 中文句子如果包含&等符号会被谷歌翻译截断损失内容，而且中文翻译到中文也没有意义，故而忽略，只翻译带有日语假名的
    if not is_japanese(src):
        return src
    if engine == "google-free":
-        gsite = config.Config().get_translate_service_site()
+        gsite = config.getInstance().get_translate_service_site()
        if not re.match('^translate\.google\.(com|com\.\w{2}|\w{2})$', gsite):
            gsite = 'translate.google.cn'
        url = (
@@ -521,7 +530,7 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t
        trans_result = trans_result.join(translate_list)
    # elif engine == "baidu":
    #     url = "https://fanyi-api.baidu.com/api/trans/vip/translate"
-    #     salt = random.randint(1, 1435660288)
+    #     salt = secrets.randbelow(1435660287) + 1  # random.randint(1, 1435660288)
    #     sign = app_id + src + str(salt) + key
    #     sign = hashlib.md5(sign.encode()).hexdigest()
    #     url += (
@@ -560,17 +569,6 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t
    return trans_result
 # ========================================================================是否为无码
 def is_uncensored(number):
    if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
        return True
    configs = config.Config().get_uncensored()
    prefix_list = str(configs).split(',')
    for pre in prefix_list:
        if pre.upper() in number.upper():
            return True
    return False
 # 从浏览器中导出网站登录验证信息的cookies，能够以会员方式打开游客无法访问到的页面
 # 示例: FC2-755670 url https://javdb9.com/v/vO8Mn
 # json 文件格式
@@ -593,20 +591,20 @@ def load_cookies(filename):
    filename = os.path.basename(filename)
    if not len(filename):
        return None, None
-    path_search_order = [
+    path_search_order = (
-        f"./{filename}",
+        Path.cwd() / filename,
-        os.path.join(Path.home(), filename),
+        Path.home() / filename,
-        os.path.join(Path.home(), f".avdc/{filename}"),
+        Path.home() / f".avdc/{filename}",
-        os.path.join(Path.home(), f".local/share/avdc/{filename}")
+        Path.home() / f".local/share/avdc/{filename}"
-]
+    )
    cookies_filename = None
    try:
        for p in path_search_order:
-        if os.path.exists(p):
+            if p.is_file():
-            cookies_filename = os.path.abspath(p)
+                cookies_filename = str(p.resolve())
                break
        if not cookies_filename:
            return None, None
    try:
        return json.load(open(cookies_filename)), cookies_filename
    except:
        return None, None
@@ -623,10 +621,9 @@ def file_modification_days(filename) -> int:
        return 9999
    return days
-# 检查文件是否是链接
+def file_not_exist_or_empty(filepath) -> bool:
-def is_link(filename: str):
+    return not os.path.isfile(filepath) or os.path.getsize(filepath) == 0
-    if os.path.islink(filename):
+
-        return True # symlink
+# 日语简单检测
-    elif os.stat(filename).st_nlink > 1:
+def is_japanese(s) -> bool:
-        return True # hard link Linux MAC OSX Windows NTFS
+    return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE))
    return False
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -6,12 +6,13 @@ import sys
 import shutil
 import typing
 import urllib3
 import signal
 import config
 from datetime import datetime, timedelta
 import time
 from pathlib import Path
-from ADC_function import  file_modification_days, get_html, is_link
+from ADC_function import  file_modification_days, get_html
 from number_parser import get_number
 from core import core_main, moveFailedFolder
@@ -35,30 +36,54 @@ def check_update(local_version):
 def argparse_function(ver: str) -> typing.Tuple[str, str, bool]:
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+    conf = config.getInstance()
    parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
    parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
    parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
-    # parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
+    parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
-    default_logdir = os.path.join(Path.home(),'.avlogs')
+    parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
    # parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
    default_logdir = str(Path.home() / '.avlogs')
    parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
-        help=f"""Duplicate stdout and stderr to logfiles
+        help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
-in logging folder, default on.
+        default folder for current user: '{default_logdir}'. Change default folder to an empty file,
-default for current user: {default_logdir}
+        or use --log-dir= to turn log off.""")
 Use --log-dir= to turn off logging feature.""")
    parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number")
    parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true",
                        help="Auto exit after program complete")
    parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
    parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
    parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
    parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
                         os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
    parser.add_argument("-a", "--auto-exit", action="store_true",
                        help="Auto exit after program complete")
    parser.add_argument("-g","--debug", action="store_true",
                        help="Turn on debug mode to generate diagnostic log for issue report.")
    parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
                        help="""Only show job list of files and numbers, and **NO** actual operation
 is performed. It may help you correct wrong numbers before real job.""")
    parser.add_argument("-v", "--version", action="version", version=ver)
    #ini_path
    args = parser.parse_args()
    def get_natural_number_or_none(value):
        return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
    def get_str_or_none(value):
        return value if isinstance(value, str) and len(value) else None
    def get_bool_or_none(value):
        return True if isinstance(value, bool) and value else None
    config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
    config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
    config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
    config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
    config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
    config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
    config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
-    return args.file, args.path, args.number, args.autoexit, args.logdir, args.regexstr
+    return args.file, args.number, args.logdir, args.regexstr, args.zero_op
 class OutLogger(object):
    def __init__(self, logfile) -> None:
        self.term = sys.stdout
        self.log = open(logfile,"w",encoding='utf-8',buffering=1)
        self.filepath = logfile
    def __del__(self):
        self.close()
    def __enter__(self):
@@ -85,6 +110,7 @@ class ErrLogger(OutLogger):
    def __init__(self, logfile) -> None:
        self.term = sys.stderr
        self.log = open(logfile,"w",encoding='utf-8',buffering=1)
        self.filepath = logfile
    def close(self):
        if self.term != None:
            sys.stderr = self.term
@@ -97,14 +123,18 @@ class ErrLogger(OutLogger):
 def dupe_stdout_to_logfile(logdir: str):
    if not isinstance(logdir, str) or len(logdir) == 0:
        return
-    if not os.path.isdir(logdir):
+    log_dir = Path(logdir)
-        os.makedirs(logdir)
+    if not log_dir.exists():
-        if not os.path.isdir(logdir):
+        try:
-            return
+            log_dir.mkdir(parents=True,exist_ok=True)
-
+        except:
            pass
    if not log_dir.is_dir():
        return  # Tips for disabling logs by change directory to a same name empty regular file
    abslog_dir = log_dir.resolve()
    log_tmstr = datetime.now().strftime("%Y%m%dT%H%M%S")
-    logfile = os.path.join(logdir, f'avdc_{log_tmstr}.txt')
+    logfile = abslog_dir / f'avdc_{log_tmstr}.txt'
-    errlog = os.path.join(logdir, f'avdc_{log_tmstr}_err.txt')
+    errlog = abslog_dir / f'avdc_{log_tmstr}_err.txt'
    sys.stdout = OutLogger(logfile)
    sys.stderr = ErrLogger(errlog)
@@ -113,28 +143,126 @@ def dupe_stdout_to_logfile(logdir: str):
 def close_logfile(logdir: str):
    if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir):
        return
-    sys.stdout.close()
+    #日志关闭前保存日志路径
-    sys.stderr.close()
+    filepath = None
    # 清理空文件
    for current_dir, subdirs, files in os.walk(logdir, topdown=False):
    try:
-            for f in files:
+        filepath = sys.stdout.filepath
                full_name = os.path.join(current_dir, f)
                if os.path.getsize(full_name) == 0:
                    os.remove(full_name)
    except:
        pass
    sys.stdout.close()
    sys.stderr.close()
    log_dir = Path(logdir).resolve()
    if isinstance(filepath, Path):
        print(f"Log file '{filepath}' saved.")
        assert(filepath.parent.samefile(log_dir))
    # 清理空文件
    for f in log_dir.glob(r'*_err.txt'):
        if f.stat().st_size == 0:
            try:
                f.unlink(missing_ok=True)
            except:
                pass
    # 合并日志 只检测日志目录内的文本日志，忽略子目录。三天前的日志，按日合并为单个日志，三个月前的日志，
    # 按月合并为单个月志，去年及以前的月志，今年4月以后将之按年合并为年志
    # 测试步骤：
    """
    LOGDIR=/tmp/avlog
    mkdir -p $LOGDIR
    for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
    for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
    for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
    echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
    # 1932 files in /tmp/avlog
    avdc -zgic1 -d0 -m3 -o $LOGDIR
    # python3 ./AV_Data_Capture.py -zgic1 -o $LOGDIR
    ls $LOGDIR
    # rm -rf $LOGDIR
    """
    today = datetime.today()
    # 第一步，合并到日。3天前的日志，文件名是同一天的合并为一份日志
    for i in range(1):
        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
        if not txts or not len(txts):
            break
        e = [f for f in txts if '_err' in f.stem]
        txts.sort()
        tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
        deadline_day = f'avdc_{tmstr_3_days_ago}'
        day_merge = [f for f in txts if f.stem < deadline_day]
        if not day_merge or not len(day_merge):
            break
        cutday = len('T235959.txt')  # cut length avdc_20201201|T235959.txt
        for f in day_merge:
            try:
                day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
                with open(day_file_name, 'a', encoding='utf-8') as m:
                    m.write(f.read_text(encoding='utf-8'))
                f.unlink(missing_ok=True)
            except:
                pass
    # 第二步，合并到月
    for i in range(1):  # 利用1次循环的break跳到第二步，避免大块if缩进或者使用goto语法
        txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
        if not txts or not len(txts):
            break
        txts.sort()
        tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
        deadline_month = f'avdc_{tmstr_3_month_ago}'
        month_merge = [f for f in txts if f.stem < deadline_month]
        if not month_merge or not len(month_merge):
            break
        tomonth = len('01.txt')  # cut length avdc_202012|01.txt
        for f in month_merge:
            try:
                month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
                with open(month_file_name, 'a', encoding='utf-8') as m:
                    m.write(f.read_text(encoding='utf-8'))
                f.unlink(missing_ok=True)
            except:
                pass
    # 第三步，月合并到年
    if today.month < 4:
        return
    mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
    if not mons or not len(mons):
        return
    mons.sort()
    deadline_year = f'avdc_{today.year-1}13'
    year_merge = [f for f in mons if f.stem < deadline_year]
    if not year_merge or not len(year_merge):
        return
    toyear = len('12.txt')   # cut length avdc_2020|12.txt
    for f in year_merge:
        try:
            year_file_name = str(f)[:-toyear] + '.txt' # avdc_2020.txt
            with open(year_file_name, 'a', encoding='utf-8') as y:
                y.write(f.read_text(encoding='utf-8'))
            f.unlink(missing_ok=True)
        except:
            pass
    # 第四步，压缩年志 如果有压缩需求，请自行手工压缩，或者使用外部脚本来定时完成。推荐nongnu的lzip，对于
    # 这种粒度的文本日志，压缩比是目前最好的。lzip -9的运行参数下，日志压缩比要高于xz -9，而且内存占用更少，
    # 多核利用率更高(plzip多线程版本)，解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右，
    # 100MB的日志文件能缩小到3.7MB。
-# 重写视频文件扫描，消除递归，取消全局变量，新增失败文件列表跳过处理
+def signal_handler(*args):
-def movie_lists(root, conf, regexstr):
+    print('[!]Ctrl+C detected, Exit.')
-    escape_folder = re.split("[,，]", conf.escape_folder())
+    sys.exit(9)
 def sigdebug_handler(*args):
    config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
    print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
 # 新增失败文件列表跳过处理，及.nfo修改天数跳过处理，提示跳过视频总数，调试模式(-g)下详细被跳过文件，跳过小广告
 def movie_lists(source_folder, regexstr):
    conf = config.getInstance()
    main_mode = conf.main_mode()
    debug = conf.debug()
    nfo_skip_days = conf.nfo_skip_days()
    soft_link = conf.soft_link()
-    total = []
+    file_type = conf.media_type().lower().split(",")
    file_type = conf.media_type().upper().split(",")
    trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
    cliRE = None
    if isinstance(regexstr, str) and len(regexstr):
@@ -142,72 +270,94 @@ def movie_lists(root, conf, regexstr):
            cliRE = re.compile(regexstr, re.IGNORECASE)
        except:
            pass
    failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
    failed_set = set()
-    if main_mode == 3 or soft_link:
+    if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
        try:
-            with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'r', encoding='utf-8')  as flt:
+            flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
                flist = flt.read().splitlines()
            failed_set = set(flist)
-                flt.close()
+            if len(flist) != len(failed_set): # 检查去重并写回，但是不改变failed_list.txt内条目的先后次序，重复的只保留最后的
-            if len(flist) != len(failed_set):
+                fset = failed_set.copy()
-                with open(os.path.join(conf.failed_folder(), 'failed_list.txt'), 'w', encoding='utf-8')  as flt:
+                for i in range(len(flist)-1, -1, -1):
-                    flt.writelines([line + '\n' for line in failed_set])
+                    fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
-                    flt.close()
+                failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8')
                assert len(fset) == 0 and len(flist) == len(failed_set)
        except:
            pass
-    for current_dir, subdirs, files in os.walk(root, topdown=False):
+    if not Path(source_folder).is_dir():
-        if len(set(current_dir.replace("\\","/").split("/")) & set(escape_folder)) > 0:
+        print('[-]Source folder not found!')
        return []
    total = []
    source = Path(source_folder).resolve()
    skip_failed_cnt, skip_nfo_days_cnt = 0, 0
    escape_folder_set = set(re.split("[,，]", conf.escape_folder()))
    for full_name in source.glob(r'**/*'):
        if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
            continue
-        for f in files:
+        if not full_name.suffix.lower() in file_type:
            full_name = os.path.join(current_dir, f)
            if not os.path.splitext(full_name)[1].upper() in file_type:
            continue
-            absf = os.path.abspath(full_name)
+        absf = str(full_name)
        if absf in failed_set:
            skip_failed_cnt += 1
            if debug:
-                    print('[!]Skip failed file:', absf)
+                print('[!]Skip failed movie:', absf)
            continue
-            if cliRE and not cliRE.search(absf):
+        is_sym = full_name.is_symlink()
        if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1):  # 短路布尔 符号链接不取stat()，因为符号链接可能指向不存在目标
            continue # file is symlink or hardlink(Linux/NTFS/Darwin)
        # 调试用0字节样本允许通过，去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
        movie_size = 0 if is_sym else full_name.stat().st_size  # 同上 符号链接不取stat()及st_size，直接赋0跳过小视频检测
        if movie_size > 0 and movie_size < 125829120:  # 1024*1024*120=125829120
            continue
-            if main_mode == 3 and nfo_skip_days > 0:
+        if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
-                nfo = Path(absf).with_suffix('.nfo')
+            continue
-                if file_modification_days(nfo) <= nfo_skip_days:
+        if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
            skip_nfo_days_cnt += 1
            if debug:
                print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
            continue
            if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f):
        total.append(absf)
    if skip_failed_cnt:
        print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
    if skip_nfo_days_cnt:
        print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
    if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
        return total
    # 软连接方式，已经成功削刮的也需要从成功目录中检查.nfo更新天数，跳过N天内更新过的
    skip_numbers = set()
-    success_folder = conf.success_folder()
+    success_folder = Path(conf.success_folder()).resolve()
-    for current_dir, subdirs, files in os.walk(success_folder, topdown=False):
+    for f in success_folder.glob(r'**/*'):
-        for f in files:
+        if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
            f_obj = Path(f)
            if f_obj.suffix.lower() != '.nfo':
            continue
-            if file_modification_days(Path(current_dir) / f_obj) > nfo_skip_days:
+        if file_modification_days(f) > nfo_skip_days:
            continue
-            number = get_number(False, f_obj.stem)
+        number = get_number(False, f.stem)
-            if number:
+        if not number:
-                skip_numbers.add(number.upper())
+            continue
        skip_numbers.add(number.lower())
    rm_list = []
    for f in total:
        n_number = get_number(False, os.path.basename(f))
-        if n_number and n_number.upper() in skip_numbers:
+        if n_number and n_number.lower() in skip_numbers:
            rm_list.append(f)
    for f in rm_list:
        total.remove(f)
        if debug:
            print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
    if len(rm_list):
        print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
    return total
 def create_failed_folder(failed_folder):
-    if not os.path.isdir(failed_folder):  # 新建failed文件夹
+    if not os.path.exists(failed_folder):  # 新建failed文件夹
        try:
            os.makedirs(failed_folder)
            if not os.path.isdir(failed_folder):
                raise
        except:
-            print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
+            print(f"[-]Fatal error! Can not make folder '{failed_folder}'")
            sys.exit(0)
@@ -227,24 +377,29 @@ def rm_empty_folder(path):
            pass
-def create_data_and_move(file_path: str, c: config.Config, debug):
+def create_data_and_move(file_path: str, zero_op):
    # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
-    file_name = os.path.basename(file_path)
+    debug = config.getInstance().debug()
-    n_number = get_number(debug, file_name)
+    n_number = get_number(debug, os.path.basename(file_path))
    file_path = os.path.abspath(file_path)
    if debug == True:
-        print(f"[!]Making Data for [{file_path}], the number is [{n_number}]")
+        print(f"[!] [{n_number}] As Number making data for '{file_path}'")
        if zero_op:
            return
        if n_number:
-            core_main(file_path, n_number, c)
+            core_main(file_path, n_number)
        else:
            print("[-] number empty ERROR")
            moveFailedFolder(file_path)
        print("[*]======================================================")
    else:
        try:
-            print(f"[!]Making Data for [{file_path}], the number is [{n_number}]")
+            print(f"[!] [{n_number}] As Number making data for '{file_path}'")
            if zero_op:
                return
            if n_number:
-                core_main(file_path, n_number, c)
+                core_main(file_path, n_number)
            else:
                raise ValueError("number empty")
            print("[*]======================================================")
@@ -253,22 +408,26 @@ def create_data_and_move(file_path: str, c: config.Config, debug):
            print('[-]', err)
            try:
-                moveFailedFolder(file_path, conf)
+                moveFailedFolder(file_path)
            except Exception as err:
                print('[!]', err)
-def create_data_and_move_with_custom_number(file_path: str, c: config.Config, custom_number):
+def create_data_and_move_with_custom_number(file_path: str, custom_number):
    conf = config.getInstance()
    file_name = os.path.basename(file_path)
    try:
-        print("[!]Making Data for [{}], the number is [{}]".format(file_path, custom_number))
+        print("[!] [{1}] As Number making data for '{0}'".format(file_path, custom_number))
-        core_main(file_path, custom_number, c)
+        if custom_number:
            core_main(file_path, custom_number)
        else:
            print("[-] number empty ERROR")
        print("[*]======================================================")
    except Exception as err:
        print("[-] [{}] ERROR:".format(file_path))
        print('[-]', err)
-        if c.soft_link():
+        if conf.soft_link():
            print("[-]Link {} to failed folder".format(file_path))
            os.symlink(file_path, os.path.join(conf.failed_folder(), file_name))
        else:
@@ -279,12 +438,26 @@ def create_data_and_move_with_custom_number(file_path: str, c: config.Config, cu
                print('[!]', err)
-if __name__ == '__main__':
+def main():
    version = '5.0.1'
    urllib3.disable_warnings() #Ignore http proxy warning
    # Parse command line args
    single_file_path, folder_path, custom_number, auto_exit, logdir, regexstr = argparse_function(version)
    # Read config.ini first, in argparse_function() need conf.failed_folder()
    conf = config.Config("config.ini")
    # Parse command line args
    single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
    main_mode = conf.main_mode()
    if not main_mode in (1, 2, 3):
        print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
        sys.exit(4)
    signal.signal(signal.SIGINT, signal_handler)
    if sys.platform == 'win32':
        signal.signal(signal.SIGBREAK, sigdebug_handler)
    else:
        signal.signal(signal.SIGWINCH, sigdebug_handler)
    dupe_stdout_to_logfile(logdir)
    print('[*]================== AV Data Capture ===================')
@@ -293,55 +466,62 @@ if __name__ == '__main__':
    print('[*]======================================================')
    print('[*]严禁在墙内宣传本项目')
-    # Read config.ini
+    start_time = time.time()
-    conf = config.Config("config.ini")
+    print('[+]Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
    if conf.update_check():
        check_update(version)
    print(f"[+]Load Config file '{conf.ini_path}'.")
    if conf.debug():
        print('[+]Enable debug')
    if conf.soft_link():
        print('[!]Enable soft link')
-    #print('[!]CmdLine:'," ".join(sys.argv[1:]))
+    if len(sys.argv)>1:
        print('[!]CmdLine:'," ".join(sys.argv[1:]))
    print('[+]Main Working mode ## {}: {} ## {}{}{}'
        .format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
        "" if not conf.multi_threading() else ", multi_threading on",
        "" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
        "" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
        ) if not single_file_path else ('-','Single File', '','',''))
    )
    create_failed_folder(conf.failed_folder())
    start_time = time.time()
    if not single_file_path == '': #Single File
        print('[+]==================== Single File =====================')
        if custom_number == '':
-            create_data_and_move_with_custom_number(single_file_path, conf, get_number(conf.debug(), os.path.basename(single_file_path)))
+            create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)))
        else:
-            create_data_and_move_with_custom_number(single_file_path, conf, custom_number)
+            create_data_and_move_with_custom_number(single_file_path, custom_number)
    else:
-        if folder_path == '':
+        folder_path = conf.source_folder()
        if not isinstance(folder_path, str) or folder_path == '':
            folder_path = os.path.abspath(".")
-        movie_list = movie_lists(folder_path, conf, regexstr)
+        movie_list = movie_lists(folder_path, regexstr)
        count = 0
        count_all = str(len(movie_list))
-        print('[+]Find', count_all, 'movies. Start at', time.strftime("%Y-%m-%d %H:%M:%S"))
+        print('[+]Find', count_all, 'movies.')
-        main_mode = conf.main_mode()
+        print('[*]======================================================')
        stop_count = conf.stop_counter()
        if stop_count<1:
            stop_count = 999999
        else:
            count_all = str(min(len(movie_list), stop_count))
-        if main_mode == 3:
+
            print(f'[!]运行模式：**维护模式**，本程序将在处理{count_all}个视频文件后停止，如需后台执行自动退出请结合 -a 参数。')
        for movie_path in movie_list:  # 遍历电影列表 交给core处理
            count = count + 1
            percentage = str(count / int(count_all) * 100)[:4] + '%'
-            print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -')
+            print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
-            create_data_and_move(movie_path, conf, conf.debug())
+            create_data_and_move(movie_path, zero_op)
            if count >= stop_count:
                print("[!]Stop counter triggered!")
                break
-    if conf.del_empty_folder():
+    if conf.del_empty_folder() and not zero_op:
        rm_empty_folder(conf.success_folder())
        rm_empty_folder(conf.failed_folder())
        if len(folder_path):
@@ -353,9 +533,15 @@ if __name__ == '__main__':
        " End at", time.strftime("%Y-%m-%d %H:%M:%S"))
    print("[+]All finished!!!")
    if not (conf.auto_exit() or auto_exit):
        input("Press enter key exit, you can check the error message before you exit...")
    close_logfile(logdir)
    if not conf.auto_exit():
        input("Press enter key exit, you can check the error message before you exit...")
    sys.exit(0)
 import multiprocessing
 if __name__ == '__main__':
    multiprocessing.freeze_support()
    main()
--- a/4
+++ b/4
@@ -16,7 +16,9 @@ make:
 	#export cloudscraper_path=$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1)
 	@echo "[+]Pyinstaller make"
-	pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py --add-data "Img:Img"
+	pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
 		--add-data "Img:Img" \
 		--add-data "config.ini:." \
 	@echo "[+]Move to bin"
 	if [ ! -d "./bin" ];then  mkdir bin; fi
--- a/WebCrawler/init.py
+++ b/WebCrawler/init.py
@@ -32,7 +32,7 @@ def get_data_state(data: dict) -> bool:  # 元数据获取失败检测
    return True
-def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数据
+def get_data_from_json(file_number):  # 从JSON返回元数据
    """
    iterate through all services and fetch the data
    """
@@ -53,6 +53,7 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
        "fc2club": fc2club.main
    }
    conf = config.getInstance()
    # default fetch order list, from the beginning to the end
    sources = conf.sources().split(',')
    if not len(conf.sources()) > 80:
@@ -114,6 +115,7 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
            json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
            # if any service return a valid return, break
            if get_data_state(json_data):
                print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
                break
        pool.close()
        pool.terminate()
@@ -125,6 +127,7 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
                json_data = json.loads(func_mapping[source](file_number))
                # if any service return a valid return, break
                if get_data_state(json_data):
                    print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
                    break
            except:
                break
@@ -134,6 +137,14 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
        print('[-]Movie Number not found!')
        return None
    # 增加number严格判断，避免提交任何number，总是返回"本橋実来 ADZ335"，这种返回number不一致的数据源故障
    # 目前选用number命名规则是javdb.com Domain Creation Date: 2013-06-19T18:34:27Z
    # 然而也可以跟进关注其它命名规则例如airav.wiki Domain Creation Date: 2019-08-28T07:18:42.0Z
    # 如果将来javdb.com命名规则下不同Studio出现同名碰撞导致无法区分，可考虑更换规则，更新相应的number分析和抓取代码。
    if str(json_data.get('number')).upper() != file_number.upper():
        print('[-]Movie number has changed! [{}]->[{}]'.format(file_number, str(json_data.get('number'))))
        return None
    # ================================================网站规则添加结束================================================
    title = json_data.get('title')
@@ -167,6 +178,10 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
    imagecut = json_data.get('imagecut')
    tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',')  # 字符串转列表 @
    while 'XXXX' in tag:
        tag.remove('XXXX')
    while 'xxx' in tag:
        tag.remove('xxx')
    actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
    if title == '' or number == '':
@@ -225,6 +240,8 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
    studio = studio.replace('エムズビデオグループ','M’s Video Group')
    studio = studio.replace('ミニマム','Minimum')
    studio = studio.replace('ワープエンタテインメント','WAAP Entertainment')
    studio = studio.replace('pacopacomama,パコパコママ','pacopacomama')
    studio = studio.replace('パコパコママ','pacopacomama')
    studio = re.sub('.*/妄想族','妄想族',studio)
    studio = studio.replace('/',' ')
    # ===  替换Studio片假名 END
@@ -293,4 +310,7 @@ def special_characters_replacement(text) -> str:
                replace('"', '＂').      # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane
                replace('<', 'ᐸ').       # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane
                replace('>', 'ᐳ').       # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane
-                replace('|', 'ǀ'))       # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane
+                replace('|', 'ǀ').       # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane
                replace('&lsquo;', '‘'). # U+02018 LEFT SINGLE QUOTATION MARK
                replace('&rsquo;', '’'). # U+02019 RIGHT SINGLE QUOTATION MARK
                replace('&amp;', '＆'))
--- a/WebCrawler/airav.py
+++ b/WebCrawler/airav.py
@@ -6,6 +6,7 @@ from lxml import etree#need install
 from bs4 import BeautifulSoup#need install
 import json
 from ADC_function import *
 from WebCrawler import javbus
 '''
 API
@@ -17,95 +18,94 @@ API
 host = 'https://www.airav.wiki'
 # airav这个网站没有演员图片，所以直接使用javbus的图
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(javbus_json):
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    result = javbus_json.get('actor_photo')
-    a = soup.find_all(attrs={'class': 'star-name'})
+    if isinstance(result, dict) and len(result):
-    d={}
+        return result
-    for i in a:
+    return ''
        l=i.a['href']
        t=i.get_text()
        html = etree.fromstring(get_html(l), etree.HTMLParser())
        p=urljoin("https://www.javbus.com",
                  str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
        p2={t:p}
        d.update(p2)
    return d
 def getTitle(htmlcode):  #获取标题
-    doc = pq(htmlcode)
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    # h5:first-child定位第一个h5标签，妈的找了好久才找到这个语法
+    title = str(html.xpath('/html/head/title/text()')[0])
-    title = str(doc('div.d-flex.videoDataBlock h5.d-none.d-md-block:nth-child(2)').text()).replace(' ', '-')
+    result = str(re.findall('](.*?)- AIRAV-WIKI', title)[0]).strip()
-    try:
+    return result
        title2 = re.sub('n\d+-','',title)
-        return title2
+def getStudio(htmlcode, javbus_json): #获取厂商 已修改
    # javbus如果有数据以它为准
    result = javbus_json.get('studio')
    if isinstance(result, str) and len(result):
        return result
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    return str(html.xpath('//a[contains(@href,"?video_factory=")]/text()')).strip(" ['']")
 def getYear(htmlcode, javbus_json):   #获取年份
    result = javbus_json.get('year')
    if isinstance(result, str) and len(result):
        return result
    release = getRelease(htmlcode, javbus_json)
    if len(release) != len('2000-01-01'):
        return ''
    return release[:4]
 def getCover(htmlcode, javbus_json):  #获取封面图片
    result = javbus_json.get('cover')
    if isinstance(result, str) and len(result):
        return result
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    return html.xpath('//img[contains(@src,"/storage/big_pic/")]/@src')[0]
 def getRelease(htmlcode, javbus_json): #获取出版日期
    result = javbus_json.get('release')
    if isinstance(result, str) and len(result):
        return result
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
        result = re.search(r'\d{4}-\d{2}-\d{2}', str(html.xpath('//li[contains(text(),"發片日期")]/text()'))).group()
    except:
-        return title
+        return ''
 def getStudio(htmlcode): #获取厂商 已修改
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    # 如果记录中冇导演，厂商排在第4位
    if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
    # 如果记录中有导演，厂商排在第5位
    elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
    else:
        result = ''
    return result
-def getYear(htmlcode):   #获取年份
+def getRuntime(javbus_json): #获取播放时长
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    result = javbus_json.get('runtime')
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+    if isinstance(result, str) and len(result):
        return result
-def getCover(htmlcode):  #获取封面链接
+    return ''
-    doc = pq(htmlcode)
+# airav女优数据库较多日文汉字姓名，javbus较多日语假名，因此airav优先
-    image = doc('a.bigImage')
+def getActor(htmlcode, javbus_json):   #获取女优
    return urljoin("https://www.javbus.com", image.attr('href'))
 def getRelease(htmlcode): #获取出版日期
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
    return result
 def getRuntime(htmlcode): #获取分钟 已修改
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
    return result
 def getActor(htmlcode):   #获取女优
    b=[]
-    soup=BeautifulSoup(htmlcode,'lxml')
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    a=soup.find_all(attrs={'class':'star-name'})
+    a = html.xpath('//ul[@class="videoAvstarList"]/li/a[starts-with(@href,"/idol/")]/text()')
-    for i in a:
+    for v in a:
-        b.append(i.get_text())
+        v = v.strip()
        if len(v):
            b.append(v)
    if len(b):
        return b
-def getNum(htmlcode):     #获取番号
+    result = javbus_json.get('actor')
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    if isinstance(result, list) and len(result):
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
        return result
-def getDirector(htmlcode): #获取导演 已修改
+    return []
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+def getNum(htmlcode, javbus_json):     #获取番号
-    if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
+    result = javbus_json.get('number')
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+    if isinstance(result, str) and len(result):
    else:
        result = ''         # 记录中有可能没有导演数据
        return result
-
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-def getOutline(htmlcode):  #获取演员
+    title = str(html.xpath('/html/head/title/text()')[0])
    result = str(re.findall('^\[(.*?)]', title)[0])
    return result
 def getDirector(javbus_json): #获取导演 已修改
    result = javbus_json.get('director')
    if isinstance(result, str) and len(result):
        return result
    return ''
 def getOutline(htmlcode):  #获取概述
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
-        result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','')
+        result = html.xpath("string(//div[@class='d-flex videoDataBlock']/div[@class='synopsis']/p)").replace('\n','').strip()
        return result
    except:
        return ''
-def getSerise(htmlcode):   #获取系列 已修改
+def getSerise(javbus_json):   #获取系列 已修改
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = javbus_json.get('series')
-    # 如果记录中冇导演，系列排在第6位
+    if isinstance(result, str) and len(result):
    if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
    # 如果记录中有导演，系列排在第7位
    elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
    else:
        result = ''
        return result
    return ''
 def getTag(htmlcode):  # 获取标签
    tag = []
    soup = BeautifulSoup(htmlcode, 'lxml')
@@ -169,52 +169,50 @@ def main(number):
    try:
        try:
            htmlcode = get_html('https://cn.airav.wiki/video/' + number)
-            javbus_htmlcode = get_html('https://www.javbus.com/ja/' + number)
+            javbus_json = json.loads(javbus.main(number))
        except:
            print(number)
        dic = {
            # 标题可使用airav
-            'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+            'title': getTitle(htmlcode),
-            # 制作商选择使用javbus
+            # 制作商先找javbus，如果没有再找本站
-            'studio': getStudio(javbus_htmlcode),
+            'studio': getStudio(htmlcode, javbus_json),
-            # 年份也是用javbus
+            # 年份先试javbus，如果没有再找本站
-            'year': str(re.search('\d{4}', getYear(javbus_htmlcode)).group()),
+            'year': getYear(htmlcode, javbus_json),
            #  简介 使用 airav
            'outline': getOutline(htmlcode),
            # 使用javbus
-            'runtime': getRuntime(javbus_htmlcode),
+            'runtime': getRuntime(javbus_json),
            # 导演 使用javbus
-            'director': getDirector(javbus_htmlcode),
+            'director': getDirector(javbus_json),
-            # 作者 使用airav
+            # 演员 先试airav
-            'actor': getActor(javbus_htmlcode),
+            'actor': getActor(htmlcode, javbus_json),
-            # 发售日使用javbus
+            # 发售日先试javbus
-            'release': getRelease(javbus_htmlcode),
+            'release': getRelease(htmlcode, javbus_json),
            # 番号使用javbus
-            'number': getNum(javbus_htmlcode),
+            'number': getNum(htmlcode, javbus_json),
            # 封面链接 使用javbus
-            'cover': getCover(javbus_htmlcode),
+            'cover': getCover(htmlcode, javbus_json),
            # 剧照获取
            'extrafanart': getExtrafanart(htmlcode),
            'imagecut': 1,
            # 使用 airav
            'tag': getTag(htmlcode),
            # 使用javbus
-            'label': getSerise(javbus_htmlcode),
+            'label': getSerise(javbus_json),
            # 妈的，airav不提供作者图片
-            'actor_photo': getActorPhoto(javbus_htmlcode),
+#            'actor_photo': getActorPhoto(javbus_json),
            'website': 'https://www.airav.wiki/video/' + number,
            'source': 'airav.py',
            # 使用javbus
-            'series': getSerise(javbus_htmlcode),
+            'series': getSerise(javbus_json)
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8')
        return js
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        data = {
            "title": "",
@@ -226,6 +224,6 @@ def main(number):
 if __name__ == '__main__':
-    #print(main('ADN-188'))
+    print(main('ADV-R0624'))  # javbus页面返回404, airav有数据
-    print(main('ADN-188'))
+    print(main('ADN-188'))    # 一人
-    print(main('CJOD-278'))
+    print(main('CJOD-278'))   # 多人 javbus演员名称采用日语假名，airav采用日文汉字
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -3,50 +3,42 @@ sys.path.append('..')
 import re
 from lxml import etree
 import json
 from bs4 import BeautifulSoup
 from ADC_function import *
-# import sys
+from WebCrawler.storyline import getStoryline
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(html):
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    a = html.xpath('//a[@class="avatar-box"]')
    a = soup.find_all(attrs={'class': 'avatar-box'})
    d = {}
    for i in a:
-        l = i.img['src']
+        l = i.find('.//img').attrib['src']
-        t = i.span.get_text()
+        t = i.find('span').text
        p2 = {t: l}
        d.update(p2)
    return d
-def getTitle(a):
+def getTitle(html):
    try:
        html = etree.fromstring(a, etree.HTMLParser())
        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
        return result.replace('/', '')
    except:
        return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+def getActor(html):
-    soup = BeautifulSoup(a, 'lxml')
+    a = html.xpath('//a[@class="avatar-box"]')
    a = soup.find_all(attrs={'class': 'avatar-box'})
    d = []
    for i in a:
-        d.append(i.span.get_text())
+        d.append(i.find('span').text)
    return d
-def getStudio(a):
+def getStudio(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
    return result1
-def getRuntime(a):
+def getRuntime(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
    return result1
-def getLabel(a):
+def getLabel(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
    return result1
-def getNum(a):
+def getNum(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
    return result1
 def getYear(release):
@@ -55,28 +47,20 @@ def getYear(release):
        return result
    except:
        return release
-def getRelease(a):
+def getRelease(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
    return result1
-def getCover(htmlcode):
+def getCover(html):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
    return result
-def getCover_small(htmlcode):
+def getCover_small(html):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
    return result
-def getTag(a):  # 获取演员
+def getTag(html):
-    soup = BeautifulSoup(a, 'lxml')
+    x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    a = soup.find_all(attrs={'class': 'genre'})
+    return x[2:] if len(x) > 2 else []
-    d = []
+def getSeries(html):
    for i in a:
        d.append(i.get_text())
    return d
 def getSeries(htmlcode):
    try:
        html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
        return result1
    except:
@@ -86,42 +70,45 @@ def main(number):
    html = get_html('https://tellme.pw/avsox')
    site = etree.HTML(html).xpath('//div[@class="container"]/div/a/@href')[0]
    a = get_html(site + '/cn/search/' + number)
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())
    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
    if result1 == '' or result1 == 'null' or result1 == 'None':
        a = get_html(site + '/cn/search/' + number.replace('-', '_'))
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        html = etree.fromstring(a, etree.HTMLParser())
        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
        if result1 == '' or result1 == 'null' or result1 == 'None':
            a = get_html(site + '/cn/search/' + number.replace('_', ''))
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+            html = etree.fromstring(a, etree.HTMLParser())
            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-    web = get_html("https:" + result1)
+    detail = get_html("https:" + result1)
-    soup = BeautifulSoup(web, 'lxml')
+    lx = etree.fromstring(detail, etree.HTMLParser())
    info = str(soup.find(attrs={'class': 'row movie'}))
    try:
        new_number = getNum(lx)
        if new_number.upper() != number.upper():
            raise ValueError('number not found')
        title = getTitle(lx).strip(new_number)
        dic = {
-            'actor': getActor(web),
+            'actor': getActor(lx),
-            'title': getTitle(web).strip(getNum(web)),
+            'title': title,
-            'studio': getStudio(info),
+            'studio': getStudio(lx),
-            'outline': '',  #
+            'outline': getStoryline(number, title),
-            'runtime': getRuntime(info),
+            'runtime': getRuntime(lx),
            'director': '',  #
-            'release': getRelease(info),
+            'release': getRelease(lx),
-            'number': getNum(info),
+            'number': new_number,
-            'cover': getCover(web),
+            'cover': getCover(lx),
-            'cover_small': getCover_small(a),
+            'cover_small': getCover_small(html),
            'imagecut': 3,
-            'tag': getTag(web),
+            'tag': getTag(lx),
-            'label': getLabel(info),
+            'label': getLabel(lx),
-            'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'year': getYear(getRelease(lx)),
-            'actor_photo': getActorPhoto(web),
+            'actor_photo': getActorPhoto(lx),
            'website': "https:" + result1,
            'source': 'avsox.py',
-            'series': getSeries(info),
+            'series': getSeries(lx),
        }
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        dic = {"title": ""}
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
@@ -129,3 +116,4 @@ def main(number):
 if __name__ == "__main__":
    print(main('012717_472'))
    print(main('1')) # got fake result raise 'number not found'
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -1,34 +1,32 @@
 import sys
 sys.path.append('../')
 import json
 from bs4 import BeautifulSoup
 from lxml import html
 import re
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 def main(number: str) -> json:
    try:
-        caribbytes, browser = get_html_by_browser(
+        # 因演员图片功能还未使用，为提速暂时注释，改为用get_html()
-            'https://www.caribbeancom.com/moviepages/'+number+'/index.html',
+        #r, browser = get_html_by_browser('https://www.caribbeancom.com/moviepages/'+number+'/index.html',
-            return_type="browser")
+        #                return_type='browser')
-
+        #if not r.ok:
-        if not caribbytes or not caribbytes.ok:
+        #    raise ValueError("page not found")
        #htmlcode = str(browser.page)
        htmlbyte = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html', return_type='content')
        htmlcode = htmlbyte.decode('euc-jp')
        if not htmlcode or '<title>404' in htmlcode or 'class="movie-info section"' not in htmlcode:
            raise ValueError("page not found")
-        lx = html.fromstring(str(browser.page))
+        lx = html.fromstring(htmlcode)
        title = get_title(lx)
        if not browser.page.select_one("#moviepages > div > div:nth-child(1) > div.movie-info.section"):
            raise ValueError("page info not found")
    except Exception as e:
        if config.Config().debug():
            print(e)
        dic = {"title": ""}
        return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
        dic = {
-        'title': get_title(lx),
+            'title': title,
            'studio': '加勒比',
            'year': get_year(lx),
-        'outline': get_outline(lx),
+            'outline': get_outline(lx, number, title),
            'runtime': get_runtime(lx),
            'director': '',
            'actor': get_actor(lx),
@@ -47,14 +45,25 @@ def main(number: str) -> json:
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
        return js
    except Exception as e:
        if config.getInstance().debug():
            print(e)
        dic = {"title": ""}
        return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
 def get_title(lx: html.HtmlElement) -> str:
    return str(lx.xpath("//div[@class='movie-info section']/div[@class='heading']/h1[@itemprop='name']/text()")[0]).strip()
 def get_year(lx: html.HtmlElement) -> str:
    return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0][:4]
-def get_outline(lx: html.HtmlElement) -> str:
+def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
-    return lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
+    o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
    g = getStoryline(number, title)
    if len(g):
        return g
    return o
 def get_release(lx: html.HtmlElement) -> str:
    return lx.xpath("//li[2]/span[@class='spec-content']/text()")[0].replace('/','-')
@@ -114,11 +123,10 @@ def get_actor_photo(browser):
        if pos<0:
            continue
        css = html[pos:pos+100]
-        p0 = css.find('background: url(')
+        cssBGjpgs = re.findall(r'background: url\((.+\.jpg)', css, re.I)
-        p1 = css.find('.jpg)')
+        if not cssBGjpgs or not len(cssBGjpgs[0]):
        if p0<0 or p1<0:
            continue
-        p = {k: urljoin(browser.url, css[p0+len('background: url('):p1+len('.jpg')])}
+        p = {k: urljoin(browser.url, cssBGjpgs[0])}
        o.update(p)
    return o
--- a/WebCrawler/dlsite.py
+++ b/WebCrawler/dlsite.py
@@ -153,7 +153,7 @@ def main(number):
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
        return js
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        data = {
            "title": "",
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -93,6 +93,7 @@ def main(number):
            actor = '素人'
        lx = etree.fromstring(htmlcode2, etree.HTMLParser())
        cover = str(lx.xpath("//div[@class='items_article_MainitemThumb']/span/img/@src")).strip(" ['']")
        cover = ADC_function.urljoin('https://adult.contents.fc2.com', cover)
        dic = {
            'title': lx.xpath('/html/head/title/text()')[0],
            'studio': getStudio_fc2com(htmlcode2),
@@ -116,7 +117,7 @@ def main(number):
            'series': '',
        }
    except Exception as e:
-        if ADC_function.config.Config().debug():
+        if ADC_function.config.getInstance().debug():
            print(e)
        dic = {"title": ""}
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
@@ -124,4 +125,5 @@ def main(number):
 if __name__ == '__main__':
    print(main('FC2-1787685'))
    print(main('FC2-2086710'))
--- a/WebCrawler/fc2club.py
+++ b/WebCrawler/fc2club.py
@@ -103,7 +103,7 @@ def main(number):
            'series': '',
        }
    except Exception as e:
-        if ADC_function.config.Config().debug():
+        if ADC_function.config.getInstance().debug():
            print(e)
        dic = {"title": ""}
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -1,114 +1,76 @@
 import sys
 sys.path.append('../')
 import re
 from pyquery import PyQuery as pq#need install
 from lxml import etree#need install
 from bs4 import BeautifulSoup#need install
 import json
 from ADC_function import *
-from WebCrawler import fanza
+from WebCrawler.storyline import getStoryline
 from WebCrawler import airav
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(html):
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    actors = html.xpath('//div[@class="star-name"]/a')
    a = soup.find_all(attrs={'class': 'star-name'})
    d={}
-    for i in a:
+    for i in actors:
-        l=i.a['href']
+        url=i.attrib['href']
-        t=i.get_text()
+        t=i.attrib['title']
-        html = etree.fromstring(get_html(l), etree.HTMLParser())
+        html = etree.fromstring(get_html(url), etree.HTMLParser())
        p=urljoin("https://www.javbus.com",
                  str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
        p2={t:p}
        d.update(p2)
    return d
-def getTitle(htmlcode):  #获取标题
+def getTitle(html):  #获取标题
-    doc = pq(htmlcode)
+    title = str(html.xpath('/html/head/title/text()')[0])
-    title=str(doc('div.container h3').text()).replace(' ','-')
+    title = str(re.findall('^.+?\s+(.*) - JavBus$', title)[0]).strip()
    try:
        title2 = re.sub('n\d+-','',title)
        return title2
    except:
    return title
-def getStudio(htmlcode): #获取厂商 已修改
+def getStudioJa(html):
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    x = html.xpath('//span[contains(text(),"メーカー:")]/../a/text()')
-    # 如果记录中冇导演，厂商排在第4位
+    return str(x[0]) if len(x) else ''
-    if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
+def getStudio(html): #获取厂商
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+    x = html.xpath('//span[contains(text(),"製作商:")]/../a/text()')
-    # 如果记录中有导演，厂商排在第5位
+    return str(x[0]) if len(x) else ''
-    elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
+def getYear(html):   #获取年份
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']").strip()
-    else:
+    return result[:4] if len(result)>=len('2000-01-01') else ''
-        result = ''
+def getCover(html):  #获取封面链接
-    return result
+    image = str(html.xpath('//a[@class="bigImage"]/@href')[0])
-def getYear(htmlcode):   #获取年份
+    return urljoin("https://www.javbus.com", image)
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+def getRelease(html): #获取出版日期
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
    return result
-def getCover(htmlcode):  #获取封面链接
+def getRuntime(html): #获取分钟 已修改
    doc = pq(htmlcode)
    image = doc('a.bigImage')
    return urljoin("https://www.javbus.com", image.attr('href'))
 def getRelease(htmlcode): #获取出版日期
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
    return result
 def getRuntime(htmlcode): #获取分钟 已修改
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
    return result
-def getActor(htmlcode):   #获取女优
+def getActor(html):   #获取女优
    b=[]
-    soup=BeautifulSoup(htmlcode,'lxml')
+    actors = html.xpath('//div[@class="star-name"]/a')
-    a=soup.find_all(attrs={'class':'star-name'})
+    for i in actors:
-    for i in a:
+        b.append(i.attrib['title'])
        b.append(i.get_text())
    return b
-def getNum(htmlcode):     #获取番号
+def getNum(html):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    kwdlist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+    return kwdlist[0]
-    return result
+def getDirectorJa(html):
-def getDirector(htmlcode): #获取导演 已修改
+    x = html.xpath('//span[contains(text(),"監督:")]/../a/text()')
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    return str(x[0]) if len(x) else ''
-    if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
+def getDirector(html): #获取导演
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+    x = html.xpath('//span[contains(text(),"導演:")]/../a/text()')
-    else:
+    return str(x[0]) if len(x) else ''
-        result = ''         # 记录中有可能没有导演数据
+def getCID(html):
    return result
 def getCID(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    #print(htmlcode)
    string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
    result = re.sub('/.*?.jpg','',string)
    return result
-def getOutline(number):  #获取剧情介绍
+def getOutline(number, title):  #获取剧情介绍 多进程并发查询
-    try:
+    return getStoryline(number,title)
-        response = json.loads(airav.main(number))
+def getSeriseJa(html):
-        result = response['outline']
+    x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
-        return result
+    return str(x[0]) if len(x) else ''
-    except:
+def getSerise(html):   #获取系列
-        return ''
+    x = html.xpath('//span[contains(text(),"系列:")]/../a/text()')
-def getSerise(htmlcode):   #获取系列 已修改
+    return str(x[0]) if len(x) else ''
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+def getTag(html):  # 获取标签
-    # 如果记录中冇导演，系列排在第6位
+    klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
+    taglist = [translateTag_to_sc(v) for v in klist[1:]]
-        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
+    return taglist
    # 如果记录中有导演，系列排在第7位
    elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
    else:
        result = ''
    return result
 def getTag(htmlcode):  # 获取标签
    tag = []
    soup = BeautifulSoup(htmlcode, 'lxml')
    a = soup.find_all(attrs={'class': 'genre'})
    for i in a:
        if 'onmouseout' in str(i) or '多選提交' in str(i):
            continue
        tag.append(translateTag_to_sc(i.get_text()))
    return tag
 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<div id=\"sample-waterfall\">[\s\S]*?</div></a>\s*?</div>')
    html = html_pather.search(htmlcode)
@@ -117,32 +79,34 @@ def getExtrafanart(htmlcode):  # 获取剧照
        extrafanart_pather = re.compile(r'<a class=\"sample-box\" href=\"(.*?)\"')
        extrafanart_imgs = extrafanart_pather.findall(html)
        if extrafanart_imgs:
-            return extrafanart_imgs
+            return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
    return ''
 def main_uncensored(number):
    htmlcode = get_html('https://www.javbus.com/ja/' + number)
-    if getTitle(htmlcode) == '':
+    if "<title>404 Page Not Found" in htmlcode:
-        htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
+        raise Exception('404 page not found')
    lx = etree.fromstring(htmlcode, etree.HTMLParser())
    title = getTitle(lx)
    dic = {
-        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
+        'title': title,
-        'studio': getStudio(htmlcode),
+        'studio': getStudioJa(lx),
-        'year': getYear(htmlcode),
+        'year': getYear(lx),
-        'outline': getOutline(number),
+        'outline': getOutline(number, title),
-        'runtime': getRuntime(htmlcode),
+        'runtime': getRuntime(lx),
-        'director': getDirector(htmlcode),
+        'director': getDirectorJa(lx),
-        'actor': getActor(htmlcode),
+        'actor': getActor(lx),
-        'release': getRelease(htmlcode),
+        'release': getRelease(lx),
-        'number': getNum(htmlcode),
+        'number': getNum(lx),
-        'cover': getCover(htmlcode),
+        'cover': getCover(lx),
-        'tag': getTag(htmlcode),
+        'tag': getTag(lx),
        'extrafanart': getExtrafanart(htmlcode),
-        'label': getSerise(htmlcode),
+        'label': getSeriseJa(lx),
        'imagecut': 0,
-        'actor_photo': '',
+#        'actor_photo': '',
        'website': 'https://www.javbus.com/ja/' + number,
        'source': 'javbus.py',
-        'series': getSerise(htmlcode),
+        'series': getSeriseJa(lx),
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
@@ -155,32 +119,36 @@ def main(number):
                htmlcode = get_html('https://www.fanbus.us/' + number)
            except:
                htmlcode = get_html('https://www.javbus.com/' + number)
            if "<title>404 Page Not Found" in htmlcode:
                raise Exception('404 page not found')
            lx = etree.fromstring(htmlcode,etree.HTMLParser())
            title = getTitle(lx)
            dic = {
-                'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+                'title': title,
-                'studio': getStudio(htmlcode),
+                'studio': getStudio(lx),
-                'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
+                'year': getYear(lx),
-                'outline': getOutline(number),
+                'outline': getOutline(number, title),
-                'runtime': getRuntime(htmlcode),
+                'runtime': getRuntime(lx),
-                'director': getDirector(htmlcode),
+                'director': getDirector(lx),
-                'actor': getActor(htmlcode),
+                'actor': getActor(lx),
-                'release': getRelease(htmlcode),
+                'release': getRelease(lx),
-                'number': getNum(htmlcode),
+                'number': getNum(lx),
-                'cover': getCover(htmlcode),
+                'cover': getCover(lx),
                'imagecut': 1,
-                'tag': getTag(htmlcode),
+                'tag': getTag(lx),
                'extrafanart': getExtrafanart(htmlcode),
-                'label': getSerise(htmlcode),
+                'label': getSerise(lx),
-                'actor_photo': getActorPhoto(htmlcode),
+#                'actor_photo': getActorPhoto(lx),
                'website': 'https://www.javbus.com/' + number,
                'source': 'javbus.py',
-                'series': getSerise(htmlcode),
+                'series': getSerise(lx),
            }
            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8')
            return js
        except:
            return main_uncensored(number)
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        data = {
            "title": "",
@@ -191,5 +159,13 @@ def main(number):
        return js
 if __name__ == "__main__" :
    config.G_conf_override['debug_mode:switch'] = True
    print(main('ABP-888'))
    print(main('ABP-960'))
    print(main('ADV-R0624'))    # 404
    print(main('MMNT-010'))
    print(main('ipx-292'))
    print(main('CEMD-011'))
    print(main('CJOD-278'))
    print(main('100221_001'))
    print(main('AVSW-061'))
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -3,25 +3,22 @@ sys.path.append('../')
 import re
 from lxml import etree
 import json
 from bs4 import BeautifulSoup
 from ADC_function import *
-from WebCrawler import airav
+from mechanicalsoup.stateful_browser import StatefulBrowser
-# import sys
+from WebCrawler.storyline import getStoryline
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-def getTitle(a):
+def getTitle(html):
    html = etree.fromstring(a, etree.HTMLParser())
    browser_title = str(html.xpath("/html/head/title/text()")[0])
    return browser_title[:browser_title.find(' | JavDB')].strip()
-def getActor(a):
+def getActor(html):
    html = etree.fromstring(a, etree.HTMLParser())
    actors = html.xpath('//span[@class="value"]/a[contains(@href,"/actors/")]/text()')
    genders = html.xpath('//span[@class="value"]/a[contains(@href,"/actors/")]/../strong/@class')
    r = []
    idx = 0
-    actor_gendor = config.Config().actor_gender()
+    actor_gendor = config.getInstance().actor_gender()
    if not actor_gendor in ['female','male','both','all']:
        actor_gendor = 'female'
    for act in actors:
@@ -33,8 +30,8 @@ def getActor(a):
        idx = idx + 1
    return r
-def getaphoto(url):
+def getaphoto(url,  browser):
-    html_page = get_html(url)
+    html_page = browser.open_relative(url).text if isinstance(browser, StatefulBrowser) else get_html(url)
    img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)')
    img_url = img_prether.findall(html_page)
    if img_url:
@@ -42,24 +39,18 @@ def getaphoto(url):
    else:
        return ''
-def getActorPhoto(html): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(html, javdb_site,  browser): #//*[@id="star_qdt"]/li/a/img
-    actorall_prether = re.compile(r'<strong>演員\:</strong>\s*?.*?<span class=\"value\">(.*)\s*?</div>')
+    actorall = html.xpath('//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]')
-    actorall = actorall_prether.findall(html)
+    if not actorall:
-
+        return {}
-    if actorall:
+    a = getActor(html)
        actoralls = actorall[0]
        actor_prether = re.compile(r'<a href\=\"(.*?)\">(.*?)</a>')
        actor = actor_prether.findall(actoralls)
    actor_photo = {}
-        for i in actor:
+    for i in actorall:
-            actor_photo[i[1]] = getaphoto('https://' + javdb_site + '.com'+i[0])
+        if i.text in a:
-
+            actor_photo[i.text] = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), browser)
    return actor_photo
-    else:
+def getStudio(a, html):
        return {}
 def getStudio(a):
    # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    # result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
    # result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
@@ -67,23 +58,25 @@ def getStudio(a):
    patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
    pianshang = patherr.findall(a)
    if pianshang:
-        result = pianshang[0]
+        result = pianshang[0].strip()
-    else:
+        if len(result):
-        result = ""
+            return result
    # 以卖家作为工作室
    try:
        result = str(html.xpath('//strong[contains(text(),"賣家:")]/../span/a/text()')).strip(" ['']")
    except:
        result = ''
    return result
-def getRuntime(a):
+def getRuntime(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
+def getLabel(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
+def getNum(html):
    html = etree.fromstring(a, etree.HTMLParser())
    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
    return str(result2 + result1).strip('+')
@@ -113,8 +106,7 @@ def getRelease(a):
    else:
        result = ''
    return result
-def getTag(a):
+def getTag(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
        total = []
@@ -135,11 +127,10 @@ def getTag(a):
                pass
        return total
-def getCover_small(a, index=0):
+def getCover_small(html, index=0):
    # same issue mentioned below,
    # javdb sometime returns multiple results
    # DO NOT just get the firt one, get the one with correct index number
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
        if not 'https' in result:
@@ -170,66 +161,76 @@ def getTrailer(htmlcode):  # 获取预告片
        video_url = ''
    return video_url
-def getExtrafanart(htmlcode):  # 获取剧照
+def getExtrafanart(html):  # 获取剧照
-    html_pather = re.compile(r'<div class=\"tile\-images preview\-images\">[\s\S]*?</a>\s+?</div>\s+?</div>')
+    result = []
-    html = html_pather.search(htmlcode)
+    try:
-    if html:
+        result = html.xpath("//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href")
-        html = html.group()
+    except:
-        extrafanart_pather = re.compile(r'<a class="tile-item" href=\"(.*?)\"')
+        pass
-        extrafanart_imgs = extrafanart_pather.findall(html)
+    return result
-        if extrafanart_imgs:
+def getCover(html):
            return extrafanart_imgs
    return ''
 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
        result = html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")[0]
    except: # 2020.7.17 Repair Cover Url crawl
        result = html.xpath("//div[contains(@class, 'column-video-cover')]/img/@src")[0]
    return result
-def getDirector(a):
+def getDirector(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(number):  #获取剧情介绍
+def getOutline0(number):  #获取剧情介绍 airav.wiki站点404，函数暂时更名，等无法恢复时删除
    try:
-        response = json.loads(airav.main(number))
+        htmlcode = get_html('https://cn.airav.wiki/video/' + number)
-        result = response['outline']
+        from WebCrawler.airav import getOutline as airav_getOutline
        result = airav_getOutline(htmlcode)
        return result
    except:
        pass
    return ''
-def getSeries(a):
+def getOutline(number, title):  #获取剧情介绍 多进程并发查询
-    #/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
+    return getStoryline(number,title)
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getSeries(html):
    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
 def main(number):
-    javdb_site = random.choice(["javdb9", "javdb30"])
+    # javdb更新后同一时间只能登录一个数字站，最新登录站会踢出旧的登录，因此按找到的第一个javdb*.json文件选择站点，
    # 如果无.json文件或者超过有效期，则随机选择一个站点。
    javdb_sites = ["javdb31", "javdb32"]
    debug =  config.getInstance().debug()
    try:
        # if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number).group():
        #     pass
        # else:
        #     number = number.upper()
        number = number.upper()
        cookie_json = './' + javdb_site + '.json'
        javdb_cookies = {'over18':'1', 'theme':'auto', 'locale':'zh'}
        # 不加载过期的cookie，javdb登录界面显示为7天免登录，故假定cookie有效期为7天
        has_json = False
        for cj in javdb_sites:
            javdb_site = cj
            cookie_json = javdb_site + '.json'
            cookies_dict, cookies_filepath = load_cookies(cookie_json)
            if isinstance(cookies_dict, dict) and isinstance(cookies_filepath, str):
                cdays = file_modification_days(cookies_filepath)
                if cdays < 7:
                    javdb_cookies = cookies_dict
                    has_json = True
                    break
                elif cdays != 9999:
-                print(
+                    print(f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
-f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
+        if not has_json:
-
+            javdb_site = secrets.choice(javdb_sites)
        if debug:
            print(f'[!]javdb:select site {javdb_site}')
        browser = None
        try:
            javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
-            query_result = get_html(javdb_url, cookies=javdb_cookies)
+            res, browser = get_html_by_browser(javdb_url, cookies=javdb_cookies, return_type='browser')
            if not res.ok:
                raise
            query_result = res.text
        except:
            query_result = get_html('https://javdb.com/search?q=' + number + '&f=all', cookies=javdb_cookies)
        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
@@ -250,61 +251,74 @@ f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not b
                    raise ValueError("number not found")
                correct_url = urls[0]
        try:
            if isinstance(browser, StatefulBrowser):  # get faster benefit from http keep-alive
                detail_page = browser.open_relative(correct_url).text
            else:
                javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
                detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
        except:
            detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies)
        # etree.fromstring开销很大，最好只用一次，而它的xpath很快，比bs4 find/select快，可以多用
        lx = etree.fromstring(detail_page, etree.HTMLParser())
        # no cut image by default
        imagecut = 3
        # If gray image exists ,then replace with normal cover
        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
-            cover_small = getCover_small(query_result)
+            cover_small = getCover_small(html)
        else:
            try:
-                cover_small = getCover_small(query_result, index=ids.index(number))
+                cover_small = getCover_small(html, index=ids.index(number))
            except:
                # if input number is "STAR438" not "STAR-438", use first search result.
-                cover_small = getCover_small(query_result)
+                cover_small = getCover_small(html)
        if 'placeholder' in cover_small:
            # replace wit normal cover and cut it
            imagecut = 1
-            cover_small = getCover(detail_page)
+            cover_small = getCover(lx)
-        dp_number = getNum(detail_page)
+        dp_number = getNum(lx)
        if dp_number.upper() != number:
            raise ValueError("number not found")
-        title = getTitle(detail_page)
+        title = getTitle(lx)
        if title and dp_number:
            number = dp_number
            # remove duplicate title
            title = title.replace(number, '').strip()
        dic = {
-            'actor': getActor(detail_page),
+            'actor': getActor(lx),
            'title': title,
-            'studio': getStudio(detail_page),
+            'studio': getStudio(detail_page, lx),
-            'outline': getOutline(number),
+            'outline': getOutline(number, title),
-            'runtime': getRuntime(detail_page),
+            'runtime': getRuntime(lx),
-            'director': getDirector(detail_page),
+            'director': getDirector(lx),
            'release': getRelease(detail_page),
            'number': number,
-            'cover': getCover(detail_page),
+            'cover': getCover(lx),
            'cover_small': cover_small,
            'trailer': getTrailer(detail_page),
-            'extrafanart': getExtrafanart(detail_page),
+            'extrafanart': getExtrafanart(lx),
            'imagecut': imagecut,
-            'tag': getTag(detail_page),
+            'tag': getTag(lx),
-            'label': getLabel(detail_page),
+            'label': getLabel(lx),
            'year': getYear(detail_page),  # str(re.search('\d{4}',getRelease(a)).group()),
-            'actor_photo': getActorPhoto(detail_page),
+#            'actor_photo': getActorPhoto(lx, javdb_site,  browser),
            'website': 'https://javdb.com' + correct_url,
            'source': 'javdb.py',
-            'series': getSeries(detail_page),
+            'series': getSeries(lx),
        }
        if not dic['actor'] and re.match(r'FC2-[\d]+', number, re.A):
            dic['actor'].append('素人')
            if not dic['series']:
                dic['series'] = dic['studio']
            if not dic['label']:
                dic['label'] = dic['studio']
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        dic = {"title": ""}
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
@@ -313,10 +327,18 @@ f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not b
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 if __name__ == "__main__":
    config.G_conf_override['debug_mode:switch'] = True
    # print(main('blacked.20.05.30'))
    # print(main('AGAV-042'))
    # print(main('BANK-022'))
-    print(main('FC2-735670'))
+    # print(main('070116-197'))
-    print(main('FC2-1174949')) # not found
+    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
    # print(main('FC2-2278260'))
    # print(main('FC2-735670'))
    # print(main('FC2-1174949')) # not found
    print(main('MVSD-439'))
-    print(main('EHM0001')) # not found
+    # print(main('EHM0001')) # not found
    # print(main('FC2-2314275'))
    # print(main('EBOD-646'))
    # print(main('LOVE-262'))
    print(main('ABP-890'))
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -137,7 +137,7 @@ def main(number2):
            'series': getSeries(a),
        }
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        dic = {"title": ""}
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -0,0 +1,334 @@
 import sys
 sys.path.append('../')
 import re
 import json
 import builtins
 from ADC_function import *
 from multiprocessing import Pool
 from multiprocessing.dummy import Pool as ThreadPool
 from difflib import SequenceMatcher
 from unicodedata import category
 from number_parser import is_uncensored
 G_registered_storyline_site = {"airav", "avno1", "xcity", "amazon", "58avgo"}
 G_mode_txt = ('顺序执行','线程池','进程池')
 class noThread(object):
    def map(self, fn, param):
        return builtins.map(fn, param)
    def __enter__(self):
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        pass
 # 获取剧情介绍 从列表中的站点同时查，取值优先级从前到后
 def getStoryline(number, title, sites: list=None):
    start_time = time.time()
    conf = config.getInstance()
    debug = conf.debug() or conf.storyline_show() == 2
    storyine_sites = conf.storyline_site().split(',') if sites is None else sites
    if is_uncensored(number):
        storyine_sites += conf.storyline_uncensored_site().split(',')
    else:
        storyine_sites += conf.storyline_censored_site().split(',')
    r_dup = set()
    apply_sites = []
    for s in storyine_sites:
        if s in G_registered_storyline_site and s not in r_dup:
            apply_sites.append(s)
            r_dup.add(s)
    mp_args = ((site, number, title, debug) for site in apply_sites)
    cores = min(len(apply_sites), os.cpu_count())
    if cores == 0:
        return ''
    run_mode = conf.storyline_mode()
    assert run_mode in (0,1,2)
    with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool:
        result = pool.map(getStoryline_mp, mp_args)
    result = list(result) if run_mode == 0 else result
    if not debug and conf.storyline_show() == 0:
        for value in result:
            if isinstance(value, str) and len(value):
                return value
        return ''
    # 以下debug结果输出会写入日志，进程池中的则不会，只在标准输出中显示
    cnt = len(apply_sites)
    s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{cnt}个进程总用时(含启动开销){time.time() - start_time:.3f}秒，结束于{time.strftime("%H:%M:%S")}'
    first = True
    sel = ''
    for i in range(cnt):
        sl = len(result[i])if isinstance(result[i], str) else 0
        if sl and first:
            s += f'，[选中{apply_sites[i]}字数:{sl}]'
            first = False
            sel = result[i]
        elif sl:
            s += f'，{apply_sites[i]}字数:{sl}'
        else:
            s += f'，{apply_sites[i]}:空'
    print(s)
    return sel
 def getStoryline_mp(args):
    return _getStoryline_mp(*args)
 # 注：新进程的print()不会写入日志中，将来调试修复失效数据源需直接查看标准输出，issue信息需截图屏幕
 def _getStoryline_mp(site, number, title, debug):
    start_time = time.time()
    storyline = None
    if not isinstance(site, str):
        return storyline
    elif site == "airav":
        storyline = getStoryline_airav(number, debug)
    elif site == "avno1":
        storyline = getStoryline_avno1(number, debug)
    elif site == "xcity":
        storyline = getStoryline_xcity(number, debug)
    elif site == "amazon":
        storyline = getStoryline_amazon(title, number, debug)
    elif site == "58avgo":
        storyline = getStoryline_58avgo(number, debug)
    if not debug:
        return storyline
    print("[!]MP 进程[{}]运行{:.3f}秒，结束于{}返回结果: {}".format(
            site,
            time.time() - start_time,
            time.strftime("%H:%M:%S"),
            storyline if isinstance(storyline, str) and len(storyline) else '[空]')
    )
    return storyline
 def getStoryline_airav(number, debug):
    try:
        number_up = number
        site = secrets.choice(('airav.cc','airav4.club'))
        url = f'https://{site}/searchresults.aspx?Search={number}&Type=0'
        res, browser = get_html_by_browser(url, return_type='browser')
        if not res.ok:
            raise ValueError(f"get_html_by_browser('{url}') failed")
        avs = browser.page.select_one('div.resultcontent > ul > li:nth-child(1) > div')
        if number_up not in avs.select_one('a > h3').text.upper():
            raise ValueError("number not found")
        detail_url = avs.select_one('a')['href']
        res = browser.open_relative(detail_url)
        if not res.ok:
            raise ValueError(f"browser.open_relative('{detail_url}') failed")
        t = browser.page.select_one('head > title').text
        airav_number = str(re.findall(r'^\s*\[(.*?)]', t)[0]).upper()
        if number.upper() != airav_number:
            raise ValueError(f"page number ->[{airav_number}] not match")
        desc = browser.page.select_one('li.introduction > span').text.strip()
        return desc
    except Exception as e:
        if debug:
            print(f"[-]MP getOutline_amazon Error: {e},number [{number}].")
        pass
    return None
 def getStoryline_58avgo(number, debug):
    try:
        url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
                '', '?status=3', '?status=4', '?status=7', '?status=9', '?status=10', '?status=11', '?status=12',
                '?status=1&Sort=Playon', '?status=1&Sort=dateupload', 'status=1&Sort=dateproduce'
        ]) # 随机选一个，避免网站httpd日志中单个ip的请求太过单一
        kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
        result, browser = get_html_by_form(url,
            fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
            return_type = 'browser')
        if not result.ok:
            raise ValueError(f"get_html_by_form('{url}','{number}') failed")
        if f'searchresults.aspx?Search={kwd}' not in browser.url:
            raise ValueError("number not found")
        s = browser.page.select('div.resultcontent > ul > li.listItem > div.one-info-panel.one > a.ga_click')
        link = None
        for i in range(len(s)):
            title = s[i].h3.text.strip()
            if re.search(number, title, re.I):
                link = s[i]
                break;
        if link is None:
            raise ValueError("number not found")
        result = browser.follow_link(link)
        if not result.ok or 'playon.aspx' not in browser.url:
            raise ValueError("detail page not found")
        title = browser.page.select('head > title')[0].text.strip()
        detail_number = str(re.findall('\[(.*?)]', title)[0])
        if not re.search(number, detail_number, re.I):
            raise ValueError("detail page number not match, got ->[{detail_number}]")
        return browser.page.select('#ContentPlaceHolder1_Label2')[0].text.strip()
    except Exception as e:
        if debug:
            print(f"[-]MP getOutline_58avgo Error: {e}, number [{number}].")
        pass
    return ''
 def getStoryline_avno1(number, debug):  #获取剧情介绍 从avno1.cc取得
    try:
        url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
                secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
                '?top=1&cat=hd', '?top=1', '?cat=hd', 'porn', '?cat=jp', '?cat=us', 'recommend_category.php'
        ]) # 随机选一个，避免网站httpd日志中单个ip的请求太过单一
        result, browser = get_html_by_form(url,
            form_select='div.wrapper > div.header > div.search > form',
            fields = {'kw' : number},
            return_type = 'browser')
        if not result.ok:
            raise ValueError(f"get_html_by_form('{url}','{number}') failed")
        s = browser.page.select('div.type_movie > div > ul > li > div')
        for i in range(len(s)):
            title = s[i].a.h3.text.strip()
            page_number = title[title.rfind(' '):].strip()
            if re.search(number, page_number, re.I):
                return s[i]['data-description'].strip()
        raise ValueError(f"page number ->[{page_number}] not match")
    except Exception as e:
        if debug:
            print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
        pass
    return ''
 def getStoryline_xcity(number, debug):  #获取剧情介绍 从xcity取得
    try:
        xcity_number = number.replace('-','')
        query_result, browser = get_html_by_form(
            'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
            fields = {'q' : xcity_number.lower()},
            return_type = 'browser')
        if not query_result or not query_result.ok:
            raise ValueError("page not found")
        result = browser.follow_link(browser.links('avod\/detail')[0])
        if not result.ok:
            raise ValueError("detail page not found")
        return browser.page.select_one('h2.title-detail + p.lead').text.strip()
    except Exception as e:
        if debug:
            print(f"[-]MP getOutline_xcity Error: {e}, number [{number}].")
        pass
    return ''
 def getStoryline_amazon(q_title, number, debug):
    if not isinstance(q_title, str) or not len(q_title):
        return None
    try:
        amazon_cookie, _ = load_cookies('amazon.json')
        cookie = amazon_cookie if isinstance(amazon_cookie, dict) else None
        url = "https://www.amazon.co.jp/s?k=" + q_title
        res, browser = get_html_by_browser(url, cookies=cookie, return_type='browser')
        if not res.ok:
            raise ValueError("get_html_by_browser() failed")
        lks = browser.links(r'/black-curtain/save-eligibility/black-curtain')
        if isinstance(lks, list) and len(lks):
            browser.follow_link(lks[0])
            cookie = None
        html = etree.fromstring(str(browser.page), etree.HTMLParser())
        titles = html.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()")
        urls = html.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href")
        if not len(urls) or len(urls) != len(titles):
            raise ValueError("titles not found")
        idx = amazon_select_one(titles, q_title, number, debug)
        if not isinstance(idx, int) or idx < 0:
            raise ValueError("title and number not found")
        furl = urls[idx]
        r = browser.open_relative(furl)
        if not r.ok:
            raise ValueError("browser.open_relative()) failed.")
        lks = browser.links(r'/black-curtain/save-eligibility/black-curtain')
        if isinstance(lks, list) and len(lks):
            browser.follow_link(lks[0])
            cookie = None
        ama_t = browser.page.select_one('#productDescription > p').text.replace('\n',' ').strip()
        ama_t = re.sub(r'審査番号:\d+', '', ama_t)
        if cookie is None:
        # 自动创建的cookies文件放在搜索路径表的末端，最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径
            ama_save = Path.home() / ".local/share/avdc/amazon.json"
            ama_save.parent.mkdir(parents=True, exist_ok=True)
            ama_save.write_text(json.dumps(browser.session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
        return ama_t
    except Exception as e:
        if debug:
            print(f'[-]MP getOutline_amazon Error: {e}, number [{number}], title: {q_title}')
        pass
    return None
 # 查货架中DVD和蓝光商品中标题相似度高的
 def amazon_select_one(a_titles, q_title, number, debug):
    sel = -1
    ratio = 0
    que_t = ''.join(c for c in q_title if not re.match(r'(P|S|Z).*', category(c), re.A))
    for loc in range(len(a_titles)):
        t = a_titles[loc]
        if re.search(number, t, re.I): # 基本不带番号，但也有极个别有的，找到番号相同的直接通过
            return loc
        if not re.search('DVD|Blu-ray', t, re.I):
            continue
        ama_t = str(re.sub('DVD|Blu-ray', "", t, re.I))
        ama_t = ''.join(c for c in ama_t if not re.match(r'(P|S|Z).*', category(c), re.A))
        findlen = 0
        lastpos = -1
        cnt = len(ama_t)
        for c in reversed(ama_t):
            cnt -= 1
            pos = que_t.rfind(c)
            if lastpos >= 0:
                pos_near = que_t[:lastpos].rfind(c)
                if pos_near < 0:
                    findlen = 0
                    lastpos = -1
                    ama_t = ama_t[:cnt+1]
                else:
                    pos = pos_near
            if pos < 0:
                if category(c) == 'Nd':
                    return -1
                ama_t = ama_t[:cnt]
                findlen = 0
                lastpos = -1
                continue
            if findlen > 0 and len(que_t) > 1 and lastpos == pos+1:
                findlen += 1
                lastpos = pos
                if findlen >= 4:
                    break
                continue
            findlen = 1
            lastpos = pos
        if findlen==0:
            return -1
        r = SequenceMatcher(None, ama_t, que_t).ratio()
        if r > ratio:
            sel = loc
            ratio = r
            save_t_ = ama_t
            if ratio > 0.999:
                break
    if ratio < 0.5:
        return -1
    if not debug:
         # 目前采信相似度高于0.9的结果
        return sel if ratio >= 0.9 else -1
    # debug 模式下记录识别准确率日志
    if ratio < 0.9:
        # 相似度[0.5, 0.9)的淘汰结果单独记录日志
        (Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write(
            f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
        return -1
    # 被采信的结果日志
    (Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write(
        f' [{number}]  Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
    return sel
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -3,16 +3,12 @@ sys.path.append('../')
 import re
 from lxml import etree
 import json
 from bs4 import BeautifulSoup
 from ADC_function import *
-
+from WebCrawler.storyline import getStoryline
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-def getTitle(a):
+def getTitle(html):
    html = etree.fromstring(a, etree.HTMLParser())
    result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
    return result
@@ -43,8 +39,7 @@ def getActorPhoto(browser):
    return o
-def getStudio(a):
+def getStudio(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
    except:
@@ -52,20 +47,14 @@ def getStudio(a):
    return result.strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
+def getRuntime(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
-        result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')[0]
+        x = html.xpath('//span[@class="koumoku" and text()="収録時間"]/../text()')[1].strip()
-    except:
+        return x
        return ''
    try:
        return re.findall('\d+',result1)[0]
    except:
        return ''
-
+def getLabel(html):
 def getLabel(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')[0]
        return result
@@ -73,8 +62,7 @@ def getLabel(a):
        return ''
-def getNum(a):
+def getNum(html):
    html = etree.fromstring(a, etree.HTMLParser())
    try:
        result = html.xpath('//*[@id="hinban"]/text()')[0]
        return result
@@ -90,8 +78,7 @@ def getYear(getRelease):
        return getRelease
-def getRelease(a):
+def getRelease(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[2]/text()')[1])
    except:
@@ -102,31 +89,22 @@ def getRelease(a):
        return ''
-def getTag(a):
+def getTag(html):
-    result2=[]
+    x = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()')
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    return [translateTag_to_sc(i.strip()) for i in x if len(i.strip())] if len(x) and len(x[0]) else []
    result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[6]/a/text()')
    for i in result1:
        i=i.replace(u'\n','')
        i=i.replace(u'\t','')
        if len(i):
            result2.append(i)
    return result2
-def getCover_small(a, index=0):
+def getCover_small(html, index=0):
    # same issue mentioned below,
    # javdb sometime returns multiple results
    # DO NOT just get the firt one, get the one with correct index number
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
    if not 'https' in result:
        result = 'https:' + result
    return result
-def getCover(htmlcode):
+def getCover(html):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')[0]
        return 'https:' + result
@@ -134,8 +112,7 @@ def getCover(htmlcode):
        return ''
-def getDirector(a):
+def getDirector(html):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath('//*[@id="program_detail_director"]/text()')[0].replace(u'\n','').replace(u'\t', '')
        return result
@@ -143,19 +120,21 @@ def getDirector(a):
        return ''
-def getOutline(htmlcode):
+def getOutline(html, number, title):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    storyline_site = config.getInstance().storyline_site().split(',')
    a = set(storyline_site) & {'airav', 'avno1'}  # 只要中文的简介文字
    if len(a):
        site = [n for n in storyline_site if n in a]
        g = getStoryline(number, title, site)
        if len(g):
            return g
    try:
-        result = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')[0]
+        x = html.xpath('//h2[@class="title-detail"]/../p[@class="lead"]/text()')[0]
        return x.replace(getNum(html), '')
    except:
        return ''
    try:
        return re.sub('\\\\\w*\d+','',result)
    except:
        return result
-def getSeries(htmlcode):
+def getSeries(html):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
        try:
            result = html.xpath("//span[contains(text(),'シリーズ')]/../a/span/text()")[0]
@@ -181,11 +160,10 @@ def getExtrafanart(htmlcode):  # 获取剧照
            return s
    return ''
-def main(number):
+def open_by_browser(number):
    try:
        xcity_number = number.replace('-','')
        query_result, browser = get_html_by_form(
-            'https://xcity.jp/about/',
+            'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
            fields = {'q' : xcity_number.lower()},
            return_type = 'browser')
        if not query_result or not query_result.ok:
@@ -193,38 +171,44 @@ def main(number):
        result = browser.follow_link(browser.links('avod\/detail')[0])
        if not result.ok:
            raise ValueError("xcity.py: detail page not found")
-        detail_page = str(browser.page)
+        return str(browser.page), browser
 def main(number):
    try:
        detail_page, browser = open_by_browser(number)
        url = browser.url
-        newnum = getNum(detail_page).upper()
+        lx = etree.fromstring(detail_page, etree.HTMLParser())
        newnum = getNum(lx).upper()
        number_up = number.upper()
        if newnum != number_up:
-            if newnum == xcity_number.upper():
+            if newnum == number.replace('-','').upper():
                newnum = number_up
            else:
                raise ValueError("xcity.py: number not found")
        title = getTitle(lx)
        dic = {
            'actor': getActor(browser),
-            'title': getTitle(detail_page),
+            'title': title,
-            'studio': getStudio(detail_page),
+            'studio': getStudio(lx),
-            'outline': getOutline(detail_page),
+            'outline': getOutline(lx, number, title),
-            'runtime': getRuntime(detail_page),
+            'runtime': getRuntime(lx),
-            'director': getDirector(detail_page),
+            'director': getDirector(lx),
-            'release': getRelease(detail_page),
+            'release': getRelease(lx),
            'number': newnum,
-            'cover': getCover(detail_page),
+            'cover': getCover(lx),
            'cover_small': '',
            'extrafanart': getExtrafanart(detail_page),
            'imagecut': 1,
-            'tag': getTag(detail_page),
+            'tag': getTag(lx),
-            'label': getLabel(detail_page),
+            'label': getLabel(lx),
-            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'year': getYear(getRelease(lx)),  # str(re.search('\d{4}',getRelease(a)).group()),
 #            'actor_photo': getActorPhoto(browser),
            'website': url,
            'source': 'xcity.py',
-            'series': getSeries(detail_page),
+            'series': getSeries(lx),
        }
    except Exception as e:
-        if config.Config().debug():
+        if config.getInstance().debug():
            print(e)
        dic = {"title": ""}
--- a/config.ini
+++ b/config.ini
@@ -1,12 +1,13 @@
 [common]
 main_mode=1
 source_folder=./
 failed_output_folder=failed
 success_output_folder=JAV_output
 soft_link=0
 failed_move=1
 auto_exit=0
 transalte_to_sc=0
-multi_threading=1
+multi_threading=0
 ;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
 actor_gender=female
 del_empty_folder=1
@@ -16,6 +17,8 @@ nfo_skip_days=30
 ; 处理完多少个视频文件后停止，0为处理所有视频文件
 stop_counter=0
 ; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
 ignore_failed_list=0
 download_only_missing_images=1
 [proxy]
 ;proxytype: http or socks5 or socks5h switch: 0 1
@@ -62,8 +65,7 @@ switch=0
 ; 用来确定是否是无码
 [uncensored]
-uncensored_prefix=S2M,BT,LAF,SMD
+uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,RED
 [media]
 ; 影片后缀
@@ -82,3 +84,20 @@ water=2
 switch=0
 extrafanart_folder=extrafanart
 ; 剧情简介
 [storyline]
 ; website为javbus javdb avsox xcity carib时，site censored_site uncensored_site 为获取剧情简介信息的
 ; 可选数据源站点列表。列表内站点同时并发查询，取值优先级从左到右，靠左站点没数据才会采用后面站点获得的。
 ; 其中airav avno1 58avgo是中文剧情简介，区别是airav只能查有码，avno1有码无码都能查，58avgo只能查无码或者
 ; 流出破解马赛克的影片(此功能没使用)。
 ; xcity和amazon是日语的，由于amazon商城没有番号信息，选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询，
 ; 设置成不查询可大幅提高刮削速度。
 ; site=
 site=avno1
 censored_site=airav,xcity,amazon
 uncensored_site=58avgo
 ; 运行模式：0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大，并发站点越多越快)
 run_mode=1
 ; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志)，剧情简介失效时可打开2查看原因
 show_result=0
--- a/config.py
+++ b/config.py
@@ -1,33 +1,82 @@
 import os
 import re
 import sys
 import configparser
 import codecs
 from pathlib import Path
 G_conf_override = {
    # index 0 save Config() first instance for quick access by using getInstance()
    0 : None,
    # register override config items
    "common:main_mode" : None,
    "common:source_folder" : None,
    "common:auto_exit" : None,
    "common:nfo_skip_days" : None,
    "common:stop_counter" : None,
    "common:ignore_failed_list" : None,
    "debug_mode:switch" : None
 }
 def getInstance():
    if isinstance(G_conf_override[0], Config):
        return G_conf_override[0]
    return Config()
 class Config:
    def __init__(self, path: str = "config.ini"):
-        path_search_order = [
+        path_search_order = (
-            path,
+            Path(path),
-            "./config.ini",
+            Path.cwd() / "config.ini",
-            os.path.join(Path.home(), "avdc.ini"),
+            Path.home() / "avdc.ini",
-            os.path.join(Path.home(), ".avdc.ini"),
+            Path.home() / ".avdc.ini",
-            os.path.join(Path.home(), ".avdc/config.ini"),
+            Path.home() / ".avdc/config.ini",
-            os.path.join(Path.home(), ".config/avdc/config.ini")
+            Path.home() / ".config/avdc/config.ini"
-        ]
+        )
        ini_path = None
        for p in path_search_order:
-            if os.path.isfile(p):
+            if p.is_file():
-                ini_path = p
+                ini_path = p.resolve()
                break
        if ini_path:
            self.conf = configparser.ConfigParser()
            self.ini_path = ini_path
            try:
-                self.conf.read(ini_path, encoding="utf-8-sig")
+                if self.conf.read(ini_path, encoding="utf-8-sig"):
                    if G_conf_override[0] is None:
                        G_conf_override[0] = self
            except:
-                self.conf.read(ini_path, encoding="utf-8")
+                if self.conf.read(ini_path, encoding="utf-8"):
                    if G_conf_override[0] is None:
                        G_conf_override[0] = self
        else:
-            print("[-]Config file not found!")
+            print("ERROR: Config file not found!")
            print("Please put config file into one of the following path:")
            print('\n'.join([str(p.resolve()) for p in path_search_order[2:]]))
            # 对于找不到配置文件的情况，还是在打包时附上对应版本的默认配置文件，有需要时为其在搜索路径中生成，
            # 要比用户乱找一个版本不对应的配置文件会可靠些。这样一来，单个执行文件就是功能完整的了，放在任何
            # 执行路径下都可以放心使用。
            res_path = None
            # pyinstaller打包的在打包中找config.ini
            if hasattr(sys, '_MEIPASS') and (Path(getattr(sys, '_MEIPASS')) / 'config.ini').is_file():
                res_path = Path(getattr(sys, '_MEIPASS')) / 'config.ini'
            # 脚本运行的所在位置找
            elif (Path(__file__).resolve().parent / 'config.ini').is_file():
                res_path = Path(__file__).resolve().parent / 'config.ini'
            if res_path is None:
                sys.exit(2)
            ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
            if re.search('n', ins, re.I):
                sys.exit(2)
            # 用户目录才确定具有写权限，因此选择 ~/avdc.ini 作为配置文件生成路径，而不是有可能并没有写权限的
            # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了，只是作为多配置文件的切换技巧保留。
            write_path = path_search_order[2]   # Path.home() / "avdc.ini"
            write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
            print("Config file '{}' created.".format(write_path.resolve()))
            input("Press Enter key exit...")
            sys.exit(0)
            # self.conf = self._default_config()
            # try:
            #     self.conf = configparser.ConfigParser()
@@ -40,13 +89,24 @@ class Config:
            #     print("[-]",e)
            #     sys.exit(3)
            #     #self.conf = self._default_config()
    def getboolean_override(self, section, item) -> bool:
        return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(G_conf_override[f"{section}:{item}"])
-    def main_mode(self) -> str:
+    def getint_override(self, section, item) -> int:
        return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(G_conf_override[f"{section}:{item}"])
    def get_override(self, section, item) -> str:
        return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(G_conf_override[f"{section}:{item}"])
    def main_mode(self) -> int:
        try:
-            return self.conf.getint("common", "main_mode")
+            return self.getint_override("common", "main_mode")
        except ValueError:
            self._exit("common:main_mode")
    def source_folder(self) -> str:
        return self.get_override("common", "source_folder")
    def failed_folder(self) -> str:
        return self.conf.get("common", "failed_output_folder")
@@ -61,7 +121,7 @@ class Config:
    def failed_move(self) -> bool:
        return self.conf.getboolean("common", "failed_move")
    def auto_exit(self) -> bool:
-        return self.conf.getboolean("common", "auto_exit")
+        return self.getboolean_override("common", "auto_exit")
    def transalte_to_sc(self) -> bool:
        return self.conf.getboolean("common", "transalte_to_sc")
    def multi_threading(self) -> bool:
@@ -70,14 +130,18 @@ class Config:
        return self.conf.getboolean("common", "del_empty_folder")
    def nfo_skip_days(self) -> int:
        try:
-            return self.conf.getint("common", "nfo_skip_days")
+            return self.getint_override("common", "nfo_skip_days")
        except:
            return 30
    def stop_counter(self) -> int:
        try:
-            return self.conf.getint("common", "stop_counter")
+            return self.getint_override("common", "stop_counter")
        except:
            return 0
    def ignore_failed_list(self) -> bool:
        return self.getboolean_override("common", "ignore_failed_list")
    def download_only_missing_images(self) -> bool:
        return self.conf.getboolean("common", "download_only_missing_images")
    def is_transalte(self) -> bool:
        return self.conf.getboolean("transalte", "switch")
    def is_trailer(self) -> bool:
@@ -173,7 +237,39 @@ class Config:
        return self.conf.get("escape", "folders")
    def debug(self) -> bool:
-        return self.conf.getboolean("debug_mode", "switch")
+        return self.getboolean_override("debug_mode", "switch")
    def storyline_site(self) -> str:
        try:
            return self.conf.get("storyline", "site")
        except:
            return "avno1"
    def storyline_censored_site(self) -> str:
        try:
            return self.conf.get("storyline", "censored_site")
        except:
            return "airav,xcity,amazon"
    def storyline_uncensored_site(self) -> str:
        try:
            return self.conf.get("storyline", "uncensored_site")
        except:
            return "58avgo"
    def storyline_show(self) -> int:
        try:
            v = self.conf.getint("storyline", "show_result")
            return v if v in (0,1,2) else 2 if v > 2 else 0
        except:
            return 0
    def storyline_mode(self) -> int:
        try:
            v = self.conf.getint("storyline", "run_mode")
            return v if v in (0,1,2) else 2 if v > 2 else 0
        except:
            return 1
    @staticmethod
    def _exit(sec: str) -> None:
@@ -188,6 +284,7 @@ class Config:
        sec1 = "common"
        conf.add_section(sec1)
        conf.set(sec1, "main_mode", "1")
        conf.set(sec1, "source_folder", "./")
        conf.set(sec1, "failed_output_folder", "failed")
        conf.set(sec1, "success_output_folder", "JAV_output")
        conf.set(sec1, "soft_link", "0")
@@ -199,6 +296,8 @@ class Config:
        conf.set(sec1, "del_empty_folder", "1")
        conf.set(sec1, "nfo_skip_days", 30)
        conf.set(sec1, "stop_counter", 0)
        conf.set(sec1, "ignore_failed_list", 0)
        conf.set(sec1, "download_only_missing_images", 1)
        sec2 = "proxy"
        conf.add_section(sec2)
@@ -265,6 +364,14 @@ class Config:
        conf.set(sec13, "switch", 1)
        conf.set(sec13, "extrafanart_folder", "extrafanart")
        sec14 = "storyline"
        conf.add_section(sec14)
        conf.set(sec14, "site", "avno1")
        conf.set(sec14, "censored_site", "airav,xcity,amazon")
        conf.set(sec14, "uncensored_site", "58avgo")
        conf.set(sec14, "show_result", 0)
        conf.set(sec14, "run_mode", 1)
        return conf
@@ -308,9 +415,45 @@ if __name__ == "__main__":
        code = compile(evstr, "<string>", "eval")
        print('{}: "{}"'.format(evstr, eval(code)))
    config = Config()
-    mfilter = ('conf', 'proxy', '_exit', '_default_config')
+    mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', 'ini_path'}
    for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
        evprint(f'config.{_m}()')
-    pfilter = ('proxies', 'SUPPORT_PROXY_TYPE')
+    pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
-    for _p in [p for p in dir(config.proxy()) if not p.startswith('__') and p not in pfilter]:
+    # test getInstance()
-        evprint(f'config.proxy().{_p}')
+    assert(getInstance() == config)
    for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
        evprint(f'getInstance().proxy().{_p}')
    # Override Test
    G_conf_override["common:nfo_skip_days"] = 4321
    G_conf_override["common:stop_counter"] = 1234
    assert config.nfo_skip_days() == 4321
    assert getInstance().stop_counter() == 1234
    # remove override
    G_conf_override["common:stop_counter"] = None
    G_conf_override["common:nfo_skip_days"] = None
    assert config.nfo_skip_days() != 4321
    assert config.stop_counter() != 1234
    # Create new instance
    conf2 = Config()
    assert getInstance() != conf2
    assert getInstance() == config
    G_conf_override["common:main_mode"] = 9
    G_conf_override["common:source_folder"] = "A:/b/c"
    # Override effect to all instances
    assert config.main_mode() == 9
    assert conf2.main_mode() == 9
    assert getInstance().main_mode() == 9
    assert conf2.source_folder() == "A:/b/c"
    print("### Override Test ###".center(36))
    evprint('getInstance().main_mode()')
    evprint('config.source_folder()')
    G_conf_override["common:main_mode"] = None
    evprint('conf2.main_mode()')
    evprint('config.main_mode()')
    # unregister key acess will raise except
    try:
        print(G_conf_override["common:actor_gender"])
    except KeyError as ke:
        print(f'Catched KeyError: {ke} is not a register key of G_conf_override dict.', file=sys.stderr)
    print(f"Load Config file '{conf2.ini_path}'.")
--- a/core.py
+++ b/core.py
@@ -3,8 +3,6 @@ import os.path
 import pathlib
 import re
 import shutil
 import platform
 import errno
 import sys
 from PIL import Image
@@ -14,7 +12,7 @@ from datetime import datetime
 from ADC_function import *
 from WebCrawler import get_data_from_json
-
+from number_parser import is_uncensored
 def escape_path(path, escape_literals: str):  # Remove escape literals
    backslash = '\\'
@@ -23,7 +21,8 @@ def escape_path(path, escape_literals: str):  # Remove escape literals
    return path
-def moveFailedFolder(filepath, conf):
+def moveFailedFolder(filepath):
    conf = config.getInstance()
    failed_folder = conf.failed_folder()
    soft_link = conf.soft_link()
    # 模式3或软连接，改为维护一个失败列表，启动扫描时加载用于排除该路径，以免反复处理
@@ -33,7 +32,6 @@ def moveFailedFolder(filepath, conf):
        print("[-]Add to Failed List file, see '%s'" % ftxt)
        with open(ftxt, 'a', encoding='utf-8') as flt:
            flt.write(f'{filepath}\n')
            flt.close()
    elif conf.failed_move() and not soft_link:
        failed_name = os.path.join(failed_folder, os.path.basename(filepath))
        mtxt = os.path.abspath(os.path.join(failed_folder, 'where_was_i_before_being_moved.txt'))
@@ -41,8 +39,13 @@ def moveFailedFolder(filepath, conf):
        with open(mtxt, 'a', encoding='utf-8') as wwibbmt:
            tmstr = datetime.now().strftime("%Y-%m-%d %H:%M")
            wwibbmt.write(f'{tmstr} FROM[{filepath}]TO[{failed_name}]\n')
-            wwibbmt.close()
+        try:
            if os.path.exists(failed_name):
                print('[-]File Exists while moving to FailedFolder')
                return
            shutil.move(filepath, failed_name)
        except:
            print('[-]File Moving to FailedFolder unsuccessful!')
 def get_info(json_data):  # 返回json里的数据
@@ -63,14 +66,15 @@ def get_info(json_data):  # 返回json里的数据
    return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label
-def small_cover_check(path, number, cover_small, leak_word, c_word, conf: config.Config, filepath):
+def small_cover_check(path, number, cover_small, leak_word, c_word, filepath):
    filename = f"{number}{leak_word}{c_word}-poster.jpg"
-    download_file_with_filename(cover_small, filename, path, conf, filepath)
+    download_file_with_filename(cover_small, filename, path, filepath)
    print('[+]Image Downloaded! ' + os.path.join(path, filename))
-def create_folder(json_data, conf: config.Config):  # 创建文件夹
+def create_folder(json_data):  # 创建文件夹
    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
    conf = config.getInstance()
    success_folder = conf.success_folder()
    actor = json_data.get('actor')
    location_rule = eval(conf.location_rule(), json_data)
@@ -81,35 +85,40 @@ def create_folder(json_data, conf: config.Config):  # 创建文件夹
    if 'title' in conf.location_rule() and len(title) > maxlen:
        shorttitle = title[0:maxlen]
        location_rule = location_rule.replace(title, shorttitle)
-
+    # 当演员为空时，location_rule被计算为'/number'绝对路径，导致路径连接忽略第一个路径参数，因此添加./使其始终为相对路径
-    path = os.path.join(success_folder, location_rule).strip()
+    path = os.path.join(success_folder, f'./{location_rule.strip()}')
-    if not os.path.isdir(path):
+    if not os.path.exists(path):
        path = escape_path(path, conf.escape_literals())
        try:
            os.makedirs(path)
            if not os.path.isdir(path):
                raise
        except:
            path = success_folder + '/' + location_rule.replace('/[' + number + ')-' + title, "/number")
            path = escape_path(path, conf.escape_literals())
-
+            try:
                os.makedirs(path)
-    return path
+            except:
                print(f"[-]Fatal error! Can not make folder '{path}'")
                sys.exit(0)
    return os.path.normpath(path)
 # =====================资源下载部分===========================
 # path = examle:photo , video.in the Project Folder!
-def download_file_with_filename(url, filename, path, conf: config.Config, filepath):
+def download_file_with_filename(url, filename, path, filepath):
    conf = config.getInstance()
    configProxy = conf.proxy()
    for i in range(configProxy.retry):
        try:
            if configProxy.enable:
-                if not os.path.isdir(path):
+                if not os.path.exists(path):
                    try:
                        os.makedirs(path)
-                    if not os.path.isdir(path):
+                    except:
-                        raise IOError
+                        print(f"[-]Fatal error! Can not make folder '{path}'")
                        sys.exit(0)
                proxies = configProxy.proxies()
                headers = {
                    'User-Agent': G_USER_AGENT}
@@ -121,10 +130,12 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
                    code.write(r.content)
                return
            else:
-                if not os.path.isdir(path):
+                if not os.path.exists(path):
                    try:
                        os.makedirs(path)
-                    if not os.path.isdir(path):
+                    except:
-                        raise IOError
+                        print(f"[-]Fatal error! Can not make folder '{path}'")
                        sys.exit(0)
                headers = {
                    'User-Agent': G_USER_AGENT}
                r = requests.get(url, timeout=configProxy.timeout, headers=headers)
@@ -148,46 +159,50 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
            print('[-]Image Download :  Connect retry ' + str(i) + '/' + str(configProxy.retry))
        except IOError:
            print(f"[-]Create Directory '{path}' failed!")
-            moveFailedFolder(filepath, conf)
+            moveFailedFolder(filepath)
            return
    print('[-]Connect Failed! Please check your Proxy or Network!')
-    moveFailedFolder(filepath, conf)
+    moveFailedFolder(filepath)
    return
-def trailer_download(trailer, leak_word, c_word, number, path, filepath, conf: config.Config):
+def trailer_download(trailer, leak_word, c_word, number, path, filepath):
-    if download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, conf, filepath) == 'failed':
+    if download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, filepath) == 'failed':
        return
-    configProxy = conf.proxy()
+    configProxy = config.getInstance().proxy()
    for i in range(configProxy.retry):
-        if os.path.getsize(path+'/' + number + leak_word + c_word + '-trailer.mp4') == 0:
+        if file_not_exist_or_empty(path+'/' + number + leak_word + c_word + '-trailer.mp4'):
            print('[!]Video Download Failed! Trying again. [{}/3]', i + 1)
-            download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, conf, filepath)
+            download_file_with_filename(trailer, number + leak_word + c_word + '-trailer.mp4', path, filepath)
            continue
        else:
            break
-    if os.path.getsize(path + '/' + number + leak_word + c_word + '-trailer.mp4') == 0:
+    if file_not_exist_or_empty(path + '/' + number + leak_word + c_word + '-trailer.mp4'):
        return
    print('[+]Video Downloaded!', path + '/' + number + leak_word + c_word + '-trailer.mp4')
 # 剧照下载成功，否则移动到failed
-def extrafanart_download(data, path, conf: config.Config, filepath):
+def extrafanart_download(data, path, filepath):
    j = 1
    conf = config.getInstance()
    path = os.path.join(path, conf.get_extrafanart())
    configProxy = conf.proxy()
    download_only_missing_images = conf.download_only_missing_images()
    for url in data:
        jpg_filename = f'extrafanart-{j}.jpg'
        jpg_fullpath = os.path.join(path, jpg_filename)
-        if download_file_with_filename(url, jpg_filename, path, conf, filepath) == 'failed':
+        if download_only_missing_images and not file_not_exist_or_empty(jpg_fullpath):
-            moveFailedFolder(filepath, conf)
+            continue
        if download_file_with_filename(url, jpg_filename, path, filepath) == 'failed':
            moveFailedFolder(filepath)
            return
        configProxy = conf.proxy()
        for i in range(configProxy.retry):
-            if os.path.getsize(jpg_fullpath) == 0:
+            if file_not_exist_or_empty(jpg_fullpath):
                print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
-                download_file_with_filename(url, jpg_filename, path, conf, filepath)
+                download_file_with_filename(url, jpg_filename, path, filepath)
                continue
            else:
                break
-        if os.path.getsize(jpg_fullpath) == 0:
+        if file_not_exist_or_empty(jpg_fullpath):
            return
        print('[+]Image Downloaded!', jpg_fullpath)
        j += 1
@@ -195,39 +210,46 @@ def extrafanart_download(data, path, conf: config.Config, filepath):
 # 封面是否下载成功，否则移动到failed
-def image_download(cover, number, leak_word, c_word, path, conf: config.Config, filepath):
+def image_download(cover, number, leak_word, c_word, path, filepath):
    filename = f"{number}{leak_word}{c_word}-fanart.jpg"
    full_filepath = os.path.join(path, filename)
-    if download_file_with_filename(cover, filename, path, conf, filepath) == 'failed':
+    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
-        moveFailedFolder(filepath, conf)
+        return
    if download_file_with_filename(cover, filename, path, filepath) == 'failed':
        moveFailedFolder(filepath)
        return
-    configProxy = conf.proxy()
+    configProxy = config.getInstance().proxy()
    for i in range(configProxy.retry):
-        if os.path.getsize(full_filepath) == 0:
+        if file_not_exist_or_empty(full_filepath):
            print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
-            download_file_with_filename(cover, filename, path, conf, filepath)
+            download_file_with_filename(cover, filename, path, filepath)
            continue
        else:
            break
-    if os.path.getsize(full_filepath) == 0:
+    if file_not_exist_or_empty(full_filepath):
        return
    print('[+]Image Downloaded!', full_filepath)
    shutil.copyfile(full_filepath, os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg"))
-def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, conf):
+def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored):
    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
-    failed_folder = conf.failed_folder()
+    if config.getInstance().main_mode() == 3:  # 模式3下，由于视频文件不做任何改变，.nfo文件必须和视频文件名称除后缀外完全一致，KODI等软件方可支持
    if conf.main_mode() == 3:  # 模式3下，由于视频文件不做任何改变，.nfo文件必须和视频文件名称除后缀外完全一致，KODI等软件方可支持
        nfo_path = str(Path(filepath).with_suffix('.nfo'))
    else:
        nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}.nfo")
    try:
-        if not os.path.isdir(path):
+        if not os.path.exists(path):
            try:
                os.makedirs(path)
-            if not os.path.isdir(path):
+            except:
-                raise IOError
+                print(f"[-]Fatal error! can not make folder '{path}'")
                sys.exit(0)
        # KODI内查看影片信息时找不到number，配置naming_rule=number+'#'+title虽可解决
        # 但使得标题太长，放入时常为空的outline内会更适合，软件给outline留出的显示版面也较大
        outline = f"{number}#{outline}"
        with open(nfo_path, "wt", encoding='UTF-8') as code:
            print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
            print("<movie>", file=code)
@@ -279,7 +301,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            print("  <num>" + number + "</num>", file=code)
            print("  <premiered>" + release + "</premiered>", file=code)
            print("  <cover>" + cover + "</cover>", file=code)
-            if config.Config().is_trailer():
+            if config.getInstance().is_trailer():
                print("  <trailer>" + trailer + "</trailer>", file=code)
            print("  <website>" + website + "</website>", file=code)
            print("</movie>", file=code)
@@ -287,12 +309,12 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
    except IOError as e:
        print("[-]Write Failed!")
        print("[-]", e)
-        moveFailedFolder(filepath, conf)
+        moveFailedFolder(filepath)
        return
    except Exception as e1:
        print("[-]Write Failed!")
        print("[-]", e1)
-        moveFailedFolder(filepath, conf)
+        moveFailedFolder(filepath)
        return
@@ -321,7 +343,7 @@ def cutImage(imagecut, path, number, leak_word, c_word):
 # leak     流出     参数值为 1   0
 # uncensored 无码   参数值为 1   0
 # ========================================================================加水印
-def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf:config.Config):
+def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored):
    mark_type = ''
    if cn_sub:
        mark_type += ',字幕'
@@ -331,17 +353,17 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf:config.Conf
        mark_type += ',无码'
    if mark_type == '':
        return
-    add_mark_thread(thumb_path, cn_sub, leak, uncensored, conf)
+    add_mark_thread(thumb_path, cn_sub, leak, uncensored)
    print('[+]Thumb Add Mark:   ' + mark_type.strip(','))
-    add_mark_thread(poster_path, cn_sub, leak, uncensored, conf)
+    add_mark_thread(poster_path, cn_sub, leak, uncensored)
    print('[+]Poster Add Mark:  ' + mark_type.strip(','))
-def add_mark_thread(pic_path, cn_sub, leak, uncensored, conf):
+def add_mark_thread(pic_path, cn_sub, leak, uncensored):
    size = 14
    img_pic = Image.open(pic_path)
    # 获取自定义位置，取余配合pos达到顺时针添加的效果
    # 左上 0, 右上 1, 右下 2， 左下 3
-    count = conf.watermark_type()
+    count = config.getInstance().watermark_type()
    if cn_sub == 1 or cn_sub == '1':
        add_to_pic(pic_path, img_pic, size, count, 1)  # 添加
        count = (count + 1) % 4
@@ -391,29 +413,38 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
    img_pic.save(pic_path, quality=95)
 # ========================结束=================================
-def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config.Config):  # 文件路径，番号，后缀，要移动至的位置
+def paste_file_to_folder(filepath, path, number, leak_word, c_word):  # 文件路径，番号，后缀，要移动至的位置
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
    file_parent_origin_path = str(filepath_obj.parent)
    try:
        targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{houzhui}")
        # 任何情况下都不要覆盖，以免遭遇数据源或者引擎错误导致所有文件得到同一个number，逐一
        # 同名覆盖致使全部文件损失且不可追回的最坏情况
        if os.path.exists(targetpath):
            raise FileExistsError('File Exists on destination path, we will never overwriting.')
        soft_link = config.getInstance().soft_link()
        # 如果soft_link=1 使用软链接
-        if conf.soft_link() == 0:
+        if soft_link == 0:
            shutil.move(filepath, targetpath)
-        elif conf.soft_link() == 1:
+        elif soft_link == 1:
-            # 采用相对路径，以便网络访问时能正确打开视频
+            # 先尝试采用相对路径，以便网络访问时能正确打开视频，失败则可能是因为跨盘符等原因无法支持
            # 相对路径径，改用绝对路径方式尝试建立软链接
            try:
                filerelpath = os.path.relpath(filepath, path)
                os.symlink(filerelpath, targetpath)
-        elif conf.soft_link() == 2:
+            except:
                os.symlink(filepath_obj.resolve(), targetpath)
        elif soft_link == 2:
            shutil.move(filepath, targetpath)
            # 移走文件后，在原来位置增加一个可追溯的软链接，指向文件新位置
            # 以便追查文件从原先位置被移动到哪里了，避免因为得到错误番号后改名移动导致的文件失踪
-            # 便于手工找回文件。并将软连接文件名后缀修改，以避免再次被搜刮。
+            # 便于手工找回文件。由于目前软链接已经不会被刮削，文件名后缀无需再修改。
            targetabspath = os.path.abspath(targetpath)
            if targetabspath != os.path.abspath(filepath):
                targetrelpath = os.path.relpath(targetabspath, file_parent_origin_path)
-                os.symlink(targetrelpath, filepath + '#sym')
+                os.symlink(targetrelpath, filepath)
-        sub_res = conf.sub_rule()
+        sub_res = config.getInstance().sub_rule()
        for subname in sub_res:
            sub_filepath = str(filepath_obj.with_suffix(subname))
@@ -422,9 +453,9 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config
                print('[+]Sub moved!')
                return True
-    except FileExistsError:
+    except FileExistsError as fee:
-        print('[-]File Exists! Please check your movie!')
+        print(f'[-]FileExistsError: {fee}')
-        print('[-]move to the root folder of the program.')
+        moveFailedFolder(filepath)
        return
    except PermissionError:
        print('[-]Error! Please run as administrator!')
@@ -434,19 +465,22 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, conf: config
        return
-def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf):  # 文件路径，番号，后缀，要移动至的位置
+def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word):  # 文件路径，番号，后缀，要移动至的位置
    if multi_part == 1:
        number += part  # 这时number会被附加上CD1后缀
    filepath_obj = pathlib.Path(filepath)
    houzhui = filepath_obj.suffix
    file_parent_origin_path = str(filepath_obj.parent)
    targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{houzhui}")
    if os.path.exists(targetpath):
        raise FileExistsError('File Exists on destination path, we will never overwriting.')
    try:
-        if conf.soft_link():
+        if config.getInstance().soft_link():
-            os.symlink(filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{houzhui}"))
+            os.symlink(filepath, targetpath)
        else:
-            shutil.move(filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{houzhui}"))
+            shutil.move(filepath, targetpath)
-        sub_res = conf.sub_rule()
+        sub_res = config.getInstance().sub_rule()
        for subname in sub_res:
            sub_filepath = str(filepath_obj.with_suffix(subname))
            if os.path.isfile(sub_filepath):  # 字幕移动
@@ -454,9 +488,8 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
                print('[+]Sub moved!')
                print('[!]Success')
                return True
-    except FileExistsError:
+    except FileExistsError as fee:
-        print('[-]File Exists! Please check your movie!')
+        print(f'[-]FileExistsError: {fee}')
        print('[-]move to the root folder of the program.')
        return
    except PermissionError:
        print('[-]Error! Please run as administrator!')
@@ -465,7 +498,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        print(f'[-]OS Error errno  {oserr.errno}')
        return
-def get_part(filepath, conf):
+def get_part(filepath):
    try:
        if re.search('-CD\d+', filepath):
            return re.findall('-CD\d+', filepath)[0]
@@ -473,7 +506,7 @@ def get_part(filepath, conf):
            return re.findall('-cd\d+', filepath)[0]
    except:
        print("[-]failed!Please rename the filename again!")
-        moveFailedFolder(filepath, conf)
+        moveFailedFolder(filepath)
        return
@@ -493,7 +526,8 @@ def debug_print(data: json):
        pass
-def core_main(file_path, number_th, conf: config.Config):
+def core_main(file_path, number_th):
    conf = config.getInstance()
    # =======================================================================初始化所需变量
    multi_part = 0
    part = ''
@@ -507,11 +541,11 @@ def core_main(file_path, number_th, conf: config.Config):
    # 下面被注释的变量不需要
    #rootpath= os.getcwd
    number = number_th
-    json_data = get_data_from_json(number, conf)  # 定义番号
+    json_data = get_data_from_json(number)  # 定义番号
    # Return if blank dict returned (data not found)
    if not json_data:
-        moveFailedFolder(filepath, conf)
+        moveFailedFolder(filepath)
        return
    if json_data["number"] != number:
@@ -526,16 +560,13 @@ def core_main(file_path, number_th, conf: config.Config):
    # =======================================================================判断-C,-CD后缀
    if '-CD' in filepath or '-cd' in filepath:
        multi_part = 1
-        part = get_part(filepath, conf)
+        part = get_part(filepath)
    if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
        cn_sub = '1'
        c_word = '-C'  # 中文字幕影片后缀
    # 判断是否无码
-    if is_uncensored(number):
+    uncensored = 1 if is_uncensored(number) else 0
        uncensored = 1
    else:
        uncensored = 0
    if '流出' in filepath or 'uncensored' in filepath:
@@ -550,7 +581,7 @@ def core_main(file_path, number_th, conf: config.Config):
        debug_print(json_data)
    # 创建文件夹
-    #path = create_folder(rootpath + '/' + conf.success_folder(),  json_data.get('location_rule'), json_data, conf)
+    #path = create_folder(rootpath + '/' + conf.success_folder(),  json_data.get('location_rule'), json_data)
    # main_mode
    #  1: 刮削模式 / Scraping mode
@@ -558,54 +589,55 @@ def core_main(file_path, number_th, conf: config.Config):
    #  3：不改变路径刮削
    if conf.main_mode() == 1:
        # 创建文件夹
-        path = create_folder(json_data, conf)
+        path = create_folder(json_data)
        if multi_part == 1:
            number += part  # 这时number会被附加上CD1后缀
        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number,  json_data.get('cover_small'), leak_word, c_word, conf, filepath)
+            small_cover_check(path, number,  json_data.get('cover_small'), leak_word, c_word, filepath)
        # creatFolder会返回番号路径
-        image_download( json_data.get('cover'), number, leak_word, c_word, path, conf, filepath)
+        image_download( json_data.get('cover'), number, leak_word, c_word, path, filepath)
        if not multi_part or part.lower() == '-cd1':
            try:
                # 下载预告片
                if conf.is_trailer() and json_data.get('trailer'):
-                    trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)
+                    trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath)
            except:
                pass
            try:
-                # 下载剧照 data, path, conf: config.Config, filepath
+                # 下载剧照 data, path, filepath
                if conf.is_extrafanart() and json_data.get('extrafanart'):
-                    extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)
+                    extrafanart_download(json_data.get('extrafanart'), path, filepath)
            except:
                pass
        # 裁剪图
        cutImage(imagecut, path, number, leak_word, c_word)
-        # 打印文件
+        # 添加水印
        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag,  json_data.get('actor_list'), liuchu, uncensored, conf)
        # 移动文件
        paste_file_to_folder(filepath, path, number, leak_word, c_word, conf)
        poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
        thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
        if conf.is_watermark():
-            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
+            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored)
        # 移动电影
        paste_file_to_folder(filepath, path, number, leak_word, c_word)
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag,  json_data.get('actor_list'), liuchu, uncensored)
    elif conf.main_mode() == 2:
        # 创建文件夹
-        path = create_folder(json_data, conf)
+        path = create_folder(json_data)
        # 移动文件
-        paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf)
+        paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word)
        poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
        thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
        if conf.is_watermark():
-            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
+            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored)
    elif conf.main_mode() == 3:
        path = str(Path(file_path).parent)
@@ -614,28 +646,29 @@ def core_main(file_path, number_th, conf: config.Config):
        # 检查小封面, 如果image cut为3，则下载小封面
        if imagecut == 3:
-            small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, conf, filepath)
+            small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, filepath)
        # creatFolder会返回番号路径
-        image_download(json_data.get('cover'), number, leak_word, c_word, path, conf, filepath)
+        image_download(json_data.get('cover'), number, leak_word, c_word, path, filepath)
        if not multi_part or part.lower() == '-cd1':
            # 下载预告片
            if conf.is_trailer() and json_data.get('trailer'):
-                trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)
+                trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath)
-            # 下载剧照 data, path, conf: config.Config, filepath
+            # 下载剧照 data, path, filepath
            if conf.is_extrafanart() and json_data.get('extrafanart'):
-                extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)
+                extrafanart_download(json_data.get('extrafanart'), path, filepath)
        # 裁剪图
        cutImage(imagecut, path, number, leak_word, c_word)
-        # 打印文件
+        # 添加水印
        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
                    tag, json_data.get('actor_list'), liuchu, uncensored, conf)
        poster_path = os.path.join(path, f"{number}{leak_word}{c_word}-poster.jpg")
        thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}-thumb.jpg")
        if conf.is_watermark():
-            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, conf)
+            add_mark(poster_path, thumb_path, cn_sub, leak, uncensored)
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
                    tag, json_data.get('actor_list'), liuchu, uncensored)
--- a/number_parser.py
+++ b/number_parser.py
@@ -1,14 +1,14 @@
 import os
 import re
-from core import *
+import sys
-
+import config
 G_spat = re.compile(
    "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@",
    re.IGNORECASE)
-def get_number(debug,filepath: str) -> str:
+def get_number(debug,file_path: str) -> str:
    # """
    # >>> from number_parser import get_number
    # >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
@@ -32,77 +32,174 @@ def get_number(debug,filepath: str) -> str:
    # >>> get_number("snis-829-C.mp4")
    # 'snis-829'
    # """
-    filepath = os.path.basename(filepath)
+    filepath = os.path.basename(file_path)
-
+    # debug True 和 False 两块代码块合并，原因是此模块及函数只涉及字符串计算，没有IO操作，debug on时输出导致异常信息即可
    if debug == False:
    try:
-            if '-' in filepath or '_' in filepath:  # 普通提取番号 主要处理包含减号-和_的番号
+        file_number = get_number_by_dict(filepath)
-                #filepath = filepath.replace("_", "-")
+        if file_number:
            return file_number
        elif '-' in filepath or '_' in filepath:  # 普通提取番号 主要处理包含减号-和_的番号
            filepath = G_spat.sub("", filepath)
            filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间
            lower_check = filename.lower()
            if 'fc2' in lower_check:
                filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
                file_number = get_number_by_dict(lower_check)
                if file_number:
                    return file_number
            return str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
        else:  # 提取不含减号-的番号，FANZA CID
            # 欧美番号匹配规则
            oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
            if oumei:
                return oumei.group()
            try:
                return str(
                    re.findall(r'(.+?)\.',
                                str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
                    "['']").replace('_', '-')
            except:
-                    return re.search(r'(.+?)\.', filepath)[0]
+                return str(re.search(r'(.+?)\.', filepath)[0])
    except Exception as e:
-            print('[-]' + str(e))
+        if debug:
-            return
+            print(f'[-]Number Parser exception: {e} [{file_path}]')
    elif debug == True:
        if '-' in filepath or '_' in filepath:  # 普通提取番号 主要处理包含减号-和_的番号
            #filepath = filepath.replace("_", "-")
            filepath = G_spat.sub("", filepath)
            filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间
            lower_check = filename.lower()
            if 'fc2' in lower_check:
                filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
            file_number = get_number_by_dict(lower_check)
            if file_number:
                return file_number
            return str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
        else:  # 提取不含减号-的番号，FANZA CID
            # 欧美番号匹配规则
            oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
            if oumei:
                return oumei.group()
            try:
                return str(
                    re.findall(r'(.+?)\.',
                               str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip(
                    "['']").replace('_', '-')
            except:
                return re.search(r'(.+?)\.', filepath)[0]
 G_TAKE_NUM_RULES = {
    'tokyo' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.A).group()),
    'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.A).group()).replace('_', '-'),
    '1pon'  : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.A).group()).replace('-', '_'),
    '10mu'  : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.A).group()).replace('-', '_'),
    'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.A).group())
    }
 def get_number_by_dict(lower_filename: str) -> str:
    for k,v in G_TAKE_NUM_RULES.items():
        if k in lower_filename:
            return v(lower_filename)
        return None
-# if __name__ == "__main__":
+
 # 按javdb数据源的命名规范提取number
 G_TAKE_NUM_RULES = {
    'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
    'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
    '1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
    '10mu'  : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
    'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
    'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
    'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0])
 }
 def get_number_by_dict(filename: str) -> str:
    try:
        for k,v in G_TAKE_NUM_RULES.items():
            if re.search(k, filename, re.I):
                return v(filename)
    except:
        pass
    return None
 class Cache_uncensored_conf:
    prefix = None
    def is_empty(self):
        return bool(self.prefix is None)
    def set(self, v: list):
        if not v or not len(v) or not len(v[0]):
            raise ValueError('input prefix list empty or None')
        s = v[0]
        if len(v) > 1:
            for i in v[1:]:
                s += f"|{i}.+"
        self.prefix = re.compile(s, re.I)
    def check(self, number):
        if self.prefix is None:
            raise ValueError('No init re compile')
        return self.prefix.match(number)
 G_cache_uncensored_conf = Cache_uncensored_conf()
 # ========================================================================是否为无码
 def is_uncensored(number):
    if re.match(
 r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
        number,
        re.I
    ):
        return True
    if G_cache_uncensored_conf.is_empty():
        G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
    return G_cache_uncensored_conf.check(number)
 if __name__ == "__main__":
 #     import doctest
 #     doctest.testmod(raise_on_error=True)
    test_use_cases = (
        "Tokyo Hot n9001 FHD.mp4", # 无-号，以前无法正确提取
        "TokyoHot-n1287-HD SP2006 .mp4",
        "caribean-020317_001.nfo",     # -号误命名为_号的
        "257138_3xplanet_1Pondo_080521_001.mp4",
        "ADV-R0624-CD3.wmv",           # 多碟影片
        "XXX-AV   22061-CD5.iso",      # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
        "xxx-av 20589.mp4",
        "Muramura-102114_145-HD.wmv",  # 新支持片商格式 102114_145  命名规则来自javdb数据源
        "heydouga-4102-023-CD2.iso",   # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
        "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
        "pacopacomama-093021_539-FHD.mkv" # 新支持片商格式 093021_539 命名规则来自javdb数据源
    )
    def evprint(evstr):
        code = compile(evstr, "<string>", "eval")
        print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code)))
    for t in test_use_cases:
        evprint(f'get_number(True, "{t}")')
    if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
        sys.exit(0)
    # 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据，参数为盘符 A .. Z 或带盘符路径
    # https://www.voidtools.com/support/everything/command_line_interface/
    # ES命令行工具需要Everything文件搜索引擎处于运行状态，es.exe单个执行文件需放入PATH路径中。
    # Everything是免费软件
    # 示例：
    # python.exe .\number_parser.py ALL                 # 从所有磁盘搜索视频
    # python.exe .\number_parser.py D                   # 从D盘搜索
    # python.exe .\number_parser.py D:                  # 同上
    # python.exe .\number_parser.py D:\download\JAVs    # 搜索D盘的\download\JAVs目录，路径必须带盘符
    # ==================
    # Linux/WSL1|2 使用mlocate(Ubuntu/Debian)或plocate(Debian sid)搜集全盘视频文件名作为测试用例number数据
    # 需安装'sudo apt install mlocate或plocate'并首次运行sudo updatedb建立全盘索引
    # MAC OS X 使用findutils的glocate，需安装'sudo brew install findutils'并首次运行sudo gupdatedb建立全盘索引
    # 示例：
    # python3 ./number_parser.py ALL
    import subprocess
    ES_search_path = "ALL disks"
    if sys.argv[1] == "ALL":
        if sys.platform == "win32":
            # ES_prog_path = 'C:/greensoft/es/es.exe'
            ES_prog_path = 'es.exe'  # es.exe需要放在PATH环境变量的路径之内
            ES_cmdline = f'{ES_prog_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;flv;ts;webm;iso;mpg;m4v'
            out_bytes = subprocess.check_output(ES_cmdline.split(' '))
            out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030，此编码为UNICODE方言与UTF-8系全射关系无转码损失
            out_list = out_text.splitlines()
        elif sys.platform in ("linux", "darwin"):
            ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate'
            ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path)
            out_bytes = subprocess.check_output(ES_cmdline.split(' '))
            out_text = out_bytes.decode('utf-8')
            out_list = [ os.path.basename(line) for line in out_text.splitlines()]
        else:
            print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.')
            sys.exit(1)
    else: # Windows single disk
        if sys.platform != "win32":
            print('[!]Usage: python3 ./number_parser.py ALL')
            sys.exit(0)
        # ES_prog_path = 'C:/greensoft/es/es.exe'
        ES_prog_path = 'es.exe'  # es.exe需要放在PATH环境变量的路径之内
        if os.path.isdir(sys.argv[1]):
            ES_search_path = sys.argv[1]
        else:
            ES_search_path = sys.argv[1][0] + ':/'
            if not os.path.isdir(ES_search_path):
                ES_search_path = 'C:/'
            ES_search_path = os.path.normcase(ES_search_path)
        ES_cmdline = f'{ES_prog_path} -path {ES_search_path} -name size:gigantic ext:mp4;avi;rmvb;wmv;mov;mkv;webm;iso;mpg;m4v'
        out_bytes = subprocess.check_output(ES_cmdline.split(' '))
        out_text = out_bytes.decode('gb18030') # 中文版windows 10 x64默认输出GB18030，此编码为UNICODE方言与UTF-8系全射关系无转码损失
        out_list = out_text.splitlines()
    print(f'\n[!]{ES_prog_path} is searching {ES_search_path} for movies as number parser test cases...')
    print(f'[+]Find {len(out_list)} Movies.')
    for filename in out_list:
        try:
            n = get_number(True, filename)
            if n:
                print('  [{0}] {2}# {1}'.format(n, filename, '#无码' if is_uncensored(n) else ''))
            else:
                print(f'[-]Number return None. # {filename}')
        except Exception as e:
            print(f'[-]Number Parser exception: {e} [{filename}]')
    sys.exit(0)
--- a/py_to_exe.ps1
+++ b/py_to_exe.ps1
@@ -10,7 +10,8 @@ pyinstaller --onefile AV_Data_Capture.py `
    --hidden-import ADC_function.py `
    --hidden-import core.py `
    --add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
-    --add-data "Img;Img"
+    --add-data "Img;Img" `
    --add-data "config.ini;." `
 rmdir -Recurse -Force build
 rmdir -Recurse -Force __pycache__
--- a/wrapper/FreeBSD.sh
+++ b/wrapper/FreeBSD.sh
@@ -1,4 +1,8 @@
 pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448
 pip install pyquery pyinstaller
-pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py --add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" --add-data "Img:Img"
+pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
    --add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
    --add-data "Img:Img" \
    --add-data "config.ini:." \
 cp config.ini ./dist
--- a/wrapper/Linux.sh
+++ b/wrapper/Linux.sh
@@ -12,5 +12,9 @@
 #fi
 pip3 install -r requirements.txt
 pip3 install cloudscraper==1.2.52
-pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py --add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" --add-data "Img:Img"
+pyinstaller --onefile AV_Data_Capture.py  --hidden-import ADC_function.py --hidden-import core.py \
    --add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
    --add-data "Img:Img" \
    --add-data "config.ini:." \
 cp config.ini ./dist