Merge pull request #1008 from yoshiko2/master

1
2023-05-05 01:19:39 +08:00
parent 39b88090a0 3e4a59d4c2
commit 3ac5c6f971
25 changed files with 463 additions and 208 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -19,6 +19,16 @@ jobs:
    steps:
      - uses: actions/checkout@v2
      - name: Install UPX
        uses: crazy-max/ghaction-upx@v2
        if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
        with:
          install-only: true
      - name: UPX version
        if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
        run: upx --version
      - name: Setup Python 3.10
        uses: actions/setup-python@v2
        with:
@@ -28,6 +38,7 @@ jobs:
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
          pip install face_recognition --no-deps
          pip install pyinstaller
      - name: Test number_perser.get_number
@@ -39,11 +50,11 @@ jobs:
        run: |
          pyinstaller \
            --onefile Movie_Data_Capture.py \
-            --python-option u \
+            --collect-submodules "scrapinglib" \
-            --hidden-import "ImageProcessing.cnn" \
+            --collect-submodules "ImageProcessing" \
-            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
+            --collect-data "face_recognition_models" \
-            --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
+            --collect-data "cloudscraper" \
-            --add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
+            --collect-data "opencc" \
            --add-data "Img:Img" \
            --add-data "config.ini:." \
@@ -52,11 +63,11 @@ jobs:
        run: |
          pyinstaller `
            --onefile Movie_Data_Capture.py `
-            --python-option u `
+            --collect-submodules "scrapinglib" `
-            --hidden-import "ImageProcessing.cnn" `
+            --collect-submodules "ImageProcessing" `
-            --add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
+            --collect-data "face_recognition_models" `
-            --add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
+            --collect-data "cloudscraper" `
-            --add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
+            --collect-data "opencc" `
            --add-data "Img;Img" `
            --add-data "config.ini;." `
@@ -77,5 +88,5 @@ jobs:
      - name: Upload build artifact
        uses: actions/upload-artifact@v1
        with:
-          name: Movie_Data_Capture-CLI-${{ env.VERSION }}-${{ runner.os }}-amd64
+          name: MDC-${{ env.VERSION }}-${{ runner.os }}-amd64
          path: dist
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -301,7 +301,7 @@ def get_html_by_scraper(url: str = None, cookies: dict = None, ua: str = None, r
 def translate(
        src: str,
-        target_language: str = "zh_cn",
+        target_language: str = config.getInstance().get_target_language(),
        engine: str = "google-free",
        app_id: str = "",
        key: str = "",
@@ -342,7 +342,15 @@ def translate(
        result = post_html(url=url, query=body, headers=headers)
        translate_list = [i["text"] for i in result.json()[0]["translations"]]
        trans_result = trans_result.join(translate_list)
-
+    elif engine == "deeplx":
        url = config.getInstance().get_translate_service_site()
        res = requests.post(f"{url}/translate", json={
            'text': src,
            'source_lang': 'auto',
            'target_lang': target_language,
        })
        if res.text.strip():
            trans_result = res.json().get('data')
    else:
        raise ValueError("Non-existent translation engine")
--- a/ImageProcessing/init.py
+++ b/ImageProcessing/init.py
@@ -60,9 +60,9 @@ def face_crop_height(filename, width, height):
    return (0, 0, width, cropHeight)
-def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
+def cutImage(imagecut, path, thumb_path, poster_path, skip_facerec=False):
    conf = config.getInstance()
-    fullpath_fanart = os.path.join(path, fanart_path)
+    fullpath_fanart = os.path.join(path, thumb_path)
    fullpath_poster = os.path.join(path, poster_path)
    aspect_ratio = conf.face_aspect_ratio()
    if conf.face_aways_imagecut():
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -104,9 +104,9 @@ is performed. It may help you correct wrong numbers before real job.""")
    set_str_or_none("common:source_folder", args.path)
    set_bool_or_none("common:auto_exit", args.auto_exit)
    set_natural_number_or_none("common:nfo_skip_days", args.days)
-    set_natural_number_or_none("common:stop_counter", args.cnt)
+    set_natural_number_or_none("advenced_sleep:stop_counter", args.cnt)
    set_bool_or_none("common:ignore_failed_list", args.ignore_failed_list)
-    set_str_or_none("common:rerun_delay", args.delaytm)
+    set_str_or_none("advenced_sleep:rerun_delay", args.delaytm)
    set_str_or_none("priority:website", args.site)
    if isinstance(args.dnimg, bool) and args.dnimg:
        conf.set_override("common:download_only_missing_images=0")
@@ -119,7 +119,7 @@ is performed. It may help you correct wrong numbers before real job.""")
    if conf.main_mode() == 3:
        no_net_op = args.no_network_operation
        if no_net_op:
-            conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1")
+            conf.set_override("advenced_sleep:stop_counter=0;advenced_sleep:rerun_delay=0s;face:aways_imagecut=1")
    return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op, args.specified_source, args.specified_url
@@ -681,7 +681,7 @@ def period(delta, pattern):
 if __name__ == '__main__':
-    version = '6.4.1'
+    version = '6.6.2'
    urllib3.disable_warnings()  # Ignore http proxy warning
    app_start = time.time()
--- a/README.md
+++ b/README.md
@@ -24,8 +24,12 @@
 # 申明
 当你查阅、下载了本项目源代码或二进制程序，即代表你接受了以下条款
 * 本项目和项目成果仅供技术，学术交流和Python3性能测试使用
 * 用户必须确保获取影片的途径在用户当地是合法的
 * 运行时和运行后所获取的元数据和封面图片等数据的版权，归版权持有人持有
 * 本项目贡献者编写该项目旨在学习Python3 ，提高编程水平
 * 本项目不提供任何影片下载的线索
 * 请勿提供运行时和运行后获取的数据提供给可能有非法目的的第三方，例如用于非法交易、侵犯未成年人的权利等
 * 用户仅能在自己的私人计算机或者测试环境中使用该工具，禁止将获取到的数据用于商业目的或其他目的，如销售、传播等
 * 用户在使用本项目和项目成果前，请用户了解并遵守当地法律法规，如果本项目及项目成果使用过程中存在违反当地法律法规的行为，请勿使用该项目及项目成果
 * 法律后果及使用后果由使用者承担
 * [GPL LICENSE](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/LICENSE)
@@ -36,3 +40,6 @@
 # 贡献者
 [![](https://opencollective.com/movie_data_capture/contributors.svg?width=890)](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
 # Star History
 [![Star History Chart](https://api.star-history.com/svg?repos=yoshiko2/Movie_Data_Capture&type=Date)](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -32,3 +32,7 @@
 # 贡献者
 [![](https://opencollective.com/movie_data_capture/contributors.svg?width=890)](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
 # Star History
 [![Star History Chart](https://api.star-history.com/svg?repos=yoshiko2/Movie_Data_Capture&type=Date)](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)
--- a/config.ini
+++ b/config.ini
@@ -21,11 +21,14 @@ nfo_skip_days = 30
 ignore_failed_list = 0
 download_only_missing_images = 1
 mapping_table_validity = 7
-; 在jellyfin中tags和genres重复，因此可以只保存genres到nfo中
+; 一些jellyfin中特有的设置 (0:不开启， 1：开启) 比如
-donot_save_tags = 0
+; 在jellyfin中tags和genres重复，因此可以只需保存genres到nfo中
 ; jellyfin中只需要保存thumb，不需要保存fanart
 jellyfin = 0
 ; 开启后tag和genere只显示演员
 actor_only_tag = 0
 sleep = 3
 anonymous_fill = 1
 [advenced_sleep]
 ; 处理完多少个视频文件后停止，0为处理所有视频文件
@@ -48,13 +51,14 @@ cacert_file =
 location_rule = actor+'/'+number
 naming_rule = number+'-'+title
 max_title_len = 50
-image_naming_with_number = 1
+; 刮削后图片是否命名为番号
 image_naming_with_number = 0
 [update]
 update_check = 1
 [priority]
-website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,mv91,getchu,javdb,gcolle
+website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu
 [escape]
 literals = \()/
@@ -66,13 +70,15 @@ switch = 0
 ; 机器翻译
 [translate]
 switch = 0
-;可选项 google-free,azure
+;可选项 google-free,azure,deeplx
 engine = google-free
-; azure翻译密钥
+target_language = zh_cn
 ; azure翻译密钥key
 key =
 ; 翻译延迟
 delay = 1
 values = title,outline
 ; google翻译服务站点，或deeplx访问链接
 service_site = translate.google.cn
 ; 预告片
--- a/config.py
+++ b/config.py
@@ -169,13 +169,13 @@ class Config:
            self._exit("common:main_mode")
    def source_folder(self) -> str:
-        return self.conf.get("common", "source_folder")
+        return self.conf.get("common", "source_folder").replace("\\\\", "/").replace("\\", "/")
    def failed_folder(self) -> str:
-        return self.conf.get("common", "failed_output_folder")
+        return self.conf.get("common", "failed_output_folder").replace("\\\\", "/").replace("\\", "/")
    def success_folder(self) -> str:
-        return self.conf.get("common", "success_output_folder")
+        return self.conf.get("common", "success_output_folder").replace("\\\\", "/").replace("\\", "/")
    def actor_gender(self) -> str:
        return self.conf.get("common", "actor_gender")
@@ -213,8 +213,8 @@ class Config:
    def mapping_table_validity(self) -> int:
        return self.conf.getint("common", "mapping_table_validity")
-    def donot_save_tags(self) -> int:
+    def jellyfin(self) -> int:
-        return self.conf.getint("common", "donot_save_tags")
+        return self.conf.getint("common", "jellyfin")
    def actor_only_tag(self) -> bool:
        return self.conf.getboolean("common", "actor_only_tag")
@@ -222,13 +222,16 @@ class Config:
    def sleep(self) -> int:
        return self.conf.getint("common", "sleep")
    def anonymous_fill(self) -> bool:
        return self.conf.getint("common", "anonymous_fill")
    def stop_counter(self) -> int:
        return self.conf.getint("advenced_sleep", "stop_counter", fallback=0)
    def rerun_delay(self) -> int:
        value = self.conf.get("advenced_sleep", "rerun_delay")
        if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)):
-            return 0   # not match '1h30m45s' or '30' or '1s2m1h4s5m'
+            return 0  # not match '1h30m45s' or '30' or '1s2m1h4s5m'
        if value.isnumeric() and int(value) >= 0:
            return int(value)
        sec = 0
@@ -279,6 +282,9 @@ class Config:
    def get_translate_engine(self) -> str:
        return self.conf.get("translate", "engine")
    def get_target_language(self) -> str:
        return self.conf.get("translate", "target_language")
    # def get_translate_appId(self) ->str:
    #     return self.conf.get("translate","appid")
@@ -439,16 +445,19 @@ class Config:
        # actor_gender value: female or male or both or all(含人妖)
        conf.set(sec1, "actor_gender", "female")
        conf.set(sec1, "del_empty_folder", "1")
-        conf.set(sec1, "nfo_skip_days", 30)
+        conf.set(sec1, "nfo_skip_days", "30")
-        conf.set(sec1, "ignore_failed_list", 0)
+        conf.set(sec1, "ignore_failed_list", "0")
-        conf.set(sec1, "download_only_missing_images", 1)
+        conf.set(sec1, "download_only_missing_images", "1")
-        conf.set(sec1, "mapping_table_validity", 7)
+        conf.set(sec1, "mapping_table_validity", "7")
-        conf.set(sec1, "donot_save_tags", 0)
+        conf.set(sec1, "jellyfin", "0")
        conf.set(sec1, "actor_only_tag", "0")
        conf.set(sec1, "sleep", "3")
        conf.set(sec1, "anonymous_fill", "0")
        sec2 = "advenced_sleep"
        conf.add_section(sec2)
-        conf.set(sec2, "stop_counter", 0)
+        conf.set(sec2, "stop_counter", "0")
-        conf.set(sec2, "rerun_delay", 0)
+        conf.set(sec2, "rerun_delay", "0")
        sec3 = "proxy"
        conf.add_section(sec3)
@@ -463,6 +472,7 @@ class Config:
        conf.set(sec4, "location_rule", "actor + '/' + number")
        conf.set(sec4, "naming_rule", "number + '-' + title")
        conf.set(sec4, "max_title_len", "50")
        conf.set(sec4, "image_naming_with_number", "0")
        sec5 = "update"
        conf.add_section(sec5)
@@ -485,6 +495,7 @@ class Config:
        conf.add_section(sec9)
        conf.set(sec9, "switch", "0")
        conf.set(sec9, "engine", "google-free")
        conf.set(sec9, "target_language", "zh_cn")
        # conf.set(sec8, "appid", "")
        conf.set(sec9, "key", "")
        conf.set(sec9, "delay", "1")
@@ -508,28 +519,28 @@ class Config:
        sec13 = "watermark"
        conf.add_section(sec13)
-        conf.set(sec13, "switch", 1)
+        conf.set(sec13, "switch", "1")
-        conf.set(sec13, "water", 2)
+        conf.set(sec13, "water", "2")
        sec14 = "extrafanart"
        conf.add_section(sec14)
-        conf.set(sec14, "switch", 1)
+        conf.set(sec14, "switch", "1")
        conf.set(sec14, "extrafanart_folder", "extrafanart")
-        conf.set(sec14, "parallel_download", 1)
+        conf.set(sec14, "parallel_download", "1")
        sec15 = "storyline"
        conf.add_section(sec15)
-        conf.set(sec15, "switch", 1)
+        conf.set(sec15, "switch", "1")
        conf.set(sec15, "site", "1:avno1,4:airavwiki")
        conf.set(sec15, "censored_site", "2:airav,5:xcity,6:amazon")
        conf.set(sec15, "uncensored_site", "3:58avgo")
-        conf.set(sec15, "show_result", 0)
+        conf.set(sec15, "show_result", "0")
-        conf.set(sec15, "run_mode", 1)
+        conf.set(sec15, "run_mode", "1")
-        conf.set(sec15, "cc_convert", 1)
+        conf.set(sec15, "cc_convert", "1")
        sec16 = "cc_convert"
        conf.add_section(sec16)
-        conf.set(sec16, "mode", 1)
+        conf.set(sec16, "mode", "1")
        conf.set(sec16, "vars", "actor,director,label,outline,series,studio,tag,title")
        sec17 = "javdb"
--- a/core.py
+++ b/core.py
@@ -272,22 +272,25 @@ def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
 def image_ext(url):
    try:
-        return os.path.splitext(url)[-1]
+        ext = os.path.splitext(url)[-1]
        if ext in {'.jpg','.jpge','.bmp','.png','.gif'}:
            return ext
        return ".jpg"
    except:
        return ".jpg"
 # 封面是否下载成功，否则移动到failed
 def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=None):
-    full_filepath = os.path.join(path, fanart_path)
+    full_filepath = os.path.join(path, thumb_path)
    if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
        return
    if json_headers != None:
-        if download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers']) == 'failed':
+        if download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers']) == 'failed':
            moveFailedFolder(filepath)
            return
    else:
-        if download_file_with_filename(cover, fanart_path, path, filepath) == 'failed':
+        if download_file_with_filename(cover, thumb_path, path, filepath) == 'failed':
            moveFailedFolder(filepath)
            return
@@ -296,20 +299,21 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
        if file_not_exist_or_empty(full_filepath):
            print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
            if json_headers != None:
-                download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers'])
+                download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers'])
            else:
-                download_file_with_filename(cover, fanart_path, path, filepath)
+                download_file_with_filename(cover, thumb_path, path, filepath)
            continue
        else:
            break
    if file_not_exist_or_empty(full_filepath):
        return
    print('[+]Image Downloaded!', Path(full_filepath).name)
-    shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
+    if not config.getInstance().jellyfin():
        shutil.copyfile(full_filepath, os.path.join(path, fanart_path))
 def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
-                uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
+                uncensored, hack, hack_word, _4k, fanart_path, poster_path, thumb_path):
    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
        json_data)
    if config.getInstance().main_mode() == 3:  # 模式3下，由于视频文件不做任何改变，.nfo文件必须和视频文件名称除后缀外完全一致，KODI等软件方可支持
@@ -332,13 +336,23 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            pass
        # KODI内查看影片信息时找不到number，配置naming_rule=number+'#'+title虽可解决
        # 但使得标题太长，放入时常为空的outline内会更适合，软件给outline留出的显示版面也较大
-        outline = f"{number}#{outline}"
+        if not outline:
            pass
        elif json_data['source'] == 'pissplay':
            outline = f"{outline}"
        else:
            outline = f"{number}#{outline}"
        with open(nfo_path, "wt", encoding='UTF-8') as code:
            print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
            print("<movie>", file=code)
-            print("  <title><![CDATA[" + naming_rule + "]]></title>", file=code)
+            if not config.getInstance().jellyfin():
-            print("  <originaltitle><![CDATA[" + naming_rule + "]]></originaltitle>", file=code)
+                print("  <title><![CDATA[" + naming_rule + "]]></title>", file=code)
-            print("  <sorttitle><![CDATA[" + naming_rule + "]]></sorttitle>", file=code)
+                print("  <originaltitle><![CDATA[" + json_data['original_naming_rule'] + "]]></originaltitle>", file=code)
                print("  <sorttitle><![CDATA[" + naming_rule + "]]></sorttitle>", file=code)
            else:
                print("  <title>" + naming_rule + "</title>", file=code)
                print("  <originaltitle>" + json_data['original_naming_rule'] + "</originaltitle>", file=code)
                print("  <sorttitle>" + naming_rule + "</sorttitle>", file=code)    
            print("  <customrating>JP-18+</customrating>", file=code)
            print("  <mpaa>JP-18+</mpaa>", file=code)
            try:
@@ -347,13 +361,18 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
                print("  <set></set>", file=code)
            print("  <studio>" + studio + "</studio>", file=code)
            print("  <year>" + year + "</year>", file=code)
-            print("  <outline><![CDATA[" + outline + "]]></outline>", file=code)
+            if not config.getInstance().jellyfin():
-            print("  <plot><![CDATA[" + outline + "]]></plot>", file=code)
+                print("  <outline><![CDATA[" + outline + "]]></outline>", file=code)
                print("  <plot><![CDATA[" + outline + "]]></plot>", file=code)
            else:
                print("  <outline>" + outline + "</outline>", file=code)
                print("  <plot>" + outline + "</plot>", file=code)
            print("  <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
            print("  <director>" + director + "</director>", file=code)
            print("  <poster>" + poster_path + "</poster>", file=code)
            print("  <thumb>" + thumb_path + "</thumb>", file=code)
-            print("  <fanart>" + fanart_path + "</fanart>", file=code)
+            if not config.getInstance().jellyfin(): # jellyfin 不需要保存fanart
                print("  <fanart>" + fanart_path + "</fanart>", file=code)
            try:
                for key in actor_list:
                    print("  <actor>", file=code)
@@ -368,8 +387,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            print("  <maker>" + studio + "</maker>", file=code)
            print("  <label>" + label + "</label>", file=code)
-            skip_tags = config.getInstance().donot_save_tags()
+            jellyfin = config.getInstance().jellyfin()
-            if not skip_tags:
+            if not jellyfin:
                if config.getInstance().actor_only_tag():
                    for key in actor_list:
                        try:
@@ -377,27 +396,27 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
                        except:
                            pass
                else:
-                    if cn_sub == '1':
+                    if cn_sub:
                        print("  <tag>中文字幕</tag>", file=code)
-                    if liuchu == '流出':
+                    if liuchu:
                        print("  <tag>流出</tag>", file=code)
-                    if uncensored == 1:
+                    if uncensored:
                        print("  <tag>无码</tag>", file=code)
-                    if hack_word != '':
+                    if hack:
                        print("  <tag>破解</tag>", file=code)
-                    if _4k == '1':
+                    if _4k:
                        print("  <tag>4k</tag>", file=code)
                    for i in tag:
                        print("  <tag>" + i + "</tag>", file=code)
-            if cn_sub == '1':
+            if cn_sub:
                print("  <genre>中文字幕</genre>", file=code)
-            if liuchu == '流出':
+            if liuchu:
                print("  <genre>无码流出</genre>", file=code)
-            if uncensored == 1:
+            if uncensored:
                print("  <genre>无码</genre>", file=code)
-            if hack_word != '':
+            if hack:
                print("  <genre>破解</genre>", file=code)
-            if _4k == '1':
+            if _4k:
                print("  <genre>4k</genre>", file=code)
            try:
                for i in tag:
@@ -470,6 +489,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack, _4k) -> No
    :cn_sub: 中文字幕 可选值：1,"1" 或其他值
    :uncensored 无码 可选值：1,"1" 或其他值
    :hack 破解 可选值：1,"1" 或其他值
    :_4k Bool
    """
    mark_type = ''
    if cn_sub:
@@ -495,17 +515,17 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack, _4k):
    # 获取自定义位置，取余配合pos达到顺时针添加的效果
    # 左上 0, 右上 1, 右下 2， 左下 3
    count = config.getInstance().watermark_type()
-    if cn_sub == 1 or cn_sub == '1':
+    if cn_sub:
        add_to_pic(pic_path, img_pic, size, count, 1)  # 添加
        count = (count + 1) % 4
-    if leak == 1 or leak == '1':
+    if leak:
        add_to_pic(pic_path, img_pic, size, count, 2)
        count = (count + 1) % 4
-    if uncensored == 1 or uncensored == '1':
+    if uncensored:
        add_to_pic(pic_path, img_pic, size, count, 3)
-    if hack == 1 or hack == '1':
+    if hack:
        add_to_pic(pic_path, img_pic, size, count, 4)
-    if _4k == 1 or _4k == '1':
+    if _4k:
        add_to_pic(pic_path, img_pic, size, count, 5)
    img_pic.close()
@@ -613,6 +633,8 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
        create_softlink = False
        if link_mode not in (1, 2):
            shutil.move(filepath, targetpath)
            print("[!]Move =>          ", path)
            return
        elif link_mode == 2:
            try:
                os.link(filepath, targetpath, follow_symlinks=False)
@@ -624,16 +646,13 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
                os.symlink(filerelpath, targetpath)
            except:
                os.symlink(str(filepath_obj.resolve()), targetpath)
-        return
+        print("[!]Link =>          ", path)
    except FileExistsError as fee:
        print(f'[-]FileExistsError: {fee}')
        return
    except PermissionError:
        print('[-]Error! Please run as administrator!')
        return
    except OSError as oserr:
        print(f'[-]OS Error errno  {oserr.errno}')
        return
 def linkImage(path, number, part, leak_word, c_word, hack_word, ext):
@@ -693,12 +712,12 @@ def core_main_no_net_op(movie_path, number):
    conf = config.getInstance()
    part = ''
    leak_word = ''
-    leak = 0
+    leak = False
    c_word = ''
-    cn_sub = ''
+    cn_sub = False
-    hack = ''
+    hack = False
    hack_word = ''
-    _4k = ''
+    _4k = False
    imagecut = 1
    multi = False
    part = ''
@@ -709,30 +728,30 @@ def core_main_no_net_op(movie_path, number):
        multi = True
    if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
                 re.I) or '中文' in movie_path or '字幕' in movie_path or ".chs" in movie_path or '.cht' in movie_path:
-        cn_sub = '1'
+        cn_sub = True
        c_word = '-C'  # 中文字幕影片后缀
-    uncensored = 1 if is_uncensored(number) else 0
+    uncensored = True if is_uncensored(number) else 0
    if '流出' in movie_path or 'uncensored' in movie_path.lower():
        leak_word = '-无码流出'  # 无码流出影片后缀
-        leak = 1
+        leak = True
    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
-        hack = 1
+        hack = True
        hack_word = "-hack"
    # try:
    #     props = get_video_properties(movie_path)  # 判断是否为4K视频
    #     if props['width'] >= 4096 or props['height'] >= 2160:
-    #         _4k = '1'
+    #         _4k = True
    # except:
    #     pass
    prestr = f"{number}{leak_word}{c_word}{hack_word}"
    full_nfo = Path(path) / f"{prestr}{part}.nfo"
    if full_nfo.is_file():
        if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
-            uncensored = 1
+            uncensored = True
        try:
            nfo_xml = etree.parse(full_nfo)
            nfo_fanart_path = nfo_xml.xpath('//fanart/text()')[0]
@@ -791,15 +810,15 @@ def move_subtitles(filepath, path, multi_part, number, part, leak_word, c_word,
 def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=None):
    conf = config.getInstance()
    # =======================================================================初始化所需变量
-    multi_part = 0
+    multi_part = False
    part = ''
    leak_word = ''
    c_word = ''
-    cn_sub = ''
+    cn_sub = False
-    liuchu = ''
+    liuchu = False
-    hack = ''
+    hack = False
    hack_word = ''
-    _4k = ''
+    _4k = False
    # 下面被注释的变量不需要
    # rootpath = os.getcwd
@@ -822,11 +841,11 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
    tag = json_data.get('tag')
    # =======================================================================判断-C,-CD后缀
    if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
-        multi_part = 1
+        multi_part = True
        part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
    if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
                 re.I) or '中文' in movie_path or '字幕' in movie_path:
-        cn_sub = '1'
+        cn_sub = True
        c_word = '-C'  # 中文字幕影片后缀
    # 判断是否无码
@@ -835,19 +854,22 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
    if '流出' in movie_path or 'uncensored' in movie_path.lower():
        liuchu = '流出'
-        leak = 1
+        leak = True
        leak_word = '-无码流出'  # 流出影片后缀
    else:
-        leak = 0
+        leak = False
    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
-        hack = 1
+        hack = True
        hack_word = "-hack"
    if '4k'.upper() in str(movie_path).upper() or '4k' in movie_path:
        _4k = True
    # 判断是否4k
    if '4K' in tag:
        tag.remove('4K')  # 从tag中移除'4K'
-        
+
    # 判断是否为无码破解
    if '无码破解' in tag:
        tag.remove('无码破解')  # 从tag中移除'无码破解'
@@ -855,7 +877,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
    # try:
    #     props = get_video_properties(movie_path)  # 判断是否为4K视频
    #     if props['width'] >= 4096 or props['height'] >= 2160:
-    #         _4k = '1'
+    #         _4k = True
    # except:
    #     pass
@@ -920,7 +942,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
                pass
        # 裁剪图
-        cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
+        cutImage(imagecut, path, thumb_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
        # 兼容Jellyfin封面图文件名规则
        if multi_part and conf.jellyfin_multi_part_fanart():
@@ -932,7 +954,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # Move subtitles
        move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        if move_status:
-            cn_sub = "1"
+            cn_sub = True
        # 添加水印
        if conf.is_watermark():
            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored,
@@ -940,7 +962,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
-                    json_data.get('actor_list'), liuchu, uncensored, hack_word
+                    json_data.get('actor_list'), liuchu, uncensored, hack, hack_word
                    , _4k, fanart_path, poster_path, thumb_path)
    elif conf.main_mode() == 2:
@@ -948,13 +970,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        path = create_folder(json_data)
        # 移动文件
        paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        # Move subtitles
-        move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
+        move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        if move_status:
            cn_sub = "1"
        if conf.is_watermark():
            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
                     _4k)
    elif conf.main_mode() == 3:
        path = str(Path(movie_path).parent)
@@ -998,7 +1016,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 添加水印
        if conf.is_watermark():
-            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
+            add_mark(os.path.join(path, poster_path), os.path.join(path, fanart_path), cn_sub, leak, uncensored, hack,
                     _4k)
        # 兼容Jellyfin封面图文件名规则
@@ -1007,5 +1025,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
-                    tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
+                    tag, json_data.get('actor_list'), liuchu, uncensored, hack, hack_word, _4k, fanart_path, poster_path,
                    thumb_path)
--- a/donate.png
+++ b/donate.png
--- a/number_parser.py
+++ b/number_parser.py
@@ -7,7 +7,7 @@ import typing
 G_spat = re.compile(
    "^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
    "^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
-    "(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|leak)",
+    "(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|hack|leak)",
    re.IGNORECASE)
@@ -54,12 +54,12 @@ def get_number(debug: bool, file_path: str) -> str:
            filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间
            lower_check = filename.lower()
            if 'fc2' in lower_check:
-                filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
+                filename = lower_check.replace('--', '-').replace('_', '-').upper()
            filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
            if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况，例如n1012-CD1.wmv
                return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
            file_number =  os.path.splitext(filename)
-            filename = re.search(r'\w+(-|_)\w+', filename, re.A)
+            filename = re.search(r'[\w\-_]+', filename, re.A)
            if filename:
                file_number = str(filename.group())
            else:
@@ -85,34 +85,7 @@ def get_number(debug: bool, file_path: str) -> str:
            print(f'[-]Number Parser exception: {e} [{file_path}]')
        return None
 # modou提取number
 def md(filename):
    m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*|-\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}'
 def mmz(filename):
    m = re.search(r'(mmz-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 def msd(filename):
    m = re.search(r'(msd-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 def mky(filename):
    m = re.search(r'(mky-[a-z]{2,2}-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 def yk(filename):
    m = re.search(r'(yk-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 def pm(filename):
    m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 def fsog(filename):
    m = re.search(r'(fsog-?)(\d{2,})(-ep\d*)*', filename, re.I)
    return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
 # 按javdb数据源的命名规范提取number
 G_TAKE_NUM_RULES = {
@@ -126,13 +99,6 @@ G_TAKE_NUM_RULES = {
    'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0],
    'mdbk': lambda x: str(re.search(r'mdbk(-|_)(\d{4})', x, re.I).group()),
    'mdtm': lambda x: str(re.search(r'mdtm(-|_)(\d{4})', x, re.I).group()),
    r'\bmd[a-z]{0,2}-\d{2,}': md,
    r'\bmmz-\d{2,}':mmz,
    r'\bmsd-\d{2,}':msd,
    r'\bmky-[a-z]{2,2}-\d{2,}':mky,
    r'\byk-\d{2,3}': yk,
    r'\bpm[a-z]?-?\d{2,}':pm,
    r'\bfsog-?\d{2,}':fsog
 }
--- a/py_to_exe.ps1
+++ b/py_to_exe.ps1
@@ -1,25 +1,25 @@
-# If you can't run this script, please execute the following command in PowerShell.
+# If you can't run this script, please execute the following command in PowerShell.
-# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
+# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
-
+
-$CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1)
+# bugfix：set submodules find path
-$OPENCC_PATH=$(python -c 'import opencc as _; print(_.__path__[0])' | select -Last 1)
+$Env:PYTHONPATH=$pwd.path
-$FACE_RECOGNITION_MODELS=$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | select -Last 1)
+$PYTHONPATH=$pwd.path
-
+mkdir build
-mkdir build
+mkdir __pycache__
-mkdir __pycache__
+
-
+pyinstaller --collect-submodules "scrapinglib" `
-pyinstaller --onefile Movie_Data_Capture.py `
+    --collect-submodules "ImageProcessing" `
-    --hidden-import "ImageProcessing.cnn" `
+    --collect-data "face_recognition_models" `
-    --python-option u `
+    --collect-data "cloudscraper" `
-    --add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
+    --collect-data "opencc" `
-    --add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
+    --add-data "Img;Img" `
-    --add-data "$OPENCC_PATH;opencc" `
+    --add-data "config.ini;." `
-    --add-data "Img;Img" `
+    --onefile Movie_Data_Capture.py
-    --add-data "config.ini;." `
+
-
+
-rmdir -Recurse -Force build
+rmdir -Recurse -Force build
-rmdir -Recurse -Force __pycache__
+rmdir -Recurse -Force __pycache__
-rmdir -Recurse -Force Movie_Data_Capture.spec
+rmdir -Recurse -Force Movie_Data_Capture.spec
-
+
-echo "[Make]Finish"
+echo "[Make]Finish"
-pause
+pause
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,8 @@
 requests
 dlib-bin
 Click
 numpy
 face-recognition-models
 lxml
 beautifulsoup4
 pillow
@@ -8,5 +12,3 @@ urllib3
 certifi
 MechanicalSoup
 opencc-python-reimplemented
 face_recognition
 get-video-properties
--- a/scraper.py
+++ b/scraper.py
@@ -99,6 +99,10 @@ def get_data_from_json(
    # ================================================网站规则添加结束================================================
    if json_data.get('title') == '':
        print('[-]Movie Number or Title not found!')
        return None
    title = json_data.get('title')
    actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',')  # 字符串转列表
    actor_list = [actor.strip() for actor in actor_list]  # 去除空白
@@ -134,11 +138,10 @@ def get_data_from_json(
        tag.remove('XXXX')
    while 'xxx' in tag:
        tag.remove('xxx')
-    actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+    if json_data['source'] =='pissplay': # pissplay actor为英文名，不用去除空格
-
+        actor = str(actor_list).strip("[ ]").replace("'", '')
-    if title == '' or number == '':
+    else:
-        print('[-]Movie Number or Title not found!')
+        actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
        return None
    # if imagecut == '3':
    #     DownloadFileWithFilename()
@@ -266,14 +269,22 @@ def get_data_from_json(
                    pass
    naming_rule = ""
    original_naming_rule = ""
    for i in conf.naming_rule().split("+"):
        if i not in json_data:
            naming_rule += i.strip("'").strip('"')
            original_naming_rule += i.strip("'").strip('"')
        else:
            item = json_data.get(i)
            naming_rule += item if type(item) is not list else "&".join(item)
            # PATCH：处理[title]存在翻译的情况，后续NFO文件的original_name只会直接沿用naming_rule,这导致original_name非原始名
            # 理应在翻译处处理 naming_rule和original_naming_rule
            if i == 'title':
                item = json_data.get('original_title')
            original_naming_rule += item if type(item) is not list else "&".join(item)
    json_data['naming_rule'] = naming_rule
    json_data['original_naming_rule'] = original_naming_rule
    return json_data
--- a/scrapinglib/init.py
+++ b/scrapinglib/init.py
@@ -1,3 +1,2 @@
 # -*- coding: utf-8 -*-
-
+from .api import search, getSupportedSources
 from .api import search, getSupportedSources
--- a/scrapinglib/api.py
+++ b/scrapinglib/api.py
@@ -20,6 +20,8 @@ from .xcity import Xcity
 from .avsox import Avsox
 from .javlibrary import Javlibrary
 from .javday import Javday
 from .pissplay import Pissplay
 from .javmenu import Javmenu
 from .tmdb import Tmdb
 from .imdb import Imdb
@@ -51,8 +53,8 @@ class Scraping:
    """
    """
    adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
-                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 
+                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
-                          'getchu', 'gcolle','javday'
+                          'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu'
                          ]
    adult_func_mapping = {
        'avsox': Avsox().scrape,
@@ -70,7 +72,9 @@ class Scraping:
        'javdb': Javdb().scrape,
        'getchu': Getchu().scrape,
        'javlibrary': Javlibrary().scrape,
-        'javday': Javday().scrape
+        'javday': Javday().scrape,
        'pissplay': Pissplay().scrape,
        'javmenu': Javmenu().scrape
    }
    general_full_sources = ['tmdb', 'imdb']
@@ -143,6 +147,14 @@ class Scraping:
            print(f'[-]Movie Number [{name}] not found!')
            return None
        # If actor is anonymous, Fill in Anonymous
        if len(json_data['actor']) == 0:
            if config.getInstance().anonymous_fill() == True:
                if "zh_" in config.getInstance().get_target_language():
                    json_data['actor'] = "佚名"
                else:
                    json_data['actor'] = "Anonymous"
        return json_data
    def searchAdult(self, number, sources):
@@ -174,13 +186,13 @@ class Scraping:
                    break
            except:
                continue
-            
+
        # javdb的封面有水印，如果可以用其他源的封面来替换javdb的封面
        if 'source' in json_data and json_data['source'] == 'javdb':
            # search other sources
            other_sources = sources[sources.index('javdb') + 1:]
            while other_sources:
-            # If cover not found in other source, then skip using other sources using javdb cover instead
+                # If cover not found in other source, then skip using other sources using javdb cover instead
                try:
                    other_json_data = self.searchAdult(number, other_sources)
                    if other_json_data is not None and 'cover' in other_json_data and other_json_data['cover'] != '':
@@ -195,12 +207,20 @@ class Scraping:
                    other_sources = sources[sources.index(other_json_data['source']) + 1:]
                except:
                    pass
-            
+
        # Return if data not found in all sources
        if not json_data:
            print(f'[-]Movie Number [{number}] not found!')
            return None
        # If actor is anonymous, Fill in Anonymous
        if len(json_data['actor']) == 0:
            if config.getInstance().anonymous_fill() == True:
                if "zh_" in config.getInstance().get_target_language():
                    json_data['actor'] = "佚名"
                else:
                    json_data['actor'] = "Anonymous"
        return json_data
    def checkGeneralSources(self, c_sources, name):
@@ -283,4 +303,8 @@ class Scraping:
            return False
        if data["number"] is None or data["number"] == "" or data["number"] == "null":
            return False
        if (data["cover"] is None or data["cover"] == "" or data["cover"] == "null") \
                and (data["cover_small"] is None or data["cover_small"] == "" or
                     data["cover_small"] == "null"):
            return False
        return True
--- a/scrapinglib/avsox.py
+++ b/scrapinglib/avsox.py
@@ -31,12 +31,14 @@ class Avsox(Parser):
        site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
        self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
        result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
-        if result1 == '' or result1 == 'null' or result1 == 'None':
+        if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
            self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('-', '_'))
            result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
-            if result1 == '' or result1 == 'null' or result1 == 'None':
+            if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
                self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('_', ''))
                result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
                if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
                    return None
        return "https:" + result1
    def getNum(self, htmltree):
--- a/scrapinglib/fanza.py
+++ b/scrapinglib/fanza.py
@@ -49,13 +49,13 @@ class Fanza(Parser):
            self.detailurl = url + fanza_search_number
            url = "https://www.dmm.co.jp/age_check/=/declared=yes/?"+ urlencode({"rurl": self.detailurl})
            self.htmlcode = self.getHtml(url)
-            if self.htmlcode != 404:
+            if self.htmlcode != 404 \
                    and 'Sorry! This content is not available in your region.' not in self.htmlcode:
                self.htmltree = etree.HTML(self.htmlcode)
-                break
+                if self.htmltree is not None:
-        if self.htmlcode == 404:
+                    result = self.dictformat(self.htmltree)
-            return 404
+                    return result
-        result = self.dictformat(self.htmltree)
+        return 404
        return result
    def getNum(self, htmltree):
        # for some old page, the input number does not match the page
--- a/scrapinglib/fc2.py
+++ b/scrapinglib/fc2.py
@@ -22,6 +22,7 @@ class Fc2(Parser):
    def extraInit(self):
        self.imagecut = 0
        self.allow_number_change = True
    def search(self, number):
        self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '')
--- a/scrapinglib/javbus.py
+++ b/scrapinglib/javbus.py
@@ -128,7 +128,7 @@ class Javbus(Parser):
    def getTags(self, htmltree):
        tags = self.getTreeElement(htmltree, self.expr_tags).split(',')
-        return tags[1:]
+        return tags[2:]
    def getOutline(self, htmltree):
        if self.morestoryline:
--- a/scrapinglib/javday.py
+++ b/scrapinglib/javday.py
@@ -39,3 +39,8 @@ class Javday(Parser):
        # 删除番号和网站名
        result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
        return result
    def getTags(self, htmltree) -> list:
        tags = super().getTags(htmltree)
        return [tag for tag in tags if 'JAVDAY.TV' not in tag]
--- a/scrapinglib/javmenu.py
+++ b/scrapinglib/javmenu.py
@@ -0,0 +1,61 @@
 # -*- coding: utf-8 -*-
 import re
 from lxml import etree
 from urllib.parse import urljoin
 from .parser import Parser
 class Javmenu(Parser):
    source = 'javmenu'
    expr_title = '/html/head/meta[@property="og:title"]/@content'
    expr_cover = '/html/head/meta[@property="og:image"]/@content'
    expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
    expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
    expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
    expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
    expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
    expr_actor = '//a[contains(@class,"actress")]/text()'
    expr_tags = '//a[contains(@class,"genre")]/text()'
    def extraInit(self):
        self.imagecut = 4
        self.uncensored = True
    def search(self, number):
        self.number = number
        if self.specifiedUrl:
            self.detailurl = self.specifiedUrl
        else:
            self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
        self.htmlcode = self.getHtml(self.detailurl)
        if self.htmlcode == 404:
            return 404
        htmltree = etree.HTML(self.htmlcode)
        result = self.dictformat(htmltree)
        return result
    def getNum(self, htmltree):
        # 番号被分割开，需要合并后才是完整番号
        part1 = self.getTreeElement(htmltree, self.expr_number)
        part2 = self.getTreeElement(htmltree, self.expr_number2)
        dp_number =  part1 + part2
        # NOTE 检测匹配与更新 self.number
        if dp_number.upper() != self.number.upper():
            raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
        self.number = dp_number
        return self.number
    def getTitle(self, htmltree):
        browser_title = super().getTitle(htmltree)
        # 删除番号
        number = re.findall("\d+",self.number)[1]
        title = browser_title.split(number,1)[-1]
        title = title.replace(' | JAV目錄大全 | 每日更新',"")
        title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
        return title.replace(self.number, '').strip()
--- a/scrapinglib/madou.py
+++ b/scrapinglib/madou.py
@@ -6,6 +6,28 @@ from urllib.parse import urlparse, unquote
 from .parser import Parser
 NUM_RULES3=[
    r'(mmz{2,4})-?(\d{2,})(-ep\d*|-\d*)?.*',
    r'(msd)-?(\d{2,})(-ep\d*|-\d*)?.*',
    r'(yk)-?(\d{2,})(-ep\d*|-\d*)?.*',
    r'(pm)-?(\d{2,})(-ep\d*|-\d*)?.*',
    r'(mky-[a-z]{2,2})-?(\d{2,})(-ep\d*|-\d*)?.*',
 ]
 # modou提取number
 def change_number(number):
    number = number.lower().strip()
    m = re.search(r'(md[a-z]{0,2})-?(\d{2,})(-ep\d*|-\d*)?.*', number, re.I)
    if m:
        return f'{m.group(1)}{m.group(2).zfill(4)}{m.group(3) or ""}'
    for rules in NUM_RULES3:
        m = re.search(rules, number, re.I)
        if m:
            return f'{m.group(1)}{m.group(2).zfill(3)}{m.group(3) or ""}'
    return number
 class Madou(Parser):
    source = 'madou'
@@ -14,12 +36,15 @@ class Madou(Parser):
    expr_studio = '//a[@rel="category tag"]/text()'
    expr_tags = '/html/head/meta[@name="keywords"]/@content'
    def extraInit(self):
-        self.imagecut = 0
+        self.imagecut = 4
        self.uncensored = True
        self.allow_number_change = True
    def search(self, number):
-        self.number = number.lower().strip()
+        self.number = change_number(number)
        if self.specifiedUrl:
            self.detailurl = self.specifiedUrl
        else:
@@ -65,5 +90,5 @@ class Madou(Parser):
    def getTags(self, htmltree):
        studio = self.getStudio(htmltree)
-        x = super().getTags(htmltree)
+        tags = super().getTags(htmltree)
-        return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
+        return [tag for tag in tags if studio not in tag and '麻豆' not in tag]
--- a/scrapinglib/parser.py
+++ b/scrapinglib/parser.py
@@ -85,7 +85,7 @@ class Parser:
        else:
            self.detailurl = self.queryNumberUrl(number)
        if not self.detailurl:
-            return None
+            return 404
        htmltree = self.getHtmlTree(self.detailurl)
        result = self.dictformat(htmltree)
        return result
@@ -210,6 +210,13 @@ class Parser:
    def getTags(self, htmltree) -> list:
        alls = self.getTreeAll(htmltree, self.expr_tags)
        tags = []
        for t in alls:
            for tag in t.strip().split(','):
                tag = tag.strip()
                if tag:
                    tags.append(tag)
        return tags
        return [ x.strip() for x in alls if x.strip()]
    def getStudio(self, htmltree):
--- a/scrapinglib/pissplay.py
+++ b/scrapinglib/pissplay.py
@@ -0,0 +1,87 @@
 # -*- coding: utf-8 -*-
 import re
 from lxml import etree
 from .parser import Parser
 from datetime import datetime
 # 搜刮 https://pissplay.com/ 中的视频
 # pissplay中的视频没有番号，所以要通过文件名搜索
 # 只用文件名和网站视频名完全一致时才可以被搜刮
 class Pissplay(Parser):
    source = 'pissplay'
    expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号，因此用标题代替
    expr_title = '//*[@id="video_title"]/text()'
    expr_cover = '/html/head//meta[@property="og:image"]/@content'
    expr_tags = '//div[@id="video_tags"]/a/text()'
    expr_release = '//div[@class="video_date"]/text()'       
    expr_outline = '//*[@id="video_description"]/p//text()'
    def extraInit(self):
        self.imagecut = 0 # 不裁剪封面
        self.specifiedSource = None
    def search(self, number):
        self.number = number.strip().upper()
        if self.specifiedUrl:
            self.detailurl = self.specifiedUrl
        else:
            newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
            self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
        self.htmlcode = self.getHtml(self.detailurl)
        if self.htmlcode == 404:
            return 404
        htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
        result = self.dictformat(htmltree)
        return result
    def getNum(self, htmltree):
        title = self.getTitle(htmltree)
        return title
    def getTitle(self, htmltree):
        title = super().getTitle(htmltree)
        title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
        return title
    def getCover(self, htmltree):
        url = super().getCover(htmltree)
        if not url.startswith('http'):
            url = 'https:' + url
        return url
    def getRelease(self, htmltree):
        releaseDate = super().getRelease(htmltree)
        isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
        return isoData
    def getStudio(self, htmltree):
        return 'PissPlay'
    def getTags(self, htmltree):
        tags = self.getTreeAll(htmltree, self.expr_tags)
        if 'Guests' in tags:
            if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
                del tags[1]
            else:
                tags = tags[1:]
        return tags
    def getActors(self, htmltree) -> list:
        tags = self.getTreeAll(htmltree, self.expr_tags)
        if 'Guests' in tags:
            if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
                return [tags[1]]
            else:
                return [tags[0]]
        else:
            return ['Bruce and Morgan']
    def getOutline(self, htmltree):
        outline = self.getTreeAll(htmltree, self.expr_outline)
        if '– Morgan xx' in outline:
            num = outline.index('– Morgan xx')
            outline = outline[:num]
        rstring = ''.join(outline).replace("&","and")
        return rstring