Merge branch 'master' into skip_tags2

2022-11-22 20:02:15 +08:00
parent a86cfc71d7 73d73b91fd
commit 8b63ef00d9
7 changed files with 111 additions and 147 deletions
@@ -45,12 +45,13 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",懸疑,悬疑,"/>
  <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲,亚洲,"/>
  <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",ハロウィーンキャンペーン,"/>
  <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
  <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",數位馬賽克,数位马赛克,"/>
  <a zh_cn="16小时+" zh_tw="16小時+" jp="16時間以上作品" keyword=",16小時以上作品,16小时以上作品,16時間以上作品,16小时+,16小時+,"/>
  <a zh_cn="3D" zh_tw="3D" jp="3D" keyword=",3D,"/>
  <a zh_cn="3D卡通" zh_tw="3D卡通" jp="3Dエロアニメ" keyword=",3D卡通,3Dエロアニメ,"/>
  <a zh_cn="4K" zh_tw="4K" jp="4K" keyword=",4K,"/>
  <a zh_cn="DMM独家" zh_tw="DMM獨家" jp="DMM獨家" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
  <a zh_cn="M女" zh_tw="M女" jp="M女" keyword=",M女,"/>
  <a zh_cn="SM" zh_tw="SM" jp="SM" keyword=",SM,"/>
  <a zh_cn="轻虐" zh_tw="輕虐" jp="微SM" keyword=",微SM,轻虐,輕虐,"/>
@@ -115,8 +116,7 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="男优潮吹" zh_tw="男優潮吹" jp="男の潮吹き" keyword=",男潮吹,男の潮吹き,男优潮吹,男優潮吹,"/>
  <a zh_cn="巴士导游" zh_tw="巴士導遊" jp="車掌小姐" keyword=",車掌小姐,车掌小姐,巴士乘务员,巴士乘務員,巴士导游,巴士導遊,バスガイド,"/>
  <a zh_cn="熟女" zh_tw="熟女" jp="熟女" keyword=",熟女,成熟的女人,"/>
-  <a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,"/>
+  <a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,白天出軌,白天出轨,通姦,"/>
  <a zh_cn="白天出轨" zh_tw="白天出軌" jp="白天出轨" keyword=",白天出軌,白天出轨,通姦,"/>
  <a zh_cn="处男" zh_tw="處男" jp="處男" keyword=",處男,处男,"/>
  <a zh_cn="处女" zh_tw="處女" jp="處女" keyword=",處女,处女,処女,童貞,"/>
  <a zh_cn="触手" zh_tw="觸手" jp="觸手" keyword=",觸手,触手,"/>
@@ -133,7 +133,7 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="放尿" zh_tw="放尿" jp="放尿" keyword=",放尿,"/>
  <a zh_cn="女服务生" zh_tw="女服務生" jp="ウェイトレス" keyword=",服務生,服务生,女服务生,女服務生,ウェイトレス,"/>
  <a zh_cn="蒙面" zh_tw="蒙面" jp="覆面・マスク" keyword=",蒙面・面罩,蒙面・面具,覆面・マスク,"/>
-  <a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛交,アナル,"/>
+  <a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛門・肛交,肛交,アナル,"/>
  <a zh_cn="肛内中出" zh_tw="肛內中出" jp="肛內中出" keyword=",肛内中出,肛內中出,"/>
  <a zh_cn="个子高" zh_tw="個子高" jp="个子高" keyword=",高,个子高,個子高,"/>
  <a zh_cn="高中生" zh_tw="高中生" jp="高中生" keyword=",高中女生,高中生,"/>
@@ -268,7 +268,6 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="插入手指" zh_tw="插入手指" jp="手指插入" keyword=",手指插入,插入手指,"/>
  <a zh_cn="首次亮相" zh_tw="首次亮相" jp="首次亮相" keyword=",首次亮相,"/>
  <a zh_cn="叔母" zh_tw="叔母" jp="叔母さん" keyword=",叔母,叔母さん,"/>
  <a zh_cn="数位马赛克" zh_tw="數位馬賽克" jp="數位馬賽克" keyword=",數位馬賽克,数位马赛克,"/>
  <a zh_cn="双性人" zh_tw="雙性人" jp="雙性人" keyword=",雙性人,双性人,"/>
  <a zh_cn="韵律服" zh_tw="韻律服" jp="レオタード" keyword=",韵律服,韻律服,レオタード,"/>
  <a zh_cn="水手服" zh_tw="水手服" jp="セーラー服" keyword=",水手服,セーラー服,"/>
@@ -290,7 +289,7 @@ zh_cn/zh_tw/jp：指对应语言输出的词，按设置的对应语言输出。
  <a zh_cn="玩物" zh_tw="玩物" jp="玩具" keyword=",玩具,玩物,"/>
  <a zh_cn="适合手机垂直播放" zh_tw="適合手機垂直播放" jp="為智能手機推薦垂直視頻" keyword=",スマホ専用縦動画,為智能手機推薦垂直視頻,适合手机垂直播放,適合手機垂直播放,"/>
  <a zh_cn="猥亵穿着" zh_tw="猥褻穿着" jp="猥褻穿著" keyword=",猥褻穿著,猥亵穿着,猥褻穿着,"/>
-  <a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",無碼流出,无码流出,"/>
+  <a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",流出,無碼流出,无码流出,"/>
  <a zh_cn="无码破解" zh_tw="無碼破解" jp="無碼破解" keyword=",無碼破解,无码破解,"/>
  <a zh_cn="无毛" zh_tw="無毛" jp="無毛" keyword=",無毛,无毛,剃毛,白虎,パイパン,"/>
  <a zh_cn="剧情" zh_tw="劇情" jp="戲劇" keyword=",戲劇,戏剧,剧情,劇情,戲劇x,戏剧、连续剧,戲劇、連續劇,ドラマ,"/>
@@ -15,6 +15,8 @@
 [English](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/README_EN.md)
 ## 广告位招租 yoshiko2.dev@gmail.com
 # 文档
 * [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
 * [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc)
@@ -84,14 +84,15 @@ def small_cover_check(path, filename, cover_small, movie_path, json_headers=None
 def create_folder(json_data):  # 创建文件夹
-    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
+    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
        json_data)
    conf = config.getInstance()
    success_folder = conf.success_folder()
    actor = json_data.get('actor')
    location_rule = eval(conf.location_rule(), json_data)
    if 'actor' in conf.location_rule() and len(actor) > 100:
        print(conf.location_rule())
-        location_rule = eval(conf.location_rule().replace("actor","'多人作品'"), json_data)
+        location_rule = eval(conf.location_rule().replace("actor", "'多人作品'"), json_data)
    maxlen = conf.max_title_len()
    if 'title' in conf.location_rule() and len(title) > maxlen:
        shorttitle = title[0:maxlen]
@@ -129,7 +130,7 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
                except:
                    print(f"[-]Fatal error! Can not make folder '{path}'")
                    os._exit(0)
-            r = get_html(url=url,return_type='content',json_headers=json_headers)
+            r = get_html(url=url, return_type='content', json_headers=json_headers)
            if r == '':
                print('[-]Movie Download Data not found!')
                return
@@ -144,20 +145,22 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
        #     moveFailedFolder(filepath)
        #     return
        except Exception as e:
-            print('[-]Image Download :Error',e)
+            print('[-]Image Download :Error', e)
    print('[-]Connect Failed! Please check your Proxy or Network!')
    moveFailedFolder(filepath)
    return
 def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath):
-    if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed':
+    if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
                                   filepath) == 'failed':
        return
    configProxy = config.getInstance().proxy()
    for i in range(configProxy.retry):
-        if file_not_exist_or_empty(path+'/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
+        if file_not_exist_or_empty(path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
            print('[!]Video Download Failed! Trying again. [{}/3]', i + 1)
-            download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath)
+            download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
                                        filepath)
            continue
        else:
            break
@@ -197,7 +200,8 @@ def actor_photo_download(actors, save_dir, number):
            failed += 1
            print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!")
    if failed:  # 非致命错误，电影不移入失败文件夹，将来可以用模式3补齐
-        print(f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
+        print(
            f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
    else:
        print(f"[+]Successfully downloaded {len(result)} actor photo.")
@@ -239,7 +243,7 @@ def extrafanart_download_one_by_one(data, path, filepath, json_data=None):
        print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
-def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None):
+def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
    tm_start = time.perf_counter()
    conf = config.getInstance()
    extrafanart_dir = Path(save_dir) / conf.get_extrafanart()
@@ -262,7 +266,8 @@ def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None):
            failed += 1
            print(f'[-]Extrafanart {i} for [{number}] download failed!')
    if failed:  # 非致命错误，电影不移入失败文件夹，将来可以用模式3补齐
-        print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
+        print(
            f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
    else:
        print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
    if conf.debug():
@@ -307,12 +312,14 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
    shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
-def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word,_4k,fanart_path,poster_path,thumb_path):
+def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
-    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
+                uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
        json_data)
    if config.getInstance().main_mode() == 3:  # 模式3下，由于视频文件不做任何改变，.nfo文件必须和视频文件名称除后缀外完全一致，KODI等软件方可支持
        nfo_path = str(Path(filepath).with_suffix('.nfo'))
    else:
-        nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
+        nfo_path = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
    try:
        if not os.path.exists(path):
            try:
@@ -364,6 +371,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
                pass
            print("  <maker>" + studio + "</maker>", file=code)
            print("  <label>" + label + "</label>", file=code)
            skip_tags = config.getInstance().donot_save_tags()
            if not skip_tags:
                if cn_sub == '1':
@@ -385,7 +393,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
            if cn_sub == '1':
                print("  <genre>中文字幕</genre>", file=code)
            if liuchu == '流出':
-                print("  <genre>流出</genre>", file=code)
+                print("  <genre>无码流出</genre>", file=code)
            if uncensored == 1:
                print("  <genre>无码</genre>", file=code)
            if hack_word != '':
@@ -471,7 +479,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack) -> None:
    if cn_sub:
        mark_type += ',字幕'
    if leak:
-        mark_type += ',流出'
+        mark_type += ',无码流出'
    if uncensored:
        mark_type += ',无码'
    if hack:
@@ -541,6 +549,8 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
    ]
    img_pic.paste(img_subt, (pos[count]['x'], pos[count]['y']), mask=a)
    img_pic.save(pic_path, quality=95)
 # ========================结束=================================
@@ -602,7 +612,8 @@ def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_
        return
-def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word):  # 文件路径，番号，后缀，要移动至的位置
+def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word,
                               hack_word):  # 文件路径，番号，后缀，要移动至的位置
    if multi_part == 1:
        number += part  # 这时number会被附加上CD1后缀
    filepath_obj = pathlib.Path(filepath)
@@ -727,7 +738,7 @@ def core_main_no_net_op(movie_path, number):
        c_word = '-C'  # 中文字幕影片后缀
    uncensored = 1 if is_uncensored(number) else 0
    if '流出' in movie_path or 'uncensored' in movie_path.lower():
-        leak_word = '-流出' # 流出影片后缀
+        leak_word = '-无码流出'  # 无码流出影片后缀
        leak = 1
    if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
@@ -814,7 +825,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
    if '流出' in movie_path or 'uncensored' in movie_path.lower():
        liuchu = '流出'
        leak = 1
-        leak_word = '-流出' # 流出影片后缀
+        leak_word = '-无码流出'  # 流出影片后缀
    else:
        leak = 0
@@ -823,9 +834,10 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        hack_word = "-hack"
    # 判断是否4k
-    if '4K' in tag: tag.remove('4K') # 从tag中移除'4K'
+    if '4K' in tag:
        tag.remove('4K')  # 从tag中移除'4K'
    props = get_video_properties(movie_path)  # 判断是否为4K视频
-    if props['width'] >=4096 or props['height'] >= 2160:
+    if props['width'] >= 4096 or props['height'] >= 2160:
        _4k = '4k'
        _4k_world = '-4k'
@@ -834,8 +846,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        debug_print(json_data)
    # 创建文件夹
-    #path = create_folder(rootpath + '/' + conf.success_folder(),  json_data.get('location_rule'), json_data)
+    # path = create_folder(rootpath + '/' + conf.success_folder(),  json_data.get('location_rule'), json_data)
    cover = json_data.get('cover')
    ext = image_ext(cover)
@@ -889,7 +900,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 添加水印
        if conf.is_watermark():
-            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
+            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
        # 兼容Jellyfin封面图文件名规则
        if multi_part and conf.jellyfin_multi_part_fanart():
@@ -899,8 +910,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
-        print_files(path, leak_word, c_word,  json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,  json_data.get('actor_list'), liuchu, uncensored, hack_word
+        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
-        ,_4k,fanart_path,poster_path,thumb_path)
+                    json_data.get('actor_list'), liuchu, uncensored, hack_word
                    , _4k, fanart_path, poster_path, thumb_path)
    elif conf.main_mode() == 2:
        # 创建文件夹
@@ -908,7 +920,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 移动文件
        paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
        if conf.is_watermark():
-            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
+            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
    elif conf.main_mode() == 3:
        path = str(Path(movie_path).parent)
@@ -952,7 +964,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 添加水印
        if conf.is_watermark():
-            add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
+            add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
        # 兼容Jellyfin封面图文件名规则
        if multi_part and conf.jellyfin_multi_part_fanart():
@@ -960,4 +972,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
        # 最后输出.nfo元数据文件，以完成.nfo文件创建作为任务成功标志
        print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
-                    tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
+                    tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
                    thumb_path)
@@ -9,3 +9,4 @@ certifi
 MechanicalSoup
 opencc-python-reimplemented
 face_recognition
 get-video-properties
@@ -11,7 +11,6 @@ from .gcolle import Gcolle
 from .getchu import Getchu
 from .jav321 import Jav321
 from .javdb import Javdb
 from .mv91 import Mv91
 from .fc2 import Fc2
 from .madou import Madou
 from .mgstage import Mgstage
@@ -19,6 +18,7 @@ from .javbus import Javbus
 from .xcity import Xcity
 from .avsox import Avsox
 from .javlibrary import Javlibrary
 from .javday import Javday
 from .tmdb import Tmdb
 from .imdb import Imdb
@@ -50,8 +50,8 @@ class Scraping:
    """
    """
    adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
-                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91',
+                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 
-                          'getchu', 'gcolle'
+                          'getchu', 'gcolle','javday'
                          ]
    adult_func_mapping = {
        'avsox': Avsox().scrape,
@@ -65,11 +65,11 @@ class Scraping:
        'fanza': Fanza().scrape,
        'airav': Airav().scrape,
        'carib': Carib().scrape,
        'mv91': Mv91().scrape,
        'gcolle': Gcolle().scrape,
        'javdb': Javdb().scrape,
        'getchu': Getchu().scrape,
        'javlibrary': Javlibrary().scrape,
        'javday': Javday().scrape
    }
    general_full_sources = ['tmdb', 'imdb']
@@ -0,0 +1,43 @@
 # -*- coding: utf-8 -*-
 import re
 from lxml import etree
 from urllib.parse import urlparse, unquote
 from .parser import Parser
 class Javday(Parser):
    source = 'javday'
    expr_url = '/html/head/meta[@property="og:url"]/@content'
    expr_cover = '/html/head/meta[@property="og:image"]/@content'
    expr_tags = '/html/head/meta[@name="keywords"]/@content'
    expr_title = "/html/head/title/text()"
    expr_actor = "//span[@class='vod_actor']/a/text()"
    expr_studio = '//span[@class="producer"]/a/text()'
    expr_number = '//span[@class="jpnum"]/text()'
    def extraInit(self):
        self.imagecut = 4
        self.uncensored = True
    def search(self, number):
        self.number = number.strip().upper()
        if self.specifiedUrl:
            self.detailurl = self.specifiedUrl
        else:
            self.detailurl = "https://javday.tv/videos/" + self.number.replace("-","") + "/"
        self.htmlcode = self.getHtml(self.detailurl)
        if self.htmlcode == 404:
            return 404
        htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
        self.detailurl = self.getTreeElement(htmltree, self.expr_url)
        result = self.dictformat(htmltree)
        return result
    def getTitle(self, htmltree):
        title = super().getTitle(htmltree)
        # 删除番号和网站名
        result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
        return result
@@ -1,94 +0,0 @@
 # -*- coding: utf-8 -*-
 import re
 from lxml import etree
 from .parser import Parser
 class Mv91(Parser):
    source = 'mv91'
    expr_number = '//div[@class="player-title"]/text()'
    expr_title = '//div[@class="player-title"]/text()'
    expr_release = '//p[@class="date"]/text()'
    expr_outline = '//div[@class="play-text"]/text()'
    expr_tags = '//div[@class="player-tag"]/text()'
    expr_actor = '//p[@class="player-name"]/text()'
    def extraInit(self):
        self.imagecut = 0
        self.uncensored = True
    def getHtmlTree(self, url, type=None):
        self.htmlcode = self.getHtml(url, type)
        if self.htmlcode == 404:
            return 404
        ret = etree.fromstring(self.htmlcode, etree.HTMLParser())
        return ret
    def queryNumberUrl(self, number):
        keyword = number.replace('91CM-','').replace('91MS-','')
        search_html = self.getHtml('https://www.91mv.org/index/search?keywords=' + keyword)
        html = etree.fromstring(search_html, etree.HTMLParser())
        endurl = html.xpath('//a[@class="video-list"]/@href')[0]
        return 'https://www.91mv.org' + endurl
    def getNum(self, htmltree):
        try:
            num = super().getNum(htmltree)
            finds = re.findall('(.*)(91.*-\d*)',num)
            if finds:
                result = str(finds[0][1])
            else:
                result = ' '.join(num.replace('/',' ').split())
                result = result.split()[1]
                if self.number.upper() != result.upper():
                    raise Exception(f'[!] {self.number}: find {result} in mv91, not match')
            return result.strip()
        except:
            return ''
    def getTitle(self, htmltree):
        try:
            title = super().getTitle(htmltree)
            finds = re.findall('(.*)(91.*-\d*)',title)
            if finds:
                result = str(finds[0][0])
            else:
                result = ' '.join(title.replace('/',' ').split())
                result = result.split()[0]
            return result.replace('「预告」','').strip('/ ')
        except:
            return ''
    def getStudio(self, htmltree):
        return '91制片厂'
    def getActors(self, htmltree):
        b=[]
        for player in self.getTreeAll(htmltree, self.expr_actor):
            player = player.replace('主演：','')
            if '/' in player:
                player = player.split('/')[0]
                player = re.sub(r'[0-9]+', '', player)
            b.append(player)
        return b
    def getRelease(self, htmltree):
        try:
            result = super().getRelease(htmltree)
            date = result.replace('日期：','')
            if isinstance(date, str) and len(date):
                return date
        except:
            pass
        return ''
    def getCover(self, htmltree):
        try:
            url = str(re.findall('var pic_url = "(.*?)"', self.htmlcode)[0])
            return url.strip()
        except:
            return ''