From 2a3c50a2dd54695d41f692eae9663ba9432857a7 Mon Sep 17 00:00:00 2001
From: lededev <lededev@noreplay.github.com>
Date: Sun, 24 Apr 2022 19:50:29 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E6=BC=94=E5=91=98=E5=A4=B4?=
 =?UTF-8?q?=E5=83=8F=E5=88=B0.actors=E7=9B=AE=E5=BD=95=EF=BC=8CKODI?=
 =?UTF-8?q?=E7=94=A8=EF=BC=9B=E4=B8=8D=E8=81=94=E7=BD=91=E7=9A=84Jellyfin?=
 =?UTF-8?q?=E5=B0=81=E9=9D=A2=E5=9B=BE=E6=96=87=E4=BB=B6=E5=90=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 WebCrawler/airav.py  |  5 ++-
 WebCrawler/carib.py  |  3 +-
 WebCrawler/fc2.py    |  6 ++-
 WebCrawler/javbus.py | 36 ++++++++---------
 WebCrawler/javdb.py  | 40 +++++++++++--------
 WebCrawler/xcity.py  | 27 +++++++------
 config.ini           |  3 ++
 config.py            |  3 ++
 core.py              | 95 +++++++++++++++++++++++++++++++++++---------
 9 files changed, 149 insertions(+), 69 deletions(-)

diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py
index f7b144c..030e8c7 100644
--- a/WebCrawler/airav.py
+++ b/WebCrawler/airav.py
@@ -202,8 +202,7 @@ def main(number):
             'tag': getTag(htmlcode),
             # 使用javbus
             'label': getSerise(javbus_json),
-            # 妈的，airav不提供作者图片
-#            'actor_photo': getActorPhoto(javbus_json),
+            'actor_photo': getActorPhoto(javbus_json),
             'website': 'https://www.airav.wiki/video/' + number,
             'source': 'airav.py',
             # 使用javbus
@@ -224,6 +223,8 @@ def main(number):
 
 
 if __name__ == '__main__':
+    config.getInstance().set_override("actor_photo:download_for_kodi=1")
+    config.getInstance().set_override("debug_mode:switch=1")
     print(main('ADV-R0624'))  # javbus页面返回404, airav有数据
     print(main('ADN-188'))    # 一人
     print(main('CJOD-278'))   # 多人 javbus演员名称采用日语假名，airav采用日文汉字
diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py
index 0d917a2..462dc61 100755
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -36,12 +36,13 @@ def main(number: str) -> json:
             'extrafanart': get_extrafanart(lx),
             'label': get_series(lx),
             'imagecut': 1,
-#            'actor_photo': get_actor_photo(lx, session),
             'website': f'{G_SITE}/moviepages/{number}/index.html',
             'source': 'carib.py',
             'series': get_series(lx),
             '无码': True
         }
+        if config.getInstance().download_actor_photo_for_kodi():
+            dic['actor_photo'] = get_actor_photo(lx, session)
         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
         return js
 
diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py
index 8c201ec..a908269 100644
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -3,6 +3,7 @@ sys.path.append('../')
 import re
 from lxml import etree#need install
 import json
+import config
 import ADC_function
 from WebCrawler.crawler import *
 # import sys
@@ -77,4 +78,7 @@ def main(number):
     return js
 
 if __name__ == '__main__':
-    print(main('FC2-2182382'))
\ No newline at end of file
+    config.getInstance().set_override("debug_mode:switch=1")
+    #print(main('FC2-2182382'))
+    #print(main('FC2-607854'))
+    print(main('FC2-2787433'))
diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py
index bb2f986..3829d16 100644
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -8,16 +8,14 @@ from WebCrawler.storyline import getStoryline
 import inspect
 
 def getActorPhoto(html):
-    actors = html.xpath('//div[@class="star-name"]/a')
-    d={}
+    actors = html.xpath('//div[@class="star-name"]/../a/img')
+    d = {}
     for i in actors:
-        url=i.attrib['href']
-        t=i.attrib['title']
-        html = etree.fromstring(get_html(url), etree.HTMLParser())
-        p=urljoin("https://www.javbus.com",
-                  str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
-        p2={t:p}
-        d.update(p2)
+        p = i.attrib['src']
+        if "nowprinting.gif" in p:
+            continue
+        t = i.attrib['title']
+        d[t] = urljoin("https://www.javbus.com", p)
     return d
 def getTitle(html):  #获取标题
     title = str(html.xpath('/html/head/title/text()')[0])
@@ -109,7 +107,7 @@ def main_uncensored(number):
         'extrafanart': getExtrafanart(htmlcode),
         'label': getSeriseJa(lx),
         'imagecut': 0,
-#        'actor_photo': '',
+        'actor_photo': getActorPhoto(lx),
         'website': 'https://www.javbus.red/' + w_number,
         'source': 'javbus.py',
         'series': getSeriseJa(lx),
@@ -152,7 +150,7 @@ def main(number):
                 'tag': getTag(lx),
                 'extrafanart': getExtrafanart(htmlcode),
                 'label': getSerise(lx),
-#                'actor_photo': getActorPhoto(lx),
+                'actor_photo': getActorPhoto(lx),
                 'website': 'https://www.javbus.com/' + number,
                 'source': 'javbus.py',
                 'series': getSerise(lx),
@@ -174,14 +172,16 @@ def main(number):
         return js
 
 if __name__ == "__main__" :
+    config.getInstance().set_override("storyline:switch=0")
+    config.getInstance().set_override("actor_photo:download_for_kodi=1")
     config.getInstance().set_override("debug_mode:switch=1")
-    # print(main('ABP-888'))
-    # print(main('ABP-960'))
-    # print(main('ADV-R0624'))    # 404
-    # print(main('MMNT-010'))
-    # print(main('ipx-292'))
-    # print(main('CEMD-011'))
-    # print(main('CJOD-278'))
+    print(main('ABP-888'))
+    print(main('ABP-960'))
+    print(main('ADV-R0624'))    # 404
+    print(main('MMNT-010'))
+    print(main('ipx-292'))
+    print(main('CEMD-011'))
+    print(main('CJOD-278'))
     print(main('BrazzersExxtra.21.02.01'))
     print(main('100221_001'))
     print(main('AVSW-061'))
diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py
index a622c35..fac4023 100755
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -30,13 +30,9 @@ def getActor(html):
     return r
 
 def getaphoto(url, session):
-    html_page = session.get(url).text if session is not None else get_html(url)
-    img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)')
-    img_url = img_prether.findall(html_page)
-    if img_url:
-        return img_url[0]
-    else:
-        return ''
+    html_page = session.get(url).text
+    img_url = re.findall(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)', html_page)
+    return img_url[0] if img_url else ''
 
 def getActorPhoto(html, javdb_site, session):
     actorall = html.xpath('//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]')
@@ -44,9 +40,18 @@ def getActorPhoto(html, javdb_site, session):
         return {}
     a = getActor(html)
     actor_photo = {}
+    if not session:
+        session = get_html_session()
     for i in actorall:
-        if i.text in a:
-            actor_photo[i.text] = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), session)
+        x = re.findall(r'/actors/(.*)', i.attrib['href'], re.A)
+        if not len(x) or not len(x[0]) or i.text not in a:
+            continue
+        actor_id = x[0]
+        pic_url = f"https://c1.jdbstatic.com/avatars/{actor_id[:2].lower()}/{actor_id}.jpg"
+        if not session.head(pic_url).ok:
+            pic_url = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), session)
+        if len(pic_url):
+            actor_photo[i.text] = pic_url
     return actor_photo
 
 def getStudio(a, html):
@@ -300,7 +305,6 @@ def main(number):
             'tag': getTag(lx),
             'label': getLabel(lx),
             'year': getYear(detail_page),  # str(re.search('\d{4}',getRelease(a)).group()),
-#            'actor_photo': getActorPhoto(lx, javdb_site,  session),
             'website': urljoin('https://javdb.com', correct_url),
             'source': 'javdb.py',
             'series': getSeries(lx),
@@ -316,6 +320,8 @@ def main(number):
                 dic['series'] = dic['studio']
             if not dic['label']:
                 dic['label'] = dic['studio']
+        if config.getInstance().download_actor_photo_for_kodi():
+            dic['actor_photo'] = getActorPhoto(lx, javdb_site,  session)
 
 
     except Exception as e:
@@ -328,19 +334,21 @@ def main(number):
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 if __name__ == "__main__":
+    config.getInstance().set_override("storyline:switch=0")
+    config.getInstance().set_override("actor_photo:download_for_kodi=1")
     config.getInstance().set_override("debug_mode:switch=1")
     # print(main('blacked.20.05.30'))
-    # print(main('AGAV-042'))
-    # print(main('BANK-022'))
+    print(main('AGAV-042'))
+    print(main('BANK-022'))
     print(main('070116-197'))
-    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
+    print(main('093021_539'))  # 没有剧照 片商pacopacomama
     #print(main('FC2-2278260'))
     # print(main('FC2-735670'))
     # print(main('FC2-1174949')) # not found
-    #print(main('MVSD-439'))
+    print(main('MVSD-439'))
     # print(main('EHM0001')) # not found
     #print(main('FC2-2314275'))
-    # print(main('EBOD-646'))
-    # print(main('LOVE-262'))
+    print(main('EBOD-646'))
+    print(main('LOVE-262'))
     print(main('ABP-890'))
     print(main('blacked.14.12.08'))
diff --git a/WebCrawler/xcity.py b/WebCrawler/xcity.py
index c117ca0..1218b03 100644
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -23,19 +23,16 @@ def getActor(browser):
 
 def getActorPhoto(browser):
     htmla = browser.page.select('#avodDetails > div > div.frame > div.content > div > ul.profileCL > li.credit-links > a')
-    t = {}
-    for i in htmla:
-        p = {i.text.strip(): i['href']}
-        t.update(p)
+    t = {i.text.strip(): i['href'] for i in htmla}
     o = {}
     for k, v in t.items():
         r = browser.open_relative(v)
-        if r.ok:
-            pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img')
-            p = {k: urljoin(browser.url, pic['src'])}
-        else:
-            p = {k, ''}
-        o.update(p)
+        if not r.ok:
+            continue
+        pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img')
+        if 'noimage.gif' in pic['src']:
+            continue
+        o[k] = urljoin(browser.url, pic['src'])
     return o
 
 
@@ -205,11 +202,12 @@ def main(number):
             'tag': getTag(lx),
             'label': getLabel(lx),
             'year': getYear(getRelease(lx)),  # str(re.search('\d{4}',getRelease(a)).group()),
-#            'actor_photo': getActorPhoto(browser),
             'website': url,
             'source': 'xcity.py',
             'series': getSeries(lx),
         }
+        if config.getInstance().download_actor_photo_for_kodi():
+            dic['actor_photo'] = getActorPhoto(browser)
     except Exception as e:
         if config.getInstance().debug():
             print(e)
@@ -219,6 +217,9 @@ def main(number):
     return js
 
 if __name__ == '__main__':
+    config.getInstance().set_override("storyline:switch=0")
+    config.getInstance().set_override("actor_photo:download_for_kodi=1")
+    config.getInstance().set_override("debug_mode:switch=1")
     print(main('RCTD-288'))
-    #print(main('VNDS-2624'))
-    #print(main('ABP-345'))
+    print(main('VNDS-2624'))
+    print(main('ABP-345'))
diff --git a/config.ini b/config.ini
index cc9c127..71b3642 100755
--- a/config.ini
+++ b/config.ini
@@ -131,3 +131,6 @@ aspect_ratio=2.12
 
 [jellyfin]
 multi_part_fanart=0
+
+[actor_photo]
+download_for_kodi=0
diff --git a/config.py b/config.py
index 2cd4015..0d38890 100644
--- a/config.py
+++ b/config.py
@@ -397,6 +397,9 @@ class Config:
     def jellyfin_multi_part_fanart(self) -> bool:
         return self.conf.getboolean("jellyfin", "multi_part_fanart", fallback=False)
 
+    def download_actor_photo_for_kodi(self) -> bool:
+        return self.conf.getboolean("actor_photo", "download_for_kodi", fallback=False)
+
     @staticmethod
     def _exit(sec: str) -> None:
         print("[-] Read config error! Please check the {} section in config.ini", sec)
diff --git a/core.py b/core.py
index da8b6c9..3563eb3 100644
--- a/core.py
+++ b/core.py
@@ -172,6 +172,7 @@ def download_file_with_filename(url, filename, path, filepath):
     moveFailedFolder(filepath)
     return
 
+
 def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath):
     if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed':
         return
@@ -187,12 +188,50 @@ def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepa
         return
     print('[+]Video Downloaded!', path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4')
 
+
+def actor_photo_download(actors, save_dir, number):
+    if not isinstance(actors, dict) or not len(actors) or not len(save_dir):
+        return
+    save_dir = Path(save_dir)
+    if not save_dir.is_dir():
+        return
+    conf = config.getInstance()
+    actors_dir = save_dir / '.actors'
+    download_only_missing_images = conf.download_only_missing_images()
+    dn_list = []
+    for actor_name, url in actors.items():
+        res = re.match(r'^http.*(\.\w+)$', url, re.A)
+        if not res:
+            continue
+        ext = res.group(1)
+        pic_fullpath = actors_dir /  f'{actor_name}{ext}'
+        if download_only_missing_images and not file_not_exist_or_empty(pic_fullpath):
+            continue
+        dn_list.append((url, pic_fullpath))
+    if not len(dn_list):
+        return
+    parallel = min(len(dn_list), conf.extrafanart_thread_pool_download())
+    if parallel > 100:
+        print('[!]Warrning: Parallel download thread too large may cause website ban IP!')
+    result = parallel_download_files(dn_list, parallel)
+    failed = 0
+    for i, r in enumerate(result):
+        if not r:
+            failed += 1
+            print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!")
+    if failed: # 非致命错误，电影不移入失败文件夹，将来可以用模式3补齐
+        print(f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
+    else:
+        print(f"[+]Successfully downloaded {len(result)} actor photo.")
+
+
 # 剧照下载成功，否则移动到failed
 def extrafanart_download(data, path, number, filepath):
     if config.getInstance().extrafanart_thread_pool_download():
         return extrafanart_download_threadpool(data, path, number)
     extrafanart_download_one_by_one(data, path, filepath)
 
+
 def extrafanart_download_one_by_one(data, path, filepath):
     tm_start = time.perf_counter()
     j = 1
@@ -252,12 +291,14 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
     if conf.debug():
         print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
 
+
 def image_ext(url):
     try:
         return os.path.splitext(url)[-1]
     except:
         return ".jpg"
 
+
 # 封面是否下载成功，否则移动到failed
 def image_download(cover, fanart_path, thumb_path, path, filepath):
     full_filepath = os.path.join(path, fanart_path)
@@ -676,12 +717,14 @@ def core_main_no_net_op(movie_path, number):
     cn_sub = ''
     hack = ''
     hack_word = ''
-    ext = '.jpg'
     imagecut = 1
+    multi = False
+    part = ''
     path = str(Path(movie_path).parent)
 
     if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
         part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
+        multi = True
     if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
             re.I) or '中文' in movie_path or '字幕' in movie_path:
         cn_sub = '1'
@@ -696,19 +739,24 @@ def core_main_no_net_op(movie_path, number):
         hack_word = "-hack"
 
     prestr = f"{number}{leak_word}{c_word}{hack_word}"
+    full_nfo = Path(path) / f"{prestr}{part}.nfo"
+    if full_nfo.is_file():
+        if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
+            uncensored = 1
+        try:
+            nfo_xml = etree.parse(full_nfo)
+            nfo_fanart_path = nfo_xml.xpath('//fanart/text()')[0]
+            ext = Path(nfo_fanart_path).suffix
+        except:
+            return
+    else:
+        return
     fanart_path =  f"{prestr}-fanart{ext}"
     poster_path = f"{prestr}-poster{ext}"
     thumb_path =  f"{prestr}-thumb{ext}"
     full_fanart_path = os.path.join(path, fanart_path)
     full_poster_path = os.path.join(path, poster_path)
     full_thumb_path = os.path.join(path, thumb_path)
-    full_nfo = Path(path) / f"{prestr}{part}.nfo"
-
-    if full_nfo.is_file():
-        if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
-            uncensored = 1
-    else:
-        return
 
     if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
         return
@@ -717,6 +765,9 @@ def core_main_no_net_op(movie_path, number):
     if conf.is_watermark():
         add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack)
 
+    if multi and conf.jellyfin_multi_part_fanart():
+        linkImage(path, number, part, leak_word, c_word, hack_word, ext)
+
 
 def core_main(movie_path, number_th, oCC):
     conf = config.getInstance()
@@ -808,16 +859,17 @@ def core_main(movie_path, number_th, oCC):
                 # 下载预告片
                 if conf.is_trailer() and json_data.get('trailer'):
                     trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
-            except:
-                pass
-            try:
+
                 # 下载剧照 data, path, filepath
                 if conf.is_extrafanart() and json_data.get('extrafanart'):
                     extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
+
+                # 下载演员头像 KODI .actors 目录位置
+                if conf.download_actor_photo_for_kodi():
+                    actor_photo_download(json_data.get('actor_photo'), path, number)
             except:
                 pass
 
-
         # 裁剪图
         cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
 
@@ -857,13 +909,20 @@ def core_main(movie_path, number_th, oCC):
         image_download( cover, fanart_path, thumb_path, path, movie_path)
 
         if not multi_part or part.lower() == '-cd1':
-            # 下载预告片
-            if conf.is_trailer() and json_data.get('trailer'):
-                trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
+            try:
+                # 下载预告片
+                if conf.is_trailer() and json_data.get('trailer'):
+                    trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
 
-            # 下载剧照 data, path, filepath
-            if conf.is_extrafanart() and json_data.get('extrafanart'):
-                extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
+                # 下载剧照 data, path, filepath
+                if conf.is_extrafanart() and json_data.get('extrafanart'):
+                    extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
+
+                # 下载演员头像 KODI .actors 目录位置
+                if conf.download_actor_photo_for_kodi():
+                    actor_photo_download(json_data.get('actor_photo'), path, number)
+            except:
+                pass
 
         # 裁剪图
         cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))