From 6a45b6057ade6b3405e2d9dc3d68799f64bc7ea9 Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 22 Apr 2022 12:02:56 +0800 Subject: [PATCH 1/8] resolve issue #772 --- number_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/number_parser.py b/number_parser.py index 8ab9d4b..b51e89c 100755 --- a/number_parser.py +++ b/number_parser.py @@ -121,7 +121,7 @@ G_cache_uncensored_conf = Cache_uncensored_conf() # ========================================================================是否为无码 -def is_uncensored(number): +def is_uncensored(number) -> bool: if re.match( r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}', number, @@ -130,7 +130,7 @@ def is_uncensored(number): return True if G_cache_uncensored_conf.is_empty(): G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) - return G_cache_uncensored_conf.check(number) + return bool(G_cache_uncensored_conf.check(number)) if __name__ == "__main__": From 41d214f391baae4e27daff9db115da045cea0445 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 24 Apr 2022 00:54:05 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E9=80=89=E9=A1=B9?= =?UTF-8?q?=E5=85=BC=E5=AE=B9Jellyfin=E5=B0=81=E9=9D=A2=E5=9B=BE=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=90=8D=E8=A7=84=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.ini | 3 +++ config.py | 3 +++ core.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/config.ini b/config.ini index c17acc7..cc9c127 100755 --- a/config.ini +++ b/config.ini @@ -128,3 +128,6 @@ locations_model=hog uncensored_only=1 aways_imagecut=0 aspect_ratio=2.12 + +[jellyfin] +multi_part_fanart=0 diff --git a/config.py b/config.py index 783c2fe..2cd4015 100644 --- a/config.py +++ b/config.py @@ -394,6 +394,9 @@ class Config: def face_aspect_ratio(self) -> float: return self.conf.getfloat("face", "aspect_ratio", fallback=2.12) + def jellyfin_multi_part_fanart(self) -> bool: + return self.conf.getboolean("jellyfin", "multi_part_fanart", fallback=False) + @staticmethod def _exit(sec: str) -> None: print("[-] Read config error! Please check the {} section in config.ini", sec) diff --git a/core.py b/core.py index 7b493cf..da8b6c9 100644 --- a/core.py +++ b/core.py @@ -614,6 +614,40 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo return +def linkImage(path, number, part, leak_word, c_word, hack_word, ext): + """ + 首先尝试为图片建立符合Jellyfin封面图文件名规则的硬连接以节省磁盘空间 + 如果目标目录无法建立硬链接则将图片复制一份成为常规文件 + 常规文件日期已经存在时,若修改日期比源文件更旧,则将被新的覆盖,否则忽略 + """ + if not all(len(v) for v in (path, number, part, ext)): + return + covers = ("-fanart", "-poster", "-thumb") + normal_prefix = f"{number}{leak_word}{c_word}{hack_word}" + multi_prefix = f"{number}{part}{leak_word}{c_word}{hack_word}" + normal_pathes = (Path(path) / f"{normal_prefix}{c}{ext}" for c in covers) + multi_pathes = (Path(path) / f"{multi_prefix}{c}{ext}" for c in covers) + for normal_path, multi_path in zip(normal_pathes, multi_pathes): + if not normal_path.is_file(): + continue + mkLink = False + if not multi_path.exists(): + mkLink = True + elif multi_path.is_file(): + if multi_path.stat().st_nlink > 1: + continue + elif normal_path.stat().st_mtime <= multi_path.stat().st_mtime: + continue + mkLink = True + multi_path.unlink(missing_ok=True) + if not mkLink: + continue + try: + os.link(str(normal_path), str(multi_path), follow_symlinks=False) + except: + shutil.copyfile(str(normal_path), str(multi_path)) + + def debug_print(data: json): try: print("[+] ------- DEBUG INFO -------") @@ -791,6 +825,10 @@ def core_main(movie_path, number_th, oCC): if conf.is_watermark(): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) + # 兼容Jellyfin封面图文件名规则 + if multi_part and conf.jellyfin_multi_part_fanart(): + linkImage(path, number_th, part, leak_word, c_word, hack_word, ext) + # 移动电影 paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) @@ -834,6 +872,10 @@ def core_main(movie_path, number_th, oCC): if conf.is_watermark(): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) + # 兼容Jellyfin封面图文件名规则 + if multi_part and conf.jellyfin_multi_part_fanart(): + linkImage(path, number_th, part, leak_word, c_word, hack_word, ext) + # 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志 print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path) From 736e249ad5936e999e028087caef48f5d9585053 Mon Sep 17 00:00:00 2001 From: code-review-doctor Date: Sat, 23 Apr 2022 23:57:04 +0100 Subject: [PATCH 3/8] Fix issue probably-meant-fstring found at https://codereview.doctor --- WebCrawler/storyline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 40117b5..16ab3cc 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -169,7 +169,7 @@ def getStoryline_airavwiki(number, debug): title = browser.page.select('head > title')[0].text.strip() detail_number = str(re.findall('\[(.*?)]', title)[0]) if not re.search(number, detail_number, re.I): - raise ValueError("detail page number not match, got ->[{detail_number}]") + raise ValueError(f"detail page number not match, got ->[{detail_number}]") desc = browser.page.select_one('div.d-flex.videoDataBlock > div.synopsis > p').text.strip() return desc except Exception as e: @@ -209,7 +209,7 @@ def getStoryline_58avgo(number, debug): title = browser.page.select_one('head > title').text.strip() detail_number = str(re.findall('\[(.*?)]', title)[0]) if not re.search(number, detail_number, re.I): - raise ValueError("detail page number not match, got ->[{detail_number}]") + raise ValueError(f"detail page number not match, got ->[{detail_number}]") return browser.page.select_one('#ContentPlaceHolder1_Label2').text.strip() except Exception as e: if debug: From 42d9986c163246891cd82a7ce87878ce694679a6 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 24 Apr 2022 19:46:22 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=91=BD=E4=BB=A4?= =?UTF-8?q?=E8=A1=8C=E5=8C=85=E5=90=AB=E5=A4=9A=E4=B8=AA-C=E5=8F=82?= =?UTF-8?q?=E6=95=B0=EF=BC=8C=E4=BE=9D=E6=AC=A1=E8=BF=9E=E7=BB=AD=E6=89=A7?= =?UTF-8?q?=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Movie_Data_Capture.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 14dfd20..aa00cb5 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -76,8 +76,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]: help="Override [priority]website= in config.") parser.add_argument("-D", "--download-images", dest='dnimg', action="store_true", help="Override [common]download_only_missing_images=0 force invoke image downloading.") - parser.add_argument("-C", "--config-override", dest='cfgcmd', default='', nargs='?', - help="Common use config override. grammar: section:key=value[;[section:]key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1") + parser.add_argument("-C", "--config-override", dest='cfgcmd', action='append', nargs=1, + help="Common use config override. Grammar: section:key=value[;[section:]key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1 Note:this parameters can be used multiple times") parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true", help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") @@ -109,8 +109,9 @@ is performed. It may help you correct wrong numbers before real job.""") if isinstance(args.dnimg, bool) and args.dnimg: conf.set_override("common:download_only_missing_images=0") set_bool_or_none("debug_mode:switch", args.debug) - if isinstance(args.cfgcmd, str) and len(args.cfgcmd.strip()): - conf.set_override(args.cfgcmd.strip()) + if isinstance(args.cfgcmd, list): + for cmd in args.cfgcmd: + conf.set_override(cmd[0]) no_net_op = False if conf.main_mode() == 3: From 2a3c50a2dd54695d41f692eae9663ba9432857a7 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 24 Apr 2022 19:50:29 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E4=B8=8B=E8=BD=BD=E6=BC=94=E5=91=98?= =?UTF-8?q?=E5=A4=B4=E5=83=8F=E5=88=B0.actors=E7=9B=AE=E5=BD=95=EF=BC=8CKO?= =?UTF-8?q?DI=E7=94=A8=EF=BC=9B=E4=B8=8D=E8=81=94=E7=BD=91=E7=9A=84Jellyfi?= =?UTF-8?q?n=E5=B0=81=E9=9D=A2=E5=9B=BE=E6=96=87=E4=BB=B6=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/airav.py | 5 ++- WebCrawler/carib.py | 3 +- WebCrawler/fc2.py | 6 ++- WebCrawler/javbus.py | 36 ++++++++--------- WebCrawler/javdb.py | 40 +++++++++++-------- WebCrawler/xcity.py | 27 +++++++------ config.ini | 3 ++ config.py | 3 ++ core.py | 95 +++++++++++++++++++++++++++++++++++--------- 9 files changed, 149 insertions(+), 69 deletions(-) diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py index f7b144c..030e8c7 100644 --- a/WebCrawler/airav.py +++ b/WebCrawler/airav.py @@ -202,8 +202,7 @@ def main(number): 'tag': getTag(htmlcode), # 使用javbus 'label': getSerise(javbus_json), - # 妈的,airav不提供作者图片 -# 'actor_photo': getActorPhoto(javbus_json), + 'actor_photo': getActorPhoto(javbus_json), 'website': 'https://www.airav.wiki/video/' + number, 'source': 'airav.py', # 使用javbus @@ -224,6 +223,8 @@ def main(number): if __name__ == '__main__': + config.getInstance().set_override("actor_photo:download_for_kodi=1") + config.getInstance().set_override("debug_mode:switch=1") print(main('ADV-R0624')) # javbus页面返回404, airav有数据 print(main('ADN-188')) # 一人 print(main('CJOD-278')) # 多人 javbus演员名称采用日语假名,airav采用日文汉字 diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 0d917a2..462dc61 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -36,12 +36,13 @@ def main(number: str) -> json: 'extrafanart': get_extrafanart(lx), 'label': get_series(lx), 'imagecut': 1, -# 'actor_photo': get_actor_photo(lx, session), 'website': f'{G_SITE}/moviepages/{number}/index.html', 'source': 'carib.py', 'series': get_series(lx), '无码': True } + if config.getInstance().download_actor_photo_for_kodi(): + dic['actor_photo'] = get_actor_photo(lx, session) js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) return js diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py index 8c201ec..a908269 100644 --- a/WebCrawler/fc2.py +++ b/WebCrawler/fc2.py @@ -3,6 +3,7 @@ sys.path.append('../') import re from lxml import etree#need install import json +import config import ADC_function from WebCrawler.crawler import * # import sys @@ -77,4 +78,7 @@ def main(number): return js if __name__ == '__main__': - print(main('FC2-2182382')) \ No newline at end of file + config.getInstance().set_override("debug_mode:switch=1") + #print(main('FC2-2182382')) + #print(main('FC2-607854')) + print(main('FC2-2787433')) diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index bb2f986..3829d16 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -8,16 +8,14 @@ from WebCrawler.storyline import getStoryline import inspect def getActorPhoto(html): - actors = html.xpath('//div[@class="star-name"]/a') - d={} + actors = html.xpath('//div[@class="star-name"]/../a/img') + d = {} for i in actors: - url=i.attrib['href'] - t=i.attrib['title'] - html = etree.fromstring(get_html(url), etree.HTMLParser()) - p=urljoin("https://www.javbus.com", - str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")) - p2={t:p} - d.update(p2) + p = i.attrib['src'] + if "nowprinting.gif" in p: + continue + t = i.attrib['title'] + d[t] = urljoin("https://www.javbus.com", p) return d def getTitle(html): #获取标题 title = str(html.xpath('/html/head/title/text()')[0]) @@ -109,7 +107,7 @@ def main_uncensored(number): 'extrafanart': getExtrafanart(htmlcode), 'label': getSeriseJa(lx), 'imagecut': 0, -# 'actor_photo': '', + 'actor_photo': getActorPhoto(lx), 'website': 'https://www.javbus.red/' + w_number, 'source': 'javbus.py', 'series': getSeriseJa(lx), @@ -152,7 +150,7 @@ def main(number): 'tag': getTag(lx), 'extrafanart': getExtrafanart(htmlcode), 'label': getSerise(lx), -# 'actor_photo': getActorPhoto(lx), + 'actor_photo': getActorPhoto(lx), 'website': 'https://www.javbus.com/' + number, 'source': 'javbus.py', 'series': getSerise(lx), @@ -174,14 +172,16 @@ def main(number): return js if __name__ == "__main__" : + config.getInstance().set_override("storyline:switch=0") + config.getInstance().set_override("actor_photo:download_for_kodi=1") config.getInstance().set_override("debug_mode:switch=1") - # print(main('ABP-888')) - # print(main('ABP-960')) - # print(main('ADV-R0624')) # 404 - # print(main('MMNT-010')) - # print(main('ipx-292')) - # print(main('CEMD-011')) - # print(main('CJOD-278')) + print(main('ABP-888')) + print(main('ABP-960')) + print(main('ADV-R0624')) # 404 + print(main('MMNT-010')) + print(main('ipx-292')) + print(main('CEMD-011')) + print(main('CJOD-278')) print(main('BrazzersExxtra.21.02.01')) print(main('100221_001')) print(main('AVSW-061')) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index a622c35..fac4023 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -30,13 +30,9 @@ def getActor(html): return r def getaphoto(url, session): - html_page = session.get(url).text if session is not None else get_html(url) - img_prether = re.compile(r' div > div.frame > div.content > div > ul.profileCL > li.credit-links > a') - t = {} - for i in htmla: - p = {i.text.strip(): i['href']} - t.update(p) + t = {i.text.strip(): i['href'] for i in htmla} o = {} for k, v in t.items(): r = browser.open_relative(v) - if r.ok: - pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img') - p = {k: urljoin(browser.url, pic['src'])} - else: - p = {k, ''} - o.update(p) + if not r.ok: + continue + pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img') + if 'noimage.gif' in pic['src']: + continue + o[k] = urljoin(browser.url, pic['src']) return o @@ -205,11 +202,12 @@ def main(number): 'tag': getTag(lx), 'label': getLabel(lx), 'year': getYear(getRelease(lx)), # str(re.search('\d{4}',getRelease(a)).group()), -# 'actor_photo': getActorPhoto(browser), 'website': url, 'source': 'xcity.py', 'series': getSeries(lx), } + if config.getInstance().download_actor_photo_for_kodi(): + dic['actor_photo'] = getActorPhoto(browser) except Exception as e: if config.getInstance().debug(): print(e) @@ -219,6 +217,9 @@ def main(number): return js if __name__ == '__main__': + config.getInstance().set_override("storyline:switch=0") + config.getInstance().set_override("actor_photo:download_for_kodi=1") + config.getInstance().set_override("debug_mode:switch=1") print(main('RCTD-288')) - #print(main('VNDS-2624')) - #print(main('ABP-345')) + print(main('VNDS-2624')) + print(main('ABP-345')) diff --git a/config.ini b/config.ini index cc9c127..71b3642 100755 --- a/config.ini +++ b/config.ini @@ -131,3 +131,6 @@ aspect_ratio=2.12 [jellyfin] multi_part_fanart=0 + +[actor_photo] +download_for_kodi=0 diff --git a/config.py b/config.py index 2cd4015..0d38890 100644 --- a/config.py +++ b/config.py @@ -397,6 +397,9 @@ class Config: def jellyfin_multi_part_fanart(self) -> bool: return self.conf.getboolean("jellyfin", "multi_part_fanart", fallback=False) + def download_actor_photo_for_kodi(self) -> bool: + return self.conf.getboolean("actor_photo", "download_for_kodi", fallback=False) + @staticmethod def _exit(sec: str) -> None: print("[-] Read config error! Please check the {} section in config.ini", sec) diff --git a/core.py b/core.py index da8b6c9..3563eb3 100644 --- a/core.py +++ b/core.py @@ -172,6 +172,7 @@ def download_file_with_filename(url, filename, path, filepath): moveFailedFolder(filepath) return + def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath): if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed': return @@ -187,12 +188,50 @@ def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepa return print('[+]Video Downloaded!', path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4') + +def actor_photo_download(actors, save_dir, number): + if not isinstance(actors, dict) or not len(actors) or not len(save_dir): + return + save_dir = Path(save_dir) + if not save_dir.is_dir(): + return + conf = config.getInstance() + actors_dir = save_dir / '.actors' + download_only_missing_images = conf.download_only_missing_images() + dn_list = [] + for actor_name, url in actors.items(): + res = re.match(r'^http.*(\.\w+)$', url, re.A) + if not res: + continue + ext = res.group(1) + pic_fullpath = actors_dir / f'{actor_name}{ext}' + if download_only_missing_images and not file_not_exist_or_empty(pic_fullpath): + continue + dn_list.append((url, pic_fullpath)) + if not len(dn_list): + return + parallel = min(len(dn_list), conf.extrafanart_thread_pool_download()) + if parallel > 100: + print('[!]Warrning: Parallel download thread too large may cause website ban IP!') + result = parallel_download_files(dn_list, parallel) + failed = 0 + for i, r in enumerate(result): + if not r: + failed += 1 + print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!") + if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐 + print(f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.") + else: + print(f"[+]Successfully downloaded {len(result)} actor photo.") + + # 剧照下载成功,否则移动到failed def extrafanart_download(data, path, number, filepath): if config.getInstance().extrafanart_thread_pool_download(): return extrafanart_download_threadpool(data, path, number) extrafanart_download_one_by_one(data, path, filepath) + def extrafanart_download_one_by_one(data, path, filepath): tm_start = time.perf_counter() j = 1 @@ -252,12 +291,14 @@ def extrafanart_download_threadpool(url_list, save_dir, number): if conf.debug(): print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s') + def image_ext(url): try: return os.path.splitext(url)[-1] except: return ".jpg" + # 封面是否下载成功,否则移动到failed def image_download(cover, fanart_path, thumb_path, path, filepath): full_filepath = os.path.join(path, fanart_path) @@ -676,12 +717,14 @@ def core_main_no_net_op(movie_path, number): cn_sub = '' hack = '' hack_word = '' - ext = '.jpg' imagecut = 1 + multi = False + part = '' path = str(Path(movie_path).parent) if re.search('[-_]CD\d+', movie_path, re.IGNORECASE): part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper() + multi = True if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path, re.I) or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' @@ -696,19 +739,24 @@ def core_main_no_net_op(movie_path, number): hack_word = "-hack" prestr = f"{number}{leak_word}{c_word}{hack_word}" + full_nfo = Path(path) / f"{prestr}{part}.nfo" + if full_nfo.is_file(): + if full_nfo.read_text(encoding='utf-8').find(r'无码') >= 0: + uncensored = 1 + try: + nfo_xml = etree.parse(full_nfo) + nfo_fanart_path = nfo_xml.xpath('//fanart/text()')[0] + ext = Path(nfo_fanart_path).suffix + except: + return + else: + return fanart_path = f"{prestr}-fanart{ext}" poster_path = f"{prestr}-poster{ext}" thumb_path = f"{prestr}-thumb{ext}" full_fanart_path = os.path.join(path, fanart_path) full_poster_path = os.path.join(path, poster_path) full_thumb_path = os.path.join(path, thumb_path) - full_nfo = Path(path) / f"{prestr}{part}.nfo" - - if full_nfo.is_file(): - if full_nfo.read_text(encoding='utf-8').find(r'无码') >= 0: - uncensored = 1 - else: - return if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): return @@ -717,6 +765,9 @@ def core_main_no_net_op(movie_path, number): if conf.is_watermark(): add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack) + if multi and conf.jellyfin_multi_part_fanart(): + linkImage(path, number, part, leak_word, c_word, hack_word, ext) + def core_main(movie_path, number_th, oCC): conf = config.getInstance() @@ -808,16 +859,17 @@ def core_main(movie_path, number_th, oCC): # 下载预告片 if conf.is_trailer() and json_data.get('trailer'): trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path) - except: - pass - try: + # 下载剧照 data, path, filepath if conf.is_extrafanart() and json_data.get('extrafanart'): extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) + + # 下载演员头像 KODI .actors 目录位置 + if conf.download_actor_photo_for_kodi(): + actor_photo_download(json_data.get('actor_photo'), path, number) except: pass - # 裁剪图 cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) @@ -857,13 +909,20 @@ def core_main(movie_path, number_th, oCC): image_download( cover, fanart_path, thumb_path, path, movie_path) if not multi_part or part.lower() == '-cd1': - # 下载预告片 - if conf.is_trailer() and json_data.get('trailer'): - trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path) + try: + # 下载预告片 + if conf.is_trailer() and json_data.get('trailer'): + trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path) - # 下载剧照 data, path, filepath - if conf.is_extrafanart() and json_data.get('extrafanart'): - extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) + # 下载剧照 data, path, filepath + if conf.is_extrafanart() and json_data.get('extrafanart'): + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) + + # 下载演员头像 KODI .actors 目录位置 + if conf.download_actor_photo_for_kodi(): + actor_photo_download(json_data.get('actor_photo'), path, number) + except: + pass # 裁剪图 cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) From ae15e0815ef3f1532dfb79d3b1aba7428f0bb19e Mon Sep 17 00:00:00 2001 From: jop6__ Date: Mon, 25 Apr 2022 13:19:22 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=A0=87=E7=AD=BE?= =?UTF-8?q?=E7=BF=BB=E8=AF=91bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '''mapping_data.xpath('a[contains(@Keyword, $name)]/@' + language, name=i)[0]'''中使用了contains匹配,会导致原标签如“内S”错误命中标签“体内SJ”,因为他们也构成包含关系,xpath匹配时在name两侧添加逗号可解决该问题。 --- WebCrawler/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 9e51623..baaf0df 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -283,8 +283,8 @@ def get_data_from_json(file_number, oCC): def convert_list(mapping_data,language,vars): total = [] for i in vars: - if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)) != 0: - i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)[0] + if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")) != 0: + i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")[0] total.append(i) return total def convert(mapping_data,language,vars): From 20dbe31b49dd79519d70e4286aa9894cbd1e431b Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 29 Apr 2022 22:45:11 +0800 Subject: [PATCH 7/8] update UserAgent --- ADC_function.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index c827a59..76250fd 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -27,8 +27,7 @@ def getXpathSingle(htmlcode, xpath): return result1 -G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36' - +G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36' def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): """ From 2fd0a7a02b2970f634dc2ef98e3e99f2330d1b68 Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 29 Apr 2022 22:45:46 +0800 Subject: [PATCH 8/8] javdb.py:sync website --- WebCrawler/javdb.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index fac4023..e4e2eb9 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -242,12 +242,12 @@ def main(number): # javdb sometime returns multiple results, # and the first elememt maybe not the one we are looking for # iterate all candidates and find the match one - urls = html.xpath('//*[@id="videos"]/div/div/a/@href') + urls = html.xpath('//div[@class="item"]/a[@class="box"]/@href') # 记录一下欧美的ids ['Blacked','Blacked'] if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): correct_url = urls[0] else: - ids = html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') + ids = html.xpath('//div[@class="item"]/a[@class="box"]/div[@class="video-title"]/strong/text()') try: correct_url = urls[ids.index(number)] except: @@ -265,21 +265,7 @@ def main(number): # etree.fromstring开销很大,最好只用一次,而它的xpath很快,比bs4 find/select快,可以多用 lx = etree.fromstring(detail_page, etree.HTMLParser()) - # no cut image by default - imagecut = 3 - # If gray image exists ,then replace with normal cover - if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): - cover_small = getCover_small(html) - else: - try: - cover_small = getCover_small(html, index=ids.index(number)) - except: - # if input number is "STAR438" not "STAR-438", use first search result. - cover_small = getCover_small(html) - if 'placeholder' in cover_small: - # replace wit normal cover and cut it - imagecut = 1 - cover_small = getCover(lx) + imagecut = 1 dp_number = getNum(lx) if dp_number.upper() != number.upper(): raise ValueError("number not eq"+dp_number) @@ -298,7 +284,6 @@ def main(number): 'release': getRelease(detail_page), 'number': number, 'cover': getCover(lx), - 'cover_small': cover_small, 'trailer': getTrailer(detail_page), 'extrafanart': getExtrafanart(lx), 'imagecut': imagecut,