diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 47aa0d7..4dac7ba 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -40,6 +40,7 @@ def main(number: str) -> json: 'website': f'{G_SITE}/moviepages/{number}/index.html', 'source': 'carib.py', 'series': get_series(lx), + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) return js diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index 46493da..2a5a303 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -83,6 +83,9 @@ def getExtrafanart(htmlcode): # 获取剧照 if extrafanart_imgs: return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs] return '' +def getUncensored(html): + x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]') + return bool(x) def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/ja/' + number) @@ -109,6 +112,7 @@ def main_uncensored(number): 'website': 'https://www.javbus.com/ja/' + number, 'source': 'javbus.py', 'series': getSeriseJa(lx), + '无码': getUncensored(lx) } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js @@ -151,6 +155,7 @@ def main(number): 'website': 'https://www.javbus.com/' + number, 'source': 'javbus.py', 'series': getSerise(lx), + '无码': getUncensored(lx) } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') return js diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index a0d8a38..2d21e29 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -179,6 +179,9 @@ def getUserRating(html): return float(v[0][0]), int(v[0][1]) except: return +def getUncensored(html): + x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]') + return bool(x) def main(number): # javdb更新后同一时间只能登录一个数字站,最新登录站会踢出旧的登录,因此按找到的第一个javdb*.json文件选择站点, @@ -300,7 +303,7 @@ def main(number): 'website': urljoin('https://javdb.com', correct_url), 'source': 'javdb.py', 'series': getSeries(lx), - + '无码': getUncensored(lx) } userrating = getUserRating(lx) if isinstance(userrating, tuple) and len(userrating) == 2: @@ -328,7 +331,7 @@ if __name__ == "__main__": # print(main('blacked.20.05.30')) # print(main('AGAV-042')) # print(main('BANK-022')) - # print(main('070116-197')) + print(main('070116-197')) # print(main('093021_539')) # 没有剧照 片商pacopacomama #print(main('FC2-2278260')) # print(main('FC2-735670')) diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py index 6cf9132..eb1f365 100644 --- a/WebCrawler/madou.py +++ b/WebCrawler/madou.py @@ -146,7 +146,8 @@ def main(number): 'website': url, 'source': 'madou.py', # 使用 - 'series': getSerise(html) + 'series': getSerise(html), + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') diff --git a/core.py b/core.py index e898e01..ed0ad51 100644 --- a/core.py +++ b/core.py @@ -617,6 +617,7 @@ def debug_print(data: json): def core_main_no_net_op(movie_path, number): conf = config.getInstance() + part = '' leak_word = '' leak = 0 c_word = '' @@ -627,6 +628,8 @@ def core_main_no_net_op(movie_path, number): imagecut = 1 path = str(Path(movie_path).parent) + if re.search('-CD\d+', movie_path, re.IGNORECASE): + part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0] if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -639,12 +642,19 @@ def core_main_no_net_op(movie_path, number): hack = 1 hack_word = "-hack" - fanart_path = f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}" - poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}" - thumb_path = f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}" + prestr = f"{number}{leak_word}{c_word}{hack_word}" + fanart_path = f"{prestr}-fanart{ext}" + poster_path = f"{prestr}-poster{ext}" + thumb_path = f"{prestr}-thumb{ext}" full_fanart_path = os.path.join(path, fanart_path) full_poster_path = os.path.join(path, poster_path) full_thumb_path = os.path.join(path, thumb_path) + full_nfo = Path(path) / f"{prestr}{part}.nfo" + + if full_nfo.is_file(): + nfo = full_nfo.read_text(encoding='utf-8') + if nfo.find(r'无码'): + uncensored = 1 if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): return @@ -695,7 +705,8 @@ def core_main(movie_path, number_th, oCC): # 判断是否无码 uncensored = 1 if is_uncensored(number) else 0 - + if json_data.get('无码'): + uncensored = 1 if '流出' in movie_path or 'uncensored' in movie_path: liuchu = '流出'