更可靠的无码识别方法

This commit is contained in:
lededev
2022-04-10 14:48:25 +08:00
parent 8add9fe424
commit 02692becfe
5 changed files with 28 additions and 7 deletions

View File

@@ -40,6 +40,7 @@ def main(number: str) -> json:
'website': f'{G_SITE}/moviepages/{number}/index.html', 'website': f'{G_SITE}/moviepages/{number}/index.html',
'source': 'carib.py', 'source': 'carib.py',
'series': get_series(lx), 'series': get_series(lx),
'无码': True
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
return js return js

View File

@@ -83,6 +83,9 @@ def getExtrafanart(htmlcode): # 获取剧照
if extrafanart_imgs: if extrafanart_imgs:
return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs] return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
return '' return ''
def getUncensored(html):
x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]')
return bool(x)
def main_uncensored(number): def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/ja/' + number) htmlcode = get_html('https://www.javbus.com/ja/' + number)
@@ -109,6 +112,7 @@ def main_uncensored(number):
'website': 'https://www.javbus.com/ja/' + number, 'website': 'https://www.javbus.com/ja/' + number,
'source': 'javbus.py', 'source': 'javbus.py',
'series': getSeriseJa(lx), 'series': getSeriseJa(lx),
'无码': getUncensored(lx)
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js return js
@@ -151,6 +155,7 @@ def main(number):
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/' + number,
'source': 'javbus.py', 'source': 'javbus.py',
'series': getSerise(lx), 'series': getSerise(lx),
'无码': getUncensored(lx)
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
return js return js

View File

@@ -179,6 +179,9 @@ def getUserRating(html):
return float(v[0][0]), int(v[0][1]) return float(v[0][0]), int(v[0][1])
except: except:
return return
def getUncensored(html):
x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]')
return bool(x)
def main(number): def main(number):
# javdb更新后同一时间只能登录一个数字站最新登录站会踢出旧的登录因此按找到的第一个javdb*.json文件选择站点 # javdb更新后同一时间只能登录一个数字站最新登录站会踢出旧的登录因此按找到的第一个javdb*.json文件选择站点
@@ -300,7 +303,7 @@ def main(number):
'website': urljoin('https://javdb.com', correct_url), 'website': urljoin('https://javdb.com', correct_url),
'source': 'javdb.py', 'source': 'javdb.py',
'series': getSeries(lx), 'series': getSeries(lx),
'无码': getUncensored(lx)
} }
userrating = getUserRating(lx) userrating = getUserRating(lx)
if isinstance(userrating, tuple) and len(userrating) == 2: if isinstance(userrating, tuple) and len(userrating) == 2:
@@ -328,7 +331,7 @@ if __name__ == "__main__":
# print(main('blacked.20.05.30')) # print(main('blacked.20.05.30'))
# print(main('AGAV-042')) # print(main('AGAV-042'))
# print(main('BANK-022')) # print(main('BANK-022'))
# print(main('070116-197')) print(main('070116-197'))
# print(main('093021_539')) # 没有剧照 片商pacopacomama # print(main('093021_539')) # 没有剧照 片商pacopacomama
#print(main('FC2-2278260')) #print(main('FC2-2278260'))
# print(main('FC2-735670')) # print(main('FC2-735670'))

View File

@@ -146,7 +146,8 @@ def main(number):
'website': url, 'website': url,
'source': 'madou.py', 'source': 'madou.py',
# 使用 # 使用
'series': getSerise(html) 'series': getSerise(html),
'无码': True
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
indent=4, separators=(',', ':'), ) # .encode('UTF-8') indent=4, separators=(',', ':'), ) # .encode('UTF-8')

19
core.py
View File

@@ -617,6 +617,7 @@ def debug_print(data: json):
def core_main_no_net_op(movie_path, number): def core_main_no_net_op(movie_path, number):
conf = config.getInstance() conf = config.getInstance()
part = ''
leak_word = '' leak_word = ''
leak = 0 leak = 0
c_word = '' c_word = ''
@@ -627,6 +628,8 @@ def core_main_no_net_op(movie_path, number):
imagecut = 1 imagecut = 1
path = str(Path(movie_path).parent) path = str(Path(movie_path).parent)
if re.search('-CD\d+', movie_path, re.IGNORECASE):
part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0]
if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path:
cn_sub = '1' cn_sub = '1'
c_word = '-C' # 中文字幕影片后缀 c_word = '-C' # 中文字幕影片后缀
@@ -639,12 +642,19 @@ def core_main_no_net_op(movie_path, number):
hack = 1 hack = 1
hack_word = "-hack" hack_word = "-hack"
fanart_path = f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}" prestr = f"{number}{leak_word}{c_word}{hack_word}"
poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}" fanart_path = f"{prestr}-fanart{ext}"
thumb_path = f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}" poster_path = f"{prestr}-poster{ext}"
thumb_path = f"{prestr}-thumb{ext}"
full_fanart_path = os.path.join(path, fanart_path) full_fanart_path = os.path.join(path, fanart_path)
full_poster_path = os.path.join(path, poster_path) full_poster_path = os.path.join(path, poster_path)
full_thumb_path = os.path.join(path, thumb_path) full_thumb_path = os.path.join(path, thumb_path)
full_nfo = Path(path) / f"{prestr}{part}.nfo"
if full_nfo.is_file():
nfo = full_nfo.read_text(encoding='utf-8')
if nfo.find(r'<tag>无码</tag>'):
uncensored = 1
if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
return return
@@ -695,7 +705,8 @@ def core_main(movie_path, number_th, oCC):
# 判断是否无码 # 判断是否无码
uncensored = 1 if is_uncensored(number) else 0 uncensored = 1 if is_uncensored(number) else 0
if json_data.get('无码'):
uncensored = 1
if '流出' in movie_path or 'uncensored' in movie_path: if '流出' in movie_path or 'uncensored' in movie_path:
liuchu = '流出' liuchu = '流出'