更精确的有码无码处理

2022-04-17 23:36:41 +08:00
parent 0e0b92a9fa
commit c94ef3cf4a
6 changed files with 13 additions and 14 deletions
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -60,7 +60,7 @@ def get_year(lx: html.HtmlElement) -> str:
 def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
    o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
-    g = getStoryline(number, title)
+    g = getStoryline(number, title, 无码=True)
    if len(g):
        return g
    return o
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -60,10 +60,10 @@ def getCID(html):
    string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
    result = re.sub('/.*?.jpg','',string)
    return result
-def getOutline(number, title):  #获取剧情介绍 多进程并发查询
+def getOutline(number, title, uncensored):  #获取剧情介绍 多进程并发查询
    if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
        return ''   # 从airav.py过来的调用不计算outline直接返回，避免重复抓取数据拖慢处理速度
-    return getStoryline(number,title)
+    return getStoryline(number,title, 无码=uncensored)
 def getSeriseJa(html):
    x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
    return str(x[0]) if len(x) else ''
@@ -98,7 +98,7 @@ def main_uncensored(number):
        'title': title,
        'studio': getStudioJa(lx),
        'year': getYear(lx),
-        'outline': getOutline(w_number, title),
+        'outline': getOutline(w_number, title, True),
        'runtime': getRuntime(lx),
        'director': getDirectorJa(lx),
        'actor': getActor(lx),
@@ -141,7 +141,7 @@ def main(number):
                'title': title,
                'studio': getStudio(lx),
                'year': getYear(lx),
-                'outline': getOutline(number, title),
+                'outline': getOutline(number, title, getUncensored(lx)),
                'runtime': getRuntime(lx),
                'director': getDirector(lx),
                'actor': getActor(lx),
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -166,8 +166,8 @@ def getDirector(html):
    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(number, title):  #获取剧情介绍 多进程并发查询
+def getOutline(number, title, uncensored):  #获取剧情介绍 多进程并发查询
-    return getStoryline(number,title)
+    return getStoryline(number, title, 无码=uncensored)
 def getSeries(html):
    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
@@ -287,7 +287,7 @@ def main(number):
            'actor': getActor(lx),
            'title': title,
            'studio': getStudio(detail_page, lx),
-            'outline': getOutline(number, title),
+            'outline': getOutline(number, title, getUncensored(lx)),
            'runtime': getRuntime(lx),
            'director': getDirector(lx),
            'release': getRelease(detail_page),
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -25,14 +25,15 @@ class noThread(object):
 # 获取剧情介绍 从列表中的站点同时查，取值优先级从前到后
-def getStoryline(number, title, sites: list=None):
+def getStoryline(number, title, sites: list=None, 无码=None):
    start_time = time.time()
    conf = config.getInstance()
    if not conf.is_storyline():
        return ''
    debug = conf.debug() or conf.storyline_show() == 2
    storyine_sites = conf.storyline_site().split(',') if sites is None else sites
-    if is_uncensored(number):
+    unc = 无码 if isinstance(无码, bool) else is_uncensored(number)
    if unc:
        storyine_sites += conf.storyline_uncensored_site().split(',')
    else:
        storyine_sites += conf.storyline_censored_site().split(',')
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -128,7 +128,7 @@ def getOutline(html, number, title):
    a = set(storyline_site) & {'airav', 'avno1'}  # 只要中文的简介文字
    if len(a):
        site = [n for n in storyline_site if n in a]
-        g = getStoryline(number, title, site)
+        g = getStoryline(number, title, site, 无码=False)
        if len(g):
            return g
    try:
--- a/core.py
+++ b/core.py
@@ -724,10 +724,8 @@ def core_main(movie_path, number_th, oCC):
        c_word = '-C'  # 中文字幕影片后缀
    # 判断是否无码
    uncensored = 1 if is_uncensored(number) else 0
    unce = json_data.get('无码')
-    if type(unce) is bool:
+    uncensored = int(unce) if isinstance(unce, bool) else int(is_uncensored(number))
        uncensored = 1 if unce else 0
    if '流出' in movie_path or 'uncensored' in movie_path.lower():
        liuchu = '流出'