更可靠的无码识别方法

2022-04-10 14:48:25 +08:00
parent 8add9fe424
commit 02692becfe
5 changed files with 28 additions and 7 deletions
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -40,6 +40,7 @@ def main(number: str) -> json:
            'website': f'{G_SITE}/moviepages/{number}/index.html',
            'source': 'carib.py',
            'series': get_series(lx),
            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
        return js
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -83,6 +83,9 @@ def getExtrafanart(htmlcode):  # 获取剧照
        if extrafanart_imgs:
            return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
    return ''
 def getUncensored(html):
    x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]')
    return bool(x)
 def main_uncensored(number):
    htmlcode = get_html('https://www.javbus.com/ja/' + number)
@@ -109,6 +112,7 @@ def main_uncensored(number):
        'website': 'https://www.javbus.com/ja/' + number,
        'source': 'javbus.py',
        'series': getSeriseJa(lx),
        '无码': getUncensored(lx)
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
@@ -151,6 +155,7 @@ def main(number):
                'website': 'https://www.javbus.com/' + number,
                'source': 'javbus.py',
                'series': getSerise(lx),
                '无码': getUncensored(lx)
            }
            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8')
            return js
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -179,6 +179,9 @@ def getUserRating(html):
        return float(v[0][0]), int(v[0][1])
    except:
        return
 def getUncensored(html):
    x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]')
    return bool(x)
 def main(number):
    # javdb更新后同一时间只能登录一个数字站，最新登录站会踢出旧的登录，因此按找到的第一个javdb*.json文件选择站点，
@@ -300,7 +303,7 @@ def main(number):
            'website': urljoin('https://javdb.com', correct_url),
            'source': 'javdb.py',
            'series': getSeries(lx),
-
+            '无码': getUncensored(lx)
        }
        userrating = getUserRating(lx)
        if isinstance(userrating, tuple) and len(userrating) == 2:
@@ -328,7 +331,7 @@ if __name__ == "__main__":
    # print(main('blacked.20.05.30'))
    # print(main('AGAV-042'))
    # print(main('BANK-022'))
-    # print(main('070116-197'))
+    print(main('070116-197'))
    # print(main('093021_539'))  # 没有剧照 片商pacopacomama
    #print(main('FC2-2278260'))
    # print(main('FC2-735670'))
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -146,7 +146,8 @@ def main(number):
            'website': url,
            'source': 'madou.py',
            # 使用
-            'series': getSerise(html)
+            'series': getSerise(html),
            '无码': True
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
                        indent=4, separators=(',', ':'), )  # .encode('UTF-8')
--- a/core.py
+++ b/core.py
@@ -617,6 +617,7 @@ def debug_print(data: json):
 def core_main_no_net_op(movie_path, number):
    conf = config.getInstance()
    part = ''
    leak_word = ''
    leak = 0
    c_word = ''
@@ -627,6 +628,8 @@ def core_main_no_net_op(movie_path, number):
    imagecut = 1
    path = str(Path(movie_path).parent)
    if re.search('-CD\d+', movie_path, re.IGNORECASE):
        part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0]
    if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path:
        cn_sub = '1'
        c_word = '-C'  # 中文字幕影片后缀
@@ -639,12 +642,19 @@ def core_main_no_net_op(movie_path, number):
        hack = 1
        hack_word = "-hack"
-    fanart_path =  f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}"
+    prestr = f"{number}{leak_word}{c_word}{hack_word}"
-    poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}"
+    fanart_path =  f"{prestr}-fanart{ext}"
-    thumb_path =  f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}"
+    poster_path = f"{prestr}-poster{ext}"
    thumb_path =  f"{prestr}-thumb{ext}"
    full_fanart_path = os.path.join(path, fanart_path)
    full_poster_path = os.path.join(path, poster_path)
    full_thumb_path = os.path.join(path, thumb_path)
    full_nfo = Path(path) / f"{prestr}{part}.nfo"
    if full_nfo.is_file():
        nfo = full_nfo.read_text(encoding='utf-8')
        if nfo.find(r'<tag>无码</tag>'):
            uncensored = 1
    if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
        return
@@ -695,7 +705,8 @@ def core_main(movie_path, number_th, oCC):
    # 判断是否无码
    uncensored = 1 if is_uncensored(number) else 0
-
+    if json_data.get('无码'):
        uncensored = 1
    if '流出' in movie_path or 'uncensored' in movie_path:
        liuchu = '流出'