Merge pull request #286 from jnozsc/fix_fanza

Fix fanza
2020-07-27 03:48:00 +08:00
parent b0accadc02 5f20f61fc5
commit d85531eda5
3 changed files with 27 additions and 17 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,6 +1,5 @@
 import requests
 from lxml import etree
-import cloudscraper

 import config

@@ -87,6 +86,7 @@ def post_html(url: str, query: dict) -> requests.Response:


 def get_javlib_cookie() -> [dict, str]:
+    import cloudscraper
    proxy, timeout, retry_count, proxytype = config.Config().proxy()
    proxies = get_proxy(proxy, proxytype)

--- a/core.py
+++ b/core.py
@@ -168,7 +168,8 @@ def get_info(json_data):  # 返回json里的数据
    cover = json_data['cover']
    website = json_data['website']
    series = json_data['series']
-    return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series
+    label = json_data.get('label', "")
+    return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label


 def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
@@ -177,7 +178,7 @@ def small_cover_check(path, number, cover_small, c_word, conf: config.Config, fi


 def create_folder(success_folder, location_rule, json_data, conf: config.Config):  # 创建文件夹
-    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series= get_info(json_data)
+    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)
    if len(location_rule) > 240:  # 新建成功输出文件夹
        path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor","'manypeople'",3)  # path为影片+元数据所在目录
    else:
@@ -265,7 +266,7 @@ def image_download(cover, number, c_word, path, conf: config.Config, filepath, f


 def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
-    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series = get_info(json_data)
+    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)

    try:
        if not os.path.exists(path):
@@ -293,8 +294,7 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa
            except:
                aaaa = ''
            print("  <maker>" + studio + "</maker>", file=code)
-            print("  <label>", file=code)
-            print("  </label>", file=code)
+            print("  <label>" + label + "</label>", file=code)
            if cn_sub == '1':
                print("  <tag>中文字幕</tag>", file=code)
            if liuchu == '流出':
--- a/fanza.py
+++ b/fanza.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import json
 import re
+from urllib.parse import urlencode

 from lxml import etree

@@ -14,7 +15,7 @@ from ADC_function import *

 def getTitle(text):
    html = etree.fromstring(text, etree.HTMLParser())
-    result = html.xpath('//*[@id="title"]/text()')[0]
+    result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
    return result


@@ -56,11 +57,11 @@ def getLabel(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
-            "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
+            "//td[contains(text(),'レーベル：')]/following-sibling::td/a/text()"
        )[0]
    except:
        result = html.xpath(
-            "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
+            "//td[contains(text(),'レーベル：')]/following-sibling::td/text()"
        )[0]
    return result

@@ -93,9 +94,12 @@ def getRelease(text):
            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
        )[0].lstrip("\n")
    except:
-        result = html.xpath(
-            "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
-        )[0].lstrip("\n")
+        try:
+            result = html.xpath(
+                "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
+            )[0].lstrip("\n")
+        except:
+            result = "----"
    if result == "----":
        try:
            result = html.xpath(
@@ -108,7 +112,7 @@ def getRelease(text):
                )[0].lstrip("\n")
            except:
                pass
-    return result.replace('/','-')
+    return result.replace("/", "-")


 def getTag(text):
@@ -187,8 +191,7 @@ def getSeries(text):
            )[0]
        return result
    except:
-        return ''
-
+        return ""


 def main(number):
@@ -208,11 +211,17 @@ def main(number):
        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
+        "https://www.dmm.co.jp/rental/-/detail/=/cid=",
    ]
    chosen_url = ""
+
    for url in fanza_urls:
        chosen_url = url + fanza_search_number
-        htmlcode = get_html(chosen_url)
+        htmlcode = get_html(
+            "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
+                urlencode({"rurl": chosen_url})
+            )
+        )
        if "404 Not Found" not in htmlcode:
            break
    if "404 Not Found" in htmlcode:
@@ -284,4 +293,5 @@ def main_htmlcode(number):


 if __name__ == "__main__":
-    print(main("DV-1562"))
+    print(main("DV-1562"))
+    print(main("96fad1217"))