Merge pull request #286 from jnozsc/fix_fanza

Fix fanza
This commit is contained in:
Yoshiko2
2020-07-27 03:48:00 +08:00
committed by GitHub
3 changed files with 27 additions and 17 deletions

View File

@@ -1,6 +1,5 @@
import requests import requests
from lxml import etree from lxml import etree
import cloudscraper
import config import config
@@ -87,6 +86,7 @@ def post_html(url: str, query: dict) -> requests.Response:
def get_javlib_cookie() -> [dict, str]: def get_javlib_cookie() -> [dict, str]:
import cloudscraper
proxy, timeout, retry_count, proxytype = config.Config().proxy() proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype) proxies = get_proxy(proxy, proxytype)

10
core.py
View File

@@ -168,7 +168,8 @@ def get_info(json_data): # 返回json里的数据
cover = json_data['cover'] cover = json_data['cover']
website = json_data['website'] website = json_data['website']
series = json_data['series'] series = json_data['series']
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series label = json_data.get('label', "")
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label
def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder): def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
@@ -177,7 +178,7 @@ def small_cover_check(path, number, cover_small, c_word, conf: config.Config, fi
def create_folder(success_folder, location_rule, json_data, conf: config.Config): # 创建文件夹 def create_folder(success_folder, location_rule, json_data, conf: config.Config): # 创建文件夹
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series= get_info(json_data) title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)
if len(location_rule) > 240: # 新建成功输出文件夹 if len(location_rule) > 240: # 新建成功输出文件夹
path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor","'manypeople'",3) # path为影片+元数据所在目录 path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor","'manypeople'",3) # path为影片+元数据所在目录
else: else:
@@ -265,7 +266,7 @@ def image_download(cover, number, c_word, path, conf: config.Config, filepath, f
def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu): def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series = get_info(json_data) title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)
try: try:
if not os.path.exists(path): if not os.path.exists(path):
@@ -293,8 +294,7 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa
except: except:
aaaa = '' aaaa = ''
print(" <maker>" + studio + "</maker>", file=code) print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code) print(" <label>" + label + "</label>", file=code)
print(" </label>", file=code)
if cn_sub == '1': if cn_sub == '1':
print(" <tag>中文字幕</tag>", file=code) print(" <tag>中文字幕</tag>", file=code)
if liuchu == '流出': if liuchu == '流出':

View File

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json import json
import re import re
from urllib.parse import urlencode
from lxml import etree from lxml import etree
@@ -14,7 +15,7 @@ from ADC_function import *
def getTitle(text): def getTitle(text):
html = etree.fromstring(text, etree.HTMLParser()) html = etree.fromstring(text, etree.HTMLParser())
result = html.xpath('//*[@id="title"]/text()')[0] result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
return result return result
@@ -56,11 +57,11 @@ def getLabel(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'シリーズ')]/following-sibling::td/a/text()" "//td[contains(text(),'レーベル')]/following-sibling::td/a/text()"
)[0] )[0]
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'シリーズ')]/following-sibling::td/text()" "//td[contains(text(),'レーベル')]/following-sibling::td/text()"
)[0] )[0]
return result return result
@@ -93,9 +94,12 @@ def getRelease(text):
"//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
)[0].lstrip("\n") )[0].lstrip("\n")
except: except:
result = html.xpath( try:
"//td[contains(text(),'発売日:')]/following-sibling::td/text()" result = html.xpath(
)[0].lstrip("\n") "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
)[0].lstrip("\n")
except:
result = "----"
if result == "----": if result == "----":
try: try:
result = html.xpath( result = html.xpath(
@@ -108,7 +112,7 @@ def getRelease(text):
)[0].lstrip("\n") )[0].lstrip("\n")
except: except:
pass pass
return result.replace('/','-') return result.replace("/", "-")
def getTag(text): def getTag(text):
@@ -187,8 +191,7 @@ def getSeries(text):
)[0] )[0]
return result return result
except: except:
return '' return ""
def main(number): def main(number):
@@ -208,11 +211,17 @@ def main(number):
"https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
"https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=", "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
"https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=", "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
"https://www.dmm.co.jp/rental/-/detail/=/cid=",
] ]
chosen_url = "" chosen_url = ""
for url in fanza_urls: for url in fanza_urls:
chosen_url = url + fanza_search_number chosen_url = url + fanza_search_number
htmlcode = get_html(chosen_url) htmlcode = get_html(
"https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
urlencode({"rurl": chosen_url})
)
)
if "404 Not Found" not in htmlcode: if "404 Not Found" not in htmlcode:
break break
if "404 Not Found" in htmlcode: if "404 Not Found" in htmlcode:
@@ -284,4 +293,5 @@ def main_htmlcode(number):
if __name__ == "__main__": if __name__ == "__main__":
print(main("DV-1562")) print(main("DV-1562"))
print(main("96fad1217"))