Merge pull request #286 from jnozsc/fix_fanza

Fix fanza
This commit is contained in:
Yoshiko2
2020-07-27 03:48:00 +08:00
committed by GitHub
3 changed files with 27 additions and 17 deletions

View File

@@ -1,6 +1,5 @@
import requests
from lxml import etree
import cloudscraper
import config
@@ -87,6 +86,7 @@ def post_html(url: str, query: dict) -> requests.Response:
def get_javlib_cookie() -> [dict, str]:
import cloudscraper
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)

10
core.py
View File

@@ -168,7 +168,8 @@ def get_info(json_data): # 返回json里的数据
cover = json_data['cover']
website = json_data['website']
series = json_data['series']
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series
label = json_data.get('label', "")
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label
def small_cover_check(path, number, cover_small, c_word, conf: config.Config, filepath, failed_folder):
@@ -177,7 +178,7 @@ def small_cover_check(path, number, cover_small, c_word, conf: config.Config, fi
def create_folder(success_folder, location_rule, json_data, conf: config.Config): # 创建文件夹
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series= get_info(json_data)
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)
if len(location_rule) > 240: # 新建成功输出文件夹
path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor","'manypeople'",3) # path为影片+元数据所在目录
else:
@@ -265,7 +266,7 @@ def image_download(cover, number, c_word, path, conf: config.Config, filepath, f
def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag, actor_list, liuchu):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series = get_info(json_data)
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website, series, label = get_info(json_data)
try:
if not os.path.exists(path):
@@ -293,8 +294,7 @@ def print_files(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fa
except:
aaaa = ''
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>", file=code)
print(" </label>", file=code)
print(" <label>" + label + "</label>", file=code)
if cn_sub == '1':
print(" <tag>中文字幕</tag>", file=code)
if liuchu == '流出':

View File

@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import json
import re
from urllib.parse import urlencode
from lxml import etree
@@ -14,7 +15,7 @@ from ADC_function import *
def getTitle(text):
html = etree.fromstring(text, etree.HTMLParser())
result = html.xpath('//*[@id="title"]/text()')[0]
result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
return result
@@ -56,11 +57,11 @@ def getLabel(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result = html.xpath(
"//td[contains(text(),'シリーズ')]/following-sibling::td/a/text()"
"//td[contains(text(),'レーベル')]/following-sibling::td/a/text()"
)[0]
except:
result = html.xpath(
"//td[contains(text(),'シリーズ')]/following-sibling::td/text()"
"//td[contains(text(),'レーベル')]/following-sibling::td/text()"
)[0]
return result
@@ -93,9 +94,12 @@ def getRelease(text):
"//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
)[0].lstrip("\n")
except:
result = html.xpath(
"//td[contains(text(),'発売日:')]/following-sibling::td/text()"
)[0].lstrip("\n")
try:
result = html.xpath(
"//td[contains(text(),'発売日:')]/following-sibling::td/text()"
)[0].lstrip("\n")
except:
result = "----"
if result == "----":
try:
result = html.xpath(
@@ -108,7 +112,7 @@ def getRelease(text):
)[0].lstrip("\n")
except:
pass
return result.replace('/','-')
return result.replace("/", "-")
def getTag(text):
@@ -187,8 +191,7 @@ def getSeries(text):
)[0]
return result
except:
return ''
return ""
def main(number):
@@ -208,11 +211,17 @@ def main(number):
"https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
"https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
"https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
"https://www.dmm.co.jp/rental/-/detail/=/cid=",
]
chosen_url = ""
for url in fanza_urls:
chosen_url = url + fanza_search_number
htmlcode = get_html(chosen_url)
htmlcode = get_html(
"https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
urlencode({"rurl": chosen_url})
)
)
if "404 Not Found" not in htmlcode:
break
if "404 Not Found" in htmlcode:
@@ -284,4 +293,5 @@ def main_htmlcode(number):
if __name__ == "__main__":
print(main("DV-1562"))
print(main("DV-1562"))
print(main("96fad1217"))