fix outline of javbus and javdb which caused by airav down

This commit is contained in:
lededev
2021-10-10 17:41:33 +08:00
parent b0959d1b18
commit 0933e87944
3 changed files with 26 additions and 5 deletions

View File

@@ -103,7 +103,7 @@ def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type:
return result.text
def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
browser = mechanicalsoup.StatefulBrowser(user_agent=G_USER_AGENT if ua is None else ua)
if isinstance(cookies, dict):
requests.utils.add_dict_to_cookiejar(browser.session.cookies, cookies)
@@ -113,7 +113,7 @@ def get_html_by_form(url, form_name: str = None, fields: dict = None, cookies: d
result = browser.open(url)
if not result.ok:
return ''
form = browser.select_form() if form_name is None else browser.select_form(form_name)
form = browser.select_form() if form_select is None else browser.select_form(form_select)
if isinstance(fields, dict):
for k, v in fields.items():
browser[k] = v

View File

@@ -80,7 +80,7 @@ def getCID(htmlcode):
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
result = re.sub('/.*?.jpg','',string)
return result
def getOutline(number): #获取剧情介绍
def getOutline0(number): #获取剧情介绍 airav.wiki站点404函数暂时更名等无法恢复时删除
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
return '' # 从airav.py过来的调用不计算outline直接返回避免重复抓取数据拖慢处理速度
try:
@@ -91,6 +91,23 @@ def getOutline(number): #获取剧情介绍
except:
pass
return ''
def getOutline(number): #获取剧情介绍 从avno1.cc取得
try:
number_up = number.upper()
result, browser = get_html_by_form('http://www.avno1.cc/cn/usercenter.php?item=pay_support',
form_select='div.wrapper > div.header > div.search > form',
fields = {'kw' : number_up},
return_type = 'browser')
if not result.ok:
raise
title = browser.page.select('div.type_movie > div > ul > li > div > a > h3')[0].text.strip()
page_number = title[title.rfind(' '):].upper()
if not number_up in page_number:
raise
return browser.page.select('div.type_movie > div > ul > li:nth-child(1) > div')[0]['data-description'].strip()
except:
pass
return ''
def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
# 如果记录中冇导演系列排在第6位
@@ -198,7 +215,7 @@ def main(number):
return js
if __name__ == "__main__" :
print(main('ADV-R0624')) # 404
#print(main('ADV-R0624')) # 404
print(main('ipx-292'))
print(main('CEMD-011'))
print(main('CJOD-278'))

View File

@@ -196,7 +196,7 @@ def getDirector(a):
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getOutline(number): #获取剧情介绍
def getOutline0(number): #获取剧情介绍 airav.wiki站点404函数暂时更名等无法恢复时删除
try:
htmlcode = get_html('https://cn.airav.wiki/video/' + number)
from WebCrawler.airav import getOutline as airav_getOutline
@@ -205,6 +205,9 @@ def getOutline(number): #获取剧情介绍
except:
pass
return ''
def getOutline(number): #获取剧情介绍
from WebCrawler.javbus import getOutline as javbus_getOutline
return javbus_getOutline(number)
def getSeries(a):
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
@@ -340,6 +343,7 @@ if __name__ == "__main__":
# print(main('blacked.20.05.30'))
# print(main('AGAV-042'))
# print(main('BANK-022'))
print(main('070116-197'))
print(main('093021_539')) # 没有剧照 片商pacopacomama
# print(main('FC2-2278260'))
# print(main('FC2-735670'))