storyline:add data source airavwiki
This commit is contained in:
@@ -5,6 +5,7 @@ from lxml import etree#need install
|
|||||||
import json
|
import json
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
|
import inspect
|
||||||
|
|
||||||
def getActorPhoto(html):
|
def getActorPhoto(html):
|
||||||
actors = html.xpath('//div[@class="star-name"]/a')
|
actors = html.xpath('//div[@class="star-name"]/a')
|
||||||
@@ -60,6 +61,8 @@ def getCID(html):
|
|||||||
result = re.sub('/.*?.jpg','',string)
|
result = re.sub('/.*?.jpg','',string)
|
||||||
return result
|
return result
|
||||||
def getOutline(number, title): #获取剧情介绍 多进程并发查询
|
def getOutline(number, title): #获取剧情介绍 多进程并发查询
|
||||||
|
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
||||||
|
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
|
||||||
return getStoryline(number,title)
|
return getStoryline(number,title)
|
||||||
def getSeriseJa(html):
|
def getSeriseJa(html):
|
||||||
x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
|
x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from difflib import SequenceMatcher
|
|||||||
from unicodedata import category
|
from unicodedata import category
|
||||||
from number_parser import is_uncensored
|
from number_parser import is_uncensored
|
||||||
|
|
||||||
G_registered_storyline_site = {"airav", "avno1", "xcity", "amazon", "58avgo"}
|
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"}
|
||||||
|
|
||||||
G_mode_txt = ('顺序执行','线程池','进程池')
|
G_mode_txt = ('顺序执行','线程池','进程池')
|
||||||
|
|
||||||
@@ -83,6 +83,8 @@ def _getStoryline_mp(site, number, title, debug):
|
|||||||
storyline = None
|
storyline = None
|
||||||
if not isinstance(site, str):
|
if not isinstance(site, str):
|
||||||
return storyline
|
return storyline
|
||||||
|
elif site == "airavwiki":
|
||||||
|
storyline = getStoryline_airavwiki(number, debug)
|
||||||
elif site == "airav":
|
elif site == "airav":
|
||||||
storyline = getStoryline_airav(number, debug)
|
storyline = getStoryline_airav(number, debug)
|
||||||
elif site == "avno1":
|
elif site == "avno1":
|
||||||
@@ -113,9 +115,9 @@ def getStoryline_airav(number, debug):
|
|||||||
if not res.ok:
|
if not res.ok:
|
||||||
raise ValueError(f"get_html_by_browser('{url}') failed")
|
raise ValueError(f"get_html_by_browser('{url}') failed")
|
||||||
avs = browser.page.select_one('div.resultcontent > ul > li:nth-child(1) > div')
|
avs = browser.page.select_one('div.resultcontent > ul > li:nth-child(1) > div')
|
||||||
if number_up not in avs.select_one('a > h3').text.upper():
|
if number_up not in avs.a.h3.text.upper():
|
||||||
raise ValueError("number not found")
|
raise ValueError("number not found")
|
||||||
detail_url = avs.select_one('a')['href']
|
detail_url = avs.a['href']
|
||||||
res = browser.open_relative(detail_url)
|
res = browser.open_relative(detail_url)
|
||||||
if not res.ok:
|
if not res.ok:
|
||||||
raise ValueError(f"browser.open_relative('{detail_url}') failed")
|
raise ValueError(f"browser.open_relative('{detail_url}') failed")
|
||||||
@@ -132,6 +134,38 @@ def getStoryline_airav(number, debug):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def getStoryline_airavwiki(number, debug):
|
||||||
|
try:
|
||||||
|
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
||||||
|
url = f'https://cn.airav.wiki/?search={kwd}'
|
||||||
|
result, browser = get_html_by_browser(url, return_type='browser')
|
||||||
|
if not result.ok:
|
||||||
|
raise ValueError(f"get_html_by_browser('{url}','{number}') failed")
|
||||||
|
s = browser.page.select('div.row > div > div.videoList.row > div > a.d-block')
|
||||||
|
link = None
|
||||||
|
for a in s:
|
||||||
|
title = a.img['title']
|
||||||
|
if re.search(number, title, re.I):
|
||||||
|
link = a
|
||||||
|
break
|
||||||
|
if link is None:
|
||||||
|
raise ValueError("number not found")
|
||||||
|
result = browser.follow_link(link)
|
||||||
|
if not result.ok or not re.search(number, browser.url, re.I):
|
||||||
|
raise ValueError("detail page not found")
|
||||||
|
title = browser.page.select('head > title')[0].text.strip()
|
||||||
|
detail_number = str(re.findall('\[(.*?)]', title)[0])
|
||||||
|
if not re.search(number, detail_number, re.I):
|
||||||
|
raise ValueError("detail page number not match, got ->[{detail_number}]")
|
||||||
|
desc = browser.page.select_one('div.d-flex.videoDataBlock > div.synopsis > p').text.strip()
|
||||||
|
return desc
|
||||||
|
except Exception as e:
|
||||||
|
if debug:
|
||||||
|
print(f"[-]MP def getStoryline_airavwiki Error: {e}, number [{number}].")
|
||||||
|
pass
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_58avgo(number, debug):
|
def getStoryline_58avgo(number, debug):
|
||||||
try:
|
try:
|
||||||
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
|
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
|
||||||
|
|||||||
10
config.ini
10
config.ini
@@ -91,14 +91,14 @@ extrafanart_folder=extrafanart
|
|||||||
[storyline]
|
[storyline]
|
||||||
; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的
|
; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的
|
||||||
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。
|
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。
|
||||||
; 其中airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1有码无码都能查,58avgo只能查无码或者
|
; 其中airavwiki airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,airavwiki avno1有码无码都能查,
|
||||||
; 流出破解马赛克的影片(此功能没使用)。
|
; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
|
||||||
; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
|
; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
|
||||||
; 设置成不查询可大幅提高刮削速度。
|
; 设置成不查询可大幅提高刮削速度。
|
||||||
; site=
|
; site=
|
||||||
site=3:avno1
|
site=1:airavwiki,4:avno1
|
||||||
censored_site=1:airav,4:xcity,5:amazon
|
censored_site=2:airav,5:xcity,6:amazon
|
||||||
uncensored_site=2:58avgo
|
uncensored_site=3:58avgo
|
||||||
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
||||||
run_mode=1
|
run_mode=1
|
||||||
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
||||||
|
|||||||
Reference in New Issue
Block a user