fix storyline
This commit is contained in:
@@ -63,7 +63,7 @@ class Avsox(Parser):
|
|||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
if self.morestoryline:
|
if self.morestoryline:
|
||||||
from .storyline import getStoryline
|
from .storyline import getStoryline
|
||||||
return getStoryline(self.number)
|
return getStoryline(self.number, proxies=self.proxies, verify=self.verify)
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
def getActors(self, htmltree):
|
||||||
|
|||||||
@@ -92,7 +92,8 @@ class Carib(Parser):
|
|||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
if self.morestoryline:
|
if self.morestoryline:
|
||||||
from .storyline import getStoryline
|
from .storyline import getStoryline
|
||||||
result = getStoryline(self.number, uncensored=self.uncensored)
|
result = getStoryline(self.number, uncensored=self.uncensored,
|
||||||
|
proxies=self.proxies, verify=self.verify)
|
||||||
if len(result):
|
if len(result):
|
||||||
return result
|
return result
|
||||||
return super().getOutline(htmltree)
|
return super().getOutline(htmltree)
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ def get(url: str, cookies=None, ua: str=None, extra_headers=None, return_type: s
|
|||||||
raise Exception('Connect Failed')
|
raise Exception('Connect Failed')
|
||||||
|
|
||||||
|
|
||||||
def post(url: str, data: dict, files=None, cookies=None, ua: str=None, return_type: str=None, encoding: str=None,
|
def post(url: str, data: dict=None, files=None, cookies=None, ua: str=None, return_type: str=None, encoding: str=None,
|
||||||
retry: int=3, timeout: int=G_DEFAULT_TIMEOUT, proxies=None, verify=None):
|
retry: int=3, timeout: int=G_DEFAULT_TIMEOUT, proxies=None, verify=None):
|
||||||
"""
|
"""
|
||||||
是否使用代理应由上层处理
|
是否使用代理应由上层处理
|
||||||
@@ -109,46 +109,6 @@ def request_session(cookies=None, ua: str=None, retry: int=3, timeout: int=G_DEF
|
|||||||
return session
|
return session
|
||||||
|
|
||||||
|
|
||||||
# storyline only
|
|
||||||
# 使用 cloudscraper....
|
|
||||||
def get_html_by_browser(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None,
|
|
||||||
encoding: str = None, use_scraper: bool = False,
|
|
||||||
retry: int = 3, timeout: int = G_DEFAULT_TIMEOUT, proxies=None, verify=None):
|
|
||||||
session = create_scraper(browser={'custom': ua or G_USER_AGENT, }) if use_scraper else requests.Session()
|
|
||||||
if isinstance(cookies, dict) and len(cookies):
|
|
||||||
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
|
|
||||||
retries = Retry(total=retry, connect=retry, backoff_factor=1,
|
|
||||||
status_forcelist=[429, 500, 502, 503, 504])
|
|
||||||
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=timeout))
|
|
||||||
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=timeout))
|
|
||||||
if verify:
|
|
||||||
session.verify = verify
|
|
||||||
if proxies:
|
|
||||||
session.proxies = proxies
|
|
||||||
try:
|
|
||||||
browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=session)
|
|
||||||
if isinstance(url, str) and len(url):
|
|
||||||
result = browser.open(url)
|
|
||||||
else:
|
|
||||||
return browser
|
|
||||||
if not result.ok:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if return_type == "object":
|
|
||||||
return result
|
|
||||||
elif return_type == "content":
|
|
||||||
return result.content
|
|
||||||
elif return_type == "browser":
|
|
||||||
return result, browser
|
|
||||||
else:
|
|
||||||
result.encoding = encoding or "utf-8"
|
|
||||||
return result.text
|
|
||||||
except requests.exceptions.ProxyError:
|
|
||||||
print("[-]get_html_by_browser() Proxy error! Please check your Proxy")
|
|
||||||
except Exception as e:
|
|
||||||
print(f'[-]get_html_by_browser() Failed! {e}')
|
|
||||||
return None
|
|
||||||
|
|
||||||
# storyline xcity only
|
# storyline xcity only
|
||||||
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None,
|
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None,
|
||||||
return_type: str = None, encoding: str = None,
|
return_type: str = None, encoding: str = None,
|
||||||
|
|||||||
@@ -136,5 +136,6 @@ class Javbus(Parser):
|
|||||||
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
||||||
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
|
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
|
||||||
from .storyline import getStoryline
|
from .storyline import getStoryline
|
||||||
return getStoryline(self.number , uncensored = self.uncensored)
|
return getStoryline(self.number , uncensored = self.uncensored,
|
||||||
|
proxies=self.proxies, verify=self.verify)
|
||||||
return ''
|
return ''
|
||||||
|
|||||||
@@ -176,7 +176,8 @@ class Javdb(Parser):
|
|||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
if self.morestoryline:
|
if self.morestoryline:
|
||||||
from .storyline import getStoryline
|
from .storyline import getStoryline
|
||||||
return getStoryline(self.number, self.getUncensored(htmltree))
|
return getStoryline(self.number, self.getUncensored(htmltree),
|
||||||
|
proxies=self.proxies, verify=self.verify)
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getTrailer(self, htmltree):
|
def getTrailer(self, htmltree):
|
||||||
|
|||||||
@@ -76,5 +76,6 @@ class Javlibrary(Parser):
|
|||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
if self.morestoryline:
|
if self.morestoryline:
|
||||||
from .storyline import getStoryline
|
from .storyline import getStoryline
|
||||||
return getStoryline(self.number, self.getUncensored(htmltree))
|
return getStoryline(self.number, self.getUncensored(htmltree),
|
||||||
|
proxies=self.proxies, verify=self.verify)
|
||||||
return ''
|
return ''
|
||||||
|
|||||||
@@ -88,6 +88,8 @@ class Parser:
|
|||||||
针对需要传递的参数: cookies, proxy等
|
针对需要传递的参数: cookies, proxy等
|
||||||
子类继承后修改
|
子类继承后修改
|
||||||
"""
|
"""
|
||||||
|
if not core:
|
||||||
|
return
|
||||||
if core.proxies:
|
if core.proxies:
|
||||||
self.proxies = core.proxies
|
self.proxies = core.proxies
|
||||||
if core.verify:
|
if core.verify:
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -13,7 +14,10 @@ import builtins
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from multiprocessing.dummy import Pool as ThreadPool
|
from multiprocessing.dummy import Pool as ThreadPool
|
||||||
from .httprequest import get_html_by_browser, get_html_by_form, get_html_by_scraper, request_session
|
|
||||||
|
from scrapinglib.airav import Airav
|
||||||
|
from scrapinglib.xcity import Xcity
|
||||||
|
from .httprequest import get_html_by_form, get_html_by_scraper, request_session
|
||||||
|
|
||||||
# 舍弃 Amazon 源
|
# 舍弃 Amazon 源
|
||||||
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "58avgo"}
|
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "58avgo"}
|
||||||
@@ -35,7 +39,7 @@ class noThread(object):
|
|||||||
|
|
||||||
|
|
||||||
# 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后
|
# 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后
|
||||||
def getStoryline(number, title = None, sites: list=None, uncensored=None):
|
def getStoryline(number, title=None, sites: list=None, uncensored=None, proxies=None, verify=None):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
debug = False
|
debug = False
|
||||||
storyine_sites = "1:avno1,4:airavwiki".split(',')
|
storyine_sites = "1:avno1,4:airavwiki".split(',')
|
||||||
@@ -52,7 +56,7 @@ def getStoryline(number, title = None, sites: list=None, uncensored=None):
|
|||||||
r_dup.add(ns)
|
r_dup.add(ns)
|
||||||
sort_sites.sort()
|
sort_sites.sort()
|
||||||
apply_sites = [re.sub(r'.*?:', '', s, re.A) for s in sort_sites]
|
apply_sites = [re.sub(r'.*?:', '', s, re.A) for s in sort_sites]
|
||||||
mp_args = ((site, number, title, debug) for site in apply_sites)
|
mp_args = ((site, number, title, debug, proxies, verify) for site in apply_sites)
|
||||||
cores = min(len(apply_sites), os.cpu_count())
|
cores = min(len(apply_sites), os.cpu_count())
|
||||||
if cores == 0:
|
if cores == 0:
|
||||||
return ''
|
return ''
|
||||||
@@ -79,24 +83,21 @@ def getStoryline(number, title = None, sites: list=None, uncensored=None):
|
|||||||
|
|
||||||
|
|
||||||
def getStoryline_mp(args):
|
def getStoryline_mp(args):
|
||||||
(site, number, title, debug) = args
|
(site, number, title, debug, proxies, verify) = args
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
storyline = None
|
storyline = None
|
||||||
if not isinstance(site, str):
|
if not isinstance(site, str):
|
||||||
return storyline
|
return storyline
|
||||||
elif site == "airavwiki":
|
elif site == "airavwiki":
|
||||||
storyline = getStoryline_airavwiki(number, debug)
|
storyline = getStoryline_airavwiki(number, debug, proxies, verify)
|
||||||
#storyline = getStoryline_airavwiki_super(number, debug)
|
|
||||||
elif site == "airav":
|
elif site == "airav":
|
||||||
storyline = getStoryline_airav(number, debug)
|
storyline = getStoryline_airav(number, debug, proxies, verify)
|
||||||
elif site == "avno1":
|
elif site == "avno1":
|
||||||
storyline = getStoryline_avno1(number, debug)
|
storyline = getStoryline_avno1(number, debug, proxies, verify)
|
||||||
elif site == "xcity":
|
elif site == "xcity":
|
||||||
storyline = getStoryline_xcity(number, debug)
|
storyline = getStoryline_xcity(number, debug, proxies, verify)
|
||||||
# elif site == "amazon":
|
|
||||||
# storyline = getStoryline_amazon(title, number, debug)
|
|
||||||
elif site == "58avgo":
|
elif site == "58avgo":
|
||||||
storyline = getStoryline_58avgo(number, debug)
|
storyline = getStoryline_58avgo(number, debug, proxies, verify)
|
||||||
if not debug:
|
if not debug:
|
||||||
return storyline
|
return storyline
|
||||||
print("[!]MP 线程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
print("[!]MP 线程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
||||||
@@ -108,11 +109,11 @@ def getStoryline_mp(args):
|
|||||||
return storyline
|
return storyline
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_airav(number, debug):
|
def getStoryline_airav(number, debug, proxies, verify):
|
||||||
try:
|
try:
|
||||||
site = secrets.choice(('airav.cc','airav4.club'))
|
site = secrets.choice(('airav.cc','airav4.club'))
|
||||||
url = f'https://{site}/searchresults.aspx?Search={number}&Type=0'
|
url = f'https://{site}/searchresults.aspx?Search={number}&Type=0'
|
||||||
session = request_session()
|
session = request_session(proxies=proxies, verify=verify)
|
||||||
res = session.get(url)
|
res = session.get(url)
|
||||||
if not res:
|
if not res:
|
||||||
raise ValueError(f"get_html_by_session('{url}') failed")
|
raise ValueError(f"get_html_by_session('{url}') failed")
|
||||||
@@ -143,36 +144,16 @@ def getStoryline_airav(number, debug):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_airavwiki(number, debug):
|
def getStoryline_airavwiki(number, debug, proxies, verify):
|
||||||
try:
|
try:
|
||||||
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
||||||
url = f'https://cn.airav.wiki/?search={kwd}'
|
airavwiki = Airav()
|
||||||
result, browser = get_html_by_browser(url, return_type='browser', use_scraper=True)
|
airavwiki.addtion_Javbus = False
|
||||||
if not result.ok:
|
airavwiki.proxies = proxies
|
||||||
raise ValueError(f"get_html_by_browser('{url}','{number}') failed")
|
airavwiki.verify = verify
|
||||||
s = browser.page.select('div.row > div > div.videoList.row > div > a.d-block')
|
jsons = airavwiki.search(kwd)
|
||||||
link = None
|
outline = json.loads(jsons).get('outline')
|
||||||
for a in s:
|
return outline
|
||||||
title = a.img['title']
|
|
||||||
list_number = re.findall('^(.*?)\s+', title, re.A)[0].strip()
|
|
||||||
if kwd == number: # 番号PRED-164 和 RED-164需要能够区分
|
|
||||||
if re.match(f'^{number}$', list_number, re.I):
|
|
||||||
link = a
|
|
||||||
break
|
|
||||||
elif re.search(number, list_number, re.I):
|
|
||||||
link = a
|
|
||||||
break
|
|
||||||
if link is None:
|
|
||||||
raise ValueError("number not found")
|
|
||||||
result = browser.follow_link(link)
|
|
||||||
if not result.ok or not re.search(number, browser.url, re.I):
|
|
||||||
raise ValueError("detail page not found")
|
|
||||||
title = browser.page.select('head > title')[0].text.strip()
|
|
||||||
detail_number = str(re.findall('\[(.*?)]', title)[0])
|
|
||||||
if not re.search(number, detail_number, re.I):
|
|
||||||
raise ValueError(f"detail page number not match, got ->[{detail_number}]")
|
|
||||||
desc = browser.page.select_one('div.d-flex.videoDataBlock > div.synopsis > p').text.strip()
|
|
||||||
return desc
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"[-]MP def getStoryline_airavwiki Error: {e}, number [{number}].")
|
print(f"[-]MP def getStoryline_airavwiki Error: {e}, number [{number}].")
|
||||||
@@ -180,7 +161,7 @@ def getStoryline_airavwiki(number, debug):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_58avgo(number, debug):
|
def getStoryline_58avgo(number, debug, proxies, verify):
|
||||||
try:
|
try:
|
||||||
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
|
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
|
||||||
'', '?status=3', '?status=4', '?status=7', '?status=9', '?status=10', '?status=11', '?status=12',
|
'', '?status=3', '?status=4', '?status=7', '?status=9', '?status=10', '?status=11', '?status=12',
|
||||||
@@ -189,6 +170,7 @@ def getStoryline_58avgo(number, debug):
|
|||||||
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
||||||
result, browser = get_html_by_form(url,
|
result, browser = get_html_by_form(url,
|
||||||
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
|
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
|
||||||
|
proxies=proxies, verify=verify,
|
||||||
return_type = 'browser')
|
return_type = 'browser')
|
||||||
if not result:
|
if not result:
|
||||||
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
||||||
@@ -219,13 +201,13 @@ def getStoryline_58avgo(number, debug):
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
def getStoryline_avno1(number, debug, proxies, verify): #获取剧情介绍 从avno1.cc取得
|
||||||
try:
|
try:
|
||||||
site = secrets.choice(['1768av.club','2nine.net','av999.tv','avno1.cc',
|
site = secrets.choice(['1768av.club','2nine.net','av999.tv','avno1.cc',
|
||||||
'hotav.biz','iqq2.xyz','javhq.tv',
|
'hotav.biz','iqq2.xyz','javhq.tv',
|
||||||
'www.hdsex.cc','www.porn18.cc','www.xxx18.cc',])
|
'www.hdsex.cc','www.porn18.cc','www.xxx18.cc',])
|
||||||
url = f'http://{site}/cn/search.php?kw_type=key&kw={number}'
|
url = f'http://{site}/cn/search.php?kw_type=key&kw={number}'
|
||||||
lx = fromstring(get_html_by_scraper(url))
|
lx = fromstring(get_html_by_scraper(url, proxies=proxies, verify=verify))
|
||||||
descs = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/@data-description')
|
descs = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/@data-description')
|
||||||
titles = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/a/h3/text()')
|
titles = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/a/h3/text()')
|
||||||
if not descs or not len(descs):
|
if not descs or not len(descs):
|
||||||
@@ -246,7 +228,7 @@ def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_avno1OLD(number, debug): #获取剧情介绍 从avno1.cc取得
|
def getStoryline_avno1OLD(number, debug, proxies, verify): #获取剧情介绍 从avno1.cc取得
|
||||||
try:
|
try:
|
||||||
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
||||||
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
||||||
@@ -255,6 +237,7 @@ def getStoryline_avno1OLD(number, debug): #获取剧情介绍 从avno1.cc取得
|
|||||||
result, browser = get_html_by_form(url,
|
result, browser = get_html_by_form(url,
|
||||||
form_select='div.wrapper > div.header > div.search > form',
|
form_select='div.wrapper > div.header > div.search > form',
|
||||||
fields = {'kw' : number},
|
fields = {'kw' : number},
|
||||||
|
proxies=proxies, verify=verify,
|
||||||
return_type = 'browser')
|
return_type = 'browser')
|
||||||
if not result:
|
if not result:
|
||||||
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
||||||
@@ -272,19 +255,14 @@ def getStoryline_avno1OLD(number, debug): #获取剧情介绍 从avno1.cc取得
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_xcity(number, debug): #获取剧情介绍 从xcity取得
|
def getStoryline_xcity(number, debug, proxies, verify): #获取剧情介绍 从xcity取得
|
||||||
try:
|
try:
|
||||||
xcity_number = number.replace('-','')
|
xcityEngine = Xcity()
|
||||||
query_result, browser = get_html_by_form(
|
xcityEngine.proxies = proxies
|
||||||
'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
|
xcityEngine.verify = verify
|
||||||
fields = {'q' : xcity_number.lower()},
|
jsons = xcityEngine.search(number)
|
||||||
return_type = 'browser')
|
outline = json.loads(jsons).get('outline')
|
||||||
if not query_result or not query_result.ok:
|
return outline
|
||||||
raise ValueError("page not found")
|
|
||||||
result = browser.follow_link(browser.links('avod\/detail')[0])
|
|
||||||
if not result.ok:
|
|
||||||
raise ValueError("detail page not found")
|
|
||||||
return browser.page.select_one('h2.title-detail + p.lead').text.strip()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"[-]MP getOutline_xcity Error: {e}, number [{number}].")
|
print(f"[-]MP getOutline_xcity Error: {e}, number [{number}].")
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml import etree
|
|
||||||
from .httprequest import get_html_by_form
|
from .httprequest import get_html_by_form
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
|
|
||||||
@@ -27,6 +26,19 @@ class Xcity(Parser):
|
|||||||
expr_series = "//span[contains(text(),'シリーズ')]/../a/span/text()"
|
expr_series = "//span[contains(text(),'シリーズ')]/../a/span/text()"
|
||||||
expr_series2 = "//span[contains(text(),'シリーズ')]/../span/text()"
|
expr_series2 = "//span[contains(text(),'シリーズ')]/../span/text()"
|
||||||
expr_extrafanart = '//div[@id="sample_images"]/div/a/@href'
|
expr_extrafanart = '//div[@id="sample_images"]/div/a/@href'
|
||||||
|
expr_outline = '//head/meta[@property="og:description"]/@content'
|
||||||
|
|
||||||
|
def queryNumberUrl(self, number):
|
||||||
|
xcity_number = number.replace('-','')
|
||||||
|
query_result, browser = get_html_by_form(
|
||||||
|
'https://xcity.jp/' + secrets.choice(['sitemap/','policy/','law/','help/','main/']),
|
||||||
|
fields = {'q' : xcity_number.lower()},
|
||||||
|
cookies=self.cookies, proxies=self.proxies, verify=self.verify,
|
||||||
|
return_type = 'browser')
|
||||||
|
if not query_result or not query_result.ok:
|
||||||
|
raise ValueError("xcity.py: page not found")
|
||||||
|
prelink = browser.links('avod\/detail')[0]['href']
|
||||||
|
return urljoin('https://xcity.jp', prelink)
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
def getStudio(self, htmltree):
|
||||||
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
||||||
@@ -55,12 +67,6 @@ class Xcity(Parser):
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getOutline(self, htmltree):
|
|
||||||
if self.morestoryline:
|
|
||||||
from .storyline import getStoryline
|
|
||||||
return getStoryline(self.number, uncensored=False)
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getActorPhoto(self, htmltree):
|
def getActorPhoto(self, htmltree):
|
||||||
treea = self.getTreeAll(htmltree, self.expr_actor_link)
|
treea = self.getTreeAll(htmltree, self.expr_actor_link)
|
||||||
t = {i.text.strip(): i.attrib['href'] for i in treea}
|
t = {i.text.strip(): i.attrib['href'] for i in treea}
|
||||||
@@ -84,28 +90,3 @@ class Xcity(Parser):
|
|||||||
i = "https:" + i
|
i = "https:" + i
|
||||||
extrafanart.append(i)
|
extrafanart.append(i)
|
||||||
return extrafanart
|
return extrafanart
|
||||||
|
|
||||||
def open_by_browser(self, number):
|
|
||||||
xcity_number = number.replace('-','')
|
|
||||||
query_result, browser = get_html_by_form(
|
|
||||||
'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
|
|
||||||
fields = {'q' : xcity_number.lower()},
|
|
||||||
return_type = 'browser')
|
|
||||||
if not query_result or not query_result.ok:
|
|
||||||
raise ValueError("xcity.py: page not found")
|
|
||||||
result = browser.follow_link(browser.links('avod\/detail')[0])
|
|
||||||
if not result.ok:
|
|
||||||
raise ValueError("xcity.py: detail page not found")
|
|
||||||
return str(browser.page), browser
|
|
||||||
|
|
||||||
def search(self, number):
|
|
||||||
self.number = number
|
|
||||||
if self.specifiedUrl:
|
|
||||||
self.detailurl = self.specifiedUrl
|
|
||||||
lx = self.getHtmlTree(self.detailurl)
|
|
||||||
else:
|
|
||||||
self.detail_page, self.browser = self.open_by_browser(number)
|
|
||||||
self.detailurl = self.browser.url
|
|
||||||
lx = etree.fromstring(self.detail_page, etree.HTMLParser())
|
|
||||||
result = self.dictformat(lx)
|
|
||||||
return result
|
|
||||||
|
|||||||
Reference in New Issue
Block a user