112 lines
4.3 KiB
Python
112 lines
4.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import re
|
|
import secrets
|
|
from urllib.parse import urljoin
|
|
from lxml import etree
|
|
from .httprequest import get_html_by_form
|
|
from .parser import Parser
|
|
|
|
|
|
class Xcity(Parser):
|
|
source = 'xcity'
|
|
|
|
expr_number = '//*[@id="hinban"]/text()'
|
|
expr_title = '//*[@id="program_detail_title"]/text()'
|
|
expr_actor = '//ul/li[@class="credit-links"]/a/text()'
|
|
expr_actor_link = '//ul/li[@class="credit-links"]/a'
|
|
expr_actorphoto = '//div[@class="frame"]/div/p/img/@src'
|
|
expr_studio = '//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()'
|
|
expr_studio2 = '//strong[contains(text(),"片商")]/../following-sibling::span/a/text()'
|
|
expr_runtime = '//span[@class="koumoku" and text()="収録時間"]/../text()'
|
|
expr_label = '//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()'
|
|
expr_release = '//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[2]/text()'
|
|
expr_tags = '//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()'
|
|
expr_cover = '//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href'
|
|
expr_director = '//*[@id="program_detail_director"]/text()'
|
|
expr_series = "//span[contains(text(),'シリーズ')]/../a/span/text()"
|
|
expr_series2 = "//span[contains(text(),'シリーズ')]/../span/text()"
|
|
expr_extrafanart = '//div[@id="sample_images"]/div/a/@href'
|
|
|
|
def getStudio(self, htmltree):
|
|
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
|
|
|
def getRuntime(self, htmltree):
|
|
return self.getTreeElement(htmltree, self.expr_runtime, 1).strip()
|
|
|
|
def getRelease(self, htmltree):
|
|
try:
|
|
result = self.getTreeElement(htmltree, self.expr_release, 1)
|
|
return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-')
|
|
except:
|
|
return ''
|
|
|
|
def getCover(self, htmltree):
|
|
try:
|
|
result = super().getCover(htmltree)
|
|
return 'https:' + result
|
|
except:
|
|
return ''
|
|
|
|
def getDirector(self, htmltree):
|
|
try:
|
|
result = super().getDirector(htmltree).replace(u'\n','').replace(u'\t', '')
|
|
return result
|
|
except:
|
|
return ''
|
|
|
|
def getOutline(self, htmltree):
|
|
if self.morestoryline:
|
|
from .storyline import getStoryline
|
|
return getStoryline(self.number, uncensored=False)
|
|
return ''
|
|
|
|
def getActorPhoto(self, htmltree):
|
|
treea = self.getTreeAll(htmltree, self.expr_actor_link)
|
|
t = {i.text.strip(): i.attrib['href'] for i in treea}
|
|
o = {}
|
|
for k, v in t.items():
|
|
actorpageUrl = "https://xcity.jp" + v
|
|
try:
|
|
adtree = self.getHtmlTree(actorpageUrl)
|
|
picUrl = self.getTreeElement(adtree, self.expr_actorphoto)
|
|
if 'noimage.gif' in picUrl:
|
|
continue
|
|
o[k] = urljoin("https://xcity.jp", picUrl)
|
|
except:
|
|
pass
|
|
return o
|
|
|
|
def getExtrafanart(self, htmltree):
|
|
arts = self.getTreeAll(htmltree, self.expr_extrafanart)
|
|
extrafanart = []
|
|
for i in arts:
|
|
i = "https:" + i
|
|
extrafanart.append(i)
|
|
return extrafanart
|
|
|
|
def open_by_browser(self, number):
|
|
xcity_number = number.replace('-','')
|
|
query_result, browser = get_html_by_form(
|
|
'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
|
|
fields = {'q' : xcity_number.lower()},
|
|
return_type = 'browser')
|
|
if not query_result or not query_result.ok:
|
|
raise ValueError("xcity.py: page not found")
|
|
result = browser.follow_link(browser.links('avod\/detail')[0])
|
|
if not result.ok:
|
|
raise ValueError("xcity.py: detail page not found")
|
|
return str(browser.page), browser
|
|
|
|
def search(self, number):
|
|
self.number = number
|
|
if self.specifiedUrl:
|
|
self.detailurl = self.specifiedUrl
|
|
lx = self.getHtmlTree(self.detailurl)
|
|
else:
|
|
self.detail_page, self.browser = self.open_by_browser(number)
|
|
self.detailurl = self.browser.url
|
|
lx = etree.fromstring(self.detail_page, etree.HTMLParser())
|
|
result = self.dictformat(lx)
|
|
return result
|