update scrapinglib
- 优化提取extrafanart,trailer等,直接使用xpath expr,不需要正则匹配 - 优化 getchu 获取cover方法,直接使用og标签信息 - 优化 www.getchu 识别 getchu-id 的资源 - 统一获取 tag 方法,返回值 list
This commit is contained in:
@@ -12,15 +12,15 @@ class Jav321(Parser):
|
||||
expr_title = "/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()"
|
||||
expr_cover = "/html/body/div[2]/div[2]/div[1]/p/a/img/@src"
|
||||
expr_outline = "/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()"
|
||||
# NOTE: 统一使用 xpath
|
||||
expr_number = '//b[contains(text(),"品番")]/following-sibling::node()'
|
||||
expr_actor = '//b[contains(text(),"出演者")]/following-sibling::a[starts-with(@href,"/star")]'
|
||||
expr_label = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]'
|
||||
expr_tags = '//b[contains(text(),"ジャンル")]/following-sibling::a[starts-with(@href,"/genre")]'
|
||||
expr_studio = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]'
|
||||
expr_actor = '//b[contains(text(),"出演者")]/following-sibling::a[starts-with(@href,"/star")]/text()'
|
||||
expr_label = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]/text()'
|
||||
expr_tags = '//b[contains(text(),"ジャンル")]/following-sibling::a[starts-with(@href,"/genre")]/text()'
|
||||
expr_studio = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]/text()'
|
||||
expr_release = '//b[contains(text(),"配信開始日")]/following-sibling::node()'
|
||||
expr_runtime = '//b[contains(text(),"収録時間")]/following-sibling::node()'
|
||||
# expr_series = '//b[contains(text(),"シリーズ")]'
|
||||
expr_series = '//b[contains(text(),"シリーズ")]/following-sibling::node()'
|
||||
expr_extrafanart = '//div[@class="col-md-3"]/div[@class="col-xs-12 col-md-12"]/p/a/img/@src'
|
||||
|
||||
def queryNumberUrl(self, number):
|
||||
return 'https://www.jav321.com/search'
|
||||
@@ -45,39 +45,8 @@ class Jav321(Parser):
|
||||
else:
|
||||
return ''
|
||||
|
||||
def getExtrafanart(self, htmltree):
|
||||
html_pather = re.compile(r'<div class=\"col\-md\-3\"><div class=\"col\-xs\-12 col\-md\-12\">[\s\S]*?</script><script async src=\"\/\/adserver\.juicyads\.com/js/jads\.js\">')
|
||||
html = html_pather.search(self.detailhtml)
|
||||
if html:
|
||||
html = html.group()
|
||||
extrafanart_pather = re.compile(r'<img.*?src=\"(.*?)\"')
|
||||
extrafanart_imgs = extrafanart_pather.findall(html)
|
||||
if extrafanart_imgs:
|
||||
return extrafanart_imgs
|
||||
return ''
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
return super().getRelease(htmltree).split(": ")[1]
|
||||
|
||||
def getRuntime(self, htmltree):
|
||||
return super().getRuntime(htmltree).split(": ")[1]
|
||||
|
||||
def parseElement(self, all):
|
||||
if all:
|
||||
ret = []
|
||||
for si in all:
|
||||
ret.append(si.text)
|
||||
return ",".join(ret)
|
||||
return ''
|
||||
|
||||
def getActors(self, htmltree):
|
||||
return self.parseElement(super().getActors(htmltree))
|
||||
|
||||
def getLabel(self, htmltree):
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_label))
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_tags))
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_studio))
|
||||
|
||||
Reference in New Issue
Block a user