62 lines
2.3 KiB
Python
62 lines
2.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
import re
|
|
from lxml import etree
|
|
from urllib.parse import urljoin
|
|
|
|
from .parser import Parser
|
|
|
|
|
|
class Javmenu(Parser):
|
|
source = 'javmenu'
|
|
|
|
expr_title = '/html/head/meta[@property="og:title"]/@content'
|
|
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
|
|
|
expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
|
|
expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
|
|
expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
|
|
expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
|
|
expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
|
|
|
|
expr_actor = '//a[contains(@class,"actress")]/text()'
|
|
expr_tags = '//a[contains(@class,"genre")]/text()'
|
|
|
|
def extraInit(self):
|
|
self.imagecut = 4
|
|
self.uncensored = True
|
|
|
|
def search(self, number):
|
|
self.number = number
|
|
if self.specifiedUrl:
|
|
self.detailurl = self.specifiedUrl
|
|
else:
|
|
self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
|
|
self.htmlcode = self.getHtml(self.detailurl)
|
|
if self.htmlcode == 404:
|
|
return 404
|
|
htmltree = etree.HTML(self.htmlcode)
|
|
result = self.dictformat(htmltree)
|
|
return result
|
|
|
|
def getNum(self, htmltree):
|
|
# 番号被分割开,需要合并后才是完整番号
|
|
part1 = self.getTreeElement(htmltree, self.expr_number)
|
|
part2 = self.getTreeElement(htmltree, self.expr_number2)
|
|
dp_number = part1 + part2
|
|
# NOTE 检测匹配与更新 self.number
|
|
if dp_number.upper() != self.number.upper():
|
|
raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
|
|
self.number = dp_number
|
|
return self.number
|
|
|
|
def getTitle(self, htmltree):
|
|
browser_title = super().getTitle(htmltree)
|
|
# 删除番号
|
|
number = re.findall("\d+",self.number)[1]
|
|
title = browser_title.split(number,1)[-1]
|
|
title = title.replace(' | JAV目錄大全 | 每日更新',"")
|
|
title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
|
|
return title.replace(self.number, '').strip()
|
|
|