update scrapinglib
- 优化提取extrafanart,trailer等,直接使用xpath expr,不需要正则匹配 - 优化 getchu 获取cover方法,直接使用og标签信息 - 优化 www.getchu 识别 getchu-id 的资源 - 统一获取 tag 方法,返回值 list
This commit is contained in:
@@ -104,9 +104,6 @@ class Airav(Parser):
|
|||||||
return result
|
return result
|
||||||
return super().getCover(htmltree)
|
return super().getCover(htmltree)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getSeries(self, htmltree):
|
def getSeries(self, htmltree):
|
||||||
result = self.javbus.get('series')
|
result = self.javbus.get('series')
|
||||||
if isinstance(result, str) and len(result):
|
if isinstance(result, str) and len(result):
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ class Avsox(Parser):
|
|||||||
return self.getTreeElement(self.searchtree, self.expr_smallcover)
|
return self.getTreeElement(self.searchtree, self.expr_smallcover)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
def getTags(self, htmltree):
|
||||||
tags = super().getTags(htmltree).split(',')
|
tags = self.getTreeElement(htmltree).split(',')
|
||||||
return [i.strip() for i in tags[2:]] if len(tags) > 2 else []
|
return [i.strip() for i in tags[2:]] if len(tags) > 2 else []
|
||||||
|
|
||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
@@ -70,7 +70,7 @@ class Avsox(Parser):
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
def getActorPhoto(self, htmltree):
|
def getActorPhoto(self, htmltree):
|
||||||
a = super().getActorPhoto(htmltree)
|
a = self.getTreeAll(htmltree, self.expr_actorphoto)
|
||||||
d = {}
|
d = {}
|
||||||
for i in a:
|
for i in a:
|
||||||
l = i.find('.//img').attrib['src']
|
l = i.find('.//img').attrib['src']
|
||||||
|
|||||||
@@ -47,9 +47,6 @@ class Carib(Parser):
|
|||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return f'https://www.caribbeancom.com/moviepages/{self.number}/images/l_l.jpg'
|
return f'https://www.caribbeancom.com/moviepages/{self.number}/images/l_l.jpg'
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
def getExtrafanart(self, htmltree):
|
||||||
r = []
|
r = []
|
||||||
genres = self.getTreeAll(htmltree, self.expr_extrafanart)
|
genres = self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||||
@@ -62,7 +59,6 @@ class Carib(Parser):
|
|||||||
return r
|
return r
|
||||||
|
|
||||||
def getActorPhoto(self, htmltree):
|
def getActorPhoto(self, htmltree):
|
||||||
# return super().getActorPhoto(htmltree)
|
|
||||||
htmla = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']")
|
htmla = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']")
|
||||||
names = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()")
|
names = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()")
|
||||||
t = {}
|
t = {}
|
||||||
|
|||||||
@@ -74,9 +74,8 @@ class Dlsite(Parser):
|
|||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
total = []
|
total = []
|
||||||
result = self.getTreeAll(htmltree, self.expr_outline)
|
result = self.getTreeAll(htmltree, self.expr_outline)
|
||||||
for i in result:
|
total = [ x.strip() for x in result if x.strip()]
|
||||||
total.append(i.strip('\r\n'))
|
return '\n'.join(total)
|
||||||
return str(total).strip(" ['']").replace("', '', '",r'\n').replace("', '",r'\n').strip(", '', '")
|
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
def getRelease(self, htmltree):
|
||||||
return super().getRelease(htmltree).replace('年','-').replace('月','-').replace('日','')
|
return super().getRelease(htmltree).replace('年','-').replace('月','-').replace('日','')
|
||||||
@@ -84,9 +83,6 @@ class Dlsite(Parser):
|
|||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return 'https:' + super().getCover(htmltree).replace('.webp', '.jpg')
|
return 'https:' + super().getCover(htmltree).replace('.webp', '.jpg')
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
def getExtrafanart(self, htmltree):
|
||||||
try:
|
try:
|
||||||
result = []
|
result = []
|
||||||
|
|||||||
@@ -106,13 +106,13 @@ class Fanza(Parser):
|
|||||||
return self.getFanzaStrings('ジャンル:')
|
return self.getFanzaStrings('ジャンル:')
|
||||||
|
|
||||||
def getLabel(self, htmltree):
|
def getLabel(self, htmltree):
|
||||||
ret = self.getFanzaStrings('レーベル')
|
ret = self.getFanzaString('レーベル')
|
||||||
if ret == "----":
|
if ret == "----":
|
||||||
return ''
|
return ''
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def getSeries(self, htmltree):
|
def getSeries(self, htmltree):
|
||||||
ret = self.getFanzaStrings('シリーズ:')
|
ret = self.getFanzaString('シリーズ:')
|
||||||
if ret == "----":
|
if ret == "----":
|
||||||
return ''
|
return ''
|
||||||
return ret
|
return ret
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ class Fc2(Parser):
|
|||||||
expr_director = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
expr_director = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
||||||
expr_actor = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
expr_actor = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
||||||
expr_cover = "//div[@class='items_article_MainitemThumb']/span/img/@src"
|
expr_cover = "//div[@class='items_article_MainitemThumb']/span/img/@src"
|
||||||
|
expr_extrafanart = '//ul[@class="items_article_SampleImagesArea"]/li/a/@href'
|
||||||
expr_tags = "//a[@class='tag tagTag']/text()"
|
expr_tags = "//a[@class='tag tagTag']/text()"
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
@@ -45,17 +46,6 @@ class Fc2(Parser):
|
|||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return urljoin('https://adult.contents.fc2.com', super().getCover(htmltree))
|
return urljoin('https://adult.contents.fc2.com', super().getCover(htmltree))
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
|
||||||
html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
|
|
||||||
html = html_pather.search(self.htmlcode)
|
|
||||||
if html:
|
|
||||||
html = html.group()
|
|
||||||
extrafanart_pather = re.compile(r'<a href=\"(.*?)\"')
|
|
||||||
extrafanart_imgs = extrafanart_pather.findall(html)
|
|
||||||
if extrafanart_imgs:
|
|
||||||
return extrafanart_imgs
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getTrailer(self, htmltree):
|
def getTrailer(self, htmltree):
|
||||||
video_pather = re.compile(r'\'[a-zA-Z0-9]{32}\'')
|
video_pather = re.compile(r'\'[a-zA-Z0-9]{32}\'')
|
||||||
video = video_pather.findall(self.htmlcode)
|
video = video_pather.findall(self.htmlcode)
|
||||||
|
|||||||
@@ -58,9 +58,6 @@ class Gcolle(Parser):
|
|||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return "https:" + super().getCover(htmltree)
|
return "https:" + super().getCover(htmltree)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
def getExtrafanart(self, htmltree):
|
||||||
extrafanart = self.getTreeAll(htmltree, self.expr_extrafanart)
|
extrafanart = self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||||
if len(extrafanart) == 0:
|
if len(extrafanart) == 0:
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class wwwGetchu(Parser):
|
|||||||
GETCHU_WWW_SEARCH_URL = 'http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit='
|
GETCHU_WWW_SEARCH_URL = 'http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit='
|
||||||
|
|
||||||
expr_title = '//*[@id="soft-title"]/text()'
|
expr_title = '//*[@id="soft-title"]/text()'
|
||||||
expr_cover = "/html/body/div[1]/table[2]/tr[1]/td/a/@href"
|
expr_cover = '//head/meta[@property="og:image"]'
|
||||||
expr_director = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
expr_director = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
||||||
expr_studio = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
expr_studio = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
||||||
expr_actor = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
expr_actor = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
||||||
@@ -47,8 +47,12 @@ class wwwGetchu(Parser):
|
|||||||
expr_series = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
expr_series = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
def queryNumberUrl(self, number):
|
||||||
self.number = quote(number, encoding="euc_jp")
|
if 'GETCHU' in number.upper():
|
||||||
queryUrl = self.GETCHU_WWW_SEARCH_URL.replace("_WORD_", self.number)
|
idn = re.findall('\d+',number)[0]
|
||||||
|
return "http://www.getchu.com/soft.phtml?id=" + idn
|
||||||
|
else:
|
||||||
|
self.number = quote(number, encoding="euc_jp")
|
||||||
|
queryUrl = self.GETCHU_WWW_SEARCH_URL.replace("_WORD_", self.number)
|
||||||
# NOTE dont know why will try 2 times
|
# NOTE dont know why will try 2 times
|
||||||
retry = 2
|
retry = 2
|
||||||
for i in range(retry):
|
for i in range(retry):
|
||||||
@@ -64,14 +68,11 @@ class wwwGetchu(Parser):
|
|||||||
return 'GETCHU-' + re.findall('\d+', self.detailurl.replace("http://www.getchu.com/soft.phtml?id=", ""))[0]
|
return 'GETCHU-' + re.findall('\d+', self.detailurl.replace("http://www.getchu.com/soft.phtml?id=", ""))[0]
|
||||||
|
|
||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return "http://www.getchu.com" + super().getCover(htmltree).replace("./", '/')
|
return self.getTreeElement(htmltree, self.expr_cover).get('content')
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
def getActors(self, htmltree):
|
||||||
return super().getDirector(htmltree)
|
return super().getDirector(htmltree)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
outline = ''
|
outline = ''
|
||||||
_list = self.getTreeAll(htmltree, self.expr_outline)
|
_list = self.getTreeAll(htmltree, self.expr_outline)
|
||||||
@@ -109,7 +110,6 @@ class dlGetchu(wwwGetchu):
|
|||||||
GETCHU_DL_URL = 'https://dl.getchu.com/i/item_WORD_'
|
GETCHU_DL_URL = 'https://dl.getchu.com/i/item_WORD_'
|
||||||
|
|
||||||
expr_title = "//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"
|
expr_title = "//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"
|
||||||
expr_cover = "//td[contains(@bgcolor,'#ffffff')]/img/@src"
|
|
||||||
expr_director = "//td[contains(text(),'作者')]/following-sibling::td/text()"
|
expr_director = "//td[contains(text(),'作者')]/following-sibling::td/text()"
|
||||||
expr_studio = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
expr_studio = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
||||||
expr_label = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
expr_label = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
||||||
@@ -135,9 +135,6 @@ class dlGetchu(wwwGetchu):
|
|||||||
def getNum(self, htmltree):
|
def getNum(self, htmltree):
|
||||||
return 'GETCHU-' + re.findall('\d+', self.number)[0]
|
return 'GETCHU-' + re.findall('\d+', self.number)[0]
|
||||||
|
|
||||||
def getCover(self, htmltree):
|
|
||||||
return "https://dl.getchu.com" + super().getCover(htmltree)
|
|
||||||
|
|
||||||
def extradict(self, dic: dict):
|
def extradict(self, dic: dict):
|
||||||
return dic
|
return dic
|
||||||
|
|
||||||
|
|||||||
@@ -12,15 +12,15 @@ class Jav321(Parser):
|
|||||||
expr_title = "/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()"
|
expr_title = "/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()"
|
||||||
expr_cover = "/html/body/div[2]/div[2]/div[1]/p/a/img/@src"
|
expr_cover = "/html/body/div[2]/div[2]/div[1]/p/a/img/@src"
|
||||||
expr_outline = "/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()"
|
expr_outline = "/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()"
|
||||||
# NOTE: 统一使用 xpath
|
|
||||||
expr_number = '//b[contains(text(),"品番")]/following-sibling::node()'
|
expr_number = '//b[contains(text(),"品番")]/following-sibling::node()'
|
||||||
expr_actor = '//b[contains(text(),"出演者")]/following-sibling::a[starts-with(@href,"/star")]'
|
expr_actor = '//b[contains(text(),"出演者")]/following-sibling::a[starts-with(@href,"/star")]/text()'
|
||||||
expr_label = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]'
|
expr_label = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]/text()'
|
||||||
expr_tags = '//b[contains(text(),"ジャンル")]/following-sibling::a[starts-with(@href,"/genre")]'
|
expr_tags = '//b[contains(text(),"ジャンル")]/following-sibling::a[starts-with(@href,"/genre")]/text()'
|
||||||
expr_studio = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]'
|
expr_studio = '//b[contains(text(),"メーカー")]/following-sibling::a[starts-with(@href,"/company")]/text()'
|
||||||
expr_release = '//b[contains(text(),"配信開始日")]/following-sibling::node()'
|
expr_release = '//b[contains(text(),"配信開始日")]/following-sibling::node()'
|
||||||
expr_runtime = '//b[contains(text(),"収録時間")]/following-sibling::node()'
|
expr_runtime = '//b[contains(text(),"収録時間")]/following-sibling::node()'
|
||||||
# expr_series = '//b[contains(text(),"シリーズ")]'
|
expr_series = '//b[contains(text(),"シリーズ")]/following-sibling::node()'
|
||||||
|
expr_extrafanart = '//div[@class="col-md-3"]/div[@class="col-xs-12 col-md-12"]/p/a/img/@src'
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
def queryNumberUrl(self, number):
|
||||||
return 'https://www.jav321.com/search'
|
return 'https://www.jav321.com/search'
|
||||||
@@ -45,39 +45,8 @@ class Jav321(Parser):
|
|||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
|
||||||
html_pather = re.compile(r'<div class=\"col\-md\-3\"><div class=\"col\-xs\-12 col\-md\-12\">[\s\S]*?</script><script async src=\"\/\/adserver\.juicyads\.com/js/jads\.js\">')
|
|
||||||
html = html_pather.search(self.detailhtml)
|
|
||||||
if html:
|
|
||||||
html = html.group()
|
|
||||||
extrafanart_pather = re.compile(r'<img.*?src=\"(.*?)\"')
|
|
||||||
extrafanart_imgs = extrafanart_pather.findall(html)
|
|
||||||
if extrafanart_imgs:
|
|
||||||
return extrafanart_imgs
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
def getRelease(self, htmltree):
|
||||||
return super().getRelease(htmltree).split(": ")[1]
|
return super().getRelease(htmltree).split(": ")[1]
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
def getRuntime(self, htmltree):
|
||||||
return super().getRuntime(htmltree).split(": ")[1]
|
return super().getRuntime(htmltree).split(": ")[1]
|
||||||
|
|
||||||
def parseElement(self, all):
|
|
||||||
if all:
|
|
||||||
ret = []
|
|
||||||
for si in all:
|
|
||||||
ret.append(si.text)
|
|
||||||
return ",".join(ret)
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
|
||||||
return self.parseElement(super().getActors(htmltree))
|
|
||||||
|
|
||||||
def getLabel(self, htmltree):
|
|
||||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_label))
|
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_tags))
|
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
|
||||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_studio))
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class Javbus(Parser):
|
|||||||
expr_runtime = '/html/body/div[5]/div[1]/div[2]/p[3]/text()'
|
expr_runtime = '/html/body/div[5]/div[1]/div[2]/p[3]/text()'
|
||||||
expr_actor = '//div[@class="star-name"]/a'
|
expr_actor = '//div[@class="star-name"]/a'
|
||||||
expr_actorphoto = '//div[@class="star-name"]/../a/img'
|
expr_actorphoto = '//div[@class="star-name"]/../a/img'
|
||||||
|
expr_extrafanart = '//div[@id="sample-waterfall"]/a/@href'
|
||||||
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||||
expr_uncensored = '//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]'
|
expr_uncensored = '//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]'
|
||||||
|
|
||||||
@@ -85,9 +86,6 @@ class Javbus(Parser):
|
|||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
return urljoin("https://www.javbus.com", super().getCover(htmltree))
|
return urljoin("https://www.javbus.com", super().getCover(htmltree))
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
|
||||||
return super().getRelease(htmltree).strip(" ['']")
|
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
def getRuntime(self, htmltree):
|
||||||
return super().getRuntime(htmltree).strip(" ['']分鐘")
|
return super().getRuntime(htmltree).strip(" ['']分鐘")
|
||||||
|
|
||||||
@@ -99,7 +97,7 @@ class Javbus(Parser):
|
|||||||
return b
|
return b
|
||||||
|
|
||||||
def getActorPhoto(self, htmltree):
|
def getActorPhoto(self, htmltree):
|
||||||
actors = super().getActorPhoto(htmltree)
|
actors = self.getTreeAll(htmltree, self.expr_actorphoto)
|
||||||
d = {}
|
d = {}
|
||||||
for i in actors:
|
for i in actors:
|
||||||
p = i.attrib['src']
|
p = i.attrib['src']
|
||||||
@@ -122,20 +120,9 @@ class Javbus(Parser):
|
|||||||
return self.getTreeElement(htmltree, self.expr_series)
|
return self.getTreeElement(htmltree, self.expr_series)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
def getTags(self, htmltree):
|
||||||
tags = super().getTags(htmltree).split(',')
|
tags = self.getTreeElement(htmltree, self.expr_tags).split(',')
|
||||||
return tags[1:]
|
return tags[1:]
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
|
||||||
html_pather = re.compile(r'<div id=\"sample-waterfall\">[\s\S]*?</div></a>\s*?</div>')
|
|
||||||
html = html_pather.search(self.htmlcode)
|
|
||||||
if html:
|
|
||||||
html = html.group()
|
|
||||||
extrafanart_pather = re.compile(r'<a class=\"sample-box\" href=\"(.*?)\"')
|
|
||||||
extrafanart_imgs = extrafanart_pather.findall(html)
|
|
||||||
if extrafanart_imgs:
|
|
||||||
return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
if self.morestoryline:
|
if self.morestoryline:
|
||||||
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class Javdb(Parser):
|
|||||||
expr_cover = "//div[contains(@class, 'column-video-cover')]/a/img/@src"
|
expr_cover = "//div[contains(@class, 'column-video-cover')]/a/img/@src"
|
||||||
expr_cover2 = "//div[contains(@class, 'column-video-cover')]/img/@src"
|
expr_cover2 = "//div[contains(@class, 'column-video-cover')]/img/@src"
|
||||||
expr_cover_no = '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "cover")]/img/@src'
|
expr_cover_no = '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "cover")]/img/@src'
|
||||||
|
expr_trailer = '//span[contains(text(),"預告片")]/../../video/source/@src'
|
||||||
expr_extrafanart = "//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href"
|
expr_extrafanart = "//article[@class='message video-panel']/div[@class='message-body']/div[@class='tile-images preview-images']/a[contains(@href,'/samples/')]/@href"
|
||||||
expr_tags = '//strong[contains(text(),"類別")]/../span/a/text()'
|
expr_tags = '//strong[contains(text(),"類別")]/../span/a/text()'
|
||||||
expr_tags2 = '//strong[contains(text(),"類別")]/../span/text()'
|
expr_tags2 = '//strong[contains(text(),"類別")]/../span/text()'
|
||||||
@@ -105,9 +106,10 @@ class Javdb(Parser):
|
|||||||
def getNum(self, htmltree):
|
def getNum(self, htmltree):
|
||||||
if self.noauth:
|
if self.noauth:
|
||||||
return self.number
|
return self.number
|
||||||
result1 = str(self.getTreeAll(htmltree, self.expr_number)).strip(" ['']")
|
# 番号被分割开,需要合并后才是完整番号
|
||||||
result2 = str(self.getTreeAll(htmltree, self.expr_number2)).strip(" ['']")
|
part1 = self.getTreeElement(htmltree, self.expr_number)
|
||||||
dp_number = str(result2 + result1).strip('+')
|
part2 = self.getTreeElement(htmltree, self.expr_number2)
|
||||||
|
dp_number = part2 + part1
|
||||||
# NOTE 检测匹配与更新 self.number
|
# NOTE 检测匹配与更新 self.number
|
||||||
if dp_number.upper() != self.number.upper():
|
if dp_number.upper() != self.number.upper():
|
||||||
raise Exception(f'[!] {self.number}: find [{dp_number}] in javdb, not match')
|
raise Exception(f'[!] {self.number}: find [{dp_number}] in javdb, not match')
|
||||||
@@ -131,28 +133,20 @@ class Javdb(Parser):
|
|||||||
return self.getTreeElement(htmltree, self.expr_release_no, self.queryid).strip()
|
return self.getTreeElement(htmltree, self.expr_release_no, self.queryid).strip()
|
||||||
return super().getRelease(htmltree)
|
return super().getRelease(htmltree)
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
|
||||||
result1 = str(self.getTreeAll(htmltree, self.expr_runtime)).strip(" ['']")
|
|
||||||
result2 = str(self.getTreeAll(htmltree, self.expr_runtime2)).strip(" ['']")
|
|
||||||
return str(result1 + result2).strip('+').rstrip('mi')
|
|
||||||
|
|
||||||
def getDirector(self, htmltree):
|
def getDirector(self, htmltree):
|
||||||
result1 = str(self.getTreeAll(htmltree, self.expr_director)).strip(" ['']")
|
return self.getTreeElementbyExprs(htmltree, self.expr_director, self.expr_director2)
|
||||||
result2 = str(self.getTreeAll(htmltree, self.expr_director2)).strip(" ['']")
|
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
|
||||||
|
|
||||||
def getSeries(self, htmltree):
|
def getSeries(self, htmltree):
|
||||||
result1 = str(self.getTreeAll(htmltree, self.expr_series)).strip(" ['']")
|
# NOTE 不清楚javdb是否有一部影片多个series的情况,暂时保留
|
||||||
result2 = str(self.getTreeAll(htmltree, self.expr_series2)).strip(" ['']")
|
results = self.getTreeAllbyExprs(htmltree, self.expr_series, self.expr_series2)
|
||||||
result = str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
result = ''.join(results)
|
||||||
if not result and self.fixstudio:
|
if not result and self.fixstudio:
|
||||||
result = self.getStudio(htmltree)
|
result = self.getStudio(htmltree)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def getLabel(self, htmltree):
|
def getLabel(self, htmltree):
|
||||||
result1 = str(self.getTreeAll(htmltree, self.expr_label)).strip(" ['']")
|
results = self.getTreeAllbyExprs(htmltree, self.expr_label, self.expr_label2)
|
||||||
result2 = str(self.getTreeAll(htmltree, self.expr_label2)).strip(" ['']")
|
result = ''.join(results)
|
||||||
result = str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
|
||||||
if not result and self.fixstudio:
|
if not result and self.fixstudio:
|
||||||
result = self.getStudio(htmltree)
|
result = self.getStudio(htmltree)
|
||||||
return result
|
return result
|
||||||
@@ -182,38 +176,20 @@ class Javdb(Parser):
|
|||||||
return getStoryline(self.number, self.getUncensored(htmltree))
|
return getStoryline(self.number, self.getUncensored(htmltree))
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
|
||||||
try:
|
|
||||||
return self.getTreeAll(htmltree, self.expr_studio).strip(" ['']")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeAll(htmltree, self.expr_studio2).strip(" ['']")
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getTrailer(self, htmltree):
|
def getTrailer(self, htmltree):
|
||||||
video_pather = re.compile(r'<video id\=\".*?>\s*?<source src=\"(.*?)\"')
|
video = super().getTrailer(htmltree)
|
||||||
video = video_pather.findall(self.deatilpage)
|
|
||||||
# 加上数组判空
|
# 加上数组判空
|
||||||
if video and video[0] != "":
|
if video:
|
||||||
if not 'https:' in video[0]:
|
if not 'https:' in video:
|
||||||
video_url = 'https:' + video[0]
|
video_url = 'https:' + video
|
||||||
else:
|
else:
|
||||||
video_url = video[0]
|
video_url = video
|
||||||
else:
|
else:
|
||||||
video_url = ''
|
video_url = ''
|
||||||
return video_url
|
return video_url
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
def getTags(self, htmltree):
|
||||||
try:
|
return self.getTreeAllbyExprs(htmltree, self.expr_tags, self.expr_tags2)
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags2)
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getUserRating(self, htmltree):
|
def getUserRating(self, htmltree):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ class Mgstage(Parser):
|
|||||||
expr_actor = '//th[contains(text(),"出演:")]/../td/a/text()'
|
expr_actor = '//th[contains(text(),"出演:")]/../td/a/text()'
|
||||||
expr_release = '//th[contains(text(),"配信開始日:")]/../td/a/text()'
|
expr_release = '//th[contains(text(),"配信開始日:")]/../td/a/text()'
|
||||||
expr_cover = '//*[@id="EnlargeImage"]/@href'
|
expr_cover = '//*[@id="EnlargeImage"]/@href'
|
||||||
expr_label = '//th[contains(text(),"シリーズ:")]/../td/a/text()'
|
expr_label = '//th[contains(text(),"レーベル:")]/../td/a/text()'
|
||||||
expr_tags = '//th[contains(text(),"ジャンル:")]/../td/a/text()'
|
expr_tags = '//th[contains(text(),"ジャンル:")]/../td/a/text()'
|
||||||
expr_tags2 = '//th[contains(text(),"ジャンル:")]/../td/text()'
|
expr_tags2 = '//th[contains(text(),"ジャンル:")]/../td/text()'
|
||||||
expr_series = '//th[contains(text(),"シリーズ")]/../td/a/text()'
|
expr_series = '//th[contains(text(),"シリーズ")]/../td/a/text()'
|
||||||
@@ -34,19 +34,17 @@ class Mgstage(Parser):
|
|||||||
return super().getTitle(htmltree).replace('/', ',').strip()
|
return super().getTitle(htmltree).replace('/', ',').strip()
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
def getTags(self, htmltree):
|
||||||
results = self.getTreeAll(htmltree, self.expr_tags)
|
return self.getTreeAllbyExprs(htmltree, self.expr_tags, self.expr_tags2)
|
||||||
results2 = self.getTreeAll(htmltree, self.expr_tags2)
|
|
||||||
return [ x.strip() for x in (results + results2) if x.strip()]
|
|
||||||
|
|
||||||
def getTreeAll(self, tree, expr):
|
def getTreeAll(self, tree, expr):
|
||||||
alls = super().getTreeAll(tree, expr)
|
alls = super().getTreeAll(tree, expr)
|
||||||
return [ x.strip() for x in alls ]
|
return [ x.strip() for x in alls if x.strip()]
|
||||||
|
|
||||||
def getTreeElement(self, tree, expr, index=0):
|
def getTreeElement(self, tree, expr, index=0):
|
||||||
if expr == '':
|
if expr == '':
|
||||||
return ''
|
return ''
|
||||||
result1 = getTreeElement(tree, expr).strip().replace("', '", '').strip(" ['']")
|
result1 = ''.join(self.getTreeAll(tree, expr))
|
||||||
result2 = getTreeElement(tree, expr.replace('td/a/','td/')).strip().replace("', '", '').strip(" ['']")
|
result2 = ''.join(self.getTreeAll(tree, expr.replace('td/a/','td/')))
|
||||||
if result1 == result2:
|
if result1 == result2:
|
||||||
return str(result1).strip('+').replace("', '",'').replace('"','')
|
return result1
|
||||||
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
|
return result1 + result2
|
||||||
|
|||||||
@@ -61,9 +61,6 @@ class Mv91(Parser):
|
|||||||
def getStudio(self, htmltree):
|
def getStudio(self, htmltree):
|
||||||
return '91制片厂'
|
return '91制片厂'
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
return self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
def getActors(self, htmltree):
|
||||||
b=[]
|
b=[]
|
||||||
for player in self.getTreeAll(htmltree, self.expr_actor):
|
for player in self.getTreeAll(htmltree, self.expr_actor):
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ from . import httprequest
|
|||||||
from .utils import getTreeElement, getTreeAll
|
from .utils import getTreeElement, getTreeAll
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
|
""" 基础刮削类
|
||||||
|
"""
|
||||||
source = 'base'
|
source = 'base'
|
||||||
# poster: `0` 复制 `1` 裁剪
|
# poster: `0` 复制 `1` 裁剪
|
||||||
imagecut = 1
|
imagecut = 1
|
||||||
@@ -139,7 +140,7 @@ class Parser:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
dic = {"title": ""}
|
dic = {"title": ""}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, separators=(',', ':'))
|
||||||
return js
|
return js
|
||||||
|
|
||||||
def extradict(self, dic:dict):
|
def extradict(self, dic:dict):
|
||||||
@@ -155,15 +156,8 @@ class Parser:
|
|||||||
def getTitle(self, htmltree):
|
def getTitle(self, htmltree):
|
||||||
return self.getTreeElement(htmltree, self.expr_title).strip()
|
return self.getTreeElement(htmltree, self.expr_title).strip()
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
def getRelease(self, htmltree):
|
||||||
try:
|
return self.getTreeElement(htmltree, self.expr_release).strip().replace('/','-')
|
||||||
return self.getTreeElement(htmltree, self.expr_studio).strip(" ['']")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeElement(htmltree, self.expr_studio2).strip(" ['']")
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getYear(self, htmltree):
|
def getYear(self, htmltree):
|
||||||
""" year基本都是从release中解析的
|
""" year基本都是从release中解析的
|
||||||
@@ -175,73 +169,46 @@ class Parser:
|
|||||||
return release
|
return release
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
def getRuntime(self, htmltree):
|
||||||
try:
|
return self.getTreeElementbyExprs(htmltree, self.expr_runtime, self.expr_runtime2).strip().rstrip('mi')
|
||||||
return self.getTreeElement(htmltree, self.expr_runtime).strip("\n\t ['']").rstrip('mi')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeElement(htmltree, self.expr_runtime2).strip("\n\t ['']").rstrip('mi')
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
|
||||||
return self.getTreeElement(htmltree, self.expr_release).strip().replace('/','-')
|
|
||||||
|
|
||||||
def getOutline(self, htmltree):
|
def getOutline(self, htmltree):
|
||||||
return self.getTreeElement(htmltree, self.expr_outline).strip().replace("\n","")
|
return self.getTreeElement(htmltree, self.expr_outline).strip()
|
||||||
|
|
||||||
def getDirector(self, htmltree):
|
def getDirector(self, htmltree):
|
||||||
return self.getTreeElement(htmltree, self.expr_director)
|
return self.getTreeElement(htmltree, self.expr_director).strip()
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
def getActors(self, htmltree) -> list:
|
||||||
return self.getTreeAll(htmltree, self.expr_actor)
|
return self.getTreeAll(htmltree, self.expr_actor)
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
def getTags(self, htmltree) -> list:
|
||||||
return self.getTreeElement(htmltree, self.expr_tags)
|
alls = self.getTreeAll(htmltree, self.expr_tags)
|
||||||
|
return [ x.strip() for x in alls if x.strip()]
|
||||||
|
|
||||||
|
def getStudio(self, htmltree):
|
||||||
|
return self.getTreeElementbyExprs(htmltree, self.expr_studio, self.expr_studio2)
|
||||||
|
|
||||||
def getLabel(self, htmltree):
|
def getLabel(self, htmltree):
|
||||||
try:
|
return self.getTreeElementbyExprs(htmltree, self.expr_label, self.expr_label2)
|
||||||
return self.getTreeElement(htmltree, self.expr_label).strip(" ['']")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeElement(htmltree, self.expr_label2).strip(" ['']")
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getSeries(self, htmltree):
|
def getSeries(self, htmltree):
|
||||||
try:
|
return self.getTreeElementbyExprs(htmltree, self.expr_series, self.expr_series2)
|
||||||
return self.getTreeElement(htmltree, self.expr_series).strip(" ['']")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeElement(htmltree, self.expr_series2).strip(" ['']")
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
try:
|
return self.getTreeElementbyExprs(htmltree, self.expr_cover, self.expr_cover2)
|
||||||
return self.getTreeElement(htmltree, self.expr_cover).strip(" ['']")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
return self.getTreeElement(htmltree, self.expr_cover2).strip(" ['']")
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getSmallCover(self, htmltree):
|
def getSmallCover(self, htmltree):
|
||||||
return self.getTreeElement(htmltree, self.expr_smallcover)
|
return self.getTreeElement(htmltree, self.expr_smallcover)
|
||||||
|
|
||||||
def getExtrafanart(self, htmltree):
|
def getExtrafanart(self, htmltree) -> list:
|
||||||
return self.getTreeAll(htmltree, self.expr_extrafanart)
|
return self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||||
|
|
||||||
def getTrailer(self, htmltree):
|
def getTrailer(self, htmltree):
|
||||||
return self.getTreeElement(htmltree, self.expr_trailer)
|
return self.getTreeElement(htmltree, self.expr_trailer)
|
||||||
|
|
||||||
def getActorPhoto(self, htmltree):
|
def getActorPhoto(self, htmltree) -> dict:
|
||||||
return self.getTreeAll(htmltree, self.expr_actorphoto)
|
return {}
|
||||||
|
|
||||||
def getUncensored(self, htmlree):
|
def getUncensored(self, htmlree) -> bool:
|
||||||
if self.expr_uncensored:
|
if self.expr_uncensored:
|
||||||
u = self.getTreeAll(htmlree, self.expr_uncensored)
|
u = self.getTreeAll(htmlree, self.expr_uncensored)
|
||||||
return bool(u)
|
return bool(u)
|
||||||
@@ -249,10 +216,10 @@ class Parser:
|
|||||||
return self.uncensored
|
return self.uncensored
|
||||||
|
|
||||||
def getUserRating(self, htmltree):
|
def getUserRating(self, htmltree):
|
||||||
return self.getTreeAll(htmltree, self.expr_userrating)
|
return self.getTreeElement(htmltree, self.expr_userrating)
|
||||||
|
|
||||||
def getUserVotes(self, htmltree):
|
def getUserVotes(self, htmltree):
|
||||||
return self.getTreeAll(htmltree, self.expr_uservotes)
|
return self.getTreeElement(htmltree, self.expr_uservotes)
|
||||||
|
|
||||||
def getTreeElement(self, tree: html.HtmlElement, expr, index=0):
|
def getTreeElement(self, tree: html.HtmlElement, expr, index=0):
|
||||||
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
||||||
@@ -263,3 +230,32 @@ class Parser:
|
|||||||
""" 根据表达式从`xmltree`中获取全部匹配值
|
""" 根据表达式从`xmltree`中获取全部匹配值
|
||||||
"""
|
"""
|
||||||
return getTreeAll(tree, expr)
|
return getTreeAll(tree, expr)
|
||||||
|
|
||||||
|
def getTreeElementbyExprs(self, tree: html.HtmlElement, expr, expr2=''):
|
||||||
|
""" 多个表达式获取element
|
||||||
|
使用内部的 getTreeElement 防止继承修改后出现问题
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
first = self.getTreeElement(tree, expr).strip()
|
||||||
|
if first:
|
||||||
|
return first
|
||||||
|
second = self.getTreeElement(tree, expr2).strip()
|
||||||
|
if second:
|
||||||
|
return second
|
||||||
|
return ''
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def getTreeAllbyExprs(self, tree: html.HtmlElement, expr, expr2=''):
|
||||||
|
""" 多个表达式获取所有element
|
||||||
|
合并并剔除重复元素
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result1 = self.getTreeAll(tree, expr)
|
||||||
|
result2 = self.getTreeAll(tree, expr2)
|
||||||
|
clean = [ x.strip() for x in result1 if x.strip() and x.strip() != ',']
|
||||||
|
clean2 = [ x.strip() for x in result2 if x.strip() and x.strip() != ',']
|
||||||
|
result = list(set(clean + clean2))
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return []
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from lxml.html import HtmlElement
|
from lxml.html import HtmlElement
|
||||||
|
|
||||||
def getTreeElement(tree: HtmlElement, expr, index=0):
|
def getTreeElement(tree: HtmlElement, expr='', index=0):
|
||||||
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
||||||
:param tree (html.HtmlElement)
|
:param tree (html.HtmlElement)
|
||||||
:param expr
|
:param expr
|
||||||
@@ -16,16 +16,16 @@ def getTreeElement(tree: HtmlElement, expr, index=0):
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getTreeAll(tree: HtmlElement, expr):
|
def getTreeAll(tree: HtmlElement, expr=''):
|
||||||
""" 根据表达式从`xmltree`中获取全部匹配值
|
""" 根据表达式从`xmltree`中获取全部匹配值
|
||||||
:param tree (html.HtmlElement)
|
:param tree (html.HtmlElement)
|
||||||
:param expr
|
:param expr
|
||||||
:param index
|
:param index
|
||||||
"""
|
"""
|
||||||
if expr == '':
|
if expr == '':
|
||||||
return ''
|
return []
|
||||||
result = tree.xpath(expr)
|
result = tree.xpath(expr)
|
||||||
try:
|
try:
|
||||||
return result
|
return result
|
||||||
except:
|
except:
|
||||||
return ''
|
return []
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ class Xcity(Parser):
|
|||||||
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
||||||
|
|
||||||
def getRuntime(self, htmltree):
|
def getRuntime(self, htmltree):
|
||||||
return self.getTreeAll(htmltree, self.expr_runtime)[1].strip()
|
return self.getTreeElement(htmltree, self.expr_runtime, 1).strip()
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
def getRelease(self, htmltree):
|
||||||
try:
|
try:
|
||||||
@@ -37,13 +37,6 @@ class Xcity(Parser):
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def getTags(self, htmltree):
|
|
||||||
result = self.getTreeAll(htmltree, self.expr_tags)
|
|
||||||
total = []
|
|
||||||
for i in result:
|
|
||||||
total.append(i.replace("\n","").replace("\t",""))
|
|
||||||
return total
|
|
||||||
|
|
||||||
def getCover(self, htmltree):
|
def getCover(self, htmltree):
|
||||||
try:
|
try:
|
||||||
result = super().getCover(htmltree)
|
result = super().getCover(htmltree)
|
||||||
|
|||||||
Reference in New Issue
Block a user