update scrapinglib
This commit is contained in:
@@ -69,7 +69,7 @@ class Airav(Parser):
|
||||
return str(re.findall('\d{4}', release)).strip(" ['']")
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_outline).replace('\n','').strip()
|
||||
return self.getTreeAll(htmltree, self.expr_outline).replace('\n','').strip()
|
||||
|
||||
def getRuntime(self, htmltree):
|
||||
result = self.javbus.get('runtime')
|
||||
|
||||
@@ -23,19 +23,19 @@ class Avsox(Parser):
|
||||
|
||||
def queryNumberUrl(self, number):
|
||||
qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox')
|
||||
site = self.getTreeIndex(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
||||
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
|
||||
result1 = self.getTreeIndex(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('-', '_'))
|
||||
result1 = self.getTreeIndex(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('_', ''))
|
||||
result1 = self.getTreeIndex(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
return "https:" + result1
|
||||
|
||||
def getNum(self, htmltree):
|
||||
new_number = self.getTreeIndex(htmltree, self.expr_number)
|
||||
new_number = self.getTreeElement(htmltree, self.expr_number)
|
||||
if new_number.upper() != self.number.upper():
|
||||
raise ValueError('number not found in ' + self.source)
|
||||
self.number = new_number
|
||||
@@ -50,7 +50,7 @@ class Avsox(Parser):
|
||||
def getSmallCover(self, htmltree):
|
||||
""" 使用搜索页面的预览小图
|
||||
"""
|
||||
return self.getTreeIndex(self.searchtree, self.expr_smallcover)
|
||||
return self.getTreeElement(self.searchtree, self.expr_smallcover)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
tags = super().getTags(htmltree).split(',')
|
||||
|
||||
@@ -48,11 +48,11 @@ class Carib(Parser):
|
||||
return f'https://www.caribbeancom.com/moviepages/{self.number}/images/l_l.jpg'
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
|
||||
def getExtrafanart(self, htmltree):
|
||||
r = []
|
||||
genres = self.getAll(htmltree, self.expr_extrafanart)
|
||||
genres = self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||
for g in genres:
|
||||
jpg = str(g)
|
||||
if '/member/' in jpg:
|
||||
|
||||
@@ -36,22 +36,22 @@ class Dlsite(Parser):
|
||||
else:
|
||||
self.detailurl = f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie'
|
||||
htmltree = self.getHtmlTree(self.detailurl)
|
||||
search_result = self.getAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
search_result = self.getTreeAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
if len(search_result) == 0:
|
||||
number = number.replace("THE ANIMATION", "").replace("he Animation", "").replace("t", "").replace("T","")
|
||||
htmltree = self.getHtmlTree(f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie')
|
||||
search_result = self.getAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
search_result = self.getTreeAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
if len(search_result) == 0:
|
||||
if "~" in number:
|
||||
number = number.replace("~","〜")
|
||||
elif "〜" in number:
|
||||
number = number.replace("〜","~")
|
||||
htmltree = self.getHtmlTree(f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie')
|
||||
search_result = self.getAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
search_result = self.getTreeAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
if len(search_result) == 0:
|
||||
number = number.replace('上巻', '').replace('下巻', '').replace('前編', '').replace('後編', '')
|
||||
htmltree = self.getHtmlTree(f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie')
|
||||
search_result = self.getAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
search_result = self.getTreeAll(htmltree, '//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')
|
||||
self.detailurl = search_result[0]
|
||||
htmltree = self.getHtmlTree(self.detailurl)
|
||||
self.number = str(re.findall("\wJ\w+", self.detailurl)).strip(" [']")
|
||||
@@ -71,7 +71,7 @@ class Dlsite(Parser):
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
total = []
|
||||
result = self.getAll(htmltree, self.expr_outline)
|
||||
result = self.getTreeAll(htmltree, self.expr_outline)
|
||||
for i in result:
|
||||
total.append(i.strip('\r\n'))
|
||||
return str(total).strip(" ['']").replace("', '', '",r'\n').replace("', '",r'\n').strip(", '', '")
|
||||
@@ -83,12 +83,12 @@ class Dlsite(Parser):
|
||||
return 'https:' + super().getCover(htmltree).replace('.webp', '.jpg')
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
|
||||
def getExtrafanart(self, htmltree):
|
||||
try:
|
||||
result = []
|
||||
for i in self.getAll(self.expr_extrafanart):
|
||||
for i in self.getTreeAll(self.expr_extrafanart):
|
||||
result.append("https:" + i)
|
||||
except:
|
||||
result = ''
|
||||
|
||||
@@ -68,9 +68,9 @@ class Fanza(Parser):
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
try:
|
||||
result = self.getTreeIndex(htmltree, self.expr_outline).replace("\n", "")
|
||||
result = self.getTreeElement(htmltree, self.expr_outline).replace("\n", "")
|
||||
if result == '':
|
||||
result = self.getTreeIndex(htmltree, self.expr_outline2).replace("\n", "")
|
||||
result = self.getTreeElement(htmltree, self.expr_outline2).replace("\n", "")
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
@@ -98,13 +98,13 @@ class Fanza(Parser):
|
||||
# return super().getCover(htmltree)
|
||||
cover_number = self.fanza_hinban
|
||||
try:
|
||||
result = self.getTreeIndex(htmltree, '//*[@id="' + cover_number + '"]/@href')
|
||||
result = self.getTreeElement(htmltree, '//*[@id="' + cover_number + '"]/@href')
|
||||
except:
|
||||
# sometimes fanza modify _ to \u0005f for image id
|
||||
if "_" in cover_number:
|
||||
cover_number = cover_number.replace("_", r"\u005f")
|
||||
try:
|
||||
result = self.getTreeIndex(htmltree, '//*[@id="' + cover_number + '"]/@href')
|
||||
result = self.getTreeElement(htmltree, '//*[@id="' + cover_number + '"]/@href')
|
||||
except:
|
||||
# (TODO) handle more edge case
|
||||
# print(html)
|
||||
|
||||
@@ -32,7 +32,7 @@ class Gcolle(Parser):
|
||||
htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text
|
||||
htmltree = etree.HTML(htmlcode)
|
||||
|
||||
r18url = self.getTreeIndex(htmltree, self.expr_r18)
|
||||
r18url = self.getTreeElement(htmltree, self.expr_r18)
|
||||
if r18url and r18url.startswith('http'):
|
||||
htmlcode = session.get(r18url).text
|
||||
htmltree = etree.HTML(htmlcode)
|
||||
@@ -46,7 +46,7 @@ class Gcolle(Parser):
|
||||
return "GCOLLE-" + str(num)
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
result = self.getAll(htmltree, self.expr_outline)
|
||||
result = self.getTreeAll(htmltree, self.expr_outline)
|
||||
try:
|
||||
return "\n".join(result)
|
||||
except:
|
||||
@@ -59,12 +59,12 @@ class Gcolle(Parser):
|
||||
return "https:" + super().getCover(htmltree)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
|
||||
def getExtrafanart(self, htmltree):
|
||||
extrafanart = self.getAll(htmltree, self.expr_extrafanart)
|
||||
extrafanart = self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||
if len(extrafanart) == 0:
|
||||
extrafanart = self.getAll(htmltree, self.expr_extrafanart2)
|
||||
extrafanart = self.getTreeAll(htmltree, self.expr_extrafanart2)
|
||||
# Add "https:" in each extrafanart url
|
||||
for i in range(len(extrafanart)):
|
||||
extrafanart[i] = 'https:' + extrafanart[i]
|
||||
|
||||
@@ -53,7 +53,7 @@ class wwwGetchu(Parser):
|
||||
retry = 2
|
||||
for i in range(retry):
|
||||
queryTree = self.getHtmlTree(queryUrl)
|
||||
detailurl = self.getTreeIndex(queryTree, '//*[@id="detail_block"]/div/table/tr[1]/td/a[1]/@href')
|
||||
detailurl = self.getTreeElement(queryTree, '//*[@id="detail_block"]/div/table/tr[1]/td/a[1]/@href')
|
||||
if detailurl:
|
||||
break
|
||||
if detailurl == "":
|
||||
@@ -70,11 +70,11 @@ class wwwGetchu(Parser):
|
||||
return super().getDirector(htmltree)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
outline = ''
|
||||
_list = self.getAll(htmltree, self.expr_outline)
|
||||
_list = self.getTreeAll(htmltree, self.expr_outline)
|
||||
for i in _list:
|
||||
outline = outline + i.strip()
|
||||
return outline
|
||||
@@ -116,7 +116,7 @@ class dlGetchu(wwwGetchu):
|
||||
else:
|
||||
queryUrl = self.GETCHU_DL_SEARCH_URL.replace("_WORD_", number)
|
||||
queryTree = self.getHtmlTree(queryUrl)
|
||||
detailurl = self.getTreeIndex(queryTree, '/html/body/div[1]/table/tr/td/table[4]/tr/td[2]/table/tr[2]/td/table/tr/td/table/tr/td[2]/div/a[1]/@href')
|
||||
detailurl = self.getTreeElement(queryTree, '/html/body/div[1]/table/tr/td/table[4]/tr/td[2]/table/tr[2]/td/table/tr/td/table/tr/td[2]/div/a[1]/@href')
|
||||
if detailurl == "":
|
||||
return None
|
||||
self.number = re.findall('\d+', detailurl)[0]
|
||||
|
||||
@@ -74,10 +74,10 @@ class Jav321(Parser):
|
||||
return self.parseElement(super().getActors(htmltree))
|
||||
|
||||
def getLabel(self, htmltree):
|
||||
return self.parseElement(self.getAll(htmltree, self.expr_label))
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_label))
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.parseElement(self.getAll(htmltree, self.expr_tags))
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_tags))
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return self.parseElement(self.getAll(htmltree, self.expr_studio))
|
||||
return self.parseElement(self.getTreeAll(htmltree, self.expr_studio))
|
||||
|
||||
@@ -78,9 +78,9 @@ class Javbus(Parser):
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
if self.uncensored:
|
||||
return self.getTreeIndex(htmltree, self.expr_studio2)
|
||||
return self.getTreeElement(htmltree, self.expr_studio2)
|
||||
else:
|
||||
return self.getTreeIndex(htmltree, self.expr_studio)
|
||||
return self.getTreeElement(htmltree, self.expr_studio)
|
||||
|
||||
def getCover(self, htmltree):
|
||||
return urljoin("https://www.javbus.com", super().getCover(htmltree))
|
||||
@@ -111,15 +111,15 @@ class Javbus(Parser):
|
||||
|
||||
def getDirector(self, htmltree):
|
||||
if self.uncensored:
|
||||
return self.getTreeIndex(htmltree, self.expr_directorJa)
|
||||
return self.getTreeElement(htmltree, self.expr_directorJa)
|
||||
else:
|
||||
return self.getTreeIndex(htmltree, self.expr_director)
|
||||
return self.getTreeElement(htmltree, self.expr_director)
|
||||
|
||||
def getSeries(self, htmltree):
|
||||
if self.uncensored:
|
||||
return self.getTreeIndex(htmltree, self.expr_series2)
|
||||
return self.getTreeElement(htmltree, self.expr_series2)
|
||||
else:
|
||||
return self.getTreeIndex(htmltree, self.expr_series)
|
||||
return self.getTreeElement(htmltree, self.expr_series)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
tags = super().getTags(htmltree).split(',')
|
||||
|
||||
@@ -86,12 +86,12 @@ class Javdb(Parser):
|
||||
# javdb sometime returns multiple results,
|
||||
# and the first elememt maybe not the one we are looking for
|
||||
# iterate all candidates and find the match one
|
||||
urls = self.getAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/@href')
|
||||
urls = self.getTreeAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/@href')
|
||||
# 记录一下欧美的ids ['Blacked','Blacked']
|
||||
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||
correct_url = urls[0]
|
||||
else:
|
||||
ids = self.getAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()')
|
||||
ids = self.getTreeAll(self.querytree, '//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()')
|
||||
try:
|
||||
self.queryid = ids.index(number)
|
||||
correct_url = urls[self.queryid]
|
||||
@@ -105,8 +105,8 @@ class Javdb(Parser):
|
||||
def getNum(self, htmltree):
|
||||
if self.noauth:
|
||||
return self.number
|
||||
result1 = str(self.getAll(htmltree, self.expr_number)).strip(" ['']")
|
||||
result2 = str(self.getAll(htmltree, self.expr_number2)).strip(" ['']")
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_number)).strip(" ['']")
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_number2)).strip(" ['']")
|
||||
dp_number = str(result2 + result1).strip('+')
|
||||
# NOTE 检测匹配与更新 self.number
|
||||
if dp_number.upper() != self.number.upper():
|
||||
@@ -116,50 +116,50 @@ class Javdb(Parser):
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
if self.noauth:
|
||||
return self.getTreeIndex(htmltree, self.expr_title_no, self.queryid)
|
||||
return self.getTreeElement(htmltree, self.expr_title_no, self.queryid)
|
||||
browser_title = super().getTitle(htmltree)
|
||||
title = browser_title[:browser_title.find(' | JavDB')].strip()
|
||||
return title.replace(self.number, '').strip()
|
||||
|
||||
def getCover(self, htmltree):
|
||||
if self.noauth:
|
||||
return self.getTreeIndex(htmltree, self.expr_cover_no, self.queryid)
|
||||
return self.getTreeElement(htmltree, self.expr_cover_no, self.queryid)
|
||||
return super().getCover(htmltree)
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
if self.noauth:
|
||||
return self.getTreeIndex(htmltree, self.expr_release_no, self.queryid).strip()
|
||||
return self.getTreeElement(htmltree, self.expr_release_no, self.queryid).strip()
|
||||
return super().getRelease(htmltree)
|
||||
|
||||
def getRuntime(self, htmltree):
|
||||
result1 = str(self.getAll(htmltree, self.expr_runtime)).strip(" ['']")
|
||||
result2 = str(self.getAll(htmltree, self.expr_runtime2)).strip(" ['']")
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_runtime)).strip(" ['']")
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_runtime2)).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').rstrip('mi')
|
||||
|
||||
def getDirector(self, htmltree):
|
||||
result1 = str(self.getAll(htmltree, self.expr_director)).strip(" ['']")
|
||||
result2 = str(self.getAll(htmltree, self.expr_director2)).strip(" ['']")
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_director)).strip(" ['']")
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_director2)).strip(" ['']")
|
||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
|
||||
def getSeries(self, htmltree):
|
||||
result1 = str(self.getAll(htmltree, self.expr_series)).strip(" ['']")
|
||||
result2 = str(self.getAll(htmltree, self.expr_series2)).strip(" ['']")
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_series)).strip(" ['']")
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_series2)).strip(" ['']")
|
||||
result = str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
if not result and self.fixstudio:
|
||||
result = self.getStudio(htmltree)
|
||||
return result
|
||||
|
||||
def getLabel(self, htmltree):
|
||||
result1 = str(self.getAll(htmltree, self.expr_label)).strip(" ['']")
|
||||
result2 = str(self.getAll(htmltree, self.expr_label2)).strip(" ['']")
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_label)).strip(" ['']")
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_label2)).strip(" ['']")
|
||||
result = str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||
if not result and self.fixstudio:
|
||||
result = self.getStudio(htmltree)
|
||||
return result
|
||||
|
||||
def getActors(self, htmltree):
|
||||
actors = self.getAll(htmltree, self.expr_actor)
|
||||
genders = self.getAll(htmltree, self.expr_actor2)
|
||||
actors = self.getTreeAll(htmltree, self.expr_actor)
|
||||
genders = self.getTreeAll(htmltree, self.expr_actor2)
|
||||
r = []
|
||||
idx = 0
|
||||
# NOTE only female, we dont care others
|
||||
@@ -184,11 +184,11 @@ class Javdb(Parser):
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
try:
|
||||
return self.getAll(htmltree, self.expr_studio).strip(" ['']")
|
||||
return self.getTreeAll(htmltree, self.expr_studio).strip(" ['']")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getAll(htmltree, self.expr_studio2).strip(" ['']")
|
||||
return self.getTreeAll(htmltree, self.expr_studio2).strip(" ['']")
|
||||
except:
|
||||
return ''
|
||||
|
||||
@@ -207,17 +207,17 @@ class Javdb(Parser):
|
||||
|
||||
def getTags(self, htmltree):
|
||||
try:
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getAll(htmltree, self.expr_tags2)
|
||||
return self.getTreeAll(htmltree, self.expr_tags2)
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getUserRating(self, htmltree):
|
||||
try:
|
||||
result = str(self.getTreeIndex(htmltree, self.expr_userrating))
|
||||
result = str(self.getTreeElement(htmltree, self.expr_userrating))
|
||||
v = re.findall(r'(\d+|\d+\.\d+)分, 由(\d+)人評價', result)
|
||||
return float(v[0][0])
|
||||
except:
|
||||
@@ -225,7 +225,7 @@ class Javdb(Parser):
|
||||
|
||||
def getUserVotes(self, htmltree):
|
||||
try:
|
||||
result = str(self.getTreeIndex(htmltree, self.expr_uservotes))
|
||||
result = str(self.getTreeElement(htmltree, self.expr_uservotes))
|
||||
v = re.findall(r'(\d+|\d+\.\d+)分, 由(\d+)人評價', result)
|
||||
return int(v[0][1])
|
||||
except:
|
||||
@@ -237,7 +237,7 @@ class Javdb(Parser):
|
||||
return img_url[0] if img_url else ''
|
||||
|
||||
def getActorPhoto(self, htmltree):
|
||||
actorall = self.getAll(htmltree, self.expr_actorphoto)
|
||||
actorall = self.getTreeAll(htmltree, self.expr_actorphoto)
|
||||
if not actorall:
|
||||
return {}
|
||||
actors = self.getActors(htmltree)
|
||||
|
||||
@@ -22,7 +22,7 @@ class Madou(Parser):
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
self.detailurl = self.getTreeIndex(htmltree, self.expr_url)
|
||||
self.detailurl = self.getTreeElement(htmltree, self.expr_url)
|
||||
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
@@ -49,8 +49,8 @@ class Mgstage(Parser):
|
||||
return super().getCover(self.htmlcodetree)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
result1 = str(self.getAll(htmltree, self.expr_tags)).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result2 = str(self.getAll(htmltree, self.expr_tags2)).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result1 = str(self.getTreeAll(htmltree, self.expr_tags)).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result2 = str(self.getTreeAll(htmltree, self.expr_tags2)).strip(" ['']").strip('\\n ').strip('\\n')
|
||||
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||
return result
|
||||
|
||||
@@ -65,7 +65,7 @@ class Mgstage(Parser):
|
||||
return extrafanart_imgs
|
||||
return ''
|
||||
|
||||
def getTreeIndex(self, tree, expr, index=0):
|
||||
def getTreeElement(self, tree, expr, index=0):
|
||||
if expr == '':
|
||||
return ''
|
||||
if tree == self.detailtree:
|
||||
|
||||
@@ -62,11 +62,11 @@ class Mv91(Parser):
|
||||
return '91制片厂'
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_tags)
|
||||
return self.getTreeAll(htmltree, self.expr_tags)
|
||||
|
||||
def getActors(self, htmltree):
|
||||
b=[]
|
||||
for player in self.getAll(htmltree, self.expr_actor):
|
||||
for player in self.getTreeAll(htmltree, self.expr_actor):
|
||||
player = player.replace('主演:','')
|
||||
if '/' in player:
|
||||
player = player.split('/')[0]
|
||||
|
||||
@@ -144,18 +144,18 @@ class Parser:
|
||||
def getNum(self, htmltree):
|
||||
""" 增加 strip 过滤
|
||||
"""
|
||||
return self.getTreeIndex(htmltree, self.expr_number)
|
||||
return self.getTreeElement(htmltree, self.expr_number)
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_title).strip()
|
||||
return self.getTreeElement(htmltree, self.expr_title).strip()
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_studio).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_studio).strip(" ['']")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_studio2).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_studio2).strip(" ['']")
|
||||
except:
|
||||
return ''
|
||||
|
||||
@@ -170,90 +170,90 @@ class Parser:
|
||||
|
||||
def getRuntime(self, htmltree):
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_runtime).strip("\n\t ['']").rstrip('mi')
|
||||
return self.getTreeElement(htmltree, self.expr_runtime).strip("\n\t ['']").rstrip('mi')
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_runtime2).strip("\n\t ['']").rstrip('mi')
|
||||
return self.getTreeElement(htmltree, self.expr_runtime2).strip("\n\t ['']").rstrip('mi')
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_release).strip().replace('/','-')
|
||||
return self.getTreeElement(htmltree, self.expr_release).strip().replace('/','-')
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_outline).strip().replace("\n","")
|
||||
return self.getTreeElement(htmltree, self.expr_outline).strip().replace("\n","")
|
||||
|
||||
def getDirector(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_director)
|
||||
return self.getTreeElement(htmltree, self.expr_director)
|
||||
|
||||
def getActors(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_actor)
|
||||
return self.getTreeAll(htmltree, self.expr_actor)
|
||||
|
||||
def getTags(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_tags)
|
||||
return self.getTreeElement(htmltree, self.expr_tags)
|
||||
|
||||
def getLabel(self, htmltree):
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_label).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_label).strip(" ['']")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_label2).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_label2).strip(" ['']")
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getSeries(self, htmltree):
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_series).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_series).strip(" ['']")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_series2).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_series2).strip(" ['']")
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getCover(self, htmltree):
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_cover).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_cover).strip(" ['']")
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return self.getTreeIndex(htmltree, self.expr_cover2).strip(" ['']")
|
||||
return self.getTreeElement(htmltree, self.expr_cover2).strip(" ['']")
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getSmallCover(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_smallcover)
|
||||
return self.getTreeElement(htmltree, self.expr_smallcover)
|
||||
|
||||
def getExtrafanart(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_extrafanart)
|
||||
return self.getTreeAll(htmltree, self.expr_extrafanart)
|
||||
|
||||
def getTrailer(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_trailer)
|
||||
return self.getTreeElement(htmltree, self.expr_trailer)
|
||||
|
||||
def getActorPhoto(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_actorphoto)
|
||||
return self.getTreeAll(htmltree, self.expr_actorphoto)
|
||||
|
||||
def getUncensored(self, htmlree):
|
||||
if self.expr_uncensored:
|
||||
u = self.getAll(htmlree, self.expr_uncensored)
|
||||
u = self.getTreeAll(htmlree, self.expr_uncensored)
|
||||
return bool(u)
|
||||
else:
|
||||
return self.uncensored
|
||||
|
||||
def getUserRating(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_userrating)
|
||||
return self.getTreeAll(htmltree, self.expr_userrating)
|
||||
|
||||
def getUserVotes(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_uservotes)
|
||||
return self.getTreeAll(htmltree, self.expr_uservotes)
|
||||
|
||||
def getTreeIndex(self, tree: html.HtmlElement, expr, index=0):
|
||||
def getTreeElement(self, tree: html.HtmlElement, expr, index=0):
|
||||
""" 根据表达式从`xmltree`中获取匹配值,默认 index 为 0
|
||||
"""
|
||||
return getTreeElement(tree, expr, index)
|
||||
|
||||
def getAll(self, tree: html.HtmlElement, expr):
|
||||
def getTreeAll(self, tree: html.HtmlElement, expr):
|
||||
""" 根据表达式从`xmltree`中获取全部匹配值
|
||||
"""
|
||||
return getTreeAll(tree, expr)
|
||||
|
||||
@@ -31,10 +31,10 @@ class Tmdb(Parser):
|
||||
return movieUrl
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_title).get('content')
|
||||
return self.getTreeElement(htmltree, self.expr_title).get('content')
|
||||
|
||||
def getCover(self, htmltree):
|
||||
return "https://www.themoviedb.org" + self.getTreeIndex(htmltree, self.expr_cover).get('content')
|
||||
return "https://www.themoviedb.org" + self.getTreeElement(htmltree, self.expr_cover).get('content')
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
return self.getTreeIndex(htmltree, self.expr_outline).get('content')
|
||||
return self.getTreeElement(htmltree, self.expr_outline).get('content')
|
||||
|
||||
@@ -28,17 +28,17 @@ class Xcity(Parser):
|
||||
return super().getStudio(htmltree).strip('+').replace("', '", '').replace('"', '')
|
||||
|
||||
def getRuntime(self, htmltree):
|
||||
return self.getAll(htmltree, self.expr_runtime)[1].strip()
|
||||
return self.getTreeAll(htmltree, self.expr_runtime)[1].strip()
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
try:
|
||||
result = self.getTreeIndex(htmltree, self.expr_release, 1)
|
||||
result = self.getTreeElement(htmltree, self.expr_release, 1)
|
||||
return re.findall('\d{4}/\d{2}/\d{2}', result)[0].replace('/','-')
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getTags(self, htmltree):
|
||||
result = self.getAll(htmltree, self.expr_tags)
|
||||
result = self.getTreeAll(htmltree, self.expr_tags)
|
||||
total = []
|
||||
for i in result:
|
||||
total.append(i.replace("\n","").replace("\t",""))
|
||||
|
||||
Reference in New Issue
Block a user