Merge remote-tracking branch 'origin/master'

This commit is contained in:
yoshiko2
2022-09-09 01:39:06 +08:00
3 changed files with 47 additions and 3 deletions

View File

@@ -64,6 +64,9 @@ class Carib(Parser):
r.append('https://www.caribbeancom.com' + jpg) r.append('https://www.caribbeancom.com' + jpg)
return r return r
def getTrailer(self, htmltree):
return f'https://smovie.caribbeancom.com/sample/movies/{self.number}/1080p.mp4'
def getActorPhoto(self, htmltree): def getActorPhoto(self, htmltree):
htmla = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']") htmla = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']")
names = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()") names = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()")

View File

@@ -11,8 +11,8 @@ class Fanza(Parser):
expr_title = '//*[starts-with(@id, "title")]/text()' expr_title = '//*[starts-with(@id, "title")]/text()'
expr_actor = "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" expr_actor = "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
expr_cover = './/head/meta[@property="og:image"]/@content' # expr_cover = './/head/meta[@property="og:image"]/@content'
expr_extrafanart = '//a[@name="sample-image"]/img/@src' # expr_extrafanart = '//a[@name="sample-image"]/img/@src'
expr_outline = "//div[@class='mg-b20 lh4']/text()" expr_outline = "//div[@class='mg-b20 lh4']/text()"
expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()" expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()"
expr_outline_og = '//head/meta[@property="og:description"]/@content' expr_outline_og = '//head/meta[@property="og:description"]/@content'
@@ -120,6 +120,48 @@ class Fanza(Parser):
return '' return ''
return ret return ret
def getCover(self, htmltree):
cover_number = self.number
try:
result = htmltree.xpath('//*[@id="' + cover_number + '"]/@href')[0]
except:
# sometimes fanza modify _ to \u0005f for image id
if "_" in cover_number:
cover_number = cover_number.replace("_", r"\u005f")
try:
result = htmltree.xpath('//*[@id="' + cover_number + '"]/@href')[0]
except:
# (TODO) handle more edge case
# print(html)
# raise exception here, same behavior as before
# people's major requirement is fetching the picture
raise ValueError("can not find image")
return result
def getExtrafanart(self, htmltree):
htmltext = re.search(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>', self.htmlcode)
if htmltext:
htmltext = htmltext.group()
extrafanart_images = re.findall(r'<img.*?src=\"(.*?)\"', htmltext)
if extrafanart_images:
sheet = []
for img_url in extrafanart_images:
url_cuts = img_url.rsplit('-', 1)
sheet.append(url_cuts[0] + 'jp-' + url_cuts[1])
return sheet
return ''
def getTrailer(self, htmltree):
htmltext = re.search(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>', self.htmlcode)
if htmltext:
htmltext = htmltext.group()
url = re.search(r'\"contentUrl\":\"(.*?)\"', htmltext)
if url:
url = url.group(1)
url = url.rsplit('_', 2)[0] + '_mhb_w.mp4'
return url
return ''
def getFanzaString(self, expr): def getFanzaString(self, expr):
result1 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/a/text()")).strip(" ['']") result1 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/a/text()")).strip(" ['']")
result2 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/text()")).strip(" ['']") result2 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/text()")).strip(" ['']")

View File

@@ -42,7 +42,6 @@ class Javbus(Parser):
'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun', 'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun',
'cdnbus.fun', 'cdnbus.fun',
'dmmbus.fun', 'dmmsee.fun', 'dmmbus.fun', 'dmmsee.fun',
'fanbus.us',
'seedmm.fun', 'seedmm.fun',
]) + "/" ]) + "/"
try: try: