From 75aedf8601e39fded2404974909ce34d72080d5a Mon Sep 17 00:00:00 2001
From: "Wayne.S.Lui" <waynelui@outlook.com>
Date: Tue, 30 Aug 2022 22:46:40 +0800
Subject: [PATCH 1/6] Update fanza.py

Update to get trailer, extrafanarts and cover, using the method from older version
---
 scrapinglib/fanza.py | 54 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/scrapinglib/fanza.py b/scrapinglib/fanza.py
index 2706b91..0ccf285 100644
--- a/scrapinglib/fanza.py
+++ b/scrapinglib/fanza.py
@@ -11,8 +11,8 @@ class Fanza(Parser):
 
     expr_title = '//*[starts-with(@id, "title")]/text()'
     expr_actor = "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
-    expr_cover = './/head/meta[@property="og:image"]/@content'
-    expr_extrafanart = '//a[@name="sample-image"]/img/@src'
+    # expr_cover = './/head/meta[@property="og:image"]/@content'
+    # expr_extrafanart = '//a[@name="sample-image"]/img/@src'
     expr_outline = "//div[@class='mg-b20 lh4']/text()"
     expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()"
     expr_outline_og = '//head/meta[@property="og:description"]/@content'
@@ -119,6 +119,56 @@ class Fanza(Parser):
         if ret == "----":
             return ''
         return ret
+    
+    def getCover(self, htmltree):
+        cover_number = self.number
+        try:
+            result = htmltree.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+        except:
+            # sometimes fanza modify _ to \u0005f for image id
+            if "_" in cover_number:
+                cover_number = cover_number.replace("_", r"\u005f")
+            try:
+                result = htmltree.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+            except:
+                # (TODO) handle more edge case
+                # print(html)
+                # raise exception here, same behavior as before
+                # people's major requirement is fetching the picture
+                raise ValueError("can not find image")
+        return result
+
+    def getExtrafanart(self, htmltree):
+        html_parent = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>')
+        html = html_parent.search(
+            self.htmlcode)  
+        if html:
+            html = html.group()
+            extrafanart_parent = re.compile(r'<img.*?src=\"(.*?)\"')
+            extrafanart_images = extrafanart_parent.findall(html)
+            if extrafanart_images:
+                sheet = []
+                for img_url in extrafanart_images:
+                    img_urls = img_url.rsplit('-', 1)
+                    img_url = img_urls[0] + 'jp-' + img_urls[1]
+                    sheet.append(img_url)
+                return sheet
+        return ''
+
+    def getTrailer(self, htmltree):
+        html_parent = re.compile(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>')
+        html = html_parent.search(
+            self.htmlcode)  
+        if html:
+            html = html.group()
+            trailer_parent = re.compile(r'\"contentUrl\":\"(.*?)\"')
+            trailer_url = trailer_parent.search(html)
+            if trailer_url:
+                trailer_url = trailer_url.group(1)
+                trailer_cuts = trailer_url.rsplit('_', 2)
+                trailer_url = trailer_cuts[0] + '_mhb_w.mp4'
+                return trailer_url
+        return ''
 
     def getFanzaString(self, expr):
         result1 = str(self.htmltree.xpath("//td[contains(text(),'"+expr+"')]/following-sibling::td/a/text()")).strip(" ['']")

From 7af659bda701c45b19fa5dfdacb651710d698d0e Mon Sep 17 00:00:00 2001
From: "Wayne.S.Lui" <waynelui@outlook.com>
Date: Tue, 30 Aug 2022 22:56:54 +0800
Subject: [PATCH 2/6] Update fanza.py

---
 scrapinglib/fanza.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/scrapinglib/fanza.py b/scrapinglib/fanza.py
index 0ccf285..698a888 100644
--- a/scrapinglib/fanza.py
+++ b/scrapinglib/fanza.py
@@ -140,8 +140,7 @@ class Fanza(Parser):
 
     def getExtrafanart(self, htmltree):
         html_parent = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>')
-        html = html_parent.search(
-            self.htmlcode)  
+        html = html_parent.search(self.htmlcode)  
         if html:
             html = html.group()
             extrafanart_parent = re.compile(r'<img.*?src=\"(.*?)\"')
@@ -157,8 +156,7 @@ class Fanza(Parser):
 
     def getTrailer(self, htmltree):
         html_parent = re.compile(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>')
-        html = html_parent.search(
-            self.htmlcode)  
+        html = html_parent.search(self.htmlcode)  
         if html:
             html = html.group()
             trailer_parent = re.compile(r'\"contentUrl\":\"(.*?)\"')

From f848a4ec244425a5a09acd009252709e64bf2595 Mon Sep 17 00:00:00 2001
From: "Wayne.S.Lui" <waynelui@outlook.com>
Date: Tue, 30 Aug 2022 22:56:54 +0800
Subject: [PATCH 3/6] Update fanza.py

---
 scrapinglib/fanza.py | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/scrapinglib/fanza.py b/scrapinglib/fanza.py
index 0ccf285..5ccd0b3 100644
--- a/scrapinglib/fanza.py
+++ b/scrapinglib/fanza.py
@@ -139,35 +139,24 @@ class Fanza(Parser):
         return result
 
     def getExtrafanart(self, htmltree):
-        html_parent = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>')
-        html = html_parent.search(
-            self.htmlcode)  
-        if html:
-            html = html.group()
-            extrafanart_parent = re.compile(r'<img.*?src=\"(.*?)\"')
-            extrafanart_images = extrafanart_parent.findall(html)
+        htmltext = re.search(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>', self.htmlcode).group()
+        if htmltext:
+            extrafanart_images = re.findall(r'<img.*?src=\"(.*?)\"', htmltext)
             if extrafanart_images:
                 sheet = []
                 for img_url in extrafanart_images:
-                    img_urls = img_url.rsplit('-', 1)
-                    img_url = img_urls[0] + 'jp-' + img_urls[1]
-                    sheet.append(img_url)
+                    url_cuts = img_url.rsplit('-', 1)
+                    sheet.append(url_cuts[0] + 'jp-' + url_cuts[1])
                 return sheet
         return ''
 
     def getTrailer(self, htmltree):
-        html_parent = re.compile(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>')
-        html = html_parent.search(
-            self.htmlcode)  
-        if html:
-            html = html.group()
-            trailer_parent = re.compile(r'\"contentUrl\":\"(.*?)\"')
-            trailer_url = trailer_parent.search(html)
-            if trailer_url:
-                trailer_url = trailer_url.group(1)
-                trailer_cuts = trailer_url.rsplit('_', 2)
-                trailer_url = trailer_cuts[0] + '_mhb_w.mp4'
-                return trailer_url
+        htmltext = re.search(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>', self.htmlcode).group()
+        if htmltext:
+            url = re.search(r'\"contentUrl\":\"(.*?)\"', htmltext).group(1)
+            if url:
+                url = url.rsplit('_', 2)[0] + '_mhb_w.mp4'
+                return url
         return ''
 
     def getFanzaString(self, expr):

From 45021672166b750df0431b39dbadf93ab9b7eee1 Mon Sep 17 00:00:00 2001
From: "Wayne.S.Lui" <waynelui@outlook.com>
Date: Wed, 31 Aug 2022 14:17:52 +0800
Subject: [PATCH 4/6] Merge remote-tracking branch 'origin/master'

# Conflicts:
#	scrapinglib/fanza.py
---
 scrapinglib/fanza.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scrapinglib/fanza.py b/scrapinglib/fanza.py
index 5ccd0b3..858115a 100644
--- a/scrapinglib/fanza.py
+++ b/scrapinglib/fanza.py
@@ -139,8 +139,9 @@ class Fanza(Parser):
         return result
 
     def getExtrafanart(self, htmltree):
-        htmltext = re.search(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>', self.htmlcode).group()
+        htmltext = re.search(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\s*?</div>', self.htmlcode)
         if htmltext:
+            htmltext = htmltext.group()
             extrafanart_images = re.findall(r'<img.*?src=\"(.*?)\"', htmltext)
             if extrafanart_images:
                 sheet = []
@@ -151,10 +152,12 @@ class Fanza(Parser):
         return ''
 
     def getTrailer(self, htmltree):
-        htmltext = re.search(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>', self.htmlcode).group()
+        htmltext = re.search(r'<script type=\"application/ld\+json\">[\s\S].*}\s*?</script>', self.htmlcode)
         if htmltext:
-            url = re.search(r'\"contentUrl\":\"(.*?)\"', htmltext).group(1)
+            htmltext = htmltext.group()
+            url = re.search(r'\"contentUrl\":\"(.*?)\"', htmltext)
             if url:
+                url = url.group(1)
                 url = url.rsplit('_', 2)[0] + '_mhb_w.mp4'
                 return url
         return ''

From 6cb4be22aec00eb4dcd82481bb0daf38b27e687f Mon Sep 17 00:00:00 2001
From: aedvoan <aedvoan@gmail.com>
Date: Sun, 4 Sep 2022 23:14:38 +0800
Subject: [PATCH 5/6] Update javbus.py

fanbus.us already deco, removed to prevent void retry.
---
 scrapinglib/javbus.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scrapinglib/javbus.py b/scrapinglib/javbus.py
index eb559c0..cf3122e 100644
--- a/scrapinglib/javbus.py
+++ b/scrapinglib/javbus.py
@@ -42,7 +42,6 @@ class Javbus(Parser):
                 'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun',
                 'cdnbus.fun',
                 'dmmbus.fun', 'dmmsee.fun',
-                'fanbus.us',
                 'seedmm.fun',
                 ]) + "/"
             try:

From 2a62b59346fbc2d9250225e7de26ffa968c7f1dd Mon Sep 17 00:00:00 2001
From: "Wayne.Lui" <waynelui@outlook.com>
Date: Tue, 6 Sep 2022 12:46:03 +0800
Subject: [PATCH 6/6] Update carib.py to download trailer

---
 scrapinglib/carib.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scrapinglib/carib.py b/scrapinglib/carib.py
index 0a561de..567ec57 100644
--- a/scrapinglib/carib.py
+++ b/scrapinglib/carib.py
@@ -63,6 +63,9 @@ class Carib(Parser):
                 r.append('https://www.caribbeancom.com' + jpg)
         return r
 
+    def getTrailer(self, htmltree):
+        return f'https://smovie.caribbeancom.com/sample/movies/{self.number}/1080p.mp4'
+
     def getActorPhoto(self, htmltree):
         htmla = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']")
         names = htmltree.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()")