From 7af0951b825eb1697f7821c8dc52cb9966ab6bc1 Mon Sep 17 00:00:00 2001
From: Feng4 <wuxiang198@gmail.com>
Date: Sun, 20 Dec 2020 00:34:42 +0800
Subject: [PATCH 1/4] Update number_parser.py

---
 number_parser.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/number_parser.py b/number_parser.py
index 6e7f7b4..554d3d3 100644
--- a/number_parser.py
+++ b/number_parser.py
@@ -61,6 +61,11 @@ def get_number(debug,filepath: str) -> str:
             file_number = re.search(r'\w+-\w+', filename, re.A).group()
             return file_number
         else:  # 提取不含减号-的番号，FANZA CID
+            # 欧美番号匹配规则
+            oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
+            if oumei:
+                return oumei.group()
+            
             try:
                 return str(
                     re.findall(r'(.+?)\.',
@@ -72,4 +77,4 @@ def get_number(debug,filepath: str) -> str:
 
 # if __name__ == "__main__":
 #     import doctest
-#     doctest.testmod(raise_on_error=True)
\ No newline at end of file
+#     doctest.testmod(raise_on_error=True)

From c94fcd47facc18942358434d3b25b0a49d065034 Mon Sep 17 00:00:00 2001
From: Feng4 <wuxiang198@gmail.com>
Date: Sun, 20 Dec 2020 00:37:03 +0800
Subject: [PATCH 2/4] Update number_parser.py

---
 number_parser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/number_parser.py b/number_parser.py
index 554d3d3..025e2cf 100644
--- a/number_parser.py
+++ b/number_parser.py
@@ -41,6 +41,11 @@ def get_number(debug,filepath: str) -> str:
                 file_number = re.search(r'\w+-\w+', filename, re.A).group()
                 return file_number
             else:  # 提取不含减号-的番号，FANZA CID
+                # 欧美番号匹配规则
+                oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
+                if oumei:
+                    return oumei.group()
+                
                 try:
                     return str(
                         re.findall(r'(.+?)\.',

From fc4cc4c122785da7690f11465a5cd2bc301b0e2f Mon Sep 17 00:00:00 2001
From: Feng4 <wuxiang198@gmail.com>
Date: Sun, 20 Dec 2020 00:42:58 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=AC=A7=E7=BE=8E?=
 =?UTF-8?q?=E7=9A=84=E5=88=AE=E5=89=8A=E5=88=A4=E6=96=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 WebCrawler/javdb.py | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py
index eac8d7a..fa38bae 100644
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -103,10 +103,17 @@ def getCover_small(a, index=0):
             result = 'https:' + result
         return result
     except: # 2020.7.17 Repair Cover Url crawl
-        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
-        if not 'https' in result:
-            result = 'https:' + result
-        return result
+        try:
+            result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
+            if not 'https' in result:
+                result = 'https:' + result
+            return result
+        except:
+            result = html.xpath("//div[@class='item-image']/img/@data-src")[index]
+            if not 'https' in result:
+                result = 'https:' + result
+            return result
+
 def getCover(htmlcode):
     html = etree.fromstring(htmlcode, etree.HTMLParser())
     try:
@@ -141,14 +148,23 @@ def main(number):
         # and the first elememt maybe not the one we are looking for
         # iterate all candidates and find the match one
         urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
-        ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
-        correct_url = urls[ids.index(number)]
+        # 记录一下欧美的ids  ['Blacked','Blacked']
+        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
+            correct_url = urls[0]
+        else:
+            ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
+            correct_url = urls[ids.index(number)]
+       
         detail_page = get_html('https://javdb.com' + correct_url)
 
         # no cut image by default
         imagecut = 3
         # If gray image exists ,then replace with normal cover
-        cover_small = getCover_small(query_result, index=ids.index(number))
+                if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
+            cover_small = getCover_small(query_result)
+        else:
+            cover_small = getCover_small(query_result, index=ids.index(number))
+        
         if 'placeholder' in cover_small:
             # replace wit normal cover and cut it
             imagecut = 1

From 23281a4a64417207d1f9c31f7bf8a3d1b06aae19 Mon Sep 17 00:00:00 2001
From: Feng4 <wuxiang198@gmail.com>
Date: Sun, 20 Dec 2020 00:49:55 +0800
Subject: [PATCH 4/4] Update javdb.py

---
 WebCrawler/javdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py
index fa38bae..41a4775 100644
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -160,7 +160,7 @@ def main(number):
         # no cut image by default
         imagecut = 3
         # If gray image exists ,then replace with normal cover
-                if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
+        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
             cover_small = getCover_small(query_result)
         else:
             cover_small = getCover_small(query_result, index=ids.index(number))