From 78619f5909ccb41569906d0b17a774f63c395eb1 Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 04:51:30 +0800
Subject: [PATCH 1/8] Set lower pillow version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index da1c7e3..e481111 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ numpy
 face-recognition-models
 lxml
 beautifulsoup4
-pillow
+pillow==9.5.0
 cloudscraper
 pysocks==1.7.1
 urllib3==1.25.11

From 47a271f938e2d341ce055173568f649ee99b4b3e Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 04:52:23 +0800
Subject: [PATCH 2/8] Update sites

---
 scrapinglib/api.py    |  7 ++++---
 scrapinglib/dlsite.py |  5 +++++
 scrapinglib/getchu.py | 15 +++++++++++++--
 scrapinglib/parser.py |  1 -
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/scrapinglib/api.py b/scrapinglib/api.py
index e1b3c09..1889206 100644
--- a/scrapinglib/api.py
+++ b/scrapinglib/api.py
@@ -221,8 +221,9 @@ class Scraping:
                 sources = ["pcolle"]
             elif "fc2" in lo_file_number:
                 if "fc2" in sources:
-                    sources = insert(sources, "msin")
-                    sources = insert(sources, "fc2")
+                    sources = ["msin", "fc2"]
+                    # sources = insert(sources, "msin")
+                    # sources = insert(sources, "fc2")
             elif "mgstage" in sources and \
                     (re.search(r"\d+\D+", file_number) or "siro" in lo_file_number):
                 sources = insert(sources, "mgstage")
@@ -245,7 +246,7 @@ class Scraping:
                 todel.append(s)
         for d in todel:
             if config.getInstance().debug():
-                print('[!] Remove Source : ' + s)
+                print('[!] Remove Source : ' + d)
             sources.remove(d)
         return sources
 
diff --git a/scrapinglib/dlsite.py b/scrapinglib/dlsite.py
index d307f02..5c6f003 100644
--- a/scrapinglib/dlsite.py
+++ b/scrapinglib/dlsite.py
@@ -97,3 +97,8 @@ class Dlsite(Parser):
         except:
             result = ''
         return result
+
+    def getTags(self, htmltree):
+        tags = super().getTags(htmltree)
+        tags.append("DLsite")
+        return tags
diff --git a/scrapinglib/getchu.py b/scrapinglib/getchu.py
index 5c1649d..d5b67ef 100644
--- a/scrapinglib/getchu.py
+++ b/scrapinglib/getchu.py
@@ -95,9 +95,15 @@ class wwwGetchu(Parser):
     def extradict(self, dic: dict):
         """ 额外新增的  headers
         """
-        dic['headers'] =  {'referer': self.detailurl}
+        dic['headers'] = {'referer': self.detailurl}
         return dic
 
+    def getTags(self, htmltree):
+        tags = super().getTags(htmltree)
+        tags.append("Getchu")
+        return tags
+
+
 class dlGetchu(wwwGetchu):
     """ 二者基本一致
     headers extrafanart 略有区别
@@ -140,7 +146,7 @@ class dlGetchu(wwwGetchu):
 
     def extradict(self, dic: dict):
         return dic
-    
+
     def getExtrafanart(self, htmltree):
         arts = self.getTreeAll(htmltree, self.expr_extrafanart)
         extrafanart = []
@@ -148,3 +154,8 @@ class dlGetchu(wwwGetchu):
             i = "https://dl.getchu.com" + i
             extrafanart.append(i)
         return extrafanart
+
+    def getTags(self, htmltree):
+        tags = super().getTags(htmltree)
+        tags.append("Getchu")
+        return tags
diff --git a/scrapinglib/parser.py b/scrapinglib/parser.py
index b0cdfb7..05a7994 100644
--- a/scrapinglib/parser.py
+++ b/scrapinglib/parser.py
@@ -219,7 +219,6 @@ class Parser:
                 if tag:
                     tags.append(tag)
         return tags
-        return [ x.strip() for x in alls if x.strip()]
 
     def getStudio(self, htmltree):
         return self.getTreeElementbyExprs(htmltree, self.expr_studio, self.expr_studio2)

From 43e9d7727e60fbd3292a673f737fa312da0f58c3 Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 04:55:31 +0800
Subject: [PATCH 3/8] Update 6.6.6

---
 Movie_Data_Capture.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py
index 99ddf6c..a96657a 100644
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -681,7 +681,7 @@ def period(delta, pattern):
 
 
 if __name__ == '__main__':
-    version = '6.6.5'
+    version = '6.6.6'
     urllib3.disable_warnings()  # Ignore http proxy warning
     app_start = time.time()
 

From 9b5af4beddc141d37f7fe89711b97f9ddc673f62 Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 04:57:57 +0800
Subject: [PATCH 4/8] Update 6.6.6

---
 config.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config.ini b/config.ini
index 2afe5cf..dfb0375 100755
--- a/config.ini
+++ b/config.ini
@@ -58,7 +58,7 @@ image_naming_with_number = 0
 update_check = 1
 
 [priority]
-website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu,pcolle,caribpr,msin
+website = javdb,javdb,javdb
 
 [escape]
 literals = \()/
@@ -129,7 +129,7 @@ mode = 1
 vars = outline,series,studio,tag,title
 
 [javdb]
-sites = 38,39,40
+sites = 521
 
 ; 人脸识别 locations_model=hog:方向梯度直方图(不太准确，速度快) cnn:深度学习模型(准确，需要GPU/CUDA,速度慢)
 ; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面，有码封面直接切右半部分

From c3e5fdb09fefa11c614e519786e942b08b2a8fb0 Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 04:59:27 +0800
Subject: [PATCH 5/8] Update 6.6.6

---
 config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config.ini b/config.ini
index dfb0375..7aa6487 100755
--- a/config.ini
+++ b/config.ini
@@ -58,7 +58,7 @@ image_naming_with_number = 0
 update_check = 1
 
 [priority]
-website = javdb,javdb,javdb
+website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu,pcolle,caribpr,msin
 
 [escape]
 literals = \()/

From 3597a9590d94175dc61a64e60c88fe0025e24e99 Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sat, 8 Jul 2023 16:53:01 +0800
Subject: [PATCH 6/8] Add site pxolle

---
 scrapinglib/pcolle.py | 58 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 scrapinglib/pcolle.py

diff --git a/scrapinglib/pcolle.py b/scrapinglib/pcolle.py
new file mode 100644
index 0000000..3cfc620
--- /dev/null
+++ b/scrapinglib/pcolle.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+
+import re
+from lxml import etree
+from .httprequest import request_session
+from .parser import Parser
+
+
+class Pcolle(Parser):
+    source = 'pcolle'
+
+    expr_number = '//th[contains(text(),"商品ID")]/../td/text()'
+    expr_title = '//div[@class="title-04"]/div/text()'
+    expr_studio = '//th[contains(text(),"販売会員")]/../td/a/text()'
+    expr_director = '//th[contains(text(),"販売会員")]/../td/a/text()'
+    expr_actor = '//th[contains(text(),"販売会員")]/../td/a/text()'
+    expr_label = '//th[contains(text(),"カテゴリー")]/../td/ul/li/a/text()'
+    expr_series = '//th[contains(text(),"カテゴリー")]/../td/ul/li/a/text()'
+    expr_release = '//th[contains(text(),"販売開始日")]/../td/text()'
+    expr_cover = '/html/body/div[1]/div/div[4]/div[2]/div/div[1]/div/article/a/img/@src'
+    expr_tags = '//p[contains(text(),"商品タグ")]/../ul/li/a/text()'
+    expr_outline = '//p[@class="fo-14"]/text()'
+    expr_extrafanart = '//*[@class="item-nav"]/ul/li/a/img/@src'
+
+    # expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src'
+
+    def extraInit(self):
+        self.imagecut = 4
+
+    def search(self, number: str):
+        self.number = number.upper().replace('PCOLLE-', '')
+        self.detailurl = 'https://www.pcolle.com/product/detail/?product_id=' + self.number
+        session = request_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
+        htmlcode = session.get(self.detailurl).text
+        htmltree = etree.HTML(htmlcode)
+        result = self.dictformat(htmltree)
+        return result
+
+    def getNum(self, htmltree):
+        num = super().getNum(htmltree).upper()
+        if self.number != num:
+            raise Exception(f'[!] {self.number}: find [{num}] in pcolle, not match')
+        return "PCOLLE-" + str(num)
+
+    def getOutline(self, htmltree):
+        result = self.getTreeAll(htmltree, self.expr_outline)
+        try:
+            return "\n".join(result)
+        except:
+            return ""
+
+    def getRelease(self, htmltree):
+        return super().getRelease(htmltree).replace('年', '-').replace('月', '-').replace('日', '')
+
+    def getCover(self, htmltree):
+        if ".gif" in super().getCover(htmltree) and len(super().getExtrafanart(htmltree)) != 0:
+            return super().getExtrafanart(htmltree)[0]
+        return super().getCover(htmltree)

From 0a263f665ceb1f20cb812438f6874635b5126b40 Mon Sep 17 00:00:00 2001
From: Hakusai Zhang <xm1994@gmail.com>
Date: Sat, 8 Jul 2023 18:20:30 +0800
Subject: [PATCH 7/8] only process file with suffix, not directory.

---
 Movie_Data_Capture.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py
index a96657a..aece3c4 100644
--- a/Movie_Data_Capture.py
+++ b/Movie_Data_Capture.py
@@ -351,6 +351,8 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
     for full_name in source.glob(r'**/*'):
         if main_mode != 3 and set(full_name.parent.parts) & escape_folder_set:
             continue
+        if not full_name.is_file():
+            continue
         if not full_name.suffix.lower() in file_type:
             continue
         absf = str(full_name)

From 26b82b17255b134e2cbfa296d342decd1ae2273a Mon Sep 17 00:00:00 2001
From: yoshiko2 <yoshiko2.dev@gmail.com>
Date: Sun, 9 Jul 2023 00:42:29 +0800
Subject: [PATCH 8/8] Update sites #2

---
 scrapinglib/api.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/scrapinglib/api.py b/scrapinglib/api.py
index 1889206..cf51ea4 100644
--- a/scrapinglib/api.py
+++ b/scrapinglib/api.py
@@ -220,10 +220,7 @@ class Scraping:
             elif "pcolle" in sources and "pcolle" in lo_file_number:
                 sources = ["pcolle"]
             elif "fc2" in lo_file_number:
-                if "fc2" in sources:
-                    sources = ["msin", "fc2"]
-                    # sources = insert(sources, "msin")
-                    # sources = insert(sources, "fc2")
+                sources = ["fc2", "msin"]
             elif "mgstage" in sources and \
                     (re.search(r"\d+\D+", file_number) or "siro" in lo_file_number):
                 sources = insert(sources, "mgstage")