From 3042001df522083498d34e9d1a2835cbe4845cc7 Mon Sep 17 00:00:00 2001
From: lededev <lededev@noreplay.github.com>
Date: Sun, 9 May 2021 12:23:21 +0800
Subject: [PATCH] javdb enable user login cookies

---
 ADC_function.py     | 26 +++++++++++++++++++++++++-
 WebCrawler/javdb.py | 20 +++++++++++++++-----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/ADC_function.py b/ADC_function.py
index 79b985d..0d29ea7 100644
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -545,4 +545,28 @@ def is_uncensored(number):
     for pre in prefix_list:
         if pre.upper() in number.upper():
             return True
-    return False
\ No newline at end of file
+    return False
+
+# 从浏览器中导出网站登录验证信息的cookies，能够以会员方式打开游客无法访问到的页面
+# 示例: FC2-755670 url https://javdb9.com/v/vO8Mn
+# json 文件格式
+# 文件名: 站点名.json，示例 javdb9.json
+# 内容(文件编码:UTF-8)：
+'''
+{
+    "over18":"1",
+    "redirect_to":"%2Fv%2FvO8Mn",
+    "remember_me_token":"cbJdeaFpbHMiOnsibWVzc2FnZSI6IklrNVJjbTAzZFVSRVlVaEtPWEpUVFhOVU0yNXhJZz09IiwiZXhwIjoiMjAyMS0wNS0xNVQxMzoyODoxNy4wMDBaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX21lX3Rva2VuIn19--a7131611e844cf75f9db4cd411b635889bff3fe3",
+    "_jdb_session":"asddefqfwfwwrfdsdaAmqKj1%2FvOrDQP4b7h%2BvGp7brvIShi2Y%2FHBUr%2BklApk06TfhBOK3g5gRImZzoi49GINH%2FK49o3W%2FX64ugBiUAcudN9b27Mg6Ohu%2Bx9Z7A4bbqmqCt7XR%2Bao8PRuOjMcdDG5czoYHJCPIPZQFU28Gd7Awc2jc5FM5CoIgSRyaYDy9ulTO7DlavxoNL%2F6OFEL%2FyaA6XUYTB2Gs1kpPiUDqwi854mo5%2FrNxMhTeBK%2BjXciazMtN5KlE5JIOfiWAjNrnx7SV3Hj%2FqPNxRxXFQyEwHr5TZa0Vk1%2FjbwWQ0wcIFfh%2FMLwwqKydAh%2FLndc%2Bmdv3e%2FJ%2BiL2--xhqYnMyVRlxJajdN--u7nl0M7Oe7tZtPd4kIaEbg%3D%3D",
+    "locale":"zh",
+    "__cfduid":"dee27116d98c432a5cabc1fe0e7c2f3c91620479752",
+    "theme":"auto"
+}
+'''
+# 从网站登录后，通过浏览器插件(CookieBro或EdittThisCookie)或者直接在地址栏网站链接信息处都可以复制或者导出cookie内容，
+# 并填写到以上json文件的相应字段中
+def load_cookies(filename):
+    try:
+        return json.load(open(filename))
+    except:
+        return None
diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py
index 678dbc5..78c2aeb 100644
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -44,7 +44,7 @@ def getActorPhoto(html): #//*[@id="star_qdt"]/li/a/img
 
     else:
         return {}
-    
+
 def getStudio(a):
     # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
     # result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
@@ -57,7 +57,7 @@ def getStudio(a):
     else:
         result = ""
     return result
-    
+
 def getRuntime(a):
     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
     result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
@@ -189,6 +189,9 @@ def getSeries(a):
     result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
     result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+
+javdb_site = "javdb9"
+
 def main(number):
     try:
         # if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number).group():
@@ -196,10 +199,12 @@ def main(number):
         # else:
         #     number = number.upper()
         number = number.upper()
+        javdb_cookies = load_cookies(javdb_site + ".json")
         try:
-            query_result = get_html('https://javdb8.com/search?q=' + number + '&f=all')
+            javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
+            query_result = get_html(javdb_url, cookies=javdb_cookies)
         except:
-            query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
+            query_result = get_html('https://javdb8.com/search?q=' + number + '&f=all')
         html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
         # javdb sometime returns multiple results,
         # and the first elememt maybe not the one we are looking for
@@ -211,7 +216,11 @@ def main(number):
         else:
             ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
             correct_url = urls[ids.index(number)]
-        detail_page = get_html('https://javdb8.com' + correct_url)
+        try:
+            javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
+            detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
+        except:
+            detail_page = get_html('https://javdb8.com' + correct_url)
 
         # no cut image by default
         imagecut = 3
@@ -266,3 +275,4 @@ if __name__ == "__main__":
     # print(main('blacked.20.05.30'))
     # print(main('AGAV-042'))
     print(main('BANK-022'))
+    print(main('FC2-735670'))