fix scrape parameters

2022-05-27 15:25:08 +08:00
parent 9898f2918f
commit 8871355787
1 changed files with 25 additions and 7 deletions
--- a/scraper.py
+++ b/scraper.py
@@ -1,10 +1,10 @@
 import json
 import re
 from multiprocessing.pool import ThreadPool
+import secrets

 import ADC_function
 import config
-from ADC_function import translate
 from lxml import etree
 from pathlib import Path

@@ -162,17 +162,35 @@ def get_data_from_json(file_number, oCC):
    #             break

    # TODO 准备参数
-    # 1. javdb 的额外参数，cookies及sites区分
-    # 2. storyline sites参数
-    # 3. getchu仍在变更，未添加
-    # 4. 清理 ADC_function, webcrawler
-    # 5. ......
+    # - 清理 ADC_function, webcrawler
    proxies = None
    configProxy = conf.proxy()
    if configProxy.enable:
        proxies = configProxy.proxies()
+    
+    javdb_sites = conf.javdb_sites().split(',')
+    for i in javdb_sites:
+        javdb_sites[javdb_sites.index(i)] = "javdb" + i
+    javdb_sites.append("javdb")
+    # 不加载过期的cookie，javdb登录界面显示为7天免登录，故假定cookie有效期为7天
+    has_json = False
+    for cj in javdb_sites:
+        javdb_site = cj
+        cookie_json = javdb_site + '.json'
+        cookies_dict, cookies_filepath = ADC_function.load_cookies(cookie_json)
+        if isinstance(cookies_dict, dict) and isinstance(cookies_filepath, str):
+            cdays = ADC_function.file_modification_days(cookies_filepath)
+            if cdays < 7:
+                javdb_cookies = cookies_dict
+                has_json = True
+                break
+            elif cdays != 9999:
+                print(f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
+    if not has_json:
+        javdb_site = secrets.choice(javdb_sites)
+        javdb_cookies = None

-    json_data = search(file_number, sources, proxies=proxies)
+    json_data = search(file_number, sources, proxies=proxies, dbsites=javdb_site, dbcookies=javdb_cookies, morestoryline=conf.is_storyline())
    # Return if data not found in all sources
    if not json_data:
        print('[-]Movie Number not found!')