fix scrape parameters
This commit is contained in:
32
scraper.py
32
scraper.py
@@ -1,10 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
|
import secrets
|
||||||
|
|
||||||
import ADC_function
|
import ADC_function
|
||||||
import config
|
import config
|
||||||
from ADC_function import translate
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -162,17 +162,35 @@ def get_data_from_json(file_number, oCC):
|
|||||||
# break
|
# break
|
||||||
|
|
||||||
# TODO 准备参数
|
# TODO 准备参数
|
||||||
# 1. javdb 的额外参数,cookies及sites区分
|
# - 清理 ADC_function, webcrawler
|
||||||
# 2. storyline sites参数
|
|
||||||
# 3. getchu仍在变更,未添加
|
|
||||||
# 4. 清理 ADC_function, webcrawler
|
|
||||||
# 5. ......
|
|
||||||
proxies = None
|
proxies = None
|
||||||
configProxy = conf.proxy()
|
configProxy = conf.proxy()
|
||||||
if configProxy.enable:
|
if configProxy.enable:
|
||||||
proxies = configProxy.proxies()
|
proxies = configProxy.proxies()
|
||||||
|
|
||||||
|
javdb_sites = conf.javdb_sites().split(',')
|
||||||
|
for i in javdb_sites:
|
||||||
|
javdb_sites[javdb_sites.index(i)] = "javdb" + i
|
||||||
|
javdb_sites.append("javdb")
|
||||||
|
# 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天
|
||||||
|
has_json = False
|
||||||
|
for cj in javdb_sites:
|
||||||
|
javdb_site = cj
|
||||||
|
cookie_json = javdb_site + '.json'
|
||||||
|
cookies_dict, cookies_filepath = ADC_function.load_cookies(cookie_json)
|
||||||
|
if isinstance(cookies_dict, dict) and isinstance(cookies_filepath, str):
|
||||||
|
cdays = ADC_function.file_modification_days(cookies_filepath)
|
||||||
|
if cdays < 7:
|
||||||
|
javdb_cookies = cookies_dict
|
||||||
|
has_json = True
|
||||||
|
break
|
||||||
|
elif cdays != 9999:
|
||||||
|
print(f'[!]Cookies file {cookies_filepath} was updated {cdays} days ago, it will not be used for HTTP requests.')
|
||||||
|
if not has_json:
|
||||||
|
javdb_site = secrets.choice(javdb_sites)
|
||||||
|
javdb_cookies = None
|
||||||
|
|
||||||
json_data = search(file_number, sources, proxies=proxies)
|
json_data = search(file_number, sources, proxies=proxies, dbsites=javdb_site, dbcookies=javdb_cookies, morestoryline=conf.is_storyline())
|
||||||
# Return if data not found in all sources
|
# Return if data not found in all sources
|
||||||
if not json_data:
|
if not json_data:
|
||||||
print('[-]Movie Number not found!')
|
print('[-]Movie Number not found!')
|
||||||
|
|||||||
Reference in New Issue
Block a user