From d771b4e985793b67bc6b817a129fb31610727caa Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 15 Jul 2021 12:18:55 +0800 Subject: [PATCH 1/2] javdb:always include over18 cookies --- WebCrawler/javdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 1a81c97..e890b70 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -216,7 +216,7 @@ def main(number): # number = number.upper() number = number.upper() cookie_json = './' + javdb_site + '.json' - javdb_cookies = None + javdb_cookies = {'over18':'1', 'theme': 'auto', 'locale': 'zh'} # 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天 cdays = file_modification_days(cookie_json) if cdays < 7: @@ -229,7 +229,7 @@ def main(number): javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all' query_result = get_html(javdb_url, cookies=javdb_cookies) except: - query_result = get_html('https://javdb.com/search?q=' + number + '&f=all') + query_result = get_html('https://javdb.com/search?q=' + number + '&f=all', cookies=javdb_cookies) html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text() # javdb sometime returns multiple results, # and the first elememt maybe not the one we are looking for @@ -251,7 +251,7 @@ def main(number): javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url detail_page = get_html(javdb_detail_url, cookies=javdb_cookies) except: - detail_page = get_html('https://' + javdb_site + '.com' + correct_url) + detail_page = get_html('https://' + javdb_site + '.com' + correct_url, cookies=javdb_cookies) # no cut image by default imagecut = 3 From f51d91b2271db0bd785677880e92b419b5d29f81 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 15 Jul 2021 12:34:21 +0800 Subject: [PATCH 2/2] try another domain name --- WebCrawler/javdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index e890b70..6b8fef5 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -216,7 +216,7 @@ def main(number): # number = number.upper() number = number.upper() cookie_json = './' + javdb_site + '.json' - javdb_cookies = {'over18':'1', 'theme': 'auto', 'locale': 'zh'} + javdb_cookies = {'over18':'1', 'theme':'auto', 'locale':'zh'} # 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天 cdays = file_modification_days(cookie_json) if cdays < 7: @@ -251,7 +251,7 @@ def main(number): javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url detail_page = get_html(javdb_detail_url, cookies=javdb_cookies) except: - detail_page = get_html('https://' + javdb_site + '.com' + correct_url, cookies=javdb_cookies) + detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies) # no cut image by default imagecut = 3