diff --git a/ADC_function.py b/ADC_function.py index 0d29ea7..c219a50 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,5 +1,6 @@ import requests import hashlib +import pathlib import random import uuid import json @@ -570,3 +571,15 @@ def load_cookies(filename): return json.load(open(filename)) except: return None + +# 文件修改时间距此时的天数 +def file_modification_days(filename) -> int: + mfile = pathlib.Path(filename) + if not mfile.exists(): + return 9999 + mtime = int(mfile.stat().st_mtime) + now = int(time.time()) + days = int((now - mtime) / (24 * 60 * 60)) + if days < 0: + return 9999 + return days diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 7acda58..7a09538 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -11,8 +11,8 @@ from ADC_function import * def getTitle(a): html = etree.fromstring(a, etree.HTMLParser()) - result = html.xpath("/html/body/section/div/h2/strong/text()")[0] - return result + browser_title = str(html.xpath("/html/head/title/text()")[0]) + return browser_title[:browser_title.find(' | JavDB')].strip() def getActor(a): html = etree.fromstring(a, etree.HTMLParser()) @@ -212,7 +212,11 @@ def main(number): # else: # number = number.upper() number = number.upper() - javdb_cookies = load_cookies(javdb_site + ".json") + cookie_json = './' + javdb_site + '.json' + javdb_cookies = None + # 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天 + if file_modification_days(cookie_json) < 7: + javdb_cookies = load_cookies(cookie_json) try: javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all' query_result = get_html(javdb_url, cookies=javdb_cookies)