From 3b498d32caa6c556de13c613a4614662bcbc157a Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 1 Nov 2021 03:49:35 +0800 Subject: [PATCH] replace browser by session in some places --- ADC_function.py | 154 +++++++++++++++++++++++++++++++--------- WebCrawler/__init__.py | 4 +- WebCrawler/carib.py | 42 +++++------ WebCrawler/javbus.py | 9 ++- WebCrawler/javdb.py | 33 ++++----- WebCrawler/storyline.py | 95 ++++++++++++++----------- 6 files changed, 215 insertions(+), 122 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 6ae5d2a..21fda6a 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -98,59 +98,113 @@ class TimeoutHTTPAdapter(HTTPAdapter): kwargs["timeout"] = self.timeout return super().send(request, **kwargs) -def get_html_by_browser(url, cookies: dict = None, ua: str = None, return_type: str = None): + +# with keep-alive feature +def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None): + configProxy = config.getInstance().proxy() + session = requests.Session() + if isinstance(cookies, dict) and len(cookies): + requests.utils.add_dict_to_cookiejar(session.cookies, cookies) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) + session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) + if configProxy.enable: + session.verify = config.getInstance().cacert_file() + session.proxies = configProxy.proxies() + headers = {"User-Agent": ua or G_USER_AGENT} + session.headers = headers + try: + if isinstance(url, str) and len(url): + result = session.get(str(url)) + else: # 空url参数直接返回可重用session对象,无需设置return_type + return session + if not result.ok: + return None + if return_type == "object": + return result + elif return_type == "content": + return result.content + elif return_type == "session": + return result, session + else: + result.encoding = "utf-8" + return result.text + except requests.exceptions.ProxyError: + print("[-]get_html_session() Proxy error! Please check your Proxy") + except Exception as e: + print(f"[-]get_html_session() failed. {e}") + return None + + +def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None): configProxy = config.getInstance().proxy() s = requests.Session() if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(s.cookies, cookies) - retries = Retry(connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: + s.verify = config.getInstance().cacert_file() s.proxies = configProxy.proxies() - browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s) - result = browser.open(url) - if not result.ok: - return '' - result.encoding = "utf-8" - if return_type == "object": - return result - elif return_type == "content": - return result.content - elif return_type == "browser": - return result, browser - else: - return result.text - + try: + browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s) + if isinstance(url, str) and len(url): + result = browser.open(url) + else: + return browser + if not result.ok: + return None + result.encoding = "utf-8" + if return_type == "object": + return result + elif return_type == "content": + return result.content + elif return_type == "browser": + return result, browser + else: + return result.text + except requests.exceptions.ProxyError: + print("[-]get_html_by_browser() Proxy error! Please check your Proxy") + except Exception as e: + print(f'[-]get_html_by_browser() Failed! {e}') + return None def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None): configProxy = config.getInstance().proxy() s = requests.Session() if isinstance(cookies, dict) and len(cookies): requests.utils.add_dict_to_cookiejar(s.cookies, cookies) - retries = Retry(connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) + retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]) s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout)) if configProxy.enable: + s.verify = config.getInstance().cacert_file() s.proxies = configProxy.proxies() - browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s) - result = browser.open(url) - if not result.ok: - return '' - form = browser.select_form() if form_select is None else browser.select_form(form_select) - if isinstance(fields, dict): - for k, v in fields.items(): - browser[k] = v - response = browser.submit_selected() - response.encoding = "utf-8" - if return_type == "object": - return response - elif return_type == "content": - return response.content - elif return_type == "browser": - return response, browser - else: - return response.text + try: + browser = mechanicalsoup.StatefulBrowser(user_agent=ua or G_USER_AGENT, session=s) + result = browser.open(url) + if not result.ok: + return None + form = browser.select_form() if form_select is None else browser.select_form(form_select) + if isinstance(fields, dict): + for k, v in fields.items(): + browser[k] = v + response = browser.submit_selected() + response.encoding = "utf-8" + if return_type == "object": + return response + elif return_type == "content": + return response.content + elif return_type == "browser": + return response, browser + else: + return response.text + except requests.exceptions.ProxyError: + print("[-]get_html_by_form() Proxy error! Please check your Proxy") + except Exception as e: + print(f'[-]get_html_by_form() Failed! {e}') + return None # def get_javlib_cookie() -> [dict, str]: @@ -645,3 +699,33 @@ def file_not_exist_or_empty(filepath) -> bool: # 日语简单检测 def is_japanese(s) -> bool: return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE)) + + +if __name__ == "__main__": + import sys, timeit + from http.client import HTTPConnection + s = get_html_session() + def benchmark(t, url): + print(f"HTTP GET Benchmark times:{t} url:{url}") + tm = timeit.timeit(f"_ = session1.get('{url}')", + "from __main__ import get_html_session;session1=get_html_session()", + number=t) + print(f'===={tm:2.5f}s get_html_session() Keep-Alive enable====') + tm = timeit.timeit(f"_ = browser1.open('{url}')", + "from __main__ import get_html_by_browser;browser1=get_html_by_browser()", + number=t) + print(f'===={tm:2.5f}s get_html_by_browser() Keep-Alive enable====') + tm = timeit.timeit(f"_ = get_html('{url}')", + "from __main__ import get_html", + number=t) + print(f'===={tm:2.5f}s get_html() ====') + t = 100 + #url = "https://www.189.cn/" + url = "http://www.chinaunicom.com" + HTTPConnection.debuglevel = 1 + _ = s.get(url) + HTTPConnection.debuglevel = 0 + # Usage: python ./ADC_function.py https://cn.bing.com/ + if len(sys.argv)>1: + url = sys.argv[1] + benchmark(t, url) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 2ff6762..275a86a 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -328,6 +328,6 @@ def special_characters_replacement(text) -> str: replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane replace('‘', '‘'). # U+02018 LEFT SINGLE QUOTATION MARK replace('’', '’'). # U+02019 RIGHT SINGLE QUOTATION MARK - replace('&', '&'). - replace('…','…') + replace('…','…'). + replace('&', '&') ) diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 790b910..17bf23e 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -6,17 +6,16 @@ import re from ADC_function import * from WebCrawler.storyline import getStoryline + +G_SITE = 'https://www.caribbeancom.com' + + def main(number: str) -> json: try: - # 因演员图片功能还未使用,为提速暂时注释,改为用get_html() - #r, browser = get_html_by_browser('https://www.caribbeancom.com/moviepages/'+number+'/index.html', - # return_type='browser') - #if not r.ok: - # raise ValueError("page not found") - #htmlcode = str(browser.page) - htmlbyte = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html', return_type='content') - htmlcode = htmlbyte.decode('euc-jp') - if not htmlcode or '404' in htmlcode or 'class="movie-info section"' not in htmlcode: + url = f'{G_SITE}/moviepages/{number}/index.html' + result, session = get_html_session(url, return_type='session') + htmlcode = result.content.decode('euc-jp') + if not result or not htmlcode or '<title>404' in htmlcode or 'class="movie-info section"' not in htmlcode: raise ValueError("page not found") lx = html.fromstring(htmlcode) @@ -32,13 +31,13 @@ def main(number: str) -> json: 'actor': get_actor(lx), 'release': get_release(lx), 'number': number, - 'cover': 'https://www.caribbeancom.com/moviepages/' + number + '/images/l_l.jpg', + 'cover': f'{G_SITE}/moviepages/{number}/images/l_l.jpg', 'tag': get_tag(lx), 'extrafanart': get_extrafanart(lx), 'label': get_series(lx), 'imagecut': 1, -# 'actor_photo': get_actor_photo(browser), - 'website': 'https://www.caribbeancom.com/moviepages/' + number + '/index.html', +# 'actor_photo': get_actor_photo(lx, session), + 'website': f'{G_SITE}/moviepages/{number}/index.html', 'source': 'carib.py', 'series': get_series(lx), } @@ -101,24 +100,25 @@ def get_series(lx: html.HtmlElement) -> str: return '' def get_runtime(lx: html.HtmlElement) -> str: - return str(lx.xpath( "//span[@class='spec-content']/span[@itemprop='duration']/text()")[0]).strip() + return str(lx.xpath("//span[@class='spec-content']/span[@itemprop='duration']/text()")[0]).strip() -def get_actor_photo(browser): - htmla = browser.page.select('#moviepages > div > div:nth-child(1) > div.movie-info.section > ul > li:nth-child(1) > span.spec-content > a') +def get_actor_photo(lx, session): + htmla = lx.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']") + names = lx.xpath("//*[@id='moviepages']/div[@class='container']/div[@class='inner-container']/div[@class='movie-info section']/ul/li[@class='movie-spec']/span[@class='spec-content']/a[@itemprop='actor']/span[@itemprop='name']/text()") t = {} - for a in htmla: - if a.text.strip() == '他': + for i, name in enumerate(names): + if name.strip() == '他': continue - p = {a.text.strip(): a['href']} + p = {name.strip(): htmla[i].attrib['href']} t.update(p) o = {} for k, v in t.items(): if '/search_act/' not in v: continue - r = browser.open_relative(v) + r = session.get(urljoin(G_SITE, v)) if not r.ok: continue - html = browser.page.prettify() + html = r.text pos = html.find('.full-bg') if pos<0: continue @@ -126,7 +126,7 @@ def get_actor_photo(browser): cssBGjpgs = re.findall(r'background: url\((.+\.jpg)', css, re.I) if not cssBGjpgs or not len(cssBGjpgs[0]): continue - p = {k: urljoin(browser.url, cssBGjpgs[0])} + p = {k: urljoin(r.url, cssBGjpgs[0])} o.update(p) return o diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index b17a382..d61db8d 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -118,8 +118,15 @@ def main_uncensored(number): def main(number): try: try: + url = "https://www." + secrets.choice([ + 'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun', + 'cdnbus.fun', + 'dmmbus.fun', 'dmmsee.fun', + 'fanbus.us', + 'seedmm.fun', + ]) + "/" try: - htmlcode = get_html('https://www.fanbus.us/' + number) + htmlcode = get_html(url + number) except: htmlcode = get_html('https://www.javbus.com/' + number) if "<title>404 Page Not Found" in htmlcode: diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index e4e803c..c2ab57b 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -4,7 +4,6 @@ import re from lxml import etree import json from ADC_function import * -from mechanicalsoup.stateful_browser import StatefulBrowser from WebCrawler.storyline import getStoryline # import io # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) @@ -30,8 +29,8 @@ def getActor(html): idx = idx + 1 return r -def getaphoto(url, browser): - html_page = browser.open_relative(url).text if isinstance(browser, StatefulBrowser) else get_html(url) +def getaphoto(url, session): + html_page = session.get(url).text if isinstance(session, requests.Session) else get_html(url) img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)') img_url = img_prether.findall(html_page) if img_url: @@ -39,7 +38,7 @@ def getaphoto(url, browser): else: return '' -def getActorPhoto(html, javdb_site, browser): #//*[@id="star_qdt"]/li/a/img +def getActorPhoto(html, javdb_site, session): actorall = html.xpath('//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]') if not actorall: return {} @@ -47,7 +46,7 @@ def getActorPhoto(html, javdb_site, browser): #//*[@id="star_qdt"]/li/a/img actor_photo = {} for i in actorall: if i.text in a: - actor_photo[i.text] = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), browser) + actor_photo[i.text] = getaphoto(urljoin(f'https://{javdb_site}.com', i.attrib['href']), session) return actor_photo def getStudio(a, html): @@ -178,15 +177,6 @@ def getDirector(html): result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') -def getOutline0(number): #获取剧情介绍 airav.wiki站点404,函数暂时更名,等无法恢复时删除 - try: - htmlcode = get_html('https://cn.airav.wiki/video/' + number) - from WebCrawler.airav import getOutline as airav_getOutline - result = airav_getOutline(htmlcode) - return result - except: - pass - return '' def getOutline(number, title): #获取剧情介绍 多进程并发查询 return getStoryline(number,title) def getSeries(html): @@ -224,11 +214,11 @@ def main(number): javdb_site = secrets.choice(javdb_sites) if debug: print(f'[!]javdb:select site {javdb_site}') - browser = None + session = None try: javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all' - res, browser = get_html_by_browser(javdb_url, cookies=javdb_cookies, return_type='browser') - if not res.ok: + res, session = get_html_session(javdb_url, cookies=javdb_cookies, return_type='session') + if not res: raise query_result = res.text except: @@ -251,8 +241,9 @@ def main(number): raise ValueError("number not found") correct_url = urls[0] try: - if isinstance(browser, StatefulBrowser): # get faster benefit from http keep-alive - detail_page = browser.open_relative(correct_url).text + if isinstance(session, requests.Session): # get faster benefit from http keep-alive + javdb_detail_url = urljoin(res.url, correct_url) + detail_page = session.get(javdb_detail_url).text else: javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url detail_page = get_html(javdb_detail_url, cookies=javdb_cookies) @@ -303,8 +294,8 @@ def main(number): 'tag': getTag(lx), 'label': getLabel(lx), 'year': getYear(detail_page), # str(re.search('\d{4}',getRelease(a)).group()), -# 'actor_photo': getActorPhoto(lx, javdb_site, browser), - 'website': 'https://javdb.com' + correct_url, +# 'actor_photo': getActorPhoto(lx, javdb_site, session), + 'website': urljoin('https://javdb.com', correct_url), 'source': 'javdb.py', 'series': getSeries(lx), diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 4543694..d9a6c89 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -4,6 +4,7 @@ import re import json import builtins from ADC_function import * +from lxml.html import fromstring from multiprocessing import Pool from multiprocessing.dummy import Pool as ThreadPool from difflib import SequenceMatcher @@ -110,24 +111,30 @@ def _getStoryline_mp(site, number, title, debug): def getStoryline_airav(number, debug): try: - number_up = number site = secrets.choice(('airav.cc','airav4.club')) url = f'https://{site}/searchresults.aspx?Search={number}&Type=0' - res, browser = get_html_by_browser(url, return_type='browser') - if not res.ok: - raise ValueError(f"get_html_by_browser('{url}') failed") - avs = browser.page.select_one('div.resultcontent > ul > li:nth-child(1) > div') - if number_up not in avs.a.h3.text.upper(): + res, session = get_html_session(url, return_type='session') + if not res: + raise ValueError(f"get_html_by_session('{url}') failed") + lx = fromstring(res.text) + urls = lx.xpath('//div[@class="resultcontent"]/ul/li/div/a[@class="ga_click"]/@href') + txts = lx.xpath('//div[@class="resultcontent"]/ul/li/div/a[@class="ga_click"]/h3[@class="one_name ga_name"]/text()') + detail_url = None + for i, txt in enumerate(txts): + if re.search(number, txt, re.I): + detail_url = urljoin(res.url, urls[i]) + break + if detail_url is None: raise ValueError("number not found") - detail_url = avs.a['href'] - res = browser.open_relative(detail_url) + res = session.get(detail_url) if not res.ok: - raise ValueError(f"browser.open_relative('{detail_url}') failed") - t = browser.page.select_one('head > title').text - airav_number = str(re.findall(r'^\s*\[(.*?)]', t)[0]).upper() - if number.upper() != airav_number: + raise ValueError(f"session.get('{detail_url}') failed") + lx = fromstring(res.text) + t = str(lx.xpath('/html/head/title/text()')[0]).strip() + airav_number = str(re.findall(r'^\s*\[(.*?)]', t)[0]) + if not re.search(number, airav_number, re.I): raise ValueError(f"page number ->[{airav_number}] not match") - desc = browser.page.select_one('li.introduction > span').text.strip() + desc = str(lx.xpath('//span[@id="ContentPlaceHolder1_Label2"]/text()')[0]).strip() return desc except Exception as e: if debug: @@ -140,9 +147,9 @@ def getStoryline_airavwiki(number, debug): try: kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number url = f'https://www.airav.wiki/api/video/list?lang=zh-TW&lng=zh-TW&search={kwd}' - result, browser = get_html_by_browser(url, return_type='browser') - if not result.ok: - raise ValueError(f"get_html_by_browser('{url}','{number}') failed") + result, session = get_html_session(url, return_type='session') + if not result: + raise ValueError(f"get_html_session('{url}','{number}') failed") j = json.loads(result.content) if int(j.get('count')) == 0: raise ValueError("number not found") @@ -150,12 +157,12 @@ def getStoryline_airavwiki(number, debug): for r in j["result"]: n = r['barcode'] if re.search(number, n, re.I): - link = f'/api/video/barcode/{n}?lng=zh-TW' + link = urljoin(result.url, f'/api/video/barcode/{n}?lng=zh-TW') break if link is None: raise ValueError("number not found") - result = browser.open_relative(link) - if not result.ok or not re.search(number, browser.url, re.I): + result = session.get(link) + if not result.ok or not re.search(number, result.url, re.I): raise ValueError("detail page not found") j = json.loads(result.content) if int(j.get('count')) != 1: @@ -221,7 +228,7 @@ def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得 form_select='div.wrapper > div.header > div.search > form', fields = {'kw' : number}, return_type = 'browser') - if not result.ok: + if not result: raise ValueError(f"get_html_by_form('{url}','{number}') failed") s = browser.page.select('div.type_movie > div > ul > li > div') for div in s: @@ -261,41 +268,45 @@ def getStoryline_amazon(q_title, number, debug): if not isinstance(q_title, str) or not len(q_title): return None try: - amazon_cookie, _ = load_cookies('amazon.json') - cookie = amazon_cookie if isinstance(amazon_cookie, dict) else None + cookie, cookies_filepath = load_cookies('amazon.json') url = "https://www.amazon.co.jp/s?k=" + q_title - res, browser = get_html_by_browser(url, cookies=cookie, return_type='browser') - if not res.ok: - raise ValueError("get_html_by_browser() failed") - lks = browser.links(r'/black-curtain/save-eligibility/black-curtain') - if isinstance(lks, list) and len(lks): - browser.follow_link(lks[0]) + res, session = get_html_session(url, cookies=cookie, return_type='session') + if not res: + raise ValueError("get_html_session() failed") + lx = fromstring(res.text) + lks = lx.xpath('//a[contains(@href, "/black-curtain/save-eligibility/black-curtain")]/@href') + if len(lks) and lks[0].startswith('/'): + res = session.get(urljoin(res.url, lks[0])) cookie = None - html = etree.fromstring(str(browser.page), etree.HTMLParser()) - titles = html.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()") - urls = html.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href") + lx = fromstring(res.text) + titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()") + urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href") if not len(urls) or len(urls) != len(titles): raise ValueError("titles not found") idx = amazon_select_one(titles, q_title, number, debug) if not isinstance(idx, int) or idx < 0: raise ValueError("title and number not found") - furl = urls[idx] - r = browser.open_relative(furl) - if not r.ok: + furl = urljoin(res.url, urls[idx]) + res = session.get(furl) + if not res.ok: raise ValueError("browser.open_relative()) failed.") - lks = browser.links(r'/black-curtain/save-eligibility/black-curtain') - if isinstance(lks, list) and len(lks): - browser.follow_link(lks[0]) + lx = fromstring(res.text) + lks = lx.xpath('//a[contains(@href, "/black-curtain/save-eligibility/black-curtain")]/@href') + if len(lks) and lks[0].startswith('/'): + res = session.get(urljoin(res.url, lks[0])) cookie = None - - ama_t = browser.page.select_one('#productDescription > p').text.replace('\n',' ').strip() - ama_t = re.sub(r'審査番号:\d+', '', ama_t) + lx = fromstring(res.text) + div = lx.xpath('//*[@id="productDescription"]')[0] + ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)]) + ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip() if cookie is None: - # 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径 + # 删除无效cookies,无论是用户创建还是自动创建,以避免持续故障 + Path(cookies_filepath).unlink(missing_ok=True) + # 自动创建的cookies文件放在搜索路径表的末端,最低优先级。有amazon.co.jp帐号的用户可以从浏览器导出cookie放在靠前搜索路径 ama_save = Path.home() / ".local/share/avdc/amazon.json" ama_save.parent.mkdir(parents=True, exist_ok=True) - ama_save.write_text(json.dumps(browser.session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8') + ama_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8') return ama_t