From b67acd256b56ad899ecc37d111e154502f7a6e8f Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 31 Oct 2021 02:51:30 +0800 Subject: [PATCH] data from json --- WebCrawler/storyline.py | 24 ++++++++++++++---------- config.ini | 5 ++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 75ee517..1da1446 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -137,27 +137,31 @@ def getStoryline_airav(number, debug): def getStoryline_airavwiki(number, debug): try: kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number - url = f'https://cn.airav.wiki/?search={kwd}' + url = f'https://www.airav.wiki/api/video/list?barcode=GZAP-055&lang=zh-TW&search={kwd}&lng=zh-CN' result, browser = get_html_by_browser(url, return_type='browser') if not result.ok: raise ValueError(f"get_html_by_browser('{url}','{number}') failed") - s = browser.page.select('div.row > div > div.videoList.row > div > a.d-block') + j = json.loads(result.content) + if int(j.get('count')) == 0: + raise ValueError("number not found") link = None - for a in s: - title = a.img['title'] - if re.search(number, title, re.I): - link = a + for r in j["result"]: + n = r['barcode'] + if re.search(number, n, re.I): + link = f'/api/video/barcode/{n}?lng=zh-CN' break if link is None: raise ValueError("number not found") - result = browser.follow_link(link) + result = browser.open_relative(link) if not result.ok or not re.search(number, browser.url, re.I): raise ValueError("detail page not found") - title = browser.page.select('head > title')[0].text.strip() - detail_number = str(re.findall('\[(.*?)]', title)[0]) + j = json.loads(result.content) + if int(j.get('count')) != 1: + raise ValueError("number not found") + detail_number = j["result"]['barcode'] if not re.search(number, detail_number, re.I): raise ValueError("detail page number not match, got ->[{detail_number}]") - desc = browser.page.select_one('div.d-flex.videoDataBlock > div.synopsis > p').text.strip() + desc = j["result"]['description'] return desc except Exception as e: if debug: diff --git a/config.ini b/config.ini index c06f26d..723453f 100755 --- a/config.ini +++ b/config.ini @@ -91,16 +91,15 @@ extrafanart_folder=extrafanart [storyline] ; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的 ; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。 -; 其中airavwiki airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,airavwiki avno1有码无码都能查, +; 其中airavwiki airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1 airavwiki 有码无码都能查, ; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。 ; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询, ; 设置成不查询可大幅提高刮削速度。 ; site= -site=1:airavwiki,4:avno1 +site=1:avno1,4:airavwiki censored_site=2:airav,5:xcity,6:amazon uncensored_site=3:58avgo ; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快) run_mode=1 ; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因 show_result=0 -