From f82d6a5317122aabd0d5d7d46c7fb30ebb773f6a Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 20 May 2022 20:07:10 +0800 Subject: [PATCH] Fix search func in source `dlsite` --- WebCrawler/dlsite.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/WebCrawler/dlsite.py b/WebCrawler/dlsite.py index 9fdaf17..cf0dc42 100644 --- a/WebCrawler/dlsite.py +++ b/WebCrawler/dlsite.py @@ -120,7 +120,15 @@ def main(number): else: htmlcode = get_html(f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie',cookies={'locale': 'zh-cn'}) html = etree.HTML(htmlcode) - a = html.xpath('//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href')[0] + search_result = html.xpath('//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href') + if len(search_result) == 0: + number = number.replace("THE ANIMATION", "").replace("THE ANIMATION", "").replace('上巻','').replace('下巻','').replace('前編','').replace('後編','') + htmlcode = get_html( + f'https://www.dlsite.com/maniax/fsr/=/language/jp/sex_category/male/keyword/{number}/order/trend/work_type_category/movie', + cookies={'locale': 'zh-cn'}) + html = etree.HTML(htmlcode) + search_result = html.xpath('//*[@id="search_result_img_box"]/li[1]/dl/dd[2]/div[2]/a/@href') + a = search_result[0] html = etree.HTML(get_html(a,cookies={'locale': 'zh-cn'})) number = str(re.findall("\wJ\w+",a)).strip(" [']") dic = {