From 7656c63afee3c459a09e659e52a0a22b9aea0077 Mon Sep 17 00:00:00 2001 From: Deng Zhou <553531284@qq.com> Date: Thu, 5 May 2022 22:04:51 +0800 Subject: [PATCH] javdb xpath --- WebCrawler/javdb.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 4eb95cb..941c774 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -124,7 +124,7 @@ def getCover_small(html, index=0): # javdb sometime returns multiple results # DO NOT just get the firt one, get the one with correct index number try: - result = html.xpath("//*[@class='movie-list h cols-4']/div/a/div[contains(@class, 'cover')]/img/@src")[index] + result = html.xpath("//*[contains(@class,'movie-list')]/div/a/div[contains(@class, 'cover')]/img/@src")[index] if not 'https' in result: result = 'https:' + result return result @@ -242,12 +242,12 @@ def main(number): # javdb sometime returns multiple results, # and the first elememt maybe not the one we are looking for # iterate all candidates and find the match one - urls = html.xpath('//*[@class="movie-list h cols-4"]/div/a/@href') + urls = html.xpath('//*[contains(@class,"movie-list")]/div/a/@href') # 记录一下欧美的ids ['Blacked','Blacked'] if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number): correct_url = urls[0] else: - ids = html.xpath('//*[@class="movie-list h cols-4"]/div/a/div[contains(@class, "video-title")]/strong/text()') + ids = html.xpath('//*[contains(@class,"movie-list")]/div/a/div[contains(@class, "video-title")]/strong/text()') try: correct_url = urls[ids.index(number)] except: