Merge pull request #787 from 553531284/master

修正javdb、fanza剧照
2022-04-30 19:47:23 +08:00
parent 2eb50e9b8d 7efb3aeba7
commit e6af7c0520
3 changed files with 6 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -110,3 +110,5 @@ venv.bak/
 JAV_output/**/*
 failed/*
 .vscode/launch.json
 .idea
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -67,7 +67,7 @@ def getOutline(html):
 def getExtrafanart(htmlcode):  # 获取剧照
-    html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div></div>')
+    html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\n</div>')
    html = html_pather.search(htmlcode)
    if html:
        html = html.group()
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -124,7 +124,7 @@ def getCover_small(html, index=0):
    # javdb sometime returns multiple results
    # DO NOT just get the firt one, get the one with correct index number
    try:
-        result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+        result = html.xpath("//*[@class='movie-list h cols-4']/div/a/div[contains(@class, 'cover')]/img/@src")[index]
        if not 'https' in result:
            result = 'https:' + result
        return result
@@ -242,12 +242,12 @@ def main(number):
        # javdb sometime returns multiple results,
        # and the first elememt maybe not the one we are looking for
        # iterate all candidates and find the match one
-        urls = html.xpath('//div[@class="item"]/a[@class="box"]/@href')
+        urls = html.xpath('//*[@class="movie-list h cols-4"]/div/a/@href')
        # 记录一下欧美的ids  ['Blacked','Blacked']
        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
            correct_url = urls[0]
        else:
-            ids = html.xpath('//div[@class="item"]/a[@class="box"]/div[@class="video-title"]/strong/text()')
+            ids = html.xpath('//*[@class="movie-list h cols-4"]/div/a/div[contains(@class, "video-title")]/strong/text()')
            try:
                correct_url = urls[ids.index(number)]
            except: