增加欧美的刮削判断

This commit is contained in:
Feng4
2020-12-20 00:42:58 +08:00
committed by GitHub
parent c94fcd47fa
commit fc4cc4c122

View File

@@ -103,10 +103,17 @@ def getCover_small(a, index=0):
result = 'https:' + result result = 'https:' + result
return result return result
except: # 2020.7.17 Repair Cover Url crawl except: # 2020.7.17 Repair Cover Url crawl
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index] try:
if not 'https' in result: result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
result = 'https:' + result if not 'https' in result:
return result result = 'https:' + result
return result
except:
result = html.xpath("//div[@class='item-image']/img/@data-src")[index]
if not 'https' in result:
result = 'https:' + result
return result
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
try: try:
@@ -141,14 +148,23 @@ def main(number):
# and the first elememt maybe not the one we are looking for # and the first elememt maybe not the one we are looking for
# iterate all candidates and find the match one # iterate all candidates and find the match one
urls = html.xpath('//*[@id="videos"]/div/div/a/@href') urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') # 记录一下欧美的ids ['Blacked','Blacked']
correct_url = urls[ids.index(number)] if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
correct_url = urls[0]
else:
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
correct_url = urls[ids.index(number)]
detail_page = get_html('https://javdb.com' + correct_url) detail_page = get_html('https://javdb.com' + correct_url)
# no cut image by default # no cut image by default
imagecut = 3 imagecut = 3
# If gray image exists ,then replace with normal cover # If gray image exists ,then replace with normal cover
cover_small = getCover_small(query_result, index=ids.index(number)) if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
cover_small = getCover_small(query_result)
else:
cover_small = getCover_small(query_result, index=ids.index(number))
if 'placeholder' in cover_small: if 'placeholder' in cover_small:
# replace wit normal cover and cut it # replace wit normal cover and cut it
imagecut = 1 imagecut = 1