增加欧美的刮削判断
This commit is contained in:
@@ -103,10 +103,17 @@ def getCover_small(a, index=0):
|
|||||||
result = 'https:' + result
|
result = 'https:' + result
|
||||||
return result
|
return result
|
||||||
except: # 2020.7.17 Repair Cover Url crawl
|
except: # 2020.7.17 Repair Cover Url crawl
|
||||||
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
|
try:
|
||||||
if not 'https' in result:
|
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
|
||||||
result = 'https:' + result
|
if not 'https' in result:
|
||||||
return result
|
result = 'https:' + result
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
result = html.xpath("//div[@class='item-image']/img/@data-src")[index]
|
||||||
|
if not 'https' in result:
|
||||||
|
result = 'https:' + result
|
||||||
|
return result
|
||||||
|
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
try:
|
try:
|
||||||
@@ -141,14 +148,23 @@ def main(number):
|
|||||||
# and the first elememt maybe not the one we are looking for
|
# and the first elememt maybe not the one we are looking for
|
||||||
# iterate all candidates and find the match one
|
# iterate all candidates and find the match one
|
||||||
urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
|
urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
|
||||||
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
# 记录一下欧美的ids ['Blacked','Blacked']
|
||||||
correct_url = urls[ids.index(number)]
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
|
correct_url = urls[0]
|
||||||
|
else:
|
||||||
|
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
||||||
|
correct_url = urls[ids.index(number)]
|
||||||
|
|
||||||
detail_page = get_html('https://javdb.com' + correct_url)
|
detail_page = get_html('https://javdb.com' + correct_url)
|
||||||
|
|
||||||
# no cut image by default
|
# no cut image by default
|
||||||
imagecut = 3
|
imagecut = 3
|
||||||
# If gray image exists ,then replace with normal cover
|
# If gray image exists ,then replace with normal cover
|
||||||
cover_small = getCover_small(query_result, index=ids.index(number))
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
|
cover_small = getCover_small(query_result)
|
||||||
|
else:
|
||||||
|
cover_small = getCover_small(query_result, index=ids.index(number))
|
||||||
|
|
||||||
if 'placeholder' in cover_small:
|
if 'placeholder' in cover_small:
|
||||||
# replace wit normal cover and cut it
|
# replace wit normal cover and cut it
|
||||||
imagecut = 1
|
imagecut = 1
|
||||||
|
|||||||
Reference in New Issue
Block a user