Merge pull request #375 from Feng4/patch-4
欧美番号规则添加,欧美番号命名,例如xxx.20.12.20
This commit is contained in:
@@ -103,10 +103,17 @@ def getCover_small(a, index=0):
|
|||||||
result = 'https:' + result
|
result = 'https:' + result
|
||||||
return result
|
return result
|
||||||
except: # 2020.7.17 Repair Cover Url crawl
|
except: # 2020.7.17 Repair Cover Url crawl
|
||||||
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
|
try:
|
||||||
if not 'https' in result:
|
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
|
||||||
result = 'https:' + result
|
if not 'https' in result:
|
||||||
return result
|
result = 'https:' + result
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
result = html.xpath("//div[@class='item-image']/img/@data-src")[index]
|
||||||
|
if not 'https' in result:
|
||||||
|
result = 'https:' + result
|
||||||
|
return result
|
||||||
|
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
try:
|
try:
|
||||||
@@ -141,14 +148,23 @@ def main(number):
|
|||||||
# and the first elememt maybe not the one we are looking for
|
# and the first elememt maybe not the one we are looking for
|
||||||
# iterate all candidates and find the match one
|
# iterate all candidates and find the match one
|
||||||
urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
|
urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
|
||||||
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
# 记录一下欧美的ids ['Blacked','Blacked']
|
||||||
correct_url = urls[ids.index(number)]
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
|
correct_url = urls[0]
|
||||||
|
else:
|
||||||
|
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
||||||
|
correct_url = urls[ids.index(number)]
|
||||||
|
|
||||||
detail_page = get_html('https://javdb.com' + correct_url)
|
detail_page = get_html('https://javdb.com' + correct_url)
|
||||||
|
|
||||||
# no cut image by default
|
# no cut image by default
|
||||||
imagecut = 3
|
imagecut = 3
|
||||||
# If gray image exists ,then replace with normal cover
|
# If gray image exists ,then replace with normal cover
|
||||||
cover_small = getCover_small(query_result, index=ids.index(number))
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
|
cover_small = getCover_small(query_result)
|
||||||
|
else:
|
||||||
|
cover_small = getCover_small(query_result, index=ids.index(number))
|
||||||
|
|
||||||
if 'placeholder' in cover_small:
|
if 'placeholder' in cover_small:
|
||||||
# replace wit normal cover and cut it
|
# replace wit normal cover and cut it
|
||||||
imagecut = 1
|
imagecut = 1
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ def get_number(debug,filepath: str) -> str:
|
|||||||
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
||||||
return file_number
|
return file_number
|
||||||
else: # 提取不含减号-的番号,FANZA CID
|
else: # 提取不含减号-的番号,FANZA CID
|
||||||
|
# 欧美番号匹配规则
|
||||||
|
oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
|
||||||
|
if oumei:
|
||||||
|
return oumei.group()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return str(
|
return str(
|
||||||
re.findall(r'(.+?)\.',
|
re.findall(r'(.+?)\.',
|
||||||
@@ -61,6 +66,11 @@ def get_number(debug,filepath: str) -> str:
|
|||||||
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
file_number = re.search(r'\w+-\w+', filename, re.A).group()
|
||||||
return file_number
|
return file_number
|
||||||
else: # 提取不含减号-的番号,FANZA CID
|
else: # 提取不含减号-的番号,FANZA CID
|
||||||
|
# 欧美番号匹配规则
|
||||||
|
oumei = re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', filepath)
|
||||||
|
if oumei:
|
||||||
|
return oumei.group()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return str(
|
return str(
|
||||||
re.findall(r'(.+?)\.',
|
re.findall(r'(.+?)\.',
|
||||||
@@ -72,4 +82,4 @@ def get_number(debug,filepath: str) -> str:
|
|||||||
|
|
||||||
# if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
# import doctest
|
# import doctest
|
||||||
# doctest.testmod(raise_on_error=True)
|
# doctest.testmod(raise_on_error=True)
|
||||||
|
|||||||
Reference in New Issue
Block a user