javdb:only accept unique number

This commit is contained in:
lededev
2021-06-29 02:13:46 +08:00
parent b85977e16b
commit 5bd044dc61

13
WebCrawler/javdb.py Normal file → Executable file
View File

@@ -216,7 +216,6 @@ def main(number):
# else:
# number = number.upper()
number = number.upper()
isFC2PPV = bool(re.search(r'^FC2-\d+', number))
cookie_json = './' + javdb_site + '.json'
javdb_cookies = None
# 不加载过期的cookiejavdb登录界面显示为7天免登录故假定cookie有效期为7天
@@ -241,14 +240,13 @@ def main(number):
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
correct_url = urls[0]
else:
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
ids = html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
try:
correct_url = urls[ids.index(number)]
except:
# 为避免获得错误番号,FC2 PPV 只要精确对应的结果
if isFC2PPV and ids[0] != number:
# 为避免获得错误番号,只要精确对应的结果
if ids[0].upper() != number:
raise ValueError("number not found")
# if input number is "STAR438" not "STAR-438", use first search result.
correct_url = urls[0]
try:
javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
@@ -273,7 +271,7 @@ def main(number):
cover_small = getCover(detail_page)
dp_number = getNum(detail_page)
if isFC2PPV and dp_number != number:
if dp_number.upper() != number:
raise ValueError("number not found")
title = getTitle(detail_page)
if title and dp_number:
@@ -318,5 +316,6 @@ if __name__ == "__main__":
# print(main('AGAV-042'))
# print(main('BANK-022'))
print(main('FC2-735670'))
print(main('FC2-1174949'))
print(main('FC2-1174949')) # not found
print(main('MVSD-439'))
print(main('EHM0001')) # not found