javdb:only accept unique number
This commit is contained in:
13
WebCrawler/javdb.py
Normal file → Executable file
13
WebCrawler/javdb.py
Normal file → Executable file
@@ -216,7 +216,6 @@ def main(number):
|
|||||||
# else:
|
# else:
|
||||||
# number = number.upper()
|
# number = number.upper()
|
||||||
number = number.upper()
|
number = number.upper()
|
||||||
isFC2PPV = bool(re.search(r'^FC2-\d+', number))
|
|
||||||
cookie_json = './' + javdb_site + '.json'
|
cookie_json = './' + javdb_site + '.json'
|
||||||
javdb_cookies = None
|
javdb_cookies = None
|
||||||
# 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天
|
# 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天
|
||||||
@@ -241,14 +240,13 @@ def main(number):
|
|||||||
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
|
||||||
correct_url = urls[0]
|
correct_url = urls[0]
|
||||||
else:
|
else:
|
||||||
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
ids = html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
|
||||||
try:
|
try:
|
||||||
correct_url = urls[ids.index(number)]
|
correct_url = urls[ids.index(number)]
|
||||||
except:
|
except:
|
||||||
# 为避免获得错误番号,FC2 PPV 只要精确对应的结果
|
# 为避免获得错误番号,只要精确对应的结果
|
||||||
if isFC2PPV and ids[0] != number:
|
if ids[0].upper() != number:
|
||||||
raise ValueError("number not found")
|
raise ValueError("number not found")
|
||||||
# if input number is "STAR438" not "STAR-438", use first search result.
|
|
||||||
correct_url = urls[0]
|
correct_url = urls[0]
|
||||||
try:
|
try:
|
||||||
javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
|
javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
|
||||||
@@ -273,7 +271,7 @@ def main(number):
|
|||||||
cover_small = getCover(detail_page)
|
cover_small = getCover(detail_page)
|
||||||
|
|
||||||
dp_number = getNum(detail_page)
|
dp_number = getNum(detail_page)
|
||||||
if isFC2PPV and dp_number != number:
|
if dp_number.upper() != number:
|
||||||
raise ValueError("number not found")
|
raise ValueError("number not found")
|
||||||
title = getTitle(detail_page)
|
title = getTitle(detail_page)
|
||||||
if title and dp_number:
|
if title and dp_number:
|
||||||
@@ -318,5 +316,6 @@ if __name__ == "__main__":
|
|||||||
# print(main('AGAV-042'))
|
# print(main('AGAV-042'))
|
||||||
# print(main('BANK-022'))
|
# print(main('BANK-022'))
|
||||||
print(main('FC2-735670'))
|
print(main('FC2-735670'))
|
||||||
print(main('FC2-1174949'))
|
print(main('FC2-1174949')) # not found
|
||||||
print(main('MVSD-439'))
|
print(main('MVSD-439'))
|
||||||
|
print(main('EHM0001')) # not found
|
||||||
|
|||||||
Reference in New Issue
Block a user