From 2ab40201b55d4c4cff390b5462f0dfcfbbc1507b Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 31 May 2021 19:12:06 +0800 Subject: [PATCH] javdb:FC2 PPV number precise matching --- WebCrawler/javdb.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 45f2509..0ca6b4d 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -212,6 +212,9 @@ def main(number): # else: # number = number.upper() number = number.upper() + isFC2PPV = False + if re.search(r'^FC2-\d+', number): + isFC2PPV = True cookie_json = './' + javdb_site + '.json' javdb_cookies = None # 不加载过期的cookie,javdb登录界面显示为7天免登录,故假定cookie有效期为7天 @@ -240,6 +243,9 @@ def main(number): try: correct_url = urls[ids.index(number)] except: + # 为避免获得错误番号,FC2 PPV 只要精确对应的结果 + if isFC2PPV and ids[0] != number: + raise ValueError("number not found") # if input number is "STAR438" not "STAR-438", use first search result. correct_url = urls[0] try: @@ -264,7 +270,9 @@ def main(number): imagecut = 1 cover_small = getCover(detail_page) - number = getNum(detail_page) + dp_number = getNum(detail_page) + if isFC2PPV and dp_number != number: + raise ValueError("number not found") title = getTitle(detail_page) if title and number: # remove duplicate title @@ -305,6 +313,7 @@ def main(number): if __name__ == "__main__": # print(main('blacked.20.05.30')) # print(main('AGAV-042')) - print(main('BANK-022')) + # print(main('BANK-022')) print(main('FC2-735670')) - print(main('MVSD-439')) \ No newline at end of file + print(main('FC2-1174949')) + print(main('MVSD-439'))