From e5bc900b40970a9c1f444cd615957ebf85b0b4ac Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 15:29:46 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E6=AC=A7=E7=BE=8E=E5=85=A8=E9=83=A8?= =?UTF-8?q?=E5=BD=92=E7=B1=BB=E5=88=B0=E6=97=A0=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javbus.py | 24 +++++++++++++----------- WebCrawler/javdb.py | 4 +++- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index 2a5a303..5218016 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -88,7 +88,8 @@ def getUncensored(html): return bool(x) def main_uncensored(number): - htmlcode = get_html('https://www.javbus.com/ja/' + number) + w_number = number.replace('.', '-') + htmlcode = get_html('https://www.javbus.red/' + w_number) if "404 Page Not Found" in htmlcode: raise Exception('404 page not found') lx = etree.fromstring(htmlcode, etree.HTMLParser()) @@ -97,7 +98,7 @@ def main_uncensored(number): 'title': title, 'studio': getStudioJa(lx), 'year': getYear(lx), - 'outline': getOutline(number, title), + 'outline': getOutline(w_number, title), 'runtime': getRuntime(lx), 'director': getDirectorJa(lx), 'actor': getActor(lx), @@ -109,10 +110,10 @@ def main_uncensored(number): 'label': getSeriseJa(lx), 'imagecut': 0, # 'actor_photo': '', - 'website': 'https://www.javbus.com/ja/' + number, + 'website': 'https://www.javbus.red/' + w_number, 'source': 'javbus.py', 'series': getSeriseJa(lx), - '无码': getUncensored(lx) + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js @@ -174,12 +175,13 @@ def main(number): if __name__ == "__main__" : config.G_conf_override['debug_mode:switch'] = True - print(main('ABP-888')) - print(main('ABP-960')) - print(main('ADV-R0624')) # 404 - print(main('MMNT-010')) - print(main('ipx-292')) - print(main('CEMD-011')) - print(main('CJOD-278')) + # print(main('ABP-888')) + # print(main('ABP-960')) + # print(main('ADV-R0624')) # 404 + # print(main('MMNT-010')) + # print(main('ipx-292')) + # print(main('CEMD-011')) + # print(main('CJOD-278')) + print(main('BrazzersExxtra.21.02.01')) print(main('100221_001')) print(main('AVSW-061')) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 2d21e29..3dfff16 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -180,7 +180,8 @@ def getUserRating(html): except: return def getUncensored(html): - x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]') + x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")' + ' or contains(@href,"/tags/western?")]') return bool(x) def main(number): @@ -342,3 +343,4 @@ if __name__ == "__main__": # print(main('EBOD-646')) # print(main('LOVE-262')) print(main('ABP-890')) + print(main('blacked.14.12.08'))