From 4c8665f633a4292117b2c8c82d0b3a8b0e9eb635 Mon Sep 17 00:00:00 2001 From: Feng4 Date: Sun, 20 Dec 2020 21:10:43 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=8F=91=E5=B8=83=E6=97=A5?= =?UTF-8?q?=E6=9C=9F=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javdb.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 41a4775..b463b3d 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -66,10 +66,17 @@ def getYear(getRelease): result = '' return result def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+') + patherr = re.compile(r'日期\:\s*?.*?(.*?)') + dates = patherr.findall(a) + if dates: + result = dates[0] + else: + result = '' + return result def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: