diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 41a4775..b463b3d 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -66,10 +66,17 @@ def getYear(getRelease): result = '' return result def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') +# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() +# result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") +# result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") +# return str(result1 + result2).strip('+') + patherr = re.compile(r'日期\:\s*?.*?(.*?)') + dates = patherr.findall(a) + if dates: + result = dates[0] + else: + result = '' + return result def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: