From 3d9c92aac537d95bfa1bcceeab0c6e2f408e5452 Mon Sep 17 00:00:00 2001 From: Max Zhao Date: Sun, 6 Sep 2020 17:36:17 +0800 Subject: [PATCH] [WebCrawler/javdb] remove actor when actor is 'N/A' --- WebCrawler/javdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 5beefa0..2e4924b 100644 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -17,7 +17,7 @@ def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1] html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').replace('N/A', '').lstrip(',').replace(',', ', ') def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img a = actor.split(',') d={}