[WebCrawler/javdb] remove actor when actor is 'N/A'

This commit is contained in:
Max Zhao
2020-09-06 17:36:17 +08:00
parent b7e0845582
commit 3d9c92aac5

View File

@@ -17,7 +17,7 @@ def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"演員")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"演員")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').replace('N/A', '').lstrip(',').replace(',', ', ')
def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
a = actor.split(',') a = actor.split(',')
d={} d={}