add airav outline to javdb crawler

This commit is contained in:
xingfan_xia
2021-06-03 12:29:19 -07:00
parent 863dd3bb81
commit b88b2ead7e
2 changed files with 9 additions and 6 deletions

View File

@@ -82,7 +82,7 @@ def getCID(htmlcode):
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','') string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
result = re.sub('/.*?.jpg','',string) result = re.sub('/.*?.jpg','',string)
return result return result
def getOutline(number): #获取演员 def getOutline(number): #获取剧情介绍
try: try:
response = json.loads(airav.main(number)) response = json.loads(airav.main(number))
result = response['outline'] result = response['outline']

View File

@@ -192,10 +192,13 @@ def getDirector(a):
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getOutline(htmlcode): def getOutline(number): #获取剧情介绍
html = etree.fromstring(htmlcode, etree.HTMLParser()) try:
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") response = json.loads(airav.main(number))
return result result = response['outline']
return result
except:
return ''
def getSeries(a): def getSeries(a):
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a #/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
@@ -281,7 +284,7 @@ def main(number):
'actor': getActor(detail_page), 'actor': getActor(detail_page),
'title': title, 'title': title,
'studio': getStudio(detail_page), 'studio': getStudio(detail_page),
'outline': getOutline(detail_page), 'outline': getOutline(number),
'runtime': getRuntime(detail_page), 'runtime': getRuntime(detail_page),
'director': getDirector(detail_page), 'director': getDirector(detail_page),
'release': getRelease(detail_page), 'release': getRelease(detail_page),