add airav outline to javdb crawler
This commit is contained in:
@@ -82,7 +82,7 @@ def getCID(htmlcode):
|
|||||||
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
||||||
result = re.sub('/.*?.jpg','',string)
|
result = re.sub('/.*?.jpg','',string)
|
||||||
return result
|
return result
|
||||||
def getOutline(number): #获取演员
|
def getOutline(number): #获取剧情介绍
|
||||||
try:
|
try:
|
||||||
response = json.loads(airav.main(number))
|
response = json.loads(airav.main(number))
|
||||||
result = response['outline']
|
result = response['outline']
|
||||||
|
|||||||
@@ -192,10 +192,13 @@ def getDirector(a):
|
|||||||
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def getOutline(htmlcode):
|
def getOutline(number): #获取剧情介绍
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
try:
|
||||||
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
response = json.loads(airav.main(number))
|
||||||
return result
|
result = response['outline']
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
def getSeries(a):
|
def getSeries(a):
|
||||||
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
|
#/html/body/section/div/div[3]/div[2]/nav/div[7]/span/a
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
@@ -281,7 +284,7 @@ def main(number):
|
|||||||
'actor': getActor(detail_page),
|
'actor': getActor(detail_page),
|
||||||
'title': title,
|
'title': title,
|
||||||
'studio': getStudio(detail_page),
|
'studio': getStudio(detail_page),
|
||||||
'outline': getOutline(detail_page),
|
'outline': getOutline(number),
|
||||||
'runtime': getRuntime(detail_page),
|
'runtime': getRuntime(detail_page),
|
||||||
'director': getDirector(detail_page),
|
'director': getDirector(detail_page),
|
||||||
'release': getRelease(detail_page),
|
'release': getRelease(detail_page),
|
||||||
|
|||||||
Reference in New Issue
Block a user