convert image relative url to absolute url
This commit is contained in:
@@ -25,7 +25,8 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||
l=i.a['href']
|
||||
t=i.get_text()
|
||||
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
||||
p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
|
||||
p=abs_url("https://www.javbus.com",
|
||||
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
||||
p2={t:p}
|
||||
d.update(p2)
|
||||
return d
|
||||
@@ -59,7 +60,7 @@ def getYear(htmlcode): #获取年份
|
||||
def getCover(htmlcode): #获取封面链接
|
||||
doc = pq(htmlcode)
|
||||
image = doc('a.bigImage')
|
||||
return image.attr('href')
|
||||
return abs_url("https://www.javbus.com", image.attr('href'))
|
||||
def getRelease(htmlcode): #获取出版日期
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||
@@ -226,9 +227,5 @@ def main(number):
|
||||
|
||||
if __name__ == '__main__':
|
||||
#print(main('ADN-188'))
|
||||
|
||||
print(main('ADN-188'))
|
||||
print(main('012717_472'))
|
||||
print(main('080719-976'))
|
||||
print(main('姫川ゆうな'))
|
||||
|
||||
print(main('CJOD-278'))
|
||||
|
||||
@@ -17,7 +17,8 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||
l=i.a['href']
|
||||
t=i.get_text()
|
||||
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
||||
p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
|
||||
p=abs_url("https://www.javbus.com",
|
||||
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
||||
p2={t:p}
|
||||
d.update(p2)
|
||||
return d
|
||||
@@ -47,12 +48,7 @@ def getYear(htmlcode): #获取年份
|
||||
def getCover(htmlcode): #获取封面链接
|
||||
doc = pq(htmlcode)
|
||||
image = doc('a.bigImage')
|
||||
uri = image.attr('href')
|
||||
if uri.startswith('http'):
|
||||
return uri
|
||||
if uri[0] != '/':
|
||||
uri = '/' + uri
|
||||
return "https://www.javbus.com" + uri
|
||||
return abs_url("https://www.javbus.com", image.attr('href'))
|
||||
def getRelease(htmlcode): #获取出版日期
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||
|
||||
Reference in New Issue
Block a user