convert image relative url to absolute url

2021-06-11 13:53:08 +08:00
parent 8fbe101196
commit 3cce315100
3 changed files with 14 additions and 14 deletions
@@ -9,6 +9,7 @@ import time
 from lxml import etree
 import re
 import config
 from urllib.parse import urljoin
 def get_data_state(data: dict) -> bool:  # 元数据获取失败检测
@@ -576,3 +577,9 @@ def is_link(filename: str):
    elif os.stat(filename).st_nlink > 1:
        return True # hard link Linux MAC OSX Windows NTFS
    return False
 # URL相对路径转绝对路径
 def abs_url(base_url: str, href: str) -> str:
    if href.startswith('http'):
        return href
    return urljoin(base_url, href)
@@ -25,7 +25,8 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
        l=i.a['href']
        t=i.get_text()
        html = etree.fromstring(get_html(l), etree.HTMLParser())
-        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p=abs_url("https://www.javbus.com",
                  str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
        p2={t:p}
        d.update(p2)
    return d
@@ -59,7 +60,7 @@ def getYear(htmlcode):   #获取年份
 def getCover(htmlcode):  #获取封面链接
    doc = pq(htmlcode)
    image = doc('a.bigImage')
-    return image.attr('href')
+    return abs_url("https://www.javbus.com", image.attr('href'))
 def getRelease(htmlcode): #获取出版日期
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
@@ -226,9 +227,5 @@ def main(number):
 if __name__ == '__main__':
    #print(main('ADN-188'))
    print(main('ADN-188'))
-    print(main('012717_472'))
+    print(main('CJOD-278'))
    print(main('080719-976'))
    print(main('姫川ゆうな'))
@@ -17,7 +17,8 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
        l=i.a['href']
        t=i.get_text()
        html = etree.fromstring(get_html(l), etree.HTMLParser())
-        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p=abs_url("https://www.javbus.com",
                  str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
        p2={t:p}
        d.update(p2)
    return d
@@ -47,12 +48,7 @@ def getYear(htmlcode):   #获取年份
 def getCover(htmlcode):  #获取封面链接
    doc = pq(htmlcode)
    image = doc('a.bigImage')
-    uri = image.attr('href')
+    return abs_url("https://www.javbus.com", image.attr('href'))
    if uri.startswith('http'):
        return uri
    if uri[0] != '/':
        uri = '/' + uri
    return "https://www.javbus.com" + uri
 def getRelease(htmlcode): #获取出版日期
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")