remove abs_url(), just urljoin() is enough
This commit is contained in:
@@ -593,9 +593,3 @@ def is_link(filename: str):
|
|||||||
return True # hard link Linux MAC OSX Windows NTFS
|
return True # hard link Linux MAC OSX Windows NTFS
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# URL相对路径转绝对路径
|
|
||||||
def abs_url(base_url: str, href: str) -> str:
|
|
||||||
if href.startswith('http'):
|
|
||||||
return href
|
|
||||||
return urljoin(base_url, href)
|
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
|||||||
l=i.a['href']
|
l=i.a['href']
|
||||||
t=i.get_text()
|
t=i.get_text()
|
||||||
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
||||||
p=abs_url("https://www.javbus.com",
|
p=urljoin("https://www.javbus.com",
|
||||||
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
||||||
p2={t:p}
|
p2={t:p}
|
||||||
d.update(p2)
|
d.update(p2)
|
||||||
@@ -60,7 +60,7 @@ def getYear(htmlcode): #获取年份
|
|||||||
def getCover(htmlcode): #获取封面链接
|
def getCover(htmlcode): #获取封面链接
|
||||||
doc = pq(htmlcode)
|
doc = pq(htmlcode)
|
||||||
image = doc('a.bigImage')
|
image = doc('a.bigImage')
|
||||||
return abs_url("https://www.javbus.com", image.attr('href'))
|
return urljoin("https://www.javbus.com", image.attr('href'))
|
||||||
def getRelease(htmlcode): #获取出版日期
|
def getRelease(htmlcode): #获取出版日期
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
|||||||
l=i.a['href']
|
l=i.a['href']
|
||||||
t=i.get_text()
|
t=i.get_text()
|
||||||
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
html = etree.fromstring(get_html(l), etree.HTMLParser())
|
||||||
p=abs_url("https://www.javbus.com",
|
p=urljoin("https://www.javbus.com",
|
||||||
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']"))
|
||||||
p2={t:p}
|
p2={t:p}
|
||||||
d.update(p2)
|
d.update(p2)
|
||||||
@@ -48,7 +48,7 @@ def getYear(htmlcode): #获取年份
|
|||||||
def getCover(htmlcode): #获取封面链接
|
def getCover(htmlcode): #获取封面链接
|
||||||
doc = pq(htmlcode)
|
doc = pq(htmlcode)
|
||||||
image = doc('a.bigImage')
|
image = doc('a.bigImage')
|
||||||
return abs_url("https://www.javbus.com", image.attr('href'))
|
return urljoin("https://www.javbus.com", image.attr('href'))
|
||||||
def getRelease(htmlcode): #获取出版日期
|
def getRelease(htmlcode): #获取出版日期
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ def getActorPhoto(browser):
|
|||||||
r = browser.open_relative(v)
|
r = browser.open_relative(v)
|
||||||
if r.ok:
|
if r.ok:
|
||||||
pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img')
|
pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img')
|
||||||
p = {k: abs_url(browser.url, pic['src'])}
|
p = {k: urljoin(browser.url, pic['src'])}
|
||||||
else:
|
else:
|
||||||
p = {k, ''}
|
p = {k, ''}
|
||||||
o.update(p)
|
o.update(p)
|
||||||
|
|||||||
Reference in New Issue
Block a user