rewrite getActorPhoto() to get real photo
This commit is contained in:
@@ -17,19 +17,30 @@ def getTitle(a):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
def getActor(browser):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
htmla = browser.page.select('#avodDetails > div > div.frame > div.content > div > ul.profileCL > li.credit-links > a')
|
||||||
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[3]/a/text()')[0]
|
t = []
|
||||||
return result1
|
for i in htmla:
|
||||||
|
t.append(i.text.strip())
|
||||||
|
return t
|
||||||
|
|
||||||
|
|
||||||
def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img
|
def getActorPhoto(browser):
|
||||||
a = actor.split(',')
|
htmla = browser.page.select('#avodDetails > div > div.frame > div.content > div > ul.profileCL > li.credit-links > a')
|
||||||
d = {}
|
t = {}
|
||||||
for i in a:
|
for i in htmla:
|
||||||
p = {i: ''}
|
p = {i.text.strip(): i['href']}
|
||||||
d.update(p)
|
t.update(p)
|
||||||
return d
|
o = {}
|
||||||
|
for k, v in t.items():
|
||||||
|
r = browser.open_relative(v)
|
||||||
|
if r.ok:
|
||||||
|
pic = browser.page.select_one('#avidolDetails > div > div.frame > div > p > img')
|
||||||
|
p = {k: abs_url(browser.url, pic['src'])}
|
||||||
|
else:
|
||||||
|
p = {k, ''}
|
||||||
|
o.update(p)
|
||||||
|
return o
|
||||||
|
|
||||||
|
|
||||||
def getStudio(a):
|
def getStudio(a):
|
||||||
@@ -181,8 +192,9 @@ def main(number):
|
|||||||
if not result.ok:
|
if not result.ok:
|
||||||
raise ValueError("xcity.py: detail page not found")
|
raise ValueError("xcity.py: detail page not found")
|
||||||
detail_page = str(browser.page)
|
detail_page = str(browser.page)
|
||||||
|
url = browser.url
|
||||||
dic = {
|
dic = {
|
||||||
'actor': getActor(detail_page),
|
'actor': getActor(browser),
|
||||||
'title': getTitle(detail_page),
|
'title': getTitle(detail_page),
|
||||||
'studio': getStudio(detail_page),
|
'studio': getStudio(detail_page),
|
||||||
'outline': getOutline(detail_page),
|
'outline': getOutline(detail_page),
|
||||||
@@ -197,8 +209,8 @@ def main(number):
|
|||||||
'tag': getTag(detail_page),
|
'tag': getTag(detail_page),
|
||||||
'label': getLabel(detail_page),
|
'label': getLabel(detail_page),
|
||||||
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
|
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
'actor_photo': getActorPhoto(getActor(detail_page)),
|
'actor_photo': getActorPhoto(browser),
|
||||||
'website': browser.url,
|
'website': url,
|
||||||
'source': 'xcity.py',
|
'source': 'xcity.py',
|
||||||
'series': getSeries(detail_page),
|
'series': getSeries(detail_page),
|
||||||
}
|
}
|
||||||
@@ -211,5 +223,6 @@ def main(number):
|
|||||||
return js
|
return js
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('VNDS-2624'))
|
print(main('RCTD-288'))
|
||||||
print(main('ABP-345'))
|
#print(main('VNDS-2624'))
|
||||||
|
#print(main('ABP-345'))
|
||||||
|
|||||||
Reference in New Issue
Block a user