javbus, javlib use outline in airav

This commit is contained in:
yoshiko2
2021-04-22 03:22:24 +08:00
parent 98c8585327
commit f761e5bccc
3 changed files with 25 additions and 21 deletions

View File

@@ -225,8 +225,8 @@ def main(number):
if __name__ == '__main__': if __name__ == '__main__':
#print(main('ADN-188')) #print(main('ADN-188'))
print(search('ADN-188')) print(main('ADN-188'))
print(search('012717_472')) print(main('012717_472'))
print(search('080719-976')) print(main('080719-976'))
print(search('姫川ゆうな')) print(main('姫川ゆうな'))

View File

@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup#need install
import json import json
from ADC_function import * from ADC_function import *
from WebCrawler import fanza from WebCrawler import fanza
import airav
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
@@ -79,12 +80,13 @@ def getCID(htmlcode):
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','') string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
result = re.sub('/.*?.jpg','',string) result = re.sub('/.*?.jpg','',string)
return result return result
def getOutline(htmlcode): #获取演员 def getOutline(number): #获取演员
html = etree.fromstring(htmlcode, etree.HTMLParser())
try: try:
result = html.xpath("string(//div[contains(@class,'mg-b20 lh4')])").replace('\n','') response = json.loads(airav.main(number))
result = response['outline']
return result return result
except: except Exception as e:
print(e)
return '' return ''
def getSerise(htmlcode): #获取系列 已修改 def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
@@ -122,15 +124,11 @@ def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/ja/' + number) htmlcode = get_html('https://www.javbus.com/ja/' + number)
if getTitle(htmlcode) == '': if getTitle(htmlcode) == '':
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_')) htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except:
dww_htmlcode = ''
dic = { dic = {
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''), 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': getYear(htmlcode), 'year': getYear(htmlcode),
'outline': getOutline(dww_htmlcode), 'outline': getOutline(number),
'runtime': getRuntime(htmlcode), 'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode), 'director': getDirector(htmlcode),
'actor': getActor(htmlcode), 'actor': getActor(htmlcode),
@@ -157,15 +155,11 @@ def main(number):
htmlcode = get_html('https://www.fanbus.us/' + number) htmlcode = get_html('https://www.fanbus.us/' + number)
except: except:
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/' + number)
try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except:
dww_htmlcode = ''
dic = { dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': str(re.search('\d{4}', getYear(htmlcode)).group()), 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
'outline': getOutline(dww_htmlcode), 'outline': getOutline(number),
'runtime': getRuntime(htmlcode), 'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode), 'director': getDirector(htmlcode),
'actor': getActor(htmlcode), 'actor': getActor(htmlcode),

View File

@@ -3,6 +3,7 @@ sys.path.append('../')
import json import json
import bs4 import bs4
import re import re
import airav
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from lxml import html from lxml import html
from http.cookies import SimpleCookie from http.cookies import SimpleCookie
@@ -42,7 +43,7 @@ def main(number: str):
"title": get_title(lx, soup), "title": get_title(lx, soup),
"studio": get_table_el_single_anchor(soup, "video_maker"), "studio": get_table_el_single_anchor(soup, "video_maker"),
"year": get_table_el_td(soup, "video_date")[:4], "year": get_table_el_td(soup, "video_date")[:4],
"outline": "", "outline": get_outline(number),
"director": get_table_el_single_anchor(soup, "video_director"), "director": get_table_el_single_anchor(soup, "video_director"),
"cover": get_cover(lx), "cover": get_cover(lx),
"imagecut": 1, "imagecut": 1,
@@ -77,7 +78,7 @@ def main(number: str):
"title": get_title(lx, soup), "title": get_title(lx, soup),
"studio": get_table_el_single_anchor(soup, "video_maker"), "studio": get_table_el_single_anchor(soup, "video_maker"),
"year": get_table_el_td(soup, "video_date")[:4], "year": get_table_el_td(soup, "video_date")[:4],
"outline": "", "outline": get_outline(number),
"director": get_table_el_single_anchor(soup, "video_director"), "director": get_table_el_single_anchor(soup, "video_director"),
"cover": get_cover(lx), "cover": get_cover(lx),
"imagecut": 1, "imagecut": 1,
@@ -102,6 +103,15 @@ def get_from_xpath(lx: html.HtmlElement, xpath: str) -> str:
return lx.xpath(xpath)[0].strip() return lx.xpath(xpath)[0].strip()
def get_outline(number):
try:
response = json.loads(airav.main(number))
result = response['outline']
return result
except:
return ''
def get_table_el_single_anchor(soup: BeautifulSoup, tag_id: str) -> str: def get_table_el_single_anchor(soup: BeautifulSoup, tag_id: str) -> str:
tag = soup.find(id=tag_id).find("a") tag = soup.find(id=tag_id).find("a")
@@ -145,7 +155,7 @@ def get_cover(lx: html.HtmlComment) -> str:
if __name__ == "__main__": if __name__ == "__main__":
lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"] lists = ["IPX-292", "STAR-438", "JKREZ-001", "KMHRS-010", "KNSD-023"]
#lists = ["DVMC-003"] #lists = ["DVMC-003"]
for num in lists: for num in lists:
print(main(num)) print(main(num))