javbus, javlib use outline in airav
This commit is contained in:
@@ -225,8 +225,8 @@ def main(number):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#print(main('ADN-188'))
|
#print(main('ADN-188'))
|
||||||
|
|
||||||
print(search('ADN-188'))
|
print(main('ADN-188'))
|
||||||
print(search('012717_472'))
|
print(main('012717_472'))
|
||||||
print(search('080719-976'))
|
print(main('080719-976'))
|
||||||
print(search('姫川ゆうな'))
|
print(main('姫川ゆうな'))
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup#need install
|
|||||||
import json
|
import json
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler import fanza
|
from WebCrawler import fanza
|
||||||
|
import airav
|
||||||
|
|
||||||
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
@@ -79,12 +80,13 @@ def getCID(htmlcode):
|
|||||||
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
||||||
result = re.sub('/.*?.jpg','',string)
|
result = re.sub('/.*?.jpg','',string)
|
||||||
return result
|
return result
|
||||||
def getOutline(htmlcode): #获取演员
|
def getOutline(number): #获取演员
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
try:
|
try:
|
||||||
result = html.xpath("string(//div[contains(@class,'mg-b20 lh4')])").replace('\n','')
|
response = json.loads(airav.main(number))
|
||||||
|
result = response['outline']
|
||||||
return result
|
return result
|
||||||
except:
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
return ''
|
return ''
|
||||||
def getSerise(htmlcode): #获取系列 已修改
|
def getSerise(htmlcode): #获取系列 已修改
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
@@ -122,15 +124,11 @@ def main_uncensored(number):
|
|||||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||||
if getTitle(htmlcode) == '':
|
if getTitle(htmlcode) == '':
|
||||||
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
|
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
|
||||||
try:
|
|
||||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
|
||||||
except:
|
|
||||||
dww_htmlcode = ''
|
|
||||||
dic = {
|
dic = {
|
||||||
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': getYear(htmlcode),
|
'year': getYear(htmlcode),
|
||||||
'outline': getOutline(dww_htmlcode),
|
'outline': getOutline(number),
|
||||||
'runtime': getRuntime(htmlcode),
|
'runtime': getRuntime(htmlcode),
|
||||||
'director': getDirector(htmlcode),
|
'director': getDirector(htmlcode),
|
||||||
'actor': getActor(htmlcode),
|
'actor': getActor(htmlcode),
|
||||||
@@ -157,15 +155,11 @@ def main(number):
|
|||||||
htmlcode = get_html('https://www.fanbus.us/' + number)
|
htmlcode = get_html('https://www.fanbus.us/' + number)
|
||||||
except:
|
except:
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
try:
|
|
||||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
|
||||||
except:
|
|
||||||
dww_htmlcode = ''
|
|
||||||
dic = {
|
dic = {
|
||||||
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
|
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
|
'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
|
||||||
'outline': getOutline(dww_htmlcode),
|
'outline': getOutline(number),
|
||||||
'runtime': getRuntime(htmlcode),
|
'runtime': getRuntime(htmlcode),
|
||||||
'director': getDirector(htmlcode),
|
'director': getDirector(htmlcode),
|
||||||
'actor': getActor(htmlcode),
|
'actor': getActor(htmlcode),
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ sys.path.append('../')
|
|||||||
import json
|
import json
|
||||||
import bs4
|
import bs4
|
||||||
import re
|
import re
|
||||||
|
import airav
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from http.cookies import SimpleCookie
|
from http.cookies import SimpleCookie
|
||||||
@@ -42,7 +43,7 @@ def main(number: str):
|
|||||||
"title": get_title(lx, soup),
|
"title": get_title(lx, soup),
|
||||||
"studio": get_table_el_single_anchor(soup, "video_maker"),
|
"studio": get_table_el_single_anchor(soup, "video_maker"),
|
||||||
"year": get_table_el_td(soup, "video_date")[:4],
|
"year": get_table_el_td(soup, "video_date")[:4],
|
||||||
"outline": "",
|
"outline": get_outline(number),
|
||||||
"director": get_table_el_single_anchor(soup, "video_director"),
|
"director": get_table_el_single_anchor(soup, "video_director"),
|
||||||
"cover": get_cover(lx),
|
"cover": get_cover(lx),
|
||||||
"imagecut": 1,
|
"imagecut": 1,
|
||||||
@@ -77,7 +78,7 @@ def main(number: str):
|
|||||||
"title": get_title(lx, soup),
|
"title": get_title(lx, soup),
|
||||||
"studio": get_table_el_single_anchor(soup, "video_maker"),
|
"studio": get_table_el_single_anchor(soup, "video_maker"),
|
||||||
"year": get_table_el_td(soup, "video_date")[:4],
|
"year": get_table_el_td(soup, "video_date")[:4],
|
||||||
"outline": "",
|
"outline": get_outline(number),
|
||||||
"director": get_table_el_single_anchor(soup, "video_director"),
|
"director": get_table_el_single_anchor(soup, "video_director"),
|
||||||
"cover": get_cover(lx),
|
"cover": get_cover(lx),
|
||||||
"imagecut": 1,
|
"imagecut": 1,
|
||||||
@@ -102,6 +103,15 @@ def get_from_xpath(lx: html.HtmlElement, xpath: str) -> str:
|
|||||||
return lx.xpath(xpath)[0].strip()
|
return lx.xpath(xpath)[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def get_outline(number):
|
||||||
|
try:
|
||||||
|
response = json.loads(airav.main(number))
|
||||||
|
result = response['outline']
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def get_table_el_single_anchor(soup: BeautifulSoup, tag_id: str) -> str:
|
def get_table_el_single_anchor(soup: BeautifulSoup, tag_id: str) -> str:
|
||||||
tag = soup.find(id=tag_id).find("a")
|
tag = soup.find(id=tag_id).find("a")
|
||||||
|
|
||||||
@@ -145,7 +155,7 @@ def get_cover(lx: html.HtmlComment) -> str:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
lists = ["DVMC-003", "GS-0167", "JKREZ-001", "KMHRS-010", "KNSD-023"]
|
lists = ["IPX-292", "STAR-438", "JKREZ-001", "KMHRS-010", "KNSD-023"]
|
||||||
#lists = ["DVMC-003"]
|
#lists = ["DVMC-003"]
|
||||||
for num in lists:
|
for num in lists:
|
||||||
print(main(num))
|
print(main(num))
|
||||||
|
|||||||
Reference in New Issue
Block a user