改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整

改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整
This commit is contained in:
yobailover
2020-09-15 00:05:17 +08:00
committed by GitHub
parent a2793e2723
commit 5d4bc3454a

View File

@@ -29,9 +29,16 @@ def getTitle(htmlcode): #获取标题
return title2 return title2
except: except:
return title return title
def getStudio(htmlcode): #获取厂商 def getStudio(htmlcode): #获取厂商 已修改
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") # 如果记录中冇导演厂商排在第4位
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
# 如果记录中有导演厂商排在第5位
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
else:
result = ''
return result return result
def getYear(htmlcode): #获取年份 def getYear(htmlcode): #获取年份
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
@@ -45,10 +52,10 @@ def getRelease(htmlcode): #获取出版日期
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result return result
def getRuntime(htmlcode): #获取分钟 def getRuntime(htmlcode): #获取分钟 已修改
soup = BeautifulSoup(htmlcode, 'lxml') html = etree.fromstring(htmlcode, etree.HTMLParser())
a = soup.find(text=re.compile('分鐘')) result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
return a return result
def getActor(htmlcode): #获取女优 def getActor(htmlcode): #获取女优
b=[] b=[]
soup=BeautifulSoup(htmlcode,'lxml') soup=BeautifulSoup(htmlcode,'lxml')
@@ -60,9 +67,12 @@ def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
return result return result
def getDirector(htmlcode): #获取导演 def getDirector(htmlcode): #获取导演 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
else:
result = '' # 记录中有可能没有导演数据
return result return result
def getCID(htmlcode): def getCID(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
@@ -77,14 +87,18 @@ def getOutline(htmlcode): #获取演员
return result return result
except: except:
return '' return ''
def getSerise(htmlcode): def getSerise(htmlcode): #获取系列 已修改
try: html = etree.fromstring(htmlcode, etree.HTMLParser())
html = etree.fromstring(htmlcode, etree.HTMLParser()) # 如果记录中冇导演系列排在第6位
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
# 如果记录中有导演系列排在第7位
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
return result else:
except: result = ''
return '' return result
def getTag(htmlcode): # 获取演员 def getTag(htmlcode): # 获取标签
tag = [] tag = []
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'genre'}) a = soup.find_all(attrs={'class': 'genre'})
@@ -94,10 +108,11 @@ def getTag(htmlcode): # 获取演员
tag.append(i.get_text()) tag.append(i.get_text())
return tag return tag
def main_uncensored(number): def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/ja/' + number)
if getTitle(htmlcode) == '': if getTitle(htmlcode) == '':
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
try: try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode)) dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except: except:
@@ -117,7 +132,7 @@ def main_uncensored(number):
'label': getSerise(htmlcode), 'label': getSerise(htmlcode),
'imagecut': 0, 'imagecut': 0,
'actor_photo': '', 'actor_photo': '',
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/ja/' + number,
'source': 'javbus.py', 'source': 'javbus.py',
'series': getSerise(htmlcode), 'series': getSerise(htmlcode),
} }
@@ -128,7 +143,7 @@ def main_uncensored(number):
def main(number): def main(number):
try: try:
try: try:
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/ja/' + number)
try: try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode)) dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except: except:
@@ -148,7 +163,7 @@ def main(number):
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'label': getSerise(htmlcode), 'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode), 'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/ja/' + number,
'source': 'javbus.py', 'source': 'javbus.py',
'series': getSerise(htmlcode), 'series': getSerise(htmlcode),
} }