改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整
改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整
This commit is contained in:
@@ -29,9 +29,16 @@ def getTitle(htmlcode): #获取标题
|
||||
return title2
|
||||
except:
|
||||
return title
|
||||
def getStudio(htmlcode): #获取厂商
|
||||
def getStudio(htmlcode): #获取厂商 已修改
|
||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
||||
# 如果记录中冇导演,厂商排在第4位
|
||||
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||
# 如果记录中有导演,厂商排在第5位
|
||||
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
||||
else:
|
||||
result = ''
|
||||
return result
|
||||
def getYear(htmlcode): #获取年份
|
||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||
@@ -45,10 +52,10 @@ def getRelease(htmlcode): #获取出版日期
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||
return result
|
||||
def getRuntime(htmlcode): #获取分钟
|
||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||
a = soup.find(text=re.compile('分鐘'))
|
||||
return a
|
||||
def getRuntime(htmlcode): #获取分钟 已修改
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
|
||||
return result
|
||||
def getActor(htmlcode): #获取女优
|
||||
b=[]
|
||||
soup=BeautifulSoup(htmlcode,'lxml')
|
||||
@@ -60,9 +67,12 @@ def getNum(htmlcode): #获取番号
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
|
||||
return result
|
||||
def getDirector(htmlcode): #获取导演
|
||||
def getDirector(htmlcode): #获取导演 已修改
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||
else:
|
||||
result = '' # 记录中有可能没有导演数据
|
||||
return result
|
||||
def getCID(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
@@ -77,14 +87,18 @@ def getOutline(htmlcode): #获取演员
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
def getSerise(htmlcode):
|
||||
try:
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
def getSerise(htmlcode): #获取系列 已修改
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
# 如果记录中冇导演,系列排在第6位
|
||||
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
|
||||
# 如果记录中有导演,系列排在第7位
|
||||
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||
return result
|
||||
except:
|
||||
return ''
|
||||
def getTag(htmlcode): # 获取演员
|
||||
else:
|
||||
result = ''
|
||||
return result
|
||||
def getTag(htmlcode): # 获取标签
|
||||
tag = []
|
||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||
a = soup.find_all(attrs={'class': 'genre'})
|
||||
@@ -94,10 +108,11 @@ def getTag(htmlcode): # 获取演员
|
||||
tag.append(i.get_text())
|
||||
return tag
|
||||
|
||||
|
||||
def main_uncensored(number):
|
||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||
if getTitle(htmlcode) == '':
|
||||
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
|
||||
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
|
||||
try:
|
||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||
except:
|
||||
@@ -117,7 +132,7 @@ def main_uncensored(number):
|
||||
'label': getSerise(htmlcode),
|
||||
'imagecut': 0,
|
||||
'actor_photo': '',
|
||||
'website': 'https://www.javbus.com/' + number,
|
||||
'website': 'https://www.javbus.com/ja/' + number,
|
||||
'source': 'javbus.py',
|
||||
'series': getSerise(htmlcode),
|
||||
}
|
||||
@@ -128,7 +143,7 @@ def main_uncensored(number):
|
||||
def main(number):
|
||||
try:
|
||||
try:
|
||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||
try:
|
||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||
except:
|
||||
@@ -148,7 +163,7 @@ def main(number):
|
||||
'tag': getTag(htmlcode),
|
||||
'label': getSerise(htmlcode),
|
||||
'actor_photo': getActorPhoto(htmlcode),
|
||||
'website': 'https://www.javbus.com/' + number,
|
||||
'website': 'https://www.javbus.com/ja/' + number,
|
||||
'source': 'javbus.py',
|
||||
'series': getSerise(htmlcode),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user