改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整
改爲使用🗾日本語Metadata,同時對監修、Studio刮削進行細微調整
This commit is contained in:
@@ -29,9 +29,16 @@ def getTitle(htmlcode): #获取标题
|
|||||||
return title2
|
return title2
|
||||||
except:
|
except:
|
||||||
return title
|
return title
|
||||||
def getStudio(htmlcode): #获取厂商
|
def getStudio(htmlcode): #获取厂商 已修改
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
# 如果记录中冇导演,厂商排在第4位
|
||||||
|
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||||
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||||
|
# 如果记录中有导演,厂商排在第5位
|
||||||
|
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
||||||
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
||||||
|
else:
|
||||||
|
result = ''
|
||||||
return result
|
return result
|
||||||
def getYear(htmlcode): #获取年份
|
def getYear(htmlcode): #获取年份
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||||
@@ -45,10 +52,10 @@ def getRelease(htmlcode): #获取出版日期
|
|||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getRuntime(htmlcode): #获取分钟
|
def getRuntime(htmlcode): #获取分钟 已修改
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
a = soup.find(text=re.compile('分鐘'))
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[3]/text()')).strip(" ['']分鐘")
|
||||||
return a
|
return result
|
||||||
def getActor(htmlcode): #获取女优
|
def getActor(htmlcode): #获取女优
|
||||||
b=[]
|
b=[]
|
||||||
soup=BeautifulSoup(htmlcode,'lxml')
|
soup=BeautifulSoup(htmlcode,'lxml')
|
||||||
@@ -60,9 +67,12 @@ def getNum(htmlcode): #获取番号
|
|||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getDirector(htmlcode): #获取导演
|
def getDirector(htmlcode): #获取导演 已修改
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||||
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||||
|
else:
|
||||||
|
result = '' # 记录中有可能没有导演数据
|
||||||
return result
|
return result
|
||||||
def getCID(htmlcode):
|
def getCID(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
@@ -77,14 +87,18 @@ def getOutline(htmlcode): #获取演员
|
|||||||
return result
|
return result
|
||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
def getSerise(htmlcode):
|
def getSerise(htmlcode): #获取系列 已修改
|
||||||
try:
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
# 如果记录中冇导演,系列排在第6位
|
||||||
|
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
||||||
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
|
||||||
|
# 如果记录中有导演,系列排在第7位
|
||||||
|
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||||
return result
|
else:
|
||||||
except:
|
result = ''
|
||||||
return ''
|
return result
|
||||||
def getTag(htmlcode): # 获取演员
|
def getTag(htmlcode): # 获取标签
|
||||||
tag = []
|
tag = []
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
a = soup.find_all(attrs={'class': 'genre'})
|
a = soup.find_all(attrs={'class': 'genre'})
|
||||||
@@ -94,10 +108,11 @@ def getTag(htmlcode): # 获取演员
|
|||||||
tag.append(i.get_text())
|
tag.append(i.get_text())
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
|
|
||||||
def main_uncensored(number):
|
def main_uncensored(number):
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||||
if getTitle(htmlcode) == '':
|
if getTitle(htmlcode) == '':
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
|
htmlcode = get_html('https://www.javbus.com/ja/' + number.replace('-','_'))
|
||||||
try:
|
try:
|
||||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||||
except:
|
except:
|
||||||
@@ -117,7 +132,7 @@ def main_uncensored(number):
|
|||||||
'label': getSerise(htmlcode),
|
'label': getSerise(htmlcode),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website': 'https://www.javbus.com/' + number,
|
'website': 'https://www.javbus.com/ja/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
'series': getSerise(htmlcode),
|
'series': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
@@ -128,7 +143,7 @@ def main_uncensored(number):
|
|||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||||
try:
|
try:
|
||||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||||
except:
|
except:
|
||||||
@@ -148,7 +163,7 @@ def main(number):
|
|||||||
'tag': getTag(htmlcode),
|
'tag': getTag(htmlcode),
|
||||||
'label': getSerise(htmlcode),
|
'label': getSerise(htmlcode),
|
||||||
'actor_photo': getActorPhoto(htmlcode),
|
'actor_photo': getActorPhoto(htmlcode),
|
||||||
'website': 'https://www.javbus.com/' + number,
|
'website': 'https://www.javbus.com/ja/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
'series': getSerise(htmlcode),
|
'series': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user