Translate to Simplified Chinese

This commit is contained in:
root
2020-09-26 16:57:27 +08:00
parent 4d7bf88ba2
commit c319671c5d
7 changed files with 53 additions and 22 deletions

View File

@@ -8,7 +8,6 @@ import json
from ADC_function import *
from WebCrawler import fanza
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'star-name'})
@@ -32,10 +31,10 @@ def getTitle(htmlcode): #获取标题
def getStudio(htmlcode): #获取厂商 已修改
html = etree.fromstring(htmlcode,etree.HTMLParser())
# 如果记录中冇导演厂商排在第4位
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
# 如果记录中有导演厂商排在第5位
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
else:
result = ''
@@ -69,7 +68,7 @@ def getNum(htmlcode): #获取番号
return result
def getDirector(htmlcode): #获取导演 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
else:
result = '' # 记录中有可能没有导演数据
@@ -90,10 +89,10 @@ def getOutline(htmlcode): #获取演员
def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser())
# 如果记录中冇导演系列排在第6位
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
# 如果记录中有导演系列排在第7位
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
else:
result = ''
@@ -105,10 +104,9 @@ def getTag(htmlcode): # 获取标签
for i in a:
if 'onmouseout' in str(i):
continue
tag.append(i.get_text())
tag.append(translateTag_to_sc(i.get_text()))
return tag
def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/ja/' + number)
if getTitle(htmlcode) == '':
@@ -143,7 +141,7 @@ def main_uncensored(number):
def main(number):
try:
try:
htmlcode = get_html('https://www.javbus.com/ja/' + number)
htmlcode = get_html('https://www.javbus.com/' + number)
try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except:
@@ -163,7 +161,7 @@ def main(number):
'tag': getTag(htmlcode),
'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/ja/' + number,
'website': 'https://www.javbus.com/' + number,
'source': 'javbus.py',
'series': getSerise(htmlcode),
}