Translate to Simplified Chinese

This commit is contained in:
root
2020-09-26 16:57:27 +08:00
parent 4d7bf88ba2
commit c319671c5d
7 changed files with 53 additions and 22 deletions

View File

@@ -123,10 +123,24 @@ def getTag(text):
result = html.xpath( result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
) )
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
) )
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
return result return result

View File

@@ -8,7 +8,6 @@ import json
from ADC_function import * from ADC_function import *
from WebCrawler import fanza from WebCrawler import fanza
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'star-name'}) a = soup.find_all(attrs={'class': 'star-name'})
@@ -32,10 +31,10 @@ def getTitle(htmlcode): #获取标题
def getStudio(htmlcode): #获取厂商 已修改 def getStudio(htmlcode): #获取厂商 已修改
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
# 如果记录中冇导演厂商排在第4位 # 如果记录中冇导演厂商排在第4位
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
# 如果记录中有导演厂商排在第5位 # 如果记录中有导演厂商排在第5位
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"): elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
else: else:
result = '' result = ''
@@ -69,7 +68,7 @@ def getNum(htmlcode): #获取番号
return result return result
def getDirector(htmlcode): #获取导演 已修改 def getDirector(htmlcode): #获取导演 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"): if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
else: else:
result = '' # 记录中有可能没有导演数据 result = '' # 记录中有可能没有导演数据
@@ -90,10 +89,10 @@ def getOutline(htmlcode): #获取演员
def getSerise(htmlcode): #获取系列 已修改 def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
# 如果记录中冇导演系列排在第6位 # 如果记录中冇导演系列排在第6位
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"): if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
# 如果记录中有导演系列排在第7位 # 如果记录中有导演系列排在第7位
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"): elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
else: else:
result = '' result = ''
@@ -105,10 +104,9 @@ def getTag(htmlcode): # 获取标签
for i in a: for i in a:
if 'onmouseout' in str(i): if 'onmouseout' in str(i):
continue continue
tag.append(i.get_text()) tag.append(translateTag_to_sc(i.get_text()))
return tag return tag
def main_uncensored(number): def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/ja/' + number) htmlcode = get_html('https://www.javbus.com/ja/' + number)
if getTitle(htmlcode) == '': if getTitle(htmlcode) == '':
@@ -143,7 +141,7 @@ def main_uncensored(number):
def main(number): def main(number):
try: try:
try: try:
htmlcode = get_html('https://www.javbus.com/ja/' + number) htmlcode = get_html('https://www.javbus.com/' + number)
try: try:
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode)) dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
except: except:
@@ -163,7 +161,7 @@ def main(number):
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'label': getSerise(htmlcode), 'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode), 'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/ja/' + number, 'website': 'https://www.javbus.com/' + number,
'source': 'javbus.py', 'source': 'javbus.py',
'series': getSerise(htmlcode), 'series': getSerise(htmlcode),
} }

View File

@@ -60,10 +60,23 @@ def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()') result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
return result total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
except: except:
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()') result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
return result total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
def getCover_small(a, index=0): def getCover_small(a, index=0):
# same issue mentioned below, # same issue mentioned below,
@@ -162,4 +175,4 @@ def main(number):
# main('DV-1562') # main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
if __name__ == "__main__": if __name__ == "__main__":
print(main('GS-351')) print(main('ipx-292'))

View File

@@ -64,7 +64,14 @@ def getTag(a):
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',') result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
total = []
for i in result:
try:
total.append(translateTag_to_sc(i))
except:
pass
return total
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")

View File

@@ -5,6 +5,7 @@ success_output_folder=JAV_output
soft_link=0 soft_link=0
failed_move=1 failed_move=1
auto_exit=0 auto_exit=0
transalte_to_sc=1
[proxy] [proxy]
;proxytype: http or socks5 or socks5h ;proxytype: http or socks5 or socks5h

View File

@@ -33,6 +33,8 @@ class Config:
return self.conf.getboolean("common", "failed_move") return self.conf.getboolean("common", "failed_move")
def auto_exit(self) -> bool: def auto_exit(self) -> bool:
return self.conf.getboolean("common", "auto_exit") return self.conf.getboolean("common", "auto_exit")
def transalte_to_sc(self) -> bool:
return self.conf.getboolean("common", "transalte_to_sc")
def proxy(self) -> [str, int, int, str]: def proxy(self) -> [str, int, int, str]:
try: try:
@@ -87,13 +89,14 @@ class Config:
conf.set(sec1, "soft_link", "0") conf.set(sec1, "soft_link", "0")
conf.set(sec1, "failed_move", "1") conf.set(sec1, "failed_move", "1")
conf.set(sec1, "auto_exit", "0") conf.set(sec1, "auto_exit", "0")
conf.set(sec1, "transalte_to_sc", "1")
sec2 = "proxy" sec2 = "proxy"
conf.add_section(sec2) conf.add_section(sec2)
conf.set(sec2, "proxy", "127.0.0.1:1080") conf.set(sec2, "proxy", "127.0.0.1:1080")
conf.set(sec2, "timeout", "10") conf.set(sec2, "timeout", "10")
conf.set(sec2, "retry", "3") conf.set(sec2, "retry", "3")
conf.set(sec2, "type", "http") conf.set(sec2, "type", "socks5")
sec3 = "Name_Rule" sec3 = "Name_Rule"
conf.add_section(sec3) conf.add_section(sec3)

View File

@@ -484,13 +484,8 @@ def get_part(filepath, failed_folder):
def debug_print(data: json): def debug_print(data: json):
try: try:
print("[+] ---Debug info---") print("[+] ---Debug info---")
for i, v in data.items():
if i == "outline":
print("[+] -", i, " :", len(v), "characters")
continue
if i == "actor_photo" or i == "year":
continue
print("[+] -", "%-11s" % i, ":", v)
print("[+] ---Debug info---") print("[+] ---Debug info---")
except: except:
pass pass