Translate to Simplified Chinese
This commit is contained in:
@@ -123,10 +123,24 @@ def getTag(text):
|
||||
result = html.xpath(
|
||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
|
||||
)
|
||||
total = []
|
||||
for i in result:
|
||||
try:
|
||||
total.append(translateTag_to_sc(i))
|
||||
except:
|
||||
pass
|
||||
return total
|
||||
except:
|
||||
result = html.xpath(
|
||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
||||
)
|
||||
total = []
|
||||
for i in result:
|
||||
try:
|
||||
total.append(translateTag_to_sc(i))
|
||||
except:
|
||||
pass
|
||||
return total
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import json
|
||||
from ADC_function import *
|
||||
from WebCrawler import fanza
|
||||
|
||||
|
||||
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||
a = soup.find_all(attrs={'class': 'star-name'})
|
||||
@@ -32,10 +31,10 @@ def getTitle(htmlcode): #获取标题
|
||||
def getStudio(htmlcode): #获取厂商 已修改
|
||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||
# 如果记录中冇导演,厂商排在第4位
|
||||
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||
# 如果记录中有导演,厂商排在第5位
|
||||
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
||||
elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
||||
else:
|
||||
result = ''
|
||||
@@ -69,7 +68,7 @@ def getNum(htmlcode): #获取番号
|
||||
return result
|
||||
def getDirector(htmlcode): #获取导演 已修改
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||
else:
|
||||
result = '' # 记录中有可能没有导演数据
|
||||
@@ -90,10 +89,10 @@ def getOutline(htmlcode): #获取演员
|
||||
def getSerise(htmlcode): #获取系列 已修改
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
# 如果记录中冇导演,系列排在第6位
|
||||
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
||||
if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
|
||||
# 如果记录中有导演,系列排在第7位
|
||||
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
||||
elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||
else:
|
||||
result = ''
|
||||
@@ -105,10 +104,9 @@ def getTag(htmlcode): # 获取标签
|
||||
for i in a:
|
||||
if 'onmouseout' in str(i):
|
||||
continue
|
||||
tag.append(i.get_text())
|
||||
tag.append(translateTag_to_sc(i.get_text()))
|
||||
return tag
|
||||
|
||||
|
||||
def main_uncensored(number):
|
||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||
if getTitle(htmlcode) == '':
|
||||
@@ -143,7 +141,7 @@ def main_uncensored(number):
|
||||
def main(number):
|
||||
try:
|
||||
try:
|
||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||
try:
|
||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||
except:
|
||||
@@ -163,7 +161,7 @@ def main(number):
|
||||
'tag': getTag(htmlcode),
|
||||
'label': getSerise(htmlcode),
|
||||
'actor_photo': getActorPhoto(htmlcode),
|
||||
'website': 'https://www.javbus.com/ja/' + number,
|
||||
'website': 'https://www.javbus.com/' + number,
|
||||
'source': 'javbus.py',
|
||||
'series': getSerise(htmlcode),
|
||||
}
|
||||
|
||||
@@ -60,10 +60,23 @@ def getTag(a):
|
||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||
try:
|
||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
|
||||
return result
|
||||
total = []
|
||||
for i in result:
|
||||
try:
|
||||
total.append(translateTag_to_sc(i))
|
||||
except:
|
||||
pass
|
||||
return total
|
||||
|
||||
except:
|
||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
|
||||
return result
|
||||
total = []
|
||||
for i in result:
|
||||
try:
|
||||
total.append(translateTag_to_sc(i))
|
||||
except:
|
||||
pass
|
||||
return total
|
||||
|
||||
def getCover_small(a, index=0):
|
||||
# same issue mentioned below,
|
||||
@@ -162,4 +175,4 @@ def main(number):
|
||||
# main('DV-1562')
|
||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||
if __name__ == "__main__":
|
||||
print(main('GS-351'))
|
||||
print(main('ipx-292'))
|
||||
|
||||
@@ -64,7 +64,14 @@ def getTag(a):
|
||||
'\\n')
|
||||
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||
'\\n')
|
||||
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||
total = []
|
||||
for i in result:
|
||||
try:
|
||||
total.append(translateTag_to_sc(i))
|
||||
except:
|
||||
pass
|
||||
return total
|
||||
def getCover(htmlcode):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
||||
|
||||
@@ -5,6 +5,7 @@ success_output_folder=JAV_output
|
||||
soft_link=0
|
||||
failed_move=1
|
||||
auto_exit=0
|
||||
transalte_to_sc=1
|
||||
|
||||
[proxy]
|
||||
;proxytype: http or socks5 or socks5h
|
||||
|
||||
@@ -33,6 +33,8 @@ class Config:
|
||||
return self.conf.getboolean("common", "failed_move")
|
||||
def auto_exit(self) -> bool:
|
||||
return self.conf.getboolean("common", "auto_exit")
|
||||
def transalte_to_sc(self) -> bool:
|
||||
return self.conf.getboolean("common", "transalte_to_sc")
|
||||
|
||||
def proxy(self) -> [str, int, int, str]:
|
||||
try:
|
||||
@@ -87,13 +89,14 @@ class Config:
|
||||
conf.set(sec1, "soft_link", "0")
|
||||
conf.set(sec1, "failed_move", "1")
|
||||
conf.set(sec1, "auto_exit", "0")
|
||||
conf.set(sec1, "transalte_to_sc", "1")
|
||||
|
||||
sec2 = "proxy"
|
||||
conf.add_section(sec2)
|
||||
conf.set(sec2, "proxy", "127.0.0.1:1080")
|
||||
conf.set(sec2, "timeout", "10")
|
||||
conf.set(sec2, "retry", "3")
|
||||
conf.set(sec2, "type", "http")
|
||||
conf.set(sec2, "type", "socks5")
|
||||
|
||||
sec3 = "Name_Rule"
|
||||
conf.add_section(sec3)
|
||||
|
||||
9
core.py
9
core.py
@@ -484,13 +484,8 @@ def get_part(filepath, failed_folder):
|
||||
def debug_print(data: json):
|
||||
try:
|
||||
print("[+] ---Debug info---")
|
||||
for i, v in data.items():
|
||||
if i == "outline":
|
||||
print("[+] -", i, " :", len(v), "characters")
|
||||
continue
|
||||
if i == "actor_photo" or i == "year":
|
||||
continue
|
||||
print("[+] -", "%-11s" % i, ":", v)
|
||||
|
||||
|
||||
print("[+] ---Debug info---")
|
||||
except:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user