Translate to Simplified Chinese
This commit is contained in:
@@ -123,10 +123,24 @@ def getTag(text):
|
|||||||
result = html.xpath(
|
result = html.xpath(
|
||||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
|
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
|
||||||
)
|
)
|
||||||
|
total = []
|
||||||
|
for i in result:
|
||||||
|
try:
|
||||||
|
total.append(translateTag_to_sc(i))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
except:
|
except:
|
||||||
result = html.xpath(
|
result = html.xpath(
|
||||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
||||||
)
|
)
|
||||||
|
total = []
|
||||||
|
for i in result:
|
||||||
|
try:
|
||||||
|
total.append(translateTag_to_sc(i))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import json
|
|||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler import fanza
|
from WebCrawler import fanza
|
||||||
|
|
||||||
|
|
||||||
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
a = soup.find_all(attrs={'class': 'star-name'})
|
a = soup.find_all(attrs={'class': 'star-name'})
|
||||||
@@ -32,10 +31,10 @@ def getTitle(htmlcode): #获取标题
|
|||||||
def getStudio(htmlcode): #获取厂商 已修改
|
def getStudio(htmlcode): #获取厂商 已修改
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||||
# 如果记录中冇导演,厂商排在第4位
|
# 如果记录中冇导演,厂商排在第4位
|
||||||
if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||||
# 如果记录中有导演,厂商排在第5位
|
# 如果记录中有导演,厂商排在第5位
|
||||||
elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
|
||||||
else:
|
else:
|
||||||
result = ''
|
result = ''
|
||||||
@@ -69,7 +68,7 @@ def getNum(htmlcode): #获取番号
|
|||||||
return result
|
return result
|
||||||
def getDirector(htmlcode): #获取导演 已修改
|
def getDirector(htmlcode): #获取导演 已修改
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
|
||||||
else:
|
else:
|
||||||
result = '' # 记录中有可能没有导演数据
|
result = '' # 记录中有可能没有导演数据
|
||||||
@@ -90,10 +89,10 @@ def getOutline(htmlcode): #获取演员
|
|||||||
def getSerise(htmlcode): #获取系列 已修改
|
def getSerise(htmlcode): #获取系列 已修改
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
# 如果记录中冇导演,系列排在第6位
|
# 如果记录中冇导演,系列排在第6位
|
||||||
if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
|
||||||
# 如果记录中有导演,系列排在第7位
|
# 如果记录中有导演,系列排在第7位
|
||||||
elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||||
else:
|
else:
|
||||||
result = ''
|
result = ''
|
||||||
@@ -105,10 +104,9 @@ def getTag(htmlcode): # 获取标签
|
|||||||
for i in a:
|
for i in a:
|
||||||
if 'onmouseout' in str(i):
|
if 'onmouseout' in str(i):
|
||||||
continue
|
continue
|
||||||
tag.append(i.get_text())
|
tag.append(translateTag_to_sc(i.get_text()))
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
|
|
||||||
def main_uncensored(number):
|
def main_uncensored(number):
|
||||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
||||||
if getTitle(htmlcode) == '':
|
if getTitle(htmlcode) == '':
|
||||||
@@ -143,7 +141,7 @@ def main_uncensored(number):
|
|||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
try:
|
try:
|
||||||
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
|
||||||
except:
|
except:
|
||||||
@@ -163,7 +161,7 @@ def main(number):
|
|||||||
'tag': getTag(htmlcode),
|
'tag': getTag(htmlcode),
|
||||||
'label': getSerise(htmlcode),
|
'label': getSerise(htmlcode),
|
||||||
'actor_photo': getActorPhoto(htmlcode),
|
'actor_photo': getActorPhoto(htmlcode),
|
||||||
'website': 'https://www.javbus.com/ja/' + number,
|
'website': 'https://www.javbus.com/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
'series': getSerise(htmlcode),
|
'series': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -60,10 +60,23 @@ def getTag(a):
|
|||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
|
result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
|
||||||
return result
|
total = []
|
||||||
|
for i in result:
|
||||||
|
try:
|
||||||
|
total.append(translateTag_to_sc(i))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
|
|
||||||
except:
|
except:
|
||||||
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
|
result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
|
||||||
return result
|
total = []
|
||||||
|
for i in result:
|
||||||
|
try:
|
||||||
|
total.append(translateTag_to_sc(i))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
|
|
||||||
def getCover_small(a, index=0):
|
def getCover_small(a, index=0):
|
||||||
# same issue mentioned below,
|
# same issue mentioned below,
|
||||||
@@ -162,4 +175,4 @@ def main(number):
|
|||||||
# main('DV-1562')
|
# main('DV-1562')
|
||||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(main('GS-351'))
|
print(main('ipx-292'))
|
||||||
|
|||||||
@@ -64,7 +64,14 @@ def getTag(a):
|
|||||||
'\\n')
|
'\\n')
|
||||||
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
||||||
'\\n')
|
'\\n')
|
||||||
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||||
|
total = []
|
||||||
|
for i in result:
|
||||||
|
try:
|
||||||
|
total.append(translateTag_to_sc(i))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return total
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ success_output_folder=JAV_output
|
|||||||
soft_link=0
|
soft_link=0
|
||||||
failed_move=1
|
failed_move=1
|
||||||
auto_exit=0
|
auto_exit=0
|
||||||
|
transalte_to_sc=1
|
||||||
|
|
||||||
[proxy]
|
[proxy]
|
||||||
;proxytype: http or socks5 or socks5h
|
;proxytype: http or socks5 or socks5h
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ class Config:
|
|||||||
return self.conf.getboolean("common", "failed_move")
|
return self.conf.getboolean("common", "failed_move")
|
||||||
def auto_exit(self) -> bool:
|
def auto_exit(self) -> bool:
|
||||||
return self.conf.getboolean("common", "auto_exit")
|
return self.conf.getboolean("common", "auto_exit")
|
||||||
|
def transalte_to_sc(self) -> bool:
|
||||||
|
return self.conf.getboolean("common", "transalte_to_sc")
|
||||||
|
|
||||||
def proxy(self) -> [str, int, int, str]:
|
def proxy(self) -> [str, int, int, str]:
|
||||||
try:
|
try:
|
||||||
@@ -87,13 +89,14 @@ class Config:
|
|||||||
conf.set(sec1, "soft_link", "0")
|
conf.set(sec1, "soft_link", "0")
|
||||||
conf.set(sec1, "failed_move", "1")
|
conf.set(sec1, "failed_move", "1")
|
||||||
conf.set(sec1, "auto_exit", "0")
|
conf.set(sec1, "auto_exit", "0")
|
||||||
|
conf.set(sec1, "transalte_to_sc", "1")
|
||||||
|
|
||||||
sec2 = "proxy"
|
sec2 = "proxy"
|
||||||
conf.add_section(sec2)
|
conf.add_section(sec2)
|
||||||
conf.set(sec2, "proxy", "127.0.0.1:1080")
|
conf.set(sec2, "proxy", "127.0.0.1:1080")
|
||||||
conf.set(sec2, "timeout", "10")
|
conf.set(sec2, "timeout", "10")
|
||||||
conf.set(sec2, "retry", "3")
|
conf.set(sec2, "retry", "3")
|
||||||
conf.set(sec2, "type", "http")
|
conf.set(sec2, "type", "socks5")
|
||||||
|
|
||||||
sec3 = "Name_Rule"
|
sec3 = "Name_Rule"
|
||||||
conf.add_section(sec3)
|
conf.add_section(sec3)
|
||||||
|
|||||||
9
core.py
9
core.py
@@ -484,13 +484,8 @@ def get_part(filepath, failed_folder):
|
|||||||
def debug_print(data: json):
|
def debug_print(data: json):
|
||||||
try:
|
try:
|
||||||
print("[+] ---Debug info---")
|
print("[+] ---Debug info---")
|
||||||
for i, v in data.items():
|
|
||||||
if i == "outline":
|
|
||||||
print("[+] -", i, " :", len(v), "characters")
|
|
||||||
continue
|
|
||||||
if i == "actor_photo" or i == "year":
|
|
||||||
continue
|
|
||||||
print("[+] -", "%-11s" % i, ":", v)
|
|
||||||
print("[+] ---Debug info---")
|
print("[+] ---Debug info---")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|||||||
Reference in New Issue
Block a user