Translate to Simplified Chinese

2020-09-26 16:57:27 +08:00
parent 4d7bf88ba2
commit c319671c5d
7 changed files with 53 additions and 22 deletions
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -123,10 +123,24 @@ def getTag(text):
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
        )
        total = []
        for i in result:
            try:
                total.append(translateTag_to_sc(i))
            except:
                pass
        return total
    except:
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
        )
        total = []
        for i in result:
            try:
                total.append(translateTag_to_sc(i))
            except:
                pass
        return total
    return result
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -8,7 +8,6 @@ import json
 from ADC_function import *
 from WebCrawler import fanza
 def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
    soup = BeautifulSoup(htmlcode, 'lxml')
    a = soup.find_all(attrs={'class': 'star-name'})
@@ -32,10 +31,10 @@ def getTitle(htmlcode):  #获取标题
 def getStudio(htmlcode): #获取厂商 已修改
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    # 如果记录中冇导演，厂商排在第4位
-    if 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
+    if '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
    # 如果记录中有导演，厂商排在第5位
-    elif 'メーカー:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
+    elif '製作商:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
    else:
        result = ''
@@ -69,7 +68,7 @@ def getNum(htmlcode):     #获取番号
    return result
 def getDirector(htmlcode): #获取导演 已修改
    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    if '監督:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
+    if '導演:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
    else:
        result = ''         # 记录中有可能没有导演数据
@@ -90,10 +89,10 @@ def getOutline(htmlcode):  #获取演员
 def getSerise(htmlcode):   #获取系列 已修改
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    # 如果记录中冇导演，系列排在第6位
-    if 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
+    if '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[6]/a/text()')).strip(" ['']")
    # 如果记录中有导演，系列排在第7位
-    elif 'シリーズ:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
+    elif '系列:' == str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/span/text()')).strip(" ['']"):
        result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
    else:
        result = ''
@@ -105,10 +104,9 @@ def getTag(htmlcode):  # 获取标签
    for i in a:
        if 'onmouseout' in str(i):
            continue
-        tag.append(i.get_text())
+        tag.append(translateTag_to_sc(i.get_text()))
    return tag
 def main_uncensored(number):
    htmlcode = get_html('https://www.javbus.com/ja/' + number)
    if getTitle(htmlcode) == '':
@@ -143,7 +141,7 @@ def main_uncensored(number):
 def main(number):
    try:
        try:
-            htmlcode = get_html('https://www.javbus.com/ja/' + number)
+            htmlcode = get_html('https://www.javbus.com/' + number)
            try:
                dww_htmlcode = fanza.main_htmlcode(getCID(htmlcode))
            except:
@@ -163,7 +161,7 @@ def main(number):
                'tag': getTag(htmlcode),
                'label': getSerise(htmlcode),
                'actor_photo': getActorPhoto(htmlcode),
-                'website': 'https://www.javbus.com/ja/' + number,
+                'website': 'https://www.javbus.com/' + number,
                'source': 'javbus.py',
                'series': getSerise(htmlcode),
            }
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -60,10 +60,23 @@ def getTag(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
-        return result
+        total = []
        for i in result:
            try:
                total.append(translateTag_to_sc(i))
            except:
                pass
        return total
    except:
        result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
-        return result
+        total = []
        for i in result:
            try:
                total.append(translateTag_to_sc(i))
            except:
                pass
        return total
 def getCover_small(a, index=0):
    # same issue mentioned below,
@@ -162,4 +175,4 @@ def main(number):
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 if __name__ == "__main__":
-    print(main('GS-351'))
+    print(main('ipx-292'))
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -64,7 +64,14 @@ def getTag(a):
        '\\n')
    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
        '\\n')
-    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
+    result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
    total = []
    for i in result:
        try:
            total.append(translateTag_to_sc(i))
        except:
            pass
    return total
 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
--- a/config.ini
+++ b/config.ini
@@ -5,6 +5,7 @@ success_output_folder=JAV_output
 soft_link=0
 failed_move=1
 auto_exit=0
 transalte_to_sc=1
 [proxy]
 ;proxytype: http or socks5 or socks5h
--- a/config.py
+++ b/config.py
@@ -33,6 +33,8 @@ class Config:
        return self.conf.getboolean("common", "failed_move")
    def auto_exit(self) -> bool:
        return self.conf.getboolean("common", "auto_exit")
    def transalte_to_sc(self) -> bool:
        return self.conf.getboolean("common", "transalte_to_sc")
    def proxy(self) -> [str, int, int, str]:
        try:
@@ -87,13 +89,14 @@ class Config:
        conf.set(sec1, "soft_link", "0")
        conf.set(sec1, "failed_move", "1")
        conf.set(sec1, "auto_exit", "0")
        conf.set(sec1, "transalte_to_sc", "1")
        sec2 = "proxy"
        conf.add_section(sec2)
        conf.set(sec2, "proxy", "127.0.0.1:1080")
        conf.set(sec2, "timeout", "10")
        conf.set(sec2, "retry", "3")
-        conf.set(sec2, "type", "http")
+        conf.set(sec2, "type", "socks5")
        sec3 = "Name_Rule"
        conf.add_section(sec3)
--- a/core.py
+++ b/core.py
@@ -484,13 +484,8 @@ def get_part(filepath, failed_folder):
 def debug_print(data: json):
    try:
        print("[+] ---Debug info---")
-        for i, v in data.items():
+
-            if i == "outline":
+
                print("[+]  -", i, "    :", len(v), "characters")
                continue
            if i == "actor_photo" or i == "year":
                continue
            print("[+]  -", "%-11s" % i, ":", v)
        print("[+] ---Debug info---")
    except:
        pass