Delete all translate func in all WebCrawlers

2021-11-06 22:49:19 +08:00
parent 0c4df0130b
commit 41f4743149
8 changed files with 16 additions and 51 deletions
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -57,8 +57,8 @@ def getCover_small(html):
    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
    return result
 def getTag(html):
-    x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    return [translateTag_to_sc(i.strip()) for i in x[2:]]  if len(x) > 2 else []
+    result = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
+    return result
 def getSeries(html):
    try:
        result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -76,11 +76,8 @@ def get_actor(lx: html.HtmlElement):
    return r

 def get_tag(lx: html.HtmlElement) -> str:
-    r = []
    genres = lx.xpath("//span[@class='spec-content']/a[@itemprop='genre']/text()")
-    for g in genres:
-        r.append(translateTag_to_sc(str(g)))
-    return r
+    return genres

 def get_extrafanart(lx: html.HtmlElement) -> str:
    r = []
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -123,25 +123,12 @@ def getTag(text):
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
        )
-        total = []
-        for i in result:
-            try:
-                total.append(translateTag_to_sc(i))
-            except:
-                pass
-        return total
+        return result
    except:
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
        )
-        total = []
-        for i in result:
-            try:
-                total.append(translateTag_to_sc(i))
-            except:
-                pass
-        return total
-    return result
+        return result


 def getCover(text, number):
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -14,7 +14,7 @@ def getTitle_fc2com(htmlcode): #获取厂商
    return result
 def getActor_fc2com(htmlcode):
    try:
-        html = etree.fromstring(htmlcode, etree.HTMLParser())
+        htmtml = etree.fromstring(htmlcode, etree.HTMLParser())
        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
        return result
    except:
@@ -48,10 +48,7 @@ def getCover_fc2com(htmlcode2): #获取厂商 #
 #     return result
 def getTag_fc2com(lx):
    result = lx.xpath("//a[@class='tag tagTag']/text()")
-    tag = []
-    for i in result:
-        tag.append(ADC_function.translateTag_to_sc(i))
-    return tag
+    return result
 def getYear_fc2com(release):
    try:
        result = re.search('\d{4}',release).group()
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -72,8 +72,7 @@ def getSerise(html):   #获取系列
    return str(x[0]) if len(x) else ''
 def getTag(html):  # 获取标签
    klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
-    taglist = [translateTag_to_sc(v) for v in klist[1:]]
-    return taglist
+    return klist
 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<div id=\"sample-waterfall\">[\s\S]*?</div></a>\s*?</div>')
    html = html_pather.search(htmlcode)
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -108,23 +108,11 @@ def getRelease(a):
 def getTag(html):
    try:
        result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()')
-        total = []
-        for i in result:
-            try:
-                total.append(translateTag_to_sc(i))
-            except:
-                pass
-        return total
+        return result

    except:
        result = html.xpath('//strong[contains(text(),"類別")]/../span/text()')
-        total = []
-        for i in result:
-            try:
-                total.append(translateTag_to_sc(i))
-            except:
-                pass
-        return total
+        return result

 def getCover_small(html, index=0):
    # same issue mentioned below,
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -65,13 +65,7 @@ def getTag(a):
    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
        '\\n')
    result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
-    total = []
-    for i in result:
-        try:
-            total.append(translateTag_to_sc(i))
-        except:
-            pass
-    return total
+    return result
 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']")
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -90,8 +90,11 @@ def getRelease(html):


 def getTag(html):
-    x = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()')
-    return [translateTag_to_sc(i.strip()) for i in x if len(i.strip())] if len(x) and len(x[0]) else []
+    result = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()')
+    total = []
+    for i in result:
+        total.append(i.replace("\n","").replace("\t",""))
+    return total


 def getCover_small(html, index=0):