From 41f474314923e2efdf31c0c4b0c63a27017d5f83 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 6 Nov 2021 22:49:19 +0800 Subject: [PATCH] Delete all translate func in all WebCrawlers --- WebCrawler/avsox.py | 4 ++-- WebCrawler/carib.py | 5 +---- WebCrawler/fanza.py | 17 ++--------------- WebCrawler/fc2.py | 7 ++----- WebCrawler/javbus.py | 3 +-- WebCrawler/javdb.py | 16 ++-------------- WebCrawler/mgstage.py | 8 +------- WebCrawler/xcity.py | 7 +++++-- 8 files changed, 16 insertions(+), 51 deletions(-) diff --git a/WebCrawler/avsox.py b/WebCrawler/avsox.py index c6e6f00..8b73b83 100644 --- a/WebCrawler/avsox.py +++ b/WebCrawler/avsox.py @@ -57,8 +57,8 @@ def getCover_small(html): result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") return result def getTag(html): - x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - return [translateTag_to_sc(i.strip()) for i in x[2:]] if len(x) > 2 else [] + result = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') + return result def getSeries(html): try: result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']") diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 02b5d5c..47aa0d7 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -76,11 +76,8 @@ def get_actor(lx: html.HtmlElement): return r def get_tag(lx: html.HtmlElement) -> str: - r = [] genres = lx.xpath("//span[@class='spec-content']/a[@itemprop='genre']/text()") - for g in genres: - r.append(translateTag_to_sc(str(g))) - return r + return genres def get_extrafanart(lx: html.HtmlElement) -> str: r = [] diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index 8dfb31d..00d8988 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -123,25 +123,12 @@ def getTag(text): result = html.xpath( "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" ) - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total + return result except: result = html.xpath( "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" ) - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total - return result + return result def getCover(text, number): diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py index 0a51fdc..27bc1a0 100644 --- a/WebCrawler/fc2.py +++ b/WebCrawler/fc2.py @@ -14,7 +14,7 @@ def getTitle_fc2com(htmlcode): #获取厂商 return result def getActor_fc2com(htmlcode): try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) + htmtml = etree.fromstring(htmlcode, etree.HTMLParser()) result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] return result except: @@ -48,10 +48,7 @@ def getCover_fc2com(htmlcode2): #获取厂商 # # return result def getTag_fc2com(lx): result = lx.xpath("//a[@class='tag tagTag']/text()") - tag = [] - for i in result: - tag.append(ADC_function.translateTag_to_sc(i)) - return tag + return result def getYear_fc2com(release): try: result = re.search('\d{4}',release).group() diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index d61db8d..09dc045 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -72,8 +72,7 @@ def getSerise(html): #获取系列 return str(x[0]) if len(x) else '' def getTag(html): # 获取标签 klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - taglist = [translateTag_to_sc(v) for v in klist[1:]] - return taglist + return klist def getExtrafanart(htmlcode): # 获取剧照 html_pather = re.compile(r'
[\s\S]*?
\s*?') html = html_pather.search(htmlcode) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 9adb7f9..cf9b868 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -108,23 +108,11 @@ def getRelease(a): def getTag(html): try: result = html.xpath('//strong[contains(text(),"類別")]/../span/a/text()') - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total + return result except: result = html.xpath('//strong[contains(text(),"類別")]/../span/text()') - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total + return result def getCover_small(html, index=0): # same issue mentioned below, diff --git a/WebCrawler/mgstage.py b/WebCrawler/mgstage.py index 8f58cb6..231ceb4 100644 --- a/WebCrawler/mgstage.py +++ b/WebCrawler/mgstage.py @@ -65,13 +65,7 @@ def getTag(a): result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( '\\n') result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',') - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total + return result def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']") diff --git a/WebCrawler/xcity.py b/WebCrawler/xcity.py index ed381e7..b6851ca 100644 --- a/WebCrawler/xcity.py +++ b/WebCrawler/xcity.py @@ -90,8 +90,11 @@ def getRelease(html): def getTag(html): - x = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()') - return [translateTag_to_sc(i.strip()) for i in x if len(i.strip())] if len(x) and len(x[0]) else [] + result = html.xpath('//span[@class="koumoku" and text()="ジャンル"]/../a[starts-with(@href,"/avod/genre/")]/text()') + total = [] + for i in result: + total.append(i.replace("\n","").replace("\t","")) + return total def getCover_small(html, index=0):