diff --git a/WebCrawler/avsox.py b/WebCrawler/avsox.py index c6e6f00..8b73b83 100644 --- a/WebCrawler/avsox.py +++ b/WebCrawler/avsox.py @@ -57,8 +57,8 @@ def getCover_small(html): result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") return result def getTag(html): - x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - return [translateTag_to_sc(i.strip()) for i in x[2:]] if len(x) > 2 else [] + result = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') + return result def getSeries(html): try: result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']") diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 02b5d5c..47aa0d7 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -76,11 +76,8 @@ def get_actor(lx: html.HtmlElement): return r def get_tag(lx: html.HtmlElement) -> str: - r = [] genres = lx.xpath("//span[@class='spec-content']/a[@itemprop='genre']/text()") - for g in genres: - r.append(translateTag_to_sc(str(g))) - return r + return genres def get_extrafanart(lx: html.HtmlElement) -> str: r = [] diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index 8dfb31d..00d8988 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -123,25 +123,12 @@ def getTag(text): result = html.xpath( "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" ) - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total + return result except: result = html.xpath( "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" ) - total = [] - for i in result: - try: - total.append(translateTag_to_sc(i)) - except: - pass - return total - return result + return result def getCover(text, number): diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py index 0a51fdc..27bc1a0 100644 --- a/WebCrawler/fc2.py +++ b/WebCrawler/fc2.py @@ -14,7 +14,7 @@ def getTitle_fc2com(htmlcode): #获取厂商 return result def getActor_fc2com(htmlcode): try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) + htmtml = etree.fromstring(htmlcode, etree.HTMLParser()) result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] return result except: @@ -48,10 +48,7 @@ def getCover_fc2com(htmlcode2): #获取厂商 # # return result def getTag_fc2com(lx): result = lx.xpath("//a[@class='tag tagTag']/text()") - tag = [] - for i in result: - tag.append(ADC_function.translateTag_to_sc(i)) - return tag + return result def getYear_fc2com(release): try: result = re.search('\d{4}',release).group() diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index d61db8d..09dc045 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -72,8 +72,7 @@ def getSerise(html): #获取系列 return str(x[0]) if len(x) else '' def getTag(html): # 获取标签 klist = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - taglist = [translateTag_to_sc(v) for v in klist[1:]] - return taglist + return klist def getExtrafanart(htmlcode): # 获取剧照 html_pather = re.compile(r'