Clean out of used modules

2022-05-11 22:51:10 +08:00
parent b3058a6f1f
commit 2096c0908c
15 changed files with 0 additions and 90 deletions
--- a/WebCrawler/airav.py
+++ b/WebCrawler/airav.py
@@ -1,10 +1,6 @@
 import sys
 sys.path.append('../')
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
 from bs4 import BeautifulSoup#need install
-import json
 from ADC_function import *
 from WebCrawler import javbus

--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('..')
-import re
-from lxml import etree
-import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 from WebCrawler.crawler import *
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -1,8 +1,6 @@
 import sys
 sys.path.append('../')
-import json
 from lxml import html
-import re
 from ADC_function import *
 from WebCrawler.storyline import getStoryline

--- a/WebCrawler/dlsite.py
+++ b/WebCrawler/dlsite.py
@@ -4,20 +4,6 @@ import json
 import sys
 sys.path.append('../')
 from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-#print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
-#title /html/head/title/text()
-#studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
-#release //th[contains(text(),"販売日")]/../td/a/text()
-#story //th[contains(text(),"シナリオ")]/../td/a/text()
-#senyo //th[contains(text(),"声優")]/../td/a/text()
-#tag //th[contains(text(),"ジャンル")]/../td/div/a/text()
-#jianjie //*[@id="main_inner"]/div[3]/text()
-#photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
-
-#https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html

 def getTitle(html):
    result = str(html.xpath('/html/head/title/text()')[0])
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -2,17 +2,10 @@
 # -*- coding: utf-8 -*-
 import sys
 sys.path.append('../')
-import json
-import re
 from urllib.parse import urlencode

-from lxml import etree
-
 from ADC_function import *
 from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 class fanzaCrawler(Crawler):
    def getFanzaString(self,string):
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -1,14 +1,10 @@
 import sys
 sys.path.append('../')
 import re
-from lxml import etree#need install
 import json
 import config
 import ADC_function
 from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
--- a/WebCrawler/fc2club.py
+++ b/WebCrawler/fc2club.py
@@ -1,12 +1,8 @@
 import sys
 sys.path.append('../')
-import re
 from lxml import etree#need install
 import json
 import ADC_function
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 def getTitle_fc2com(htmlcode): #获取标题
    html = etree.fromstring(htmlcode,etree.HTMLParser())
@@ -44,14 +40,6 @@ def getCover_fc2com(htmlcode2): #获取img #
    imgUrl = imgUrl.replace('../','https://fc2club.net/')
    print(imgUrl)
    return imgUrl
-# def getOutline_fc2com(htmlcode2):     #获取番号 #
-#     xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
-#     path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
-#     html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
-#     print('https://adult.contents.fc2.com'+path)
-#     print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
-#     result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
-#     return result
 def getTag_fc2com(htmlcode):     #获取tag
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
--- a/WebCrawler/gcolle.py
+++ b/WebCrawler/gcolle.py
@@ -3,8 +3,6 @@ sys.path.append('../')

 from WebCrawler.crawler import *
 from ADC_function import *
-from lxml import etree
-

 def main(number):
    save_cookies = False
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('../')
-import re
-from lxml import etree#need install
-import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 import inspect
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -1,12 +1,7 @@
 import sys
 sys.path.append('../')
-import re
-from lxml import etree
-import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 def getTitle(html):
    browser_title = str(html.xpath("/html/head/title/text()")[0])
@@ -55,10 +50,6 @@ def getActorPhoto(html, javdb_site, session):
    return actor_photo

 def getStudio(a, html):
-    # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    # result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
-    # result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
-    # return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
    patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
    pianshang = patherr.findall(a)
    if pianshang:
@@ -85,11 +76,6 @@ def getNum(html):
    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
    return str(result2 + result1).strip('+')
 def getYear(getRelease):
-    # try:
-    #     result = str(re.search('\d{4}', getRelease).group())
-    #     return result
-    # except:
-    #     return getRelease
    patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)\-.*?</span>')
    dates = patherr.findall(getRelease)
    if dates:
@@ -99,10 +85,6 @@ def getYear(getRelease):
    return result

 def getRelease(a):
-    # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    # result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
-    # result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
-    # return str(result1 + result2).strip('+')
    patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)</span>')
    dates = patherr.findall(a)
    if dates:
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('../')
-from bs4 import BeautifulSoup  # need install
-from lxml import etree  # need install
-from pyquery import PyQuery as pq  # need install
 from ADC_function import *
 import json
 import re
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -1,14 +1,8 @@
 import sys
 sys.path.append('../')
-import re
-from lxml import etree
-import json
 from bs4 import BeautifulSoup
 from ADC_function import *
 from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 class MgsCrawler(Crawler):
    def getMgsString(self, _xpath):
--- a/WebCrawler/mv91.py
+++ b/WebCrawler/mv91.py
@@ -1,10 +1,5 @@
 import sys
 sys.path.append('../')
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
-import json
 from ADC_function import *


--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -1,7 +1,5 @@
 import sys
 sys.path.append('../')
-import re
-import json
 import builtins
 from ADC_function import *
 from lxml.html import fromstring
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -1,12 +1,7 @@
 import sys
 sys.path.append('../')
-import re
-from lxml import etree
-import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

 def getTitle(html):
    result = html.xpath('//*[@id="program_detail_title"]/text()')[0]