Clean out of used modules

2022-05-11 22:51:10 +08:00
parent b3058a6f1f
commit 2096c0908c
15 changed files with 0 additions and 90 deletions
--- a/WebCrawler/airav.py
+++ b/WebCrawler/airav.py
@@ -1,10 +1,6 @@
 import sys
 sys.path.append('../')
 import re
 from pyquery import PyQuery as pq#need install
 from lxml import etree#need install
 from bs4 import BeautifulSoup#need install
 import json
 from ADC_function import *
 from WebCrawler import javbus
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('..')
 import re
 from lxml import etree
 import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 from WebCrawler.crawler import *
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -1,8 +1,6 @@
 import sys
 sys.path.append('../')
 import json
 from lxml import html
 import re
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
--- a/WebCrawler/dlsite.py
+++ b/WebCrawler/dlsite.py
@@ -4,20 +4,6 @@ import json
 import sys
 sys.path.append('../')
 from ADC_function import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 #print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
 #title /html/head/title/text()
 #studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
 #release //th[contains(text(),"販売日")]/../td/a/text()
 #story //th[contains(text(),"シナリオ")]/../td/a/text()
 #senyo //th[contains(text(),"声優")]/../td/a/text()
 #tag //th[contains(text(),"ジャンル")]/../td/div/a/text()
 #jianjie //*[@id="main_inner"]/div[3]/text()
 #photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
 #https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html
 def getTitle(html):
    result = str(html.xpath('/html/head/title/text()')[0])
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -2,17 +2,10 @@
 # -*- coding: utf-8 -*-
 import sys
 sys.path.append('../')
 import json
 import re
 from urllib.parse import urlencode
 from lxml import etree
 from ADC_function import *
 from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 class fanzaCrawler(Crawler):
    def getFanzaString(self,string):
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -1,14 +1,10 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree#need install
 import json
 import config
 import ADC_function
 from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getExtrafanart(htmlcode):  # 获取剧照
    html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
--- a/WebCrawler/fc2club.py
+++ b/WebCrawler/fc2club.py
@@ -1,12 +1,8 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree#need install
 import json
 import ADC_function
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getTitle_fc2com(htmlcode): #获取标题
    html = etree.fromstring(htmlcode,etree.HTMLParser())
@@ -44,14 +40,6 @@ def getCover_fc2com(htmlcode2): #获取img #
    imgUrl = imgUrl.replace('../','https://fc2club.net/')
    print(imgUrl)
    return imgUrl
 # def getOutline_fc2com(htmlcode2):     #获取番号 #
 #     xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
 #     path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
 #     html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
 #     print('https://adult.contents.fc2.com'+path)
 #     print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
 #     result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
 #     return result
 def getTag_fc2com(htmlcode):     #获取tag
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
--- a/WebCrawler/gcolle.py
+++ b/WebCrawler/gcolle.py
@@ -3,8 +3,6 @@ sys.path.append('../')
 from WebCrawler.crawler import *
 from ADC_function import *
 from lxml import etree
 def main(number):
    save_cookies = False
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree#need install
 import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 import inspect
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -1,12 +1,7 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree
 import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getTitle(html):
    browser_title = str(html.xpath("/html/head/title/text()")[0])
@@ -55,10 +50,6 @@ def getActorPhoto(html, javdb_site, session):
    return actor_photo
 def getStudio(a, html):
    # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    # result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
    # result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
    # return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
    patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
    pianshang = patherr.findall(a)
    if pianshang:
@@ -85,11 +76,6 @@ def getNum(html):
    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
    return str(result2 + result1).strip('+')
 def getYear(getRelease):
    # try:
    #     result = str(re.search('\d{4}', getRelease).group())
    #     return result
    # except:
    #     return getRelease
    patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)\-.*?</span>')
    dates = patherr.findall(getRelease)
    if dates:
@@ -99,10 +85,6 @@ def getYear(getRelease):
    return result
 def getRelease(a):
    # html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    # result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
    # result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
    # return str(result1 + result2).strip('+')
    patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)</span>')
    dates = patherr.findall(a)
    if dates:
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -1,8 +1,5 @@
 import sys
 sys.path.append('../')
 from bs4 import BeautifulSoup  # need install
 from lxml import etree  # need install
 from pyquery import PyQuery as pq  # need install
 from ADC_function import *
 import json
 import re
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -1,14 +1,8 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree
 import json
 from bs4 import BeautifulSoup
 from ADC_function import *
 from WebCrawler.crawler import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 class MgsCrawler(Crawler):
    def getMgsString(self, _xpath):
--- a/WebCrawler/mv91.py
+++ b/WebCrawler/mv91.py
@@ -1,10 +1,5 @@
 import sys
 sys.path.append('../')
 import re
 from pyquery import PyQuery as pq#need install
 from lxml import etree#need install
 from bs4 import BeautifulSoup#need install
 import json
 from ADC_function import *
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -1,7 +1,5 @@
 import sys
 sys.path.append('../')
 import re
 import json
 import builtins
 from ADC_function import *
 from lxml.html import fromstring
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -1,12 +1,7 @@
 import sys
 sys.path.append('../')
 import re
 from lxml import etree
 import json
 from ADC_function import *
 from WebCrawler.storyline import getStoryline
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getTitle(html):
    result = html.xpath('//*[@id="program_detail_title"]/text()')[0]