diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py
index 030e8c7..eeea95f 100644
--- a/WebCrawler/airav.py
+++ b/WebCrawler/airav.py
@@ -1,10 +1,6 @@
import sys
sys.path.append('../')
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
from bs4 import BeautifulSoup#need install
-import json
from ADC_function import *
from WebCrawler import javbus
diff --git a/WebCrawler/avsox.py b/WebCrawler/avsox.py
index 0c4a9d7..a18eab6 100644
--- a/WebCrawler/avsox.py
+++ b/WebCrawler/avsox.py
@@ -1,8 +1,5 @@
import sys
sys.path.append('..')
-import re
-from lxml import etree
-import json
from ADC_function import *
from WebCrawler.storyline import getStoryline
from WebCrawler.crawler import *
diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py
index 462dc61..50cbcc1 100755
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -1,8 +1,6 @@
import sys
sys.path.append('../')
-import json
from lxml import html
-import re
from ADC_function import *
from WebCrawler.storyline import getStoryline
diff --git a/WebCrawler/dlsite.py b/WebCrawler/dlsite.py
index 19ab291..dc2c570 100644
--- a/WebCrawler/dlsite.py
+++ b/WebCrawler/dlsite.py
@@ -4,20 +4,6 @@ import json
import sys
sys.path.append('../')
from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-#print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
-#title /html/head/title/text()
-#studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
-#release //th[contains(text(),"販売日")]/../td/a/text()
-#story //th[contains(text(),"シナリオ")]/../td/a/text()
-#senyo //th[contains(text(),"声優")]/../td/a/text()
-#tag //th[contains(text(),"ジャンル")]/../td/div/a/text()
-#jianjie //*[@id="main_inner"]/div[3]/text()
-#photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
-
-#https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html
def getTitle(html):
result = str(html.xpath('/html/head/title/text()')[0])
diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py
index 622f837..735bf72 100644
--- a/WebCrawler/fanza.py
+++ b/WebCrawler/fanza.py
@@ -2,17 +2,10 @@
# -*- coding: utf-8 -*-
import sys
sys.path.append('../')
-import json
-import re
from urllib.parse import urlencode
-from lxml import etree
-
from ADC_function import *
from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
class fanzaCrawler(Crawler):
def getFanzaString(self,string):
diff --git a/WebCrawler/fc2.py b/WebCrawler/fc2.py
index a908269..7eae92b 100644
--- a/WebCrawler/fc2.py
+++ b/WebCrawler/fc2.py
@@ -1,14 +1,10 @@
import sys
sys.path.append('../')
import re
-from lxml import etree#need install
import json
import config
import ADC_function
from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getExtrafanart(htmlcode): # 获取剧照
html_pather = re.compile(r'
')
diff --git a/WebCrawler/fc2club.py b/WebCrawler/fc2club.py
index df14b3b..774c5fd 100644
--- a/WebCrawler/fc2club.py
+++ b/WebCrawler/fc2club.py
@@ -1,12 +1,8 @@
import sys
sys.path.append('../')
-import re
from lxml import etree#need install
import json
import ADC_function
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle_fc2com(htmlcode): #获取标题
html = etree.fromstring(htmlcode,etree.HTMLParser())
@@ -44,14 +40,6 @@ def getCover_fc2com(htmlcode2): #获取img #
imgUrl = imgUrl.replace('../','https://fc2club.net/')
print(imgUrl)
return imgUrl
-# def getOutline_fc2com(htmlcode2): #获取番号 #
-# xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
-# path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
-# html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
-# print('https://adult.contents.fc2.com'+path)
-# print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
-# result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
-# return result
def getTag_fc2com(htmlcode): #获取tag
html = etree.fromstring(htmlcode,etree.HTMLParser())
a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
diff --git a/WebCrawler/gcolle.py b/WebCrawler/gcolle.py
index 078dd06..749da1f 100644
--- a/WebCrawler/gcolle.py
+++ b/WebCrawler/gcolle.py
@@ -3,8 +3,6 @@ sys.path.append('../')
from WebCrawler.crawler import *
from ADC_function import *
-from lxml import etree
-
def main(number):
save_cookies = False
diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py
index 3829d16..2c914c2 100644
--- a/WebCrawler/javbus.py
+++ b/WebCrawler/javbus.py
@@ -1,8 +1,5 @@
import sys
sys.path.append('../')
-import re
-from lxml import etree#need install
-import json
from ADC_function import *
from WebCrawler.storyline import getStoryline
import inspect
diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py
index 941c774..6529dd6 100755
--- a/WebCrawler/javdb.py
+++ b/WebCrawler/javdb.py
@@ -1,12 +1,7 @@
import sys
sys.path.append('../')
-import re
-from lxml import etree
-import json
from ADC_function import *
from WebCrawler.storyline import getStoryline
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(html):
browser_title = str(html.xpath("/html/head/title/text()")[0])
@@ -55,10 +50,6 @@ def getActorPhoto(html, javdb_site, session):
return actor_photo
def getStudio(a, html):
- # html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- # result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
- # result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
- # return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
patherr = re.compile(r'片商\:[\s\S]*?(.*?)')
pianshang = patherr.findall(a)
if pianshang:
@@ -85,11 +76,6 @@ def getNum(html):
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
return str(result2 + result1).strip('+')
def getYear(getRelease):
- # try:
- # result = str(re.search('\d{4}', getRelease).group())
- # return result
- # except:
- # return getRelease
patherr = re.compile(r'日期\:\s*?.*?(.*?)\-.*?')
dates = patherr.findall(getRelease)
if dates:
@@ -99,10 +85,6 @@ def getYear(getRelease):
return result
def getRelease(a):
- # html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- # result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
- # result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
- # return str(result1 + result2).strip('+')
patherr = re.compile(r'日期\:\s*?.*?(.*?)')
dates = patherr.findall(a)
if dates:
diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py
index c04d9a2..70c4e46 100644
--- a/WebCrawler/madou.py
+++ b/WebCrawler/madou.py
@@ -1,8 +1,5 @@
import sys
sys.path.append('../')
-from bs4 import BeautifulSoup # need install
-from lxml import etree # need install
-from pyquery import PyQuery as pq # need install
from ADC_function import *
import json
import re
diff --git a/WebCrawler/mgstage.py b/WebCrawler/mgstage.py
index db727b4..bb344f9 100644
--- a/WebCrawler/mgstage.py
+++ b/WebCrawler/mgstage.py
@@ -1,14 +1,8 @@
import sys
sys.path.append('../')
-import re
-from lxml import etree
-import json
from bs4 import BeautifulSoup
from ADC_function import *
from WebCrawler.crawler import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
class MgsCrawler(Crawler):
def getMgsString(self, _xpath):
diff --git a/WebCrawler/mv91.py b/WebCrawler/mv91.py
index 301997e..a8c1cf3 100644
--- a/WebCrawler/mv91.py
+++ b/WebCrawler/mv91.py
@@ -1,10 +1,5 @@
import sys
sys.path.append('../')
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
-import json
from ADC_function import *
diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py
index 16ab3cc..c4fc2c4 100644
--- a/WebCrawler/storyline.py
+++ b/WebCrawler/storyline.py
@@ -1,7 +1,5 @@
import sys
sys.path.append('../')
-import re
-import json
import builtins
from ADC_function import *
from lxml.html import fromstring
diff --git a/WebCrawler/xcity.py b/WebCrawler/xcity.py
index 1218b03..a22f417 100644
--- a/WebCrawler/xcity.py
+++ b/WebCrawler/xcity.py
@@ -1,12 +1,7 @@
import sys
sys.path.append('../')
-import re
-from lxml import etree
-import json
from ADC_function import *
from WebCrawler.storyline import getStoryline
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(html):
result = html.xpath('//*[@id="program_detail_title"]/text()')[0]