Clean out of used modules
This commit is contained in:
@@ -1,10 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from pyquery import PyQuery as pq#need install
|
|
||||||
from lxml import etree#need install
|
|
||||||
from bs4 import BeautifulSoup#need install
|
from bs4 import BeautifulSoup#need install
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler import javbus
|
from WebCrawler import javbus
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('..')
|
sys.path.append('..')
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import json
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import re
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
|
|
||||||
|
|||||||
@@ -4,20 +4,6 @@ import json
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
# import sys
|
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
#print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
|
|
||||||
#title /html/head/title/text()
|
|
||||||
#studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
|
|
||||||
#release //th[contains(text(),"販売日")]/../td/a/text()
|
|
||||||
#story //th[contains(text(),"シナリオ")]/../td/a/text()
|
|
||||||
#senyo //th[contains(text(),"声優")]/../td/a/text()
|
|
||||||
#tag //th[contains(text(),"ジャンル")]/../td/div/a/text()
|
|
||||||
#jianjie //*[@id="main_inner"]/div[3]/text()
|
|
||||||
#photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
|
|
||||||
|
|
||||||
#https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html
|
|
||||||
|
|
||||||
def getTitle(html):
|
def getTitle(html):
|
||||||
result = str(html.xpath('/html/head/title/text()')[0])
|
result = str(html.xpath('/html/head/title/text()')[0])
|
||||||
|
|||||||
@@ -2,17 +2,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
class fanzaCrawler(Crawler):
|
class fanzaCrawler(Crawler):
|
||||||
def getFanzaString(self,string):
|
def getFanzaString(self,string):
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
import re
|
||||||
from lxml import etree#need install
|
|
||||||
import json
|
import json
|
||||||
import config
|
import config
|
||||||
import ADC_function
|
import ADC_function
|
||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
def getExtrafanart(htmlcode): # 获取剧照
|
def getExtrafanart(htmlcode): # 获取剧照
|
||||||
html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
|
html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
|
||||||
|
|||||||
@@ -1,12 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from lxml import etree#need install
|
from lxml import etree#need install
|
||||||
import json
|
import json
|
||||||
import ADC_function
|
import ADC_function
|
||||||
# import sys
|
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
def getTitle_fc2com(htmlcode): #获取标题
|
def getTitle_fc2com(htmlcode): #获取标题
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||||
@@ -44,14 +40,6 @@ def getCover_fc2com(htmlcode2): #获取img #
|
|||||||
imgUrl = imgUrl.replace('../','https://fc2club.net/')
|
imgUrl = imgUrl.replace('../','https://fc2club.net/')
|
||||||
print(imgUrl)
|
print(imgUrl)
|
||||||
return imgUrl
|
return imgUrl
|
||||||
# def getOutline_fc2com(htmlcode2): #获取番号 #
|
|
||||||
# xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
|
|
||||||
# path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
|
|
||||||
# html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
|
|
||||||
# print('https://adult.contents.fc2.com'+path)
|
|
||||||
# print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
|
|
||||||
# result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
|
|
||||||
# return result
|
|
||||||
def getTag_fc2com(htmlcode): #获取tag
|
def getTag_fc2com(htmlcode): #获取tag
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
||||||
a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
|
a = html.xpath('//*[@class="show-top-grids"]/div[1]/h5[4]/a')
|
||||||
|
|||||||
@@ -3,8 +3,6 @@ sys.path.append('../')
|
|||||||
|
|
||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
save_cookies = False
|
save_cookies = False
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from lxml import etree#need install
|
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
import inspect
|
import inspect
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
def getTitle(html):
|
def getTitle(html):
|
||||||
browser_title = str(html.xpath("/html/head/title/text()")[0])
|
browser_title = str(html.xpath("/html/head/title/text()")[0])
|
||||||
@@ -55,10 +50,6 @@ def getActorPhoto(html, javdb_site, session):
|
|||||||
return actor_photo
|
return actor_photo
|
||||||
|
|
||||||
def getStudio(a, html):
|
def getStudio(a, html):
|
||||||
# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
# result1 = str(html.xpath('//strong[contains(text(),"片商")]/../span/text()')).strip(" ['']")
|
|
||||||
# result2 = str(html.xpath('//strong[contains(text(),"片商")]/../span/a/text()')).strip(" ['']")
|
|
||||||
# return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
|
||||||
patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
|
patherr = re.compile(r'<strong>片商\:</strong>[\s\S]*?<a href=\".*?>(.*?)</a></span>')
|
||||||
pianshang = patherr.findall(a)
|
pianshang = patherr.findall(a)
|
||||||
if pianshang:
|
if pianshang:
|
||||||
@@ -85,11 +76,6 @@ def getNum(html):
|
|||||||
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result2 + result1).strip('+')
|
return str(result2 + result1).strip('+')
|
||||||
def getYear(getRelease):
|
def getYear(getRelease):
|
||||||
# try:
|
|
||||||
# result = str(re.search('\d{4}', getRelease).group())
|
|
||||||
# return result
|
|
||||||
# except:
|
|
||||||
# return getRelease
|
|
||||||
patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)\-.*?</span>')
|
patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)\-.*?</span>')
|
||||||
dates = patherr.findall(getRelease)
|
dates = patherr.findall(getRelease)
|
||||||
if dates:
|
if dates:
|
||||||
@@ -99,10 +85,6 @@ def getYear(getRelease):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def getRelease(a):
|
def getRelease(a):
|
||||||
# html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
# result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
|
|
||||||
# result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
|
|
||||||
# return str(result1 + result2).strip('+')
|
|
||||||
patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)</span>')
|
patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)</span>')
|
||||||
dates = patherr.findall(a)
|
dates = patherr.findall(a)
|
||||||
if dates:
|
if dates:
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
from bs4 import BeautifulSoup # need install
|
|
||||||
from lxml import etree # need install
|
|
||||||
from pyquery import PyQuery as pq # need install
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|||||||
@@ -1,14 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
import json
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.crawler import *
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
class MgsCrawler(Crawler):
|
class MgsCrawler(Crawler):
|
||||||
def getMgsString(self, _xpath):
|
def getMgsString(self, _xpath):
|
||||||
|
|||||||
@@ -1,10 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from pyquery import PyQuery as pq#need install
|
|
||||||
from lxml import etree#need install
|
|
||||||
from bs4 import BeautifulSoup#need install
|
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import builtins
|
import builtins
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
|
|||||||
@@ -1,12 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
import json
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
# import io
|
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
|
||||||
|
|
||||||
def getTitle(html):
|
def getTitle(html):
|
||||||
result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
|
result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user