update scrapinglib

This commit is contained in:
Mathhew
2022-05-27 15:24:29 +08:00
parent d6d0a1687b
commit 9898f2918f
16 changed files with 213 additions and 73 deletions

View File

@@ -18,13 +18,11 @@ class Airav(Parser):
expr_actor = '//ul[@class="videoAvstarList"]/li/a[starts-with(@href,"/idol/")]/text()' expr_actor = '//ul[@class="videoAvstarList"]/li/a[starts-with(@href,"/idol/")]/text()'
expr_cover = '//img[contains(@src,"/storage/big_pic/")]/@src' expr_cover = '//img[contains(@src,"/storage/big_pic/")]/@src'
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
self.detailurl = 'https://cn.airav.wiki/video/' + number self.detailurl = 'https://cn.airav.wiki/video/' + number
engine = Javbus() engine = Javbus()
javbusinfo = engine.search(number, core) javbusinfo = engine.search(number, self)
if javbusinfo == 404: if javbusinfo == 404:
self.javbus = {"title": ""} self.javbus = {"title": ""}
else: else:

View File

@@ -8,6 +8,7 @@ from scrapinglib.carib import Carib
from scrapinglib.dlsite import Dlsite from scrapinglib.dlsite import Dlsite
from scrapinglib.fanza import Fanza from scrapinglib.fanza import Fanza
from scrapinglib.gcolle import Gcolle from scrapinglib.gcolle import Gcolle
from scrapinglib.getchu import Getchu
from scrapinglib.jav321 import Jav321 from scrapinglib.jav321 import Jav321
from scrapinglib.javdb import Javdb from scrapinglib.javdb import Javdb
from scrapinglib.mv91 import Mv91 from scrapinglib.mv91 import Mv91
@@ -19,13 +20,15 @@ from .xcity import Xcity
from .avsox import Avsox from .avsox import Avsox
def search(number, souces=None, proxies=None, dbcookies=None): def search(number, souces=None, proxies=None, verify=None, dbcookies=None, dbsite=None, morestoryline=True):
""" """
TODO 支持更多网站 douban, imdb,tmdb anidb等 TODO 支持更多网站 douban, imdb,tmdb anidb等
type 区分 r18 与 normal type 区分 r18 与 normal
""" """
sc = Scraping() sc = Scraping()
return sc.search(number, souces, proxies=proxies, dbcookies=dbcookies) return sc.search(number, souces, proxies=proxies, verify=verify,
dbcookies=dbcookies, dbsite=dbsite,
morestoryline=morestoryline)
class Scraping(): class Scraping():
@@ -54,30 +57,39 @@ class Scraping():
full_sources = ['avsox', 'javbus', 'xcity', 'mgstage', 'madou', 'fc2', full_sources = ['avsox', 'javbus', 'xcity', 'mgstage', 'madou', 'fc2',
'dlsite', 'jav321', 'fanza', 'airav', 'carib', 'mv91', 'dlsite', 'jav321', 'fanza', 'airav', 'carib', 'mv91',
'gcolle', 'javdb'] 'gcolle', 'javdb', 'getchu']
func_mapping = { func_mapping = {
'avsox': Avsox().search, 'avsox': Avsox().scrape,
'javbus': Javbus().search, 'javbus': Javbus().scrape,
'xcity': Xcity().search, 'xcity': Xcity().scrape,
'mgstage': Mgstage().search, 'mgstage': Mgstage().scrape,
'madou': Madou().search, 'madou': Madou().scrape,
'fc2': Fc2().search, 'fc2': Fc2().scrape,
'dlsite': Dlsite().search, 'dlsite': Dlsite().scrape,
'jav321': Jav321().search, 'jav321': Jav321().scrape,
'fanza': Fanza().search, 'fanza': Fanza().scrape,
'airav': Airav().search, 'airav': Airav().scrape,
'carib': Carib().search, 'carib': Carib().scrape,
'mv91': Mv91().search, 'mv91': Mv91().scrape,
'gcolle': Gcolle().search, 'gcolle': Gcolle().scrape,
'javdb': Javdb().search, 'javdb': Javdb().scrape,
'getchu': Getchu().scrape,
} }
proxies = None proxies = None
verify = None
dbcookies = None dbcookies = None
dbsite = None
# 使用storyline方法进一步获取故事情节
morestoryline = True
def search(self, number, sources=None, proxies=None, dbcookies=None): def search(self, number, sources=None, proxies=None, verify=None,
dbcookies=None, dbsite=None, morestoryline=True):
self.proxies = proxies self.proxies = proxies
self.verify = verify
self.dbcookies = dbcookies self.dbcookies = dbcookies
self.dbsite = dbsite
self.morestoryline = morestoryline
sources = self.checkSources(sources, number) sources = self.checkSources(sources, number)
json_data = {} json_data = {}

View File

@@ -57,8 +57,10 @@ class Avsox(Parser):
return [i.strip() for i in tags[2:]] if len(tags) > 2 else [] return [i.strip() for i in tags[2:]] if len(tags) > 2 else []
def getOutline(self, htmltree): def getOutline(self, htmltree):
from .storyline import getStoryline if self.morestoryline:
return getStoryline(self.number) from .storyline import getStoryline
return getStoryline(self.number)
return ''
def getActors(self, htmltree): def getActors(self, htmltree):
a = super().getActors(htmltree) a = super().getActors(htmltree)

View File

@@ -20,10 +20,8 @@ class Carib(Parser):
expr_series = "//span[@class='spec-title'][contains(text(),'シリーズ')]/../span[@class='spec-content']/a/text()" expr_series = "//span[@class='spec-title'][contains(text(),'シリーズ')]/../span[@class='spec-content']/a/text()"
expr_outline = "//div[@class='movie-info section']/p[@itemprop='description']/text()" expr_outline = "//div[@class='movie-info section']/p[@itemprop='description']/text()"
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
self.detailurl = f'https://www.caribbeancom.com/moviepages/{number}/index.html' self.detailurl = f'https://www.caribbeancom.com/moviepages/{number}/index.html'
htmlcode = self.getHtml(self.detailurl) htmlcode = self.getHtml(self.detailurl)
if htmlcode == 404 or 'class="movie-info section"' not in htmlcode: if htmlcode == 404 or 'class="movie-info section"' not in htmlcode:

View File

@@ -8,6 +8,7 @@ from .parser import Parser
class Dlsite(Parser): class Dlsite(Parser):
source = 'dlsite' source = 'dlsite'
imagecut = 4 imagecut = 4
allow_number_change = True
expr_title = '/html/head/title/text()' expr_title = '/html/head/title/text()'
expr_actor = '//th[contains(text(),"声优")]/../td/a/text()' expr_actor = '//th[contains(text(),"声优")]/../td/a/text()'
@@ -26,10 +27,8 @@ class Dlsite(Parser):
expr_label2 = '//th[contains(text(),"社团名")]/../td/span[1]/a/text()' expr_label2 = '//th[contains(text(),"社团名")]/../td/span[1]/a/text()'
expr_extrafanart = '//*[@id="work_left"]/div/div/div[1]/div/@data-src' expr_extrafanart = '//*[@id="work_left"]/div/div/div[1]/div/@data-src'
def search(self, number, core: None): def search(self, number):
self.updateCore(core)
self.cookies = {'locale': 'zh-cn'} self.cookies = {'locale': 'zh-cn'}
if "RJ" in number or "VJ" in number: if "RJ" in number or "VJ" in number:
self.number = number.upper() self.number = number.upper()
self.detailurl = 'https://www.dlsite.com/maniax/work/=/product_id/' + self.number + '.html/?locale=zh_CN' self.detailurl = 'https://www.dlsite.com/maniax/work/=/product_id/' + self.number + '.html/?locale=zh_CN'

View File

@@ -14,10 +14,8 @@ class Fanza(Parser):
expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()" expr_outline2 = "//div[@class='mg-b20 lh4']//p/text()"
expr_runtime = "//td[contains(text(),'収録時間')]/following-sibling::td/text()" expr_runtime = "//td[contains(text(),'収録時間')]/following-sibling::td/text()"
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
# fanza allow letter + number + underscore, normalize the input here # fanza allow letter + number + underscore, normalize the input here
# @note: I only find the usage of underscore as h_test123456789 # @note: I only find the usage of underscore as h_test123456789
fanza_search_number = number fanza_search_number = number

View File

@@ -20,10 +20,8 @@ class Fc2(Parser):
expr_cover = "//div[@class='items_article_MainitemThumb']/span/img/@src" expr_cover = "//div[@class='items_article_MainitemThumb']/span/img/@src"
expr_tags = "//a[@class='tag tagTag']/text()" expr_tags = "//a[@class='tag tagTag']/text()"
def search(self, number, core: None): def search(self, number):
self.number = number.replace('FC2-', '').replace('fc2-', '') self.number = number.replace('FC2-', '').replace('fc2-', '')
self.updateCore(core)
self.detailurl = 'https://adult.contents.fc2.com/article/' + self.number + '/' self.detailurl = 'https://adult.contents.fc2.com/article/' + self.number + '/'
self.htmlcode = self.getHtml(self.detailurl) self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404: if self.htmlcode == 404:

View File

@@ -25,10 +25,8 @@ class Gcolle(Parser):
expr_extrafanart = '//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src' expr_extrafanart = '//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src'
expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src' expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src'
def search(self, number, core: None): def search(self, number):
self.number = number.upper().replace('GCOLLE-','') self.number = number.upper().replace('GCOLLE-','')
self.updateCore(core)
self.detailurl = 'https://gcolle.net/product_info.php/products_id/' + self.number self.detailurl = 'https://gcolle.net/product_info.php/products_id/' + self.number
session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify) session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text

View File

@@ -1,8 +1,129 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re
import json
from urllib.parse import quote
from .parser import Parser from .parser import Parser
class Getchu(Parser): class Getchu():
source = 'getchu' source = 'getchu'
def scrape(self, number, core: None):
dl = dlGetchu()
www = wwwGetchu()
number = number.replace("-C", "")
dic = {}
if "item" in number:
sort = ["dl.scrape(number, core)", "www.scrape(number, core)"]
else:
sort = ["www.scrape(number, core)", "dl.scrape(number, core)"]
for i in sort:
try:
dic = eval(i)
if dic != None and json.loads(dic).get('title') != '':
break
except:
pass
return dic
class wwwGetchu(Parser):
imagecut = 0
allow_number_change = True
cookies = {'getchu_adalt_flag': 'getchu.com', "adult_check_flag": "1"}
GETCHU_WWW_SEARCH_URL = 'http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit='
expr_title = '//*[@id="soft-title"]/text()'
expr_cover = "/html/body/div[1]/table[2]/tr[1]/td/a/@href"
expr_director = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
expr_studio = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
expr_actor = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
expr_label = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
expr_release = "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
expr_tags = "//td[contains(text(),'カテゴリ')]/following-sibling::td/a/text()"
expr_outline = "//div[contains(text(),'商品紹介')]/following-sibling::div/text()"
expr_extrafanart = "//div[contains(text(),'サンプル画像')]/following-sibling::div/a/@href"
expr_series = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
def queryNumberUrl(self, number):
self.number = quote(number, encoding="euc_jp")
queryUrl = self.GETCHU_WWW_SEARCH_URL.replace("_WORD_", self.number)
# NOTE dont know why will try 2 times
retry = 2
for i in range(retry):
queryTree = self.getHtmlTree(queryUrl)
detailurl = self.getTreeIndex(queryTree, '//*[@id="detail_block"]/div/table/tr[1]/td/a[1]/@href')
if detailurl:
break
if detailurl == "":
return None
return detailurl.replace('../', 'http://www.getchu.com/')
def getNum(self, htmltree):
return 'GETCHU-' + re.findall('\d+', self.detailurl.replace("http://www.getchu.com/soft.phtml?id=", ""))[0]
def getCover(self, htmltree):
return "http://www.getchu.com" + super().getCover(htmltree).replace("./", '/')
def getActors(self, htmltree):
return super().getDirector(htmltree)
def getTags(self, htmltree):
return self.getAll(htmltree, self.expr_tags)
def getOutline(self, htmltree):
outline = ''
_list = self.getAll(htmltree, self.expr_outline)
for i in _list:
outline = outline + i.strip()
return outline
def getExtrafanart(self, htmltree):
arts = super().getExtrafanart(htmltree)
extrafanart = []
for i in arts:
i = "http://www.getchu.com" + i.replace("./", '/')
if 'jpg' in i:
extrafanart.append(i)
return extrafanart
class dlGetchu(wwwGetchu):
imagecut = 4
allow_number_change = True
cookies = {"adult_check_flag": "1"}
extraheader = {"Referer": "https://dl.getchu.com/"}
GETCHU_DL_SEARCH_URL = 'https://dl.getchu.com/search/search_list.php?dojin=1&search_category_id=&search_keyword=_WORD_&btnWordSearch=%B8%A1%BA%F7&action=search&set_category_flag=1'
GETCHU_DL_URL = 'https://dl.getchu.com/i/item_WORD_'
expr_title = "//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"
expr_cover = "//td[contains(@bgcolor,'#ffffff')]/img/@src"
expr_director = "//td[contains(text(),'作者')]/following-sibling::td/text()"
expr_studio = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
expr_label = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
expr_runtime = "//td[contains(text(),'画像数&ページ数')]/following-sibling::td/text()"
expr_release = "//td[contains(text(),'配信開始日')]/following-sibling::td/text()"
expr_tags = "//td[contains(text(),'趣向')]/following-sibling::td/a/text()"
expr_outline = "//*[contains(text(),'作品内容')]/following-sibling::td/text()"
expr_extrafanart = "//td[contains(@style,'background-color: #444444;')]/a/@href"
expr_series = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
def queryNumberUrl(self, number):
if "item" in number or 'GETCHU' in number.upper():
self.number = re.findall('\d+',number)[0]
else:
queryUrl = self.GETCHU_DL_SEARCH_URL.replace("_WORD_", number)
queryTree = self.getHtmlTree(queryUrl)
detailurl = self.getTreeIndex(queryTree, '/html/body/div[1]/table/tr/td/table[4]/tr/td[2]/table/tr[2]/td/table/tr/td/table/tr/td[2]/div/a[1]/@href')
if detailurl == "":
return None
self.number = re.findall('\d+', detailurl)[0]
return self.GETCHU_DL_URL.replace("_WORD_", self.number)
def getNum(self, htmltree):
return 'GETCHU-' + re.findall('\d+', self.number)[0]
def getCover(self, htmltree):
return "https://dl.getchu.com" + super().getCover(htmltree)

View File

@@ -9,7 +9,7 @@ from cloudscraper import create_scraper
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36' G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.133 Safari/537.36'
G_DEFAULT_TIMEOUT = 10 G_DEFAULT_TIMEOUT = 10
def get(url: str, cookies = None, ua: str = None, return_type: str = None, encoding: str = None, def get(url: str, cookies=None, ua: str = None, extra_headers=None, return_type: str = None, encoding: str = None,
retry: int = 3, timeout: int = G_DEFAULT_TIMEOUT, proxies=None, verify=None): retry: int = 3, timeout: int = G_DEFAULT_TIMEOUT, proxies=None, verify=None):
""" """
网页请求核心函数 网页请求核心函数
@@ -18,7 +18,8 @@ def get(url: str, cookies = None, ua: str = None, return_type: str = None, encod
""" """
errors = "" errors = ""
headers = {"User-Agent": ua or G_USER_AGENT} headers = {"User-Agent": ua or G_USER_AGENT}
if extra_headers != None:
headers.update(extra_headers)
for i in range(retry): for i in range(retry):
try: try:
result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies, result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies,

View File

@@ -29,10 +29,8 @@ class Javbus(Parser):
expr_tags = '/html/head/meta[@name="keywords"]/@content' expr_tags = '/html/head/meta[@name="keywords"]/@content'
expr_uncensored = '//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]' expr_uncensored = '//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]'
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
try: try:
url = "https://www." + secrets.choice([ url = "https://www." + secrets.choice([
'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun', 'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun',
@@ -139,7 +137,9 @@ class Javbus(Parser):
return '' return ''
def getOutline(self, htmltree): def getOutline(self, htmltree):
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'): if self.morestoryline:
return '' # 从airav.py过来的调用不计算outline直接返回避免重复抓取数据拖慢处理速度 if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
from .storyline import getStoryline return '' # 从airav.py过来的调用不计算outline直接返回避免重复抓取数据拖慢处理速度
return getStoryline(self.number , uncensored = self.uncensored) from .storyline import getStoryline
return getStoryline(self.number , uncensored = self.uncensored)
return ''

View File

@@ -43,15 +43,22 @@ class Javdb(Parser):
def updateCore(self, core): def updateCore(self, core):
if core.proxies: if core.proxies:
self.proxies = core.proxies self.proxies = core.proxies
if core.verify:
self.verify = core.verify
if core.morestoryline:
self.morestoryline = True
# special
if core.dbcookies: if core.dbcookies:
self.cookies = core.dbcookies self.cookies = core.dbcookies
else: else:
self.cookies = {'over18':'1', 'theme':'auto', 'locale':'zh'} self.cookies = {'over18':'1', 'theme':'auto', 'locale':'zh'}
if core.dbsite:
self.dbsite = core.dbsite
else:
self.dbsite = 'javdb'
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
self.session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify) self.session = get_html_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify)
self.detailurl = self.queryNumberUrl(number) self.detailurl = self.queryNumberUrl(number)
@@ -61,7 +68,7 @@ class Javdb(Parser):
return result return result
def queryNumberUrl(self, number): def queryNumberUrl(self, number):
javdb_url = 'https://javdb.com/search?q=' + number + '&f=all' javdb_url = 'https://' + self.dbsite + '.com/search?q=' + number + '&f=all'
try: try:
resp = self.session.get(javdb_url) resp = self.session.get(javdb_url)
except Exception as e: except Exception as e:
@@ -148,8 +155,10 @@ class Javdb(Parser):
return r return r
def getOutline(self, htmltree): def getOutline(self, htmltree):
from .storyline import getStoryline if self.morestoryline:
return getStoryline(self.number, self.getUncensored(htmltree)) from .storyline import getStoryline
return getStoryline(self.number, self.getUncensored(htmltree))
return ''
def getStudio(self, htmltree): def getStudio(self, htmltree):
try: try:

View File

@@ -15,10 +15,8 @@ class Madou(Parser):
expr_studio = '//a[@rel="category tag"]/text()' expr_studio = '//a[@rel="category tag"]/text()'
expr_tags = '/html/head/meta[@name="keywords"]/@content' expr_tags = '/html/head/meta[@name="keywords"]/@content'
def search(self, number, core: None): def search(self, number):
self.number = number.lower().strip() self.number = number.lower().strip()
self.updateCore(core)
self.detailurl = "https://madou.club/" + number + ".html" self.detailurl = "https://madou.club/" + number + ".html"
self.htmlcode = self.getHtml(self.detailurl) self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404: if self.htmlcode == 404:

View File

@@ -23,10 +23,8 @@ class Mgstage(Parser):
expr_tags2 = '//th[contains(text(),"ジャンル:")]/../td/text()' expr_tags2 = '//th[contains(text(),"ジャンル:")]/../td/text()'
expr_series = '//th[contains(text(),"シリーズ")]/../td/a/text()' expr_series = '//th[contains(text(),"シリーズ")]/../td/a/text()'
def search(self, number, core: None): def search(self, number):
self.number = number.upper() self.number = number.upper()
self.updateCore(core)
self.cookies = {'adc':'1'} self.cookies = {'adc':'1'}
self.detailurl = 'https://www.mgstage.com/product/product_detail/'+str(self.number)+'/' self.detailurl = 'https://www.mgstage.com/product/product_detail/'+str(self.number)+'/'
self.htmlcode = self.getHtml(self.detailurl) self.htmlcode = self.getHtml(self.detailurl)

View File

@@ -11,10 +11,13 @@ class Parser:
source = 'base' source = 'base'
imagecut = 1 imagecut = 1
uncensored = False uncensored = False
allow_number_change = False
# update # update
proxies = None proxies = None
cookies = None
verify = None verify = None
extraheader = None
cookies = None
morestoryline = False
number = '' number = ''
detailurl = '' detailurl = ''
@@ -47,12 +50,15 @@ class Parser:
def __init__(self) -> None: def __init__(self) -> None:
pass pass
def search(self, number, core: None): def scrape(self, number, core: None):
""" 搜索番号 """ 刮削番号
""" """
self.number = number
self.updateCore(core) self.updateCore(core)
result = self.search(number)
return result
def search(self, number):
self.number = number
self.detailurl = self.queryNumberUrl(number) self.detailurl = self.queryNumberUrl(number)
htmltree = self.getHtmlTree(self.detailurl) htmltree = self.getHtmlTree(self.detailurl)
result = self.dictformat(htmltree) result = self.dictformat(htmltree)
@@ -66,6 +72,10 @@ class Parser:
""" """
if core.proxies: if core.proxies:
self.proxies = core.proxies self.proxies = core.proxies
if core.verify:
self.verify = core.verify
if core.morestoryline:
self.morestoryline = True
def queryNumberUrl(self, number): def queryNumberUrl(self, number):
""" 根据番号查询详细信息url """ 根据番号查询详细信息url
@@ -78,7 +88,7 @@ class Parser:
def getHtml(self, url, type = None): def getHtml(self, url, type = None):
""" 访问网页 """ 访问网页
""" """
resp = httprequest.get(url, cookies=self.cookies, proxies=self.proxies, verify=self.verify, return_type=type) resp = httprequest.get(url, cookies=self.cookies, proxies=self.proxies, extra_headers=self.extraheader, verify=self.verify, return_type=type)
if '<title>404 Page Not Found' in resp \ if '<title>404 Page Not Found' in resp \
or '<title>未找到页面' in resp \ or '<title>未找到页面' in resp \
or '404 Not Found' in resp \ or '404 Not Found' in resp \

View File

@@ -59,8 +59,10 @@ class Xcity(Parser):
return '' return ''
def getOutline(self, htmltree): def getOutline(self, htmltree):
from .storyline import getStoryline if self.morestoryline:
return getStoryline(self.number, uncensored=False) from .storyline import getStoryline
return getStoryline(self.number, uncensored=False)
return ''
def getActors(self, htmltree): def getActors(self, htmltree):
htmla = self.browser.page.select('#avodDetails > div > div.frame > div.content > div > ul.profileCL > li.credit-links > a') htmla = self.browser.page.select('#avodDetails > div > div.frame > div.content > div > ul.profileCL > li.credit-links > a')
@@ -111,10 +113,8 @@ class Xcity(Parser):
raise ValueError("xcity.py: detail page not found") raise ValueError("xcity.py: detail page not found")
return str(browser.page), browser return str(browser.page), browser
def search(self, number, core: None): def search(self, number):
self.number = number self.number = number
self.updateCore(core)
self.detail_page, self.browser = self.open_by_browser(number) self.detail_page, self.browser = self.open_by_browser(number)
self.detailurl = self.browser.url self.detailurl = self.browser.url
lx = etree.fromstring(self.detail_page, etree.HTMLParser()) lx = etree.fromstring(self.detail_page, etree.HTMLParser())