update init
This commit is contained in:
@@ -8,9 +8,6 @@ from .javbus import Javbus
|
|||||||
|
|
||||||
class Airav(Parser):
|
class Airav(Parser):
|
||||||
source = 'airav'
|
source = 'airav'
|
||||||
# for javbus
|
|
||||||
specifiedSource = None
|
|
||||||
addtion_Javbus = True
|
|
||||||
|
|
||||||
expr_title = '/html/head/title/text()'
|
expr_title = '/html/head/title/text()'
|
||||||
expr_number = '/html/head/title/text()'
|
expr_number = '/html/head/title/text()'
|
||||||
@@ -22,6 +19,11 @@ class Airav(Parser):
|
|||||||
expr_tags = '//div[@class="tagBtnMargin"]/a/text()'
|
expr_tags = '//div[@class="tagBtnMargin"]/a/text()'
|
||||||
expr_extrafanart = '//div[@class="mobileImgThumbnail"]/a/@href'
|
expr_extrafanart = '//div[@class="mobileImgThumbnail"]/a/@href'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
# for javbus
|
||||||
|
self.specifiedSource = None
|
||||||
|
self.addtion_Javbus = True
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
self.number = number
|
self.number = number
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
|
|||||||
@@ -4,10 +4,7 @@ from .parser import Parser
|
|||||||
|
|
||||||
|
|
||||||
class Avsox(Parser):
|
class Avsox(Parser):
|
||||||
|
|
||||||
source = 'avsox'
|
source = 'avsox'
|
||||||
imagecut = 3
|
|
||||||
originalnum = ''
|
|
||||||
|
|
||||||
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
|
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
|
||||||
expr_actor = '//a[@class="avatar-box"]'
|
expr_actor = '//a[@class="avatar-box"]'
|
||||||
@@ -21,6 +18,10 @@ class Avsox(Parser):
|
|||||||
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
|
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
|
||||||
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
|
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 3
|
||||||
|
self.originalnum = ''
|
||||||
|
|
||||||
def queryNumberUrl(self, number: str):
|
def queryNumberUrl(self, number: str):
|
||||||
upnum = number.upper()
|
upnum = number.upper()
|
||||||
if 'FC2' in upnum and 'FC2-PPV' not in upnum:
|
if 'FC2' in upnum and 'FC2-PPV' not in upnum:
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Carib(Parser):
|
class Carib(Parser):
|
||||||
source = 'carib'
|
source = 'carib'
|
||||||
uncensored = True
|
|
||||||
|
|
||||||
expr_title = "//div[@class='movie-info section']/div[@class='heading']/h1[@itemprop='name']/text()"
|
expr_title = "//div[@class='movie-info section']/div[@class='heading']/h1[@itemprop='name']/text()"
|
||||||
expr_release = "//li[2]/span[@class='spec-content']/text()"
|
expr_release = "//li[2]/span[@class='spec-content']/text()"
|
||||||
@@ -20,6 +19,9 @@ class Carib(Parser):
|
|||||||
expr_series = "//span[@class='spec-title'][contains(text(),'シリーズ')]/../span[@class='spec-content']/a/text()"
|
expr_series = "//span[@class='spec-title'][contains(text(),'シリーズ')]/../span[@class='spec-content']/a/text()"
|
||||||
expr_outline = "//div[@class='movie-info section']/p[@itemprop='description']/text()"
|
expr_outline = "//div[@class='movie-info section']/p[@itemprop='description']/text()"
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.uncensored = True
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
self.number = number
|
self.number = number
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
|
|||||||
@@ -6,8 +6,6 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Dlsite(Parser):
|
class Dlsite(Parser):
|
||||||
source = 'dlsite'
|
source = 'dlsite'
|
||||||
imagecut = 4
|
|
||||||
allow_number_change = True
|
|
||||||
|
|
||||||
expr_title = '/html/head/title/text()'
|
expr_title = '/html/head/title/text()'
|
||||||
expr_actor = '//th[contains(text(),"声优")]/../td/a/text()'
|
expr_actor = '//th[contains(text(),"声优")]/../td/a/text()'
|
||||||
@@ -26,6 +24,10 @@ class Dlsite(Parser):
|
|||||||
expr_label2 = '//th[contains(text(),"社团名")]/../td/span[1]/a/text()'
|
expr_label2 = '//th[contains(text(),"社团名")]/../td/span[1]/a/text()'
|
||||||
expr_extrafanart = '//*[@id="work_left"]/div/div/div[1]/div/@data-src'
|
expr_extrafanart = '//*[@id="work_left"]/div/div/div[1]/div/@data-src'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 4
|
||||||
|
self.allow_number_change = True
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
self.cookies = {'locale': 'zh-cn'}
|
self.cookies = {'locale': 'zh-cn'}
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Fc2(Parser):
|
class Fc2(Parser):
|
||||||
source = 'fc2'
|
source = 'fc2'
|
||||||
imagecut = 0
|
|
||||||
|
|
||||||
expr_title = '/html/head/title/text()'
|
expr_title = '/html/head/title/text()'
|
||||||
expr_studio = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
expr_studio = '//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'
|
||||||
@@ -21,6 +20,9 @@ class Fc2(Parser):
|
|||||||
expr_extrafanart = '//ul[@class="items_article_SampleImagesArea"]/li/a/@href'
|
expr_extrafanart = '//ul[@class="items_article_SampleImagesArea"]/li/a/@href'
|
||||||
expr_tags = "//a[@class='tag tagTag']/text()"
|
expr_tags = "//a[@class='tag tagTag']/text()"
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 0
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '')
|
self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '')
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Gcolle(Parser):
|
class Gcolle(Parser):
|
||||||
source = 'gcolle'
|
source = 'gcolle'
|
||||||
imagecut = 4
|
|
||||||
|
|
||||||
expr_r18 = '//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href'
|
expr_r18 = '//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href'
|
||||||
expr_number = '//td[contains(text(),"商品番号")]/../td[2]/text()'
|
expr_number = '//td[contains(text(),"商品番号")]/../td[2]/text()'
|
||||||
@@ -25,7 +24,10 @@ class Gcolle(Parser):
|
|||||||
expr_extrafanart = '//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src'
|
expr_extrafanart = '//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src'
|
||||||
expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src'
|
expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src'
|
||||||
|
|
||||||
def search(self, number):
|
def extraInit(self):
|
||||||
|
self.imagecut = 4
|
||||||
|
|
||||||
|
def search(self, number: str):
|
||||||
self.number = number.upper().replace('GCOLLE-', '')
|
self.number = number.upper().replace('GCOLLE-', '')
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
self.detailurl = self.specifiedUrl
|
self.detailurl = self.specifiedUrl
|
||||||
@@ -69,5 +71,3 @@ class Gcolle(Parser):
|
|||||||
for i in range(len(extrafanart)):
|
for i in range(len(extrafanart)):
|
||||||
extrafanart[i] = 'https:' + extrafanart[i]
|
extrafanart[i] = 'https:' + extrafanart[i]
|
||||||
return extrafanart
|
return extrafanart
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -28,12 +28,6 @@ class Getchu():
|
|||||||
return dic
|
return dic
|
||||||
|
|
||||||
class wwwGetchu(Parser):
|
class wwwGetchu(Parser):
|
||||||
imagecut = 0
|
|
||||||
allow_number_change = True
|
|
||||||
|
|
||||||
cookies = {'getchu_adalt_flag': 'getchu.com', "adult_check_flag": "1"}
|
|
||||||
GETCHU_WWW_SEARCH_URL = 'http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit='
|
|
||||||
|
|
||||||
expr_title = '//*[@id="soft-title"]/text()'
|
expr_title = '//*[@id="soft-title"]/text()'
|
||||||
expr_cover = '//head/meta[@property="og:image"]/@content'
|
expr_cover = '//head/meta[@property="og:image"]/@content'
|
||||||
expr_director = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
expr_director = "//td[contains(text(),'ブランド')]/following-sibling::td/a[1]/text()"
|
||||||
@@ -46,6 +40,13 @@ class wwwGetchu(Parser):
|
|||||||
expr_extrafanart = "//div[contains(text(),'サンプル画像')]/following-sibling::div/a/@href"
|
expr_extrafanart = "//div[contains(text(),'サンプル画像')]/following-sibling::div/a/@href"
|
||||||
expr_series = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
expr_series = "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 0
|
||||||
|
self.allow_number_change = True
|
||||||
|
|
||||||
|
self.cookies = {'getchu_adalt_flag': 'getchu.com', "adult_check_flag": "1"}
|
||||||
|
self.GETCHU_WWW_SEARCH_URL = 'http://www.getchu.com/php/search.phtml?genre=anime_dvd&search_keyword=_WORD_&check_key_dtl=1&submit='
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
def queryNumberUrl(self, number):
|
||||||
if 'GETCHU' in number.upper():
|
if 'GETCHU' in number.upper():
|
||||||
idn = re.findall('\d+',number)[0]
|
idn = re.findall('\d+',number)[0]
|
||||||
@@ -96,16 +97,6 @@ class dlGetchu(wwwGetchu):
|
|||||||
""" 二者基本一致
|
""" 二者基本一致
|
||||||
headers extrafanart 略有区别
|
headers extrafanart 略有区别
|
||||||
"""
|
"""
|
||||||
|
|
||||||
imagecut = 4
|
|
||||||
allow_number_change = True
|
|
||||||
|
|
||||||
cookies = {"adult_check_flag": "1"}
|
|
||||||
extraheader = {"Referer": "https://dl.getchu.com/"}
|
|
||||||
|
|
||||||
GETCHU_DL_SEARCH_URL = 'https://dl.getchu.com/search/search_list.php?dojin=1&search_category_id=&search_keyword=_WORD_&btnWordSearch=%B8%A1%BA%F7&action=search&set_category_flag=1'
|
|
||||||
GETCHU_DL_URL = 'https://dl.getchu.com/i/item_WORD_'
|
|
||||||
|
|
||||||
expr_title = "//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"
|
expr_title = "//div[contains(@style,'color: #333333; padding: 3px 0px 0px 5px;')]/text()"
|
||||||
expr_director = "//td[contains(text(),'作者')]/following-sibling::td/text()"
|
expr_director = "//td[contains(text(),'作者')]/following-sibling::td/text()"
|
||||||
expr_studio = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
expr_studio = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
||||||
@@ -117,6 +108,16 @@ class dlGetchu(wwwGetchu):
|
|||||||
expr_extrafanart = "//td[contains(@style,'background-color: #444444;')]/a/@href"
|
expr_extrafanart = "//td[contains(@style,'background-color: #444444;')]/a/@href"
|
||||||
expr_series = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
expr_series = "//td[contains(text(),'サークル')]/following-sibling::td/a/text()"
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 4
|
||||||
|
self.allow_number_change = True
|
||||||
|
|
||||||
|
self.cookies = {"adult_check_flag": "1"}
|
||||||
|
self.extraheader = {"Referer": "https://dl.getchu.com/"}
|
||||||
|
|
||||||
|
self.GETCHU_DL_SEARCH_URL = 'https://dl.getchu.com/search/search_list.php?dojin=1&search_category_id=&search_keyword=_WORD_&btnWordSearch=%B8%A1%BA%F7&action=search&set_category_flag=1'
|
||||||
|
self.GETCHU_DL_URL = 'https://dl.getchu.com/i/item_WORD_'
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
def queryNumberUrl(self, number):
|
||||||
if "item" in number or 'GETCHU' in number.upper():
|
if "item" in number or 'GETCHU' in number.upper():
|
||||||
self.number = re.findall('\d+',number)[0]
|
self.number = re.findall('\d+',number)[0]
|
||||||
|
|||||||
@@ -11,9 +11,6 @@ from .parser import Parser
|
|||||||
class Javdb(Parser):
|
class Javdb(Parser):
|
||||||
source = 'javdb'
|
source = 'javdb'
|
||||||
|
|
||||||
fixstudio = False
|
|
||||||
noauth = False
|
|
||||||
|
|
||||||
expr_number = '//strong[contains(text(),"番號")]/../span/text()'
|
expr_number = '//strong[contains(text(),"番號")]/../span/text()'
|
||||||
expr_number2 = '//strong[contains(text(),"番號")]/../span/a/text()'
|
expr_number2 = '//strong[contains(text(),"番號")]/../span/a/text()'
|
||||||
expr_title = "/html/head/title/text()"
|
expr_title = "/html/head/title/text()"
|
||||||
@@ -44,6 +41,10 @@ class Javdb(Parser):
|
|||||||
expr_uservotes = '//span[@class="score-stars"]/../text()'
|
expr_uservotes = '//span[@class="score-stars"]/../text()'
|
||||||
expr_actorphoto = '//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]'
|
expr_actorphoto = '//strong[contains(text(),"演員:")]/../span/a[starts-with(@href,"/actors/")]'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.fixstudio = False
|
||||||
|
self.noauth = False
|
||||||
|
|
||||||
def updateCore(self, core):
|
def updateCore(self, core):
|
||||||
if core.proxies:
|
if core.proxies:
|
||||||
self.proxies = core.proxies
|
self.proxies = core.proxies
|
||||||
|
|||||||
@@ -8,8 +8,6 @@ from .parser import Parser
|
|||||||
class Javlibrary(Parser):
|
class Javlibrary(Parser):
|
||||||
source = 'javlibrary'
|
source = 'javlibrary'
|
||||||
|
|
||||||
htmltree = None
|
|
||||||
|
|
||||||
expr_number = '//div[@id="video_id"]/table/tr/td[@class="text"]/text()'
|
expr_number = '//div[@id="video_id"]/table/tr/td[@class="text"]/text()'
|
||||||
expr_title = '//div[@id="video_title"]/h3/a/text()'
|
expr_title = '//div[@id="video_title"]/h3/a/text()'
|
||||||
expr_actor = '//div[@id="video_cast"]/table/tr/td[@class="text"]/span/span[@class="star"]/a/text()'
|
expr_actor = '//div[@id="video_cast"]/table/tr/td[@class="text"]/span/span[@class="star"]/a/text()'
|
||||||
@@ -22,6 +20,9 @@ class Javlibrary(Parser):
|
|||||||
expr_director = '//div[@id="video_director"]/table/tr/td[@class="text"]/span/a/text()'
|
expr_director = '//div[@id="video_director"]/table/tr/td[@class="text"]/span/a/text()'
|
||||||
expr_extrafanart = '//div[@class="previewthumbs"]/img/@src'
|
expr_extrafanart = '//div[@class="previewthumbs"]/img/@src'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.htmltree = None
|
||||||
|
|
||||||
def updateCore(self, core):
|
def updateCore(self, core):
|
||||||
if core.proxies:
|
if core.proxies:
|
||||||
self.proxies = core.proxies
|
self.proxies = core.proxies
|
||||||
|
|||||||
@@ -8,14 +8,16 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Madou(Parser):
|
class Madou(Parser):
|
||||||
source = 'madou'
|
source = 'madou'
|
||||||
imagecut = 0
|
|
||||||
uncensored = True
|
|
||||||
|
|
||||||
expr_url = '//a[@class="share-weixin"]/@data-url'
|
expr_url = '//a[@class="share-weixin"]/@data-url'
|
||||||
expr_title = "/html/head/title/text()"
|
expr_title = "/html/head/title/text()"
|
||||||
expr_studio = '//a[@rel="category tag"]/text()'
|
expr_studio = '//a[@rel="category tag"]/text()'
|
||||||
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 0
|
||||||
|
self.uncensored = True
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
self.number = number.lower().strip()
|
self.number = number.lower().strip()
|
||||||
if self.specifiedUrl:
|
if self.specifiedUrl:
|
||||||
|
|||||||
@@ -8,8 +8,6 @@ from .parser import Parser
|
|||||||
|
|
||||||
class Mv91(Parser):
|
class Mv91(Parser):
|
||||||
source = 'mv91'
|
source = 'mv91'
|
||||||
imagecut = 0
|
|
||||||
uncensored = True
|
|
||||||
|
|
||||||
expr_number = '//div[@class="player-title"]/text()'
|
expr_number = '//div[@class="player-title"]/text()'
|
||||||
expr_title = '//div[@class="player-title"]/text()'
|
expr_title = '//div[@class="player-title"]/text()'
|
||||||
@@ -18,6 +16,10 @@ class Mv91(Parser):
|
|||||||
expr_tags = '//div[@class="player-tag"]/text()'
|
expr_tags = '//div[@class="player-tag"]/text()'
|
||||||
expr_actor = '//p[@class="player-name"]/text()'
|
expr_actor = '//p[@class="player-name"]/text()'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 0
|
||||||
|
self.uncensored = True
|
||||||
|
|
||||||
def getHtmlTree(self, url, type=None):
|
def getHtmlTree(self, url, type=None):
|
||||||
self.htmlcode = self.getHtml(url, type)
|
self.htmlcode = self.getHtml(url, type)
|
||||||
if self.htmlcode == 404:
|
if self.htmlcode == 404:
|
||||||
|
|||||||
@@ -11,23 +11,6 @@ class Parser:
|
|||||||
""" 基础刮削类
|
""" 基础刮削类
|
||||||
"""
|
"""
|
||||||
source = 'base'
|
source = 'base'
|
||||||
# 推荐剪切poster封面:
|
|
||||||
# `0` 复制cover
|
|
||||||
# `1` 裁剪cover
|
|
||||||
# `3` 下载小封面
|
|
||||||
imagecut = 1
|
|
||||||
uncensored = False
|
|
||||||
allow_number_change = False
|
|
||||||
# update
|
|
||||||
proxies = None
|
|
||||||
verify = None
|
|
||||||
extraheader = None
|
|
||||||
cookies = None
|
|
||||||
morestoryline = False
|
|
||||||
specifiedUrl = None
|
|
||||||
|
|
||||||
number = ''
|
|
||||||
detailurl = ''
|
|
||||||
# xpath expr
|
# xpath expr
|
||||||
expr_number = ''
|
expr_number = ''
|
||||||
expr_title = ''
|
expr_title = ''
|
||||||
@@ -54,12 +37,33 @@ class Parser:
|
|||||||
expr_userrating = ''
|
expr_userrating = ''
|
||||||
expr_uservotes = ''
|
expr_uservotes = ''
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self):
|
||||||
|
# 推荐剪切poster封面:
|
||||||
|
# `0` 复制cover
|
||||||
|
# `1` 裁剪cover
|
||||||
|
# `3` 下载小封面
|
||||||
|
self.imagecut = 1
|
||||||
|
self.uncensored = False
|
||||||
|
self.allow_number_change = False
|
||||||
|
# update
|
||||||
|
self.proxies = None
|
||||||
|
self.verify = None
|
||||||
|
self.extraheader = None
|
||||||
|
self.cookies = None
|
||||||
|
self.morestoryline = False
|
||||||
|
self.specifiedUrl = None
|
||||||
|
self.extraInit()
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
""" 自定义初始化内容
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def scrape(self, number, core: None):
|
def scrape(self, number, core: None):
|
||||||
""" 刮削番号
|
""" 刮削番号
|
||||||
"""
|
"""
|
||||||
|
# 每次调用,初始化参数
|
||||||
|
self.__init__()
|
||||||
self.updateCore(core)
|
self.updateCore(core)
|
||||||
result = self.search(number)
|
result = self.search(number)
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user