fixes
- 优化avsox刮削 FC2 - 修复javdb与library的specifiedUrl - 其他
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
@@ -8,6 +7,7 @@ class Avsox(Parser):
|
||||
|
||||
source = 'avsox'
|
||||
imagecut = 3
|
||||
originalnum = ''
|
||||
|
||||
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
|
||||
expr_actor = '//a[@class="avatar-box"]'
|
||||
@@ -21,7 +21,11 @@ class Avsox(Parser):
|
||||
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
|
||||
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
|
||||
|
||||
def queryNumberUrl(self, number):
|
||||
def queryNumberUrl(self, number: str):
|
||||
upnum = number.upper()
|
||||
if 'FC2' in upnum and 'FC2-PPV' not in upnum:
|
||||
number = upnum.replace('FC2', 'FC2-PPV')
|
||||
self.number = number
|
||||
qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox')
|
||||
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
|
||||
@@ -38,11 +42,14 @@ class Avsox(Parser):
|
||||
new_number = self.getTreeElement(htmltree, self.expr_number)
|
||||
if new_number.upper() != self.number.upper():
|
||||
raise ValueError('number not found in ' + self.source)
|
||||
self.originalnum = new_number
|
||||
if 'FC2-PPV' in new_number.upper():
|
||||
new_number = new_number.upper().replace('FC2-PPV', 'FC2')
|
||||
self.number = new_number
|
||||
return new_number
|
||||
return self.number
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
return super().getTitle(htmltree).replace('/', '').strip(self.number)
|
||||
return super().getTitle(htmltree).replace('/', '').strip(self.originalnum).strip()
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return super().getStudio(htmltree).replace("', '", ' ')
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
|
||||
@@ -51,6 +51,8 @@ class Javdb(Parser):
|
||||
self.verify = core.verify
|
||||
if core.morestoryline:
|
||||
self.morestoryline = True
|
||||
if core.specifiedSource == self.source:
|
||||
self.specifiedUrl = core.specifiedUrl
|
||||
# special
|
||||
if core.dbcookies:
|
||||
self.cookies = core.dbcookies
|
||||
|
||||
@@ -29,6 +29,8 @@ class Javlibrary(Parser):
|
||||
self.verify = core.verify
|
||||
if core.morestoryline:
|
||||
self.morestoryline = True
|
||||
if core.specifiedSource == self.source:
|
||||
self.specifiedUrl = core.specifiedUrl
|
||||
self.cookies = {'over18':'1'}
|
||||
|
||||
def search(self, number):
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .utils import getTreeElement
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
|
||||
@@ -228,17 +228,28 @@ class Parser:
|
||||
def getActorPhoto(self, htmltree) -> dict:
|
||||
return {}
|
||||
|
||||
def getUncensored(self, htmlree) -> bool:
|
||||
if self.expr_uncensored:
|
||||
u = self.getTreeAll(htmlree, self.expr_uncensored)
|
||||
return bool(u)
|
||||
else:
|
||||
def getUncensored(self, htmltree) -> bool:
|
||||
"""
|
||||
tag: 無码 無修正 uncensored 无码
|
||||
title: 無碼 無修正 uncensored
|
||||
"""
|
||||
if self.uncensored:
|
||||
return self.uncensored
|
||||
tags = [x.lower() for x in self.getTags(htmltree) if len(x)]
|
||||
title = self.getTitle(htmltree)
|
||||
if self.expr_uncensored:
|
||||
u = self.getTreeAll(htmltree, self.expr_uncensored)
|
||||
self.uncensored = bool(u)
|
||||
elif '無码' in tags or '無修正' in tags or 'uncensored' in tags or '无码' in tags:
|
||||
self.uncensored = True
|
||||
elif '無码' in title or '無修正' in title or 'uncensored' in title.lower():
|
||||
self.uncensored = True
|
||||
return self.uncensored
|
||||
|
||||
def getImagecut(self, htmlree):
|
||||
def getImagecut(self, htmltree):
|
||||
""" 修正 无码poster不裁剪cover
|
||||
"""
|
||||
if self.imagecut == 1 and self.getUncensored(htmlree):
|
||||
if self.imagecut == 1 and self.getUncensored(htmltree):
|
||||
self.imagecut = 0
|
||||
return self.imagecut
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ from urllib.parse import urljoin
|
||||
from lxml.html import fromstring
|
||||
from multiprocessing.dummy import Pool as ThreadPool
|
||||
|
||||
from scrapinglib.airav import Airav
|
||||
from scrapinglib.xcity import Xcity
|
||||
from .airav import Airav
|
||||
from .xcity import Xcity
|
||||
from .httprequest import get_html_by_form, get_html_by_scraper, request_session
|
||||
|
||||
# 舍弃 Amazon 源
|
||||
|
||||
Reference in New Issue
Block a user