fixes
- 优化avsox刮削 FC2 - 修复javdb与library的specifiedUrl - 其他
This commit is contained in:
@@ -1,6 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
|
|
||||||
|
|
||||||
@@ -8,6 +7,7 @@ class Avsox(Parser):
|
|||||||
|
|
||||||
source = 'avsox'
|
source = 'avsox'
|
||||||
imagecut = 3
|
imagecut = 3
|
||||||
|
originalnum = ''
|
||||||
|
|
||||||
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
|
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
|
||||||
expr_actor = '//a[@class="avatar-box"]'
|
expr_actor = '//a[@class="avatar-box"]'
|
||||||
@@ -21,7 +21,11 @@ class Avsox(Parser):
|
|||||||
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
|
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
|
||||||
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
|
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
def queryNumberUrl(self, number: str):
|
||||||
|
upnum = number.upper()
|
||||||
|
if 'FC2' in upnum and 'FC2-PPV' not in upnum:
|
||||||
|
number = upnum.replace('FC2', 'FC2-PPV')
|
||||||
|
self.number = number
|
||||||
qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox')
|
qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox')
|
||||||
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
||||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
|
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
|
||||||
@@ -38,11 +42,14 @@ class Avsox(Parser):
|
|||||||
new_number = self.getTreeElement(htmltree, self.expr_number)
|
new_number = self.getTreeElement(htmltree, self.expr_number)
|
||||||
if new_number.upper() != self.number.upper():
|
if new_number.upper() != self.number.upper():
|
||||||
raise ValueError('number not found in ' + self.source)
|
raise ValueError('number not found in ' + self.source)
|
||||||
|
self.originalnum = new_number
|
||||||
|
if 'FC2-PPV' in new_number.upper():
|
||||||
|
new_number = new_number.upper().replace('FC2-PPV', 'FC2')
|
||||||
self.number = new_number
|
self.number = new_number
|
||||||
return new_number
|
return self.number
|
||||||
|
|
||||||
def getTitle(self, htmltree):
|
def getTitle(self, htmltree):
|
||||||
return super().getTitle(htmltree).replace('/', '').strip(self.number)
|
return super().getTitle(htmltree).replace('/', '').strip(self.originalnum).strip()
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
def getStudio(self, htmltree):
|
||||||
return super().getStudio(htmltree).replace("', '", ' ')
|
return super().getStudio(htmltree).replace("', '", ' ')
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from lxml import etree
|
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ class Javdb(Parser):
|
|||||||
self.verify = core.verify
|
self.verify = core.verify
|
||||||
if core.morestoryline:
|
if core.morestoryline:
|
||||||
self.morestoryline = True
|
self.morestoryline = True
|
||||||
|
if core.specifiedSource == self.source:
|
||||||
|
self.specifiedUrl = core.specifiedUrl
|
||||||
# special
|
# special
|
||||||
if core.dbcookies:
|
if core.dbcookies:
|
||||||
self.cookies = core.dbcookies
|
self.cookies = core.dbcookies
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ class Javlibrary(Parser):
|
|||||||
self.verify = core.verify
|
self.verify = core.verify
|
||||||
if core.morestoryline:
|
if core.morestoryline:
|
||||||
self.morestoryline = True
|
self.morestoryline = True
|
||||||
|
if core.specifiedSource == self.source:
|
||||||
|
self.specifiedUrl = core.specifiedUrl
|
||||||
self.cookies = {'over18':'1'}
|
self.cookies = {'over18':'1'}
|
||||||
|
|
||||||
def search(self, number):
|
def search(self, number):
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from .utils import getTreeElement
|
|
||||||
from .parser import Parser
|
from .parser import Parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -228,17 +228,28 @@ class Parser:
|
|||||||
def getActorPhoto(self, htmltree) -> dict:
|
def getActorPhoto(self, htmltree) -> dict:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def getUncensored(self, htmlree) -> bool:
|
def getUncensored(self, htmltree) -> bool:
|
||||||
if self.expr_uncensored:
|
"""
|
||||||
u = self.getTreeAll(htmlree, self.expr_uncensored)
|
tag: 無码 無修正 uncensored 无码
|
||||||
return bool(u)
|
title: 無碼 無修正 uncensored
|
||||||
else:
|
"""
|
||||||
|
if self.uncensored:
|
||||||
return self.uncensored
|
return self.uncensored
|
||||||
|
tags = [x.lower() for x in self.getTags(htmltree) if len(x)]
|
||||||
|
title = self.getTitle(htmltree)
|
||||||
|
if self.expr_uncensored:
|
||||||
|
u = self.getTreeAll(htmltree, self.expr_uncensored)
|
||||||
|
self.uncensored = bool(u)
|
||||||
|
elif '無码' in tags or '無修正' in tags or 'uncensored' in tags or '无码' in tags:
|
||||||
|
self.uncensored = True
|
||||||
|
elif '無码' in title or '無修正' in title or 'uncensored' in title.lower():
|
||||||
|
self.uncensored = True
|
||||||
|
return self.uncensored
|
||||||
|
|
||||||
def getImagecut(self, htmlree):
|
def getImagecut(self, htmltree):
|
||||||
""" 修正 无码poster不裁剪cover
|
""" 修正 无码poster不裁剪cover
|
||||||
"""
|
"""
|
||||||
if self.imagecut == 1 and self.getUncensored(htmlree):
|
if self.imagecut == 1 and self.getUncensored(htmltree):
|
||||||
self.imagecut = 0
|
self.imagecut = 0
|
||||||
return self.imagecut
|
return self.imagecut
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ from urllib.parse import urljoin
|
|||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from multiprocessing.dummy import Pool as ThreadPool
|
from multiprocessing.dummy import Pool as ThreadPool
|
||||||
|
|
||||||
from scrapinglib.airav import Airav
|
from .airav import Airav
|
||||||
from scrapinglib.xcity import Xcity
|
from .xcity import Xcity
|
||||||
from .httprequest import get_html_by_form, get_html_by_scraper, request_session
|
from .httprequest import get_html_by_form, get_html_by_scraper, request_session
|
||||||
|
|
||||||
# 舍弃 Amazon 源
|
# 舍弃 Amazon 源
|
||||||
|
|||||||
Reference in New Issue
Block a user