- 优化avsox刮削 FC2
- 修复javdb与library的specifiedUrl
- 其他
This commit is contained in:
Mathhew
2022-08-16 09:24:16 +08:00
parent bb3688e67c
commit 153cdcde00
7 changed files with 35 additions and 15 deletions

View File

@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re
from .parser import Parser from .parser import Parser
@@ -8,6 +7,7 @@ class Avsox(Parser):
source = 'avsox' source = 'avsox'
imagecut = 3 imagecut = 3
originalnum = ''
expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()' expr_number = '//span[contains(text(),"识别码:")]/../span[2]/text()'
expr_actor = '//a[@class="avatar-box"]' expr_actor = '//a[@class="avatar-box"]'
@@ -21,7 +21,11 @@ class Avsox(Parser):
expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()' expr_label = '//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'
expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()' expr_series = '//span[contains(text(),"系列:")]/../span[2]/text()'
def queryNumberUrl(self, number): def queryNumberUrl(self, number: str):
upnum = number.upper()
if 'FC2' in upnum and 'FC2-PPV' not in upnum:
number = upnum.replace('FC2', 'FC2-PPV')
self.number = number
qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox') qurySiteTree = self.getHtmlTree('https://tellme.pw/avsox')
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href') site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number) self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
@@ -38,11 +42,14 @@ class Avsox(Parser):
new_number = self.getTreeElement(htmltree, self.expr_number) new_number = self.getTreeElement(htmltree, self.expr_number)
if new_number.upper() != self.number.upper(): if new_number.upper() != self.number.upper():
raise ValueError('number not found in ' + self.source) raise ValueError('number not found in ' + self.source)
self.originalnum = new_number
if 'FC2-PPV' in new_number.upper():
new_number = new_number.upper().replace('FC2-PPV', 'FC2')
self.number = new_number self.number = new_number
return new_number return self.number
def getTitle(self, htmltree): def getTitle(self, htmltree):
return super().getTitle(htmltree).replace('/', '').strip(self.number) return super().getTitle(htmltree).replace('/', '').strip(self.originalnum).strip()
def getStudio(self, htmltree): def getStudio(self, htmltree):
return super().getStudio(htmltree).replace("', '", ' ') return super().getStudio(htmltree).replace("', '", ' ')

View File

@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import re
from lxml import etree
from .parser import Parser from .parser import Parser

View File

@@ -51,6 +51,8 @@ class Javdb(Parser):
self.verify = core.verify self.verify = core.verify
if core.morestoryline: if core.morestoryline:
self.morestoryline = True self.morestoryline = True
if core.specifiedSource == self.source:
self.specifiedUrl = core.specifiedUrl
# special # special
if core.dbcookies: if core.dbcookies:
self.cookies = core.dbcookies self.cookies = core.dbcookies

View File

@@ -29,6 +29,8 @@ class Javlibrary(Parser):
self.verify = core.verify self.verify = core.verify
if core.morestoryline: if core.morestoryline:
self.morestoryline = True self.morestoryline = True
if core.specifiedSource == self.source:
self.specifiedUrl = core.specifiedUrl
self.cookies = {'over18':'1'} self.cookies = {'over18':'1'}
def search(self, number): def search(self, number):

View File

@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from .utils import getTreeElement
from .parser import Parser from .parser import Parser

View File

@@ -228,17 +228,28 @@ class Parser:
def getActorPhoto(self, htmltree) -> dict: def getActorPhoto(self, htmltree) -> dict:
return {} return {}
def getUncensored(self, htmlree) -> bool: def getUncensored(self, htmltree) -> bool:
"""
tag: 無码 無修正 uncensored 无码
title: 無碼 無修正 uncensored
"""
if self.uncensored:
return self.uncensored
tags = [x.lower() for x in self.getTags(htmltree) if len(x)]
title = self.getTitle(htmltree)
if self.expr_uncensored: if self.expr_uncensored:
u = self.getTreeAll(htmlree, self.expr_uncensored) u = self.getTreeAll(htmltree, self.expr_uncensored)
return bool(u) self.uncensored = bool(u)
else: elif '無码' in tags or '無修正' in tags or 'uncensored' in tags or '无码' in tags:
self.uncensored = True
elif '無码' in title or '無修正' in title or 'uncensored' in title.lower():
self.uncensored = True
return self.uncensored return self.uncensored
def getImagecut(self, htmlree): def getImagecut(self, htmltree):
""" 修正 无码poster不裁剪cover """ 修正 无码poster不裁剪cover
""" """
if self.imagecut == 1 and self.getUncensored(htmlree): if self.imagecut == 1 and self.getUncensored(htmltree):
self.imagecut = 0 self.imagecut = 0
return self.imagecut return self.imagecut

View File

@@ -15,8 +15,8 @@ from urllib.parse import urljoin
from lxml.html import fromstring from lxml.html import fromstring
from multiprocessing.dummy import Pool as ThreadPool from multiprocessing.dummy import Pool as ThreadPool
from scrapinglib.airav import Airav from .airav import Airav
from scrapinglib.xcity import Xcity from .xcity import Xcity
from .httprequest import get_html_by_form, get_html_by_scraper, request_session from .httprequest import get_html_by_form, get_html_by_scraper, request_session
# 舍弃 Amazon 源 # 舍弃 Amazon 源