88 lines
3.1 KiB
Python
88 lines
3.1 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
import re
|
||
from lxml import etree
|
||
from .parser import Parser
|
||
from datetime import datetime
|
||
|
||
# 搜刮 https://pissplay.com/ 中的视频
|
||
# pissplay中的视频没有番号,所以要通过文件名搜索
|
||
# 只用文件名和网站视频名完全一致时才可以被搜刮
|
||
class Pissplay(Parser):
|
||
source = 'pissplay'
|
||
|
||
expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
|
||
expr_title = '//*[@id="video_title"]/text()'
|
||
expr_cover = '/html/head//meta[@property="og:image"]/@content'
|
||
expr_tags = '//div[@id="video_tags"]/a/text()'
|
||
expr_release = '//div[@class="video_date"]/text()'
|
||
expr_outline = '//*[@id="video_description"]/p//text()'
|
||
|
||
def extraInit(self):
|
||
self.imagecut = 0 # 不裁剪封面
|
||
self.specifiedSource = None
|
||
|
||
def search(self, number):
|
||
self.number = number.strip().upper()
|
||
if self.specifiedUrl:
|
||
self.detailurl = self.specifiedUrl
|
||
else:
|
||
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
|
||
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
|
||
self.htmlcode = self.getHtml(self.detailurl)
|
||
if self.htmlcode == 404:
|
||
return 404
|
||
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||
result = self.dictformat(htmltree)
|
||
return result
|
||
|
||
def getNum(self, htmltree):
|
||
title = self.getTitle(htmltree)
|
||
return title
|
||
|
||
def getTitle(self, htmltree):
|
||
title = super().getTitle(htmltree)
|
||
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
|
||
return title
|
||
|
||
def getCover(self, htmltree):
|
||
url = super().getCover(htmltree)
|
||
if not url.startswith('http'):
|
||
url = 'https:' + url
|
||
return url
|
||
|
||
def getRelease(self, htmltree):
|
||
releaseDate = super().getRelease(htmltree)
|
||
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
|
||
return isoData
|
||
|
||
def getStudio(self, htmltree):
|
||
return 'PissPlay'
|
||
|
||
def getTags(self, htmltree):
|
||
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||
if 'Guests' in tags:
|
||
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||
del tags[1]
|
||
else:
|
||
tags = tags[1:]
|
||
return tags
|
||
|
||
def getActors(self, htmltree) -> list:
|
||
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||
if 'Guests' in tags:
|
||
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||
return [tags[1]]
|
||
else:
|
||
return [tags[0]]
|
||
else:
|
||
return 'Bruce and Morgan'
|
||
|
||
def getOutline(self, htmltree):
|
||
outline = self.getTreeAll(htmltree, self.expr_outline)
|
||
if '– Morgan xx' in outline:
|
||
num = outline.index('– Morgan xx')
|
||
outline = outline[:num]
|
||
rstring = ''.join(outline).replace("&","and")
|
||
return rstring
|