add pissplay

This commit is contained in:
Marks
2022-12-04 21:01:19 -08:00
parent 39b88090a0
commit bb37d6ad09
3 changed files with 103 additions and 7 deletions

View File

@@ -100,8 +100,11 @@ def get_data_from_json(
# ================================================网站规则添加结束================================================ # ================================================网站规则添加结束================================================
title = json_data.get('title') title = json_data.get('title')
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表 if json_data['source'] =='pissplay': # pissplay actor为英文名不用去除空格
actor_list = [actor.strip() for actor in actor_list] # 去除空白 actor_list = [json_data.get('actor')]
else:
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list] # 去除空白
director = json_data.get('director') director = json_data.get('director')
release = json_data.get('release') release = json_data.get('release')
number = json_data.get('number') number = json_data.get('number')
@@ -134,11 +137,15 @@ def get_data_from_json(
tag.remove('XXXX') tag.remove('XXXX')
while 'xxx' in tag: while 'xxx' in tag:
tag.remove('xxx') tag.remove('xxx')
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if json_data['source'] =='pissplay': # pissplay actor为英文名不用去除空格
actor = str(actor_list).strip("[ ]").replace("'", '')
else:
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
if title == '' or number == '': if title == '' or number == '':
print('[-]Movie Number or Title not found!') if json_data['source'] != 'pissplay': # pissplay 没有番号
return None print('[-]Movie Number or Title not found!')
return None
# if imagecut == '3': # if imagecut == '3':
# DownloadFileWithFilename() # DownloadFileWithFilename()

View File

@@ -20,6 +20,7 @@ from .xcity import Xcity
from .avsox import Avsox from .avsox import Avsox
from .javlibrary import Javlibrary from .javlibrary import Javlibrary
from .javday import Javday from .javday import Javday
from .pissplay import Pissplay
from .tmdb import Tmdb from .tmdb import Tmdb
from .imdb import Imdb from .imdb import Imdb
@@ -52,7 +53,7 @@ class Scraping:
""" """
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle','javday' 'getchu', 'gcolle','javday','pissplay'
] ]
adult_func_mapping = { adult_func_mapping = {
'avsox': Avsox().scrape, 'avsox': Avsox().scrape,
@@ -70,7 +71,8 @@ class Scraping:
'javdb': Javdb().scrape, 'javdb': Javdb().scrape,
'getchu': Getchu().scrape, 'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape, 'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape 'javday': Javday().scrape,
'pissplay': Pissplay().scrape
} }
general_full_sources = ['tmdb', 'imdb'] general_full_sources = ['tmdb', 'imdb']

87
scrapinglib/pissplay.py Normal file
View File

@@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from .parser import Parser
from datetime import datetime
# 搜刮 https://pissplay.com/ 中的视频
# pissplay中的视频没有番号所以要通过文件名搜索
# 只用文件名和网站视频名完全一致时才可以被搜刮
class Pissplay(Parser):
source = 'pissplay'
expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
expr_title = '//*[@id="video_title"]/text()'
expr_cover = '/html/head//meta[@property="og:image"]/@content'
expr_tags = '//div[@id="video_tags"]/a/text()'
expr_release = '//div[@class="video_date"]/text()'
expr_outline = '//*[@id="video_description"]/p//text()'
def extraInit(self):
self.imagecut = 0 # 不裁剪封面
self.specifiedSource = None
def search(self, number):
self.number = number.strip().upper()
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
result = self.dictformat(htmltree)
return result
def getNum(self, htmltree):
title = self.getTitle(htmltree)
return title
def getTitle(self, htmltree):
title = super().getTitle(htmltree)
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
return title
def getCover(self, htmltree):
url = super().getCover(htmltree)
if not url.startswith('http'):
url = 'https:' + url
return url
def getRelease(self, htmltree):
releaseDate = super().getRelease(htmltree)
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
return isoData
def getStudio(self, htmltree):
return 'PissPlay'
def getTags(self, htmltree):
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
del tags[1]
else:
tags = tags[1:]
return tags
def getActors(self, htmltree) -> list:
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
return tags[1]
else:
return tags[0]
else:
return 'Bruce and Morgan'
def getOutline(self, htmltree):
outline = self.getTreeAll(htmltree, self.expr_outline)
if ' Morgan xx' in outline:
num = outline.index(' Morgan xx')
outline = outline[:num]
rstring = ''.join(outline).replace("&","and")
return rstring