add pissplay
This commit is contained in:
17
scraper.py
17
scraper.py
@@ -100,8 +100,11 @@ def get_data_from_json(
|
|||||||
# ================================================网站规则添加结束================================================
|
# ================================================网站规则添加结束================================================
|
||||||
|
|
||||||
title = json_data.get('title')
|
title = json_data.get('title')
|
||||||
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
|
if json_data['source'] =='pissplay': # pissplay actor为英文名,不用去除空格
|
||||||
actor_list = [actor.strip() for actor in actor_list] # 去除空白
|
actor_list = [json_data.get('actor')]
|
||||||
|
else:
|
||||||
|
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
|
||||||
|
actor_list = [actor.strip() for actor in actor_list] # 去除空白
|
||||||
director = json_data.get('director')
|
director = json_data.get('director')
|
||||||
release = json_data.get('release')
|
release = json_data.get('release')
|
||||||
number = json_data.get('number')
|
number = json_data.get('number')
|
||||||
@@ -134,11 +137,15 @@ def get_data_from_json(
|
|||||||
tag.remove('XXXX')
|
tag.remove('XXXX')
|
||||||
while 'xxx' in tag:
|
while 'xxx' in tag:
|
||||||
tag.remove('xxx')
|
tag.remove('xxx')
|
||||||
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
|
if json_data['source'] =='pissplay': # pissplay actor为英文名,不用去除空格
|
||||||
|
actor = str(actor_list).strip("[ ]").replace("'", '')
|
||||||
|
else:
|
||||||
|
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
|
||||||
|
|
||||||
if title == '' or number == '':
|
if title == '' or number == '':
|
||||||
print('[-]Movie Number or Title not found!')
|
if json_data['source'] != 'pissplay': # pissplay 没有番号
|
||||||
return None
|
print('[-]Movie Number or Title not found!')
|
||||||
|
return None
|
||||||
|
|
||||||
# if imagecut == '3':
|
# if imagecut == '3':
|
||||||
# DownloadFileWithFilename()
|
# DownloadFileWithFilename()
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from .xcity import Xcity
|
|||||||
from .avsox import Avsox
|
from .avsox import Avsox
|
||||||
from .javlibrary import Javlibrary
|
from .javlibrary import Javlibrary
|
||||||
from .javday import Javday
|
from .javday import Javday
|
||||||
|
from .pissplay import Pissplay
|
||||||
|
|
||||||
from .tmdb import Tmdb
|
from .tmdb import Tmdb
|
||||||
from .imdb import Imdb
|
from .imdb import Imdb
|
||||||
@@ -52,7 +53,7 @@ class Scraping:
|
|||||||
"""
|
"""
|
||||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||||
'getchu', 'gcolle','javday'
|
'getchu', 'gcolle','javday','pissplay'
|
||||||
]
|
]
|
||||||
adult_func_mapping = {
|
adult_func_mapping = {
|
||||||
'avsox': Avsox().scrape,
|
'avsox': Avsox().scrape,
|
||||||
@@ -70,7 +71,8 @@ class Scraping:
|
|||||||
'javdb': Javdb().scrape,
|
'javdb': Javdb().scrape,
|
||||||
'getchu': Getchu().scrape,
|
'getchu': Getchu().scrape,
|
||||||
'javlibrary': Javlibrary().scrape,
|
'javlibrary': Javlibrary().scrape,
|
||||||
'javday': Javday().scrape
|
'javday': Javday().scrape,
|
||||||
|
'pissplay': Pissplay().scrape
|
||||||
}
|
}
|
||||||
|
|
||||||
general_full_sources = ['tmdb', 'imdb']
|
general_full_sources = ['tmdb', 'imdb']
|
||||||
|
|||||||
87
scrapinglib/pissplay.py
Normal file
87
scrapinglib/pissplay.py
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from lxml import etree
|
||||||
|
from .parser import Parser
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# 搜刮 https://pissplay.com/ 中的视频
|
||||||
|
# pissplay中的视频没有番号,所以要通过文件名搜索
|
||||||
|
# 只用文件名和网站视频名完全一致时才可以被搜刮
|
||||||
|
class Pissplay(Parser):
|
||||||
|
source = 'pissplay'
|
||||||
|
|
||||||
|
expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
|
||||||
|
expr_title = '//*[@id="video_title"]/text()'
|
||||||
|
expr_cover = '/html/head//meta[@property="og:image"]/@content'
|
||||||
|
expr_tags = '//div[@id="video_tags"]/a/text()'
|
||||||
|
expr_release = '//div[@class="video_date"]/text()'
|
||||||
|
expr_outline = '//*[@id="video_description"]/p//text()'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 0 # 不裁剪封面
|
||||||
|
self.specifiedSource = None
|
||||||
|
|
||||||
|
def search(self, number):
|
||||||
|
self.number = number.strip().upper()
|
||||||
|
if self.specifiedUrl:
|
||||||
|
self.detailurl = self.specifiedUrl
|
||||||
|
else:
|
||||||
|
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
|
||||||
|
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
|
||||||
|
self.htmlcode = self.getHtml(self.detailurl)
|
||||||
|
if self.htmlcode == 404:
|
||||||
|
return 404
|
||||||
|
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||||
|
result = self.dictformat(htmltree)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getNum(self, htmltree):
|
||||||
|
title = self.getTitle(htmltree)
|
||||||
|
return title
|
||||||
|
|
||||||
|
def getTitle(self, htmltree):
|
||||||
|
title = super().getTitle(htmltree)
|
||||||
|
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
|
||||||
|
return title
|
||||||
|
|
||||||
|
def getCover(self, htmltree):
|
||||||
|
url = super().getCover(htmltree)
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = 'https:' + url
|
||||||
|
return url
|
||||||
|
|
||||||
|
def getRelease(self, htmltree):
|
||||||
|
releaseDate = super().getRelease(htmltree)
|
||||||
|
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
|
||||||
|
return isoData
|
||||||
|
|
||||||
|
def getStudio(self, htmltree):
|
||||||
|
return 'PissPlay'
|
||||||
|
|
||||||
|
def getTags(self, htmltree):
|
||||||
|
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||||||
|
if 'Guests' in tags:
|
||||||
|
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||||||
|
del tags[1]
|
||||||
|
else:
|
||||||
|
tags = tags[1:]
|
||||||
|
return tags
|
||||||
|
|
||||||
|
def getActors(self, htmltree) -> list:
|
||||||
|
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||||||
|
if 'Guests' in tags:
|
||||||
|
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||||||
|
return tags[1]
|
||||||
|
else:
|
||||||
|
return tags[0]
|
||||||
|
else:
|
||||||
|
return 'Bruce and Morgan'
|
||||||
|
|
||||||
|
def getOutline(self, htmltree):
|
||||||
|
outline = self.getTreeAll(htmltree, self.expr_outline)
|
||||||
|
if '– Morgan xx' in outline:
|
||||||
|
num = outline.index('– Morgan xx')
|
||||||
|
outline = outline[:num]
|
||||||
|
rstring = ''.join(outline).replace("&","and")
|
||||||
|
return rstring
|
||||||
Reference in New Issue
Block a user