add pissplay

2022-12-04 21:01:19 -08:00
parent 39b88090a0
commit bb37d6ad09
3 changed files with 103 additions and 7 deletions
--- a/scraper.py
+++ b/scraper.py
@@ -100,8 +100,11 @@ def get_data_from_json(
    # ================================================网站规则添加结束================================================

    title = json_data.get('title')
-    actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',')  # 字符串转列表
-    actor_list = [actor.strip() for actor in actor_list]  # 去除空白
+    if json_data['source'] =='pissplay': # pissplay actor为英文名，不用去除空格
+        actor_list = [json_data.get('actor')]
+    else:   
+        actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',')  # 字符串转列表
+        actor_list = [actor.strip() for actor in actor_list]  # 去除空白
    director = json_data.get('director')
    release = json_data.get('release')
    number = json_data.get('number')
@@ -134,11 +137,15 @@ def get_data_from_json(
        tag.remove('XXXX')
    while 'xxx' in tag:
        tag.remove('xxx')
-    actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+    if json_data['source'] =='pissplay': # pissplay actor为英文名，不用去除空格
+        actor = str(actor_list).strip("[ ]").replace("'", '')
+    else:
+        actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')

    if title == '' or number == '':
-        print('[-]Movie Number or Title not found!')
-        return None
+        if json_data['source'] != 'pissplay': # pissplay 没有番号
+            print('[-]Movie Number or Title not found!')
+            return None

    # if imagecut == '3':
    #     DownloadFileWithFilename()
--- a/scrapinglib/api.py
+++ b/scrapinglib/api.py
@@ -20,6 +20,7 @@ from .xcity import Xcity
 from .avsox import Avsox
 from .javlibrary import Javlibrary
 from .javday import Javday
+from .pissplay import Pissplay

 from .tmdb import Tmdb
 from .imdb import Imdb
@@ -52,7 +53,7 @@ class Scraping:
    """
    adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
                          'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 
-                          'getchu', 'gcolle','javday'
+                          'getchu', 'gcolle','javday','pissplay'
                          ]
    adult_func_mapping = {
        'avsox': Avsox().scrape,
@@ -70,7 +71,8 @@ class Scraping:
        'javdb': Javdb().scrape,
        'getchu': Getchu().scrape,
        'javlibrary': Javlibrary().scrape,
-        'javday': Javday().scrape
+        'javday': Javday().scrape,
+        'pissplay': Pissplay().scrape
    }

    general_full_sources = ['tmdb', 'imdb']
--- a/scrapinglib/pissplay.py
+++ b/scrapinglib/pissplay.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+import re
+from lxml import etree
+from .parser import Parser
+from datetime import datetime
+
+# 搜刮 https://pissplay.com/ 中的视频
+# pissplay中的视频没有番号，所以要通过文件名搜索
+# 只用文件名和网站视频名完全一致时才可以被搜刮
+class Pissplay(Parser):
+    source = 'pissplay'
+
+    expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号，因此用标题代替
+    expr_title = '//*[@id="video_title"]/text()'
+    expr_cover = '/html/head//meta[@property="og:image"]/@content'
+    expr_tags = '//div[@id="video_tags"]/a/text()'
+    expr_release = '//div[@class="video_date"]/text()'       
+    expr_outline = '//*[@id="video_description"]/p//text()'
+
+    def extraInit(self):
+        self.imagecut = 0 # 不裁剪封面
+        self.specifiedSource = None
+        
+    def search(self, number):
+        self.number = number.strip().upper()
+        if self.specifiedUrl:
+            self.detailurl = self.specifiedUrl
+        else:
+            newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
+            self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
+        self.htmlcode = self.getHtml(self.detailurl)
+        if self.htmlcode == 404:
+            return 404
+        htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
+        result = self.dictformat(htmltree)
+        return result
+
+    def getNum(self, htmltree):
+        title = self.getTitle(htmltree)
+        return title
+    
+    def getTitle(self, htmltree):
+        title = super().getTitle(htmltree)
+        title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
+        return title
+
+    def getCover(self, htmltree):
+        url = super().getCover(htmltree)
+        if not url.startswith('http'):
+            url = 'https:' + url
+        return url
+
+    def getRelease(self, htmltree):
+        releaseDate = super().getRelease(htmltree)
+        isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
+        return isoData
+    
+    def getStudio(self, htmltree):
+        return 'PissPlay'
+    
+    def getTags(self, htmltree):
+        tags = self.getTreeAll(htmltree, self.expr_tags)
+        if 'Guests' in tags:
+            if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
+                del tags[1]
+            else:
+                tags = tags[1:]
+        return tags
+    
+    def getActors(self, htmltree) -> list:
+        tags = self.getTreeAll(htmltree, self.expr_tags)
+        if 'Guests' in tags:
+            if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
+                return tags[1]
+            else:
+                return tags[0]
+        else:
+            return 'Bruce and Morgan'
+    
+    def getOutline(self, htmltree):
+        outline = self.getTreeAll(htmltree, self.expr_outline)
+        if '– Morgan xx' in outline:
+            num = outline.index('– Morgan xx')
+            outline = outline[:num]
+        rstring = ''.join(outline).replace("&","and")
+        return rstring