@@ -11,7 +11,6 @@ from .gcolle import Gcolle
|
||||
from .getchu import Getchu
|
||||
from .jav321 import Jav321
|
||||
from .javdb import Javdb
|
||||
from .mv91 import Mv91
|
||||
from .fc2 import Fc2
|
||||
from .madou import Madou
|
||||
from .mgstage import Mgstage
|
||||
@@ -19,6 +18,7 @@ from .javbus import Javbus
|
||||
from .xcity import Xcity
|
||||
from .avsox import Avsox
|
||||
from .javlibrary import Javlibrary
|
||||
from .javday import Javday
|
||||
|
||||
from .tmdb import Tmdb
|
||||
from .imdb import Imdb
|
||||
@@ -50,8 +50,8 @@ class Scraping:
|
||||
"""
|
||||
"""
|
||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91',
|
||||
'getchu', 'gcolle'
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||
'getchu', 'gcolle','javday'
|
||||
]
|
||||
adult_func_mapping = {
|
||||
'avsox': Avsox().scrape,
|
||||
@@ -65,11 +65,11 @@ class Scraping:
|
||||
'fanza': Fanza().scrape,
|
||||
'airav': Airav().scrape,
|
||||
'carib': Carib().scrape,
|
||||
'mv91': Mv91().scrape,
|
||||
'gcolle': Gcolle().scrape,
|
||||
'javdb': Javdb().scrape,
|
||||
'getchu': Getchu().scrape,
|
||||
'javlibrary': Javlibrary().scrape,
|
||||
'javday': Javday().scrape
|
||||
}
|
||||
|
||||
general_full_sources = ['tmdb', 'imdb']
|
||||
|
||||
43
scrapinglib/javday.py
Normal file
43
scrapinglib/javday.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from urllib.parse import urlparse, unquote
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Javday(Parser):
|
||||
source = 'javday'
|
||||
|
||||
expr_url = '/html/head/meta[@property="og:url"]/@content'
|
||||
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
||||
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||
expr_title = "/html/head/title/text()"
|
||||
expr_actor = "//span[@class='vod_actor']/a/text()"
|
||||
expr_studio = '//span[@class="producer"]/a/text()'
|
||||
expr_number = '//span[@class="jpnum"]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 4
|
||||
self.uncensored = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number.strip().upper()
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
self.detailurl = "https://javday.tv/videos/" + self.number.replace("-","") + "/"
|
||||
self.htmlcode = self.getHtml(self.detailurl)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
self.detailurl = self.getTreeElement(htmltree, self.expr_url)
|
||||
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
title = super().getTitle(htmltree)
|
||||
# 删除番号和网站名
|
||||
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
|
||||
return result
|
||||
@@ -1,94 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Mv91(Parser):
|
||||
source = 'mv91'
|
||||
|
||||
expr_number = '//div[@class="player-title"]/text()'
|
||||
expr_title = '//div[@class="player-title"]/text()'
|
||||
expr_release = '//p[@class="date"]/text()'
|
||||
expr_outline = '//div[@class="play-text"]/text()'
|
||||
expr_tags = '//div[@class="player-tag"]/text()'
|
||||
expr_actor = '//p[@class="player-name"]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 0
|
||||
self.uncensored = True
|
||||
|
||||
def getHtmlTree(self, url, type=None):
|
||||
self.htmlcode = self.getHtml(url, type)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
ret = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
return ret
|
||||
|
||||
def queryNumberUrl(self, number):
|
||||
keyword = number.replace('91CM-','').replace('91MS-','')
|
||||
search_html = self.getHtml('https://www.91mv.org/index/search?keywords=' + keyword)
|
||||
html = etree.fromstring(search_html, etree.HTMLParser())
|
||||
endurl = html.xpath('//a[@class="video-list"]/@href')[0]
|
||||
return 'https://www.91mv.org' + endurl
|
||||
|
||||
def getNum(self, htmltree):
|
||||
try:
|
||||
num = super().getNum(htmltree)
|
||||
finds = re.findall('(.*)(91.*-\d*)',num)
|
||||
if finds:
|
||||
result = str(finds[0][1])
|
||||
else:
|
||||
result = ' '.join(num.replace('/',' ').split())
|
||||
result = result.split()[1]
|
||||
if self.number.upper() != result.upper():
|
||||
raise Exception(f'[!] {self.number}: find {result} in mv91, not match')
|
||||
return result.strip()
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
try:
|
||||
title = super().getTitle(htmltree)
|
||||
finds = re.findall('(.*)(91.*-\d*)',title)
|
||||
if finds:
|
||||
result = str(finds[0][0])
|
||||
else:
|
||||
result = ' '.join(title.replace('/',' ').split())
|
||||
result = result.split()[0]
|
||||
return result.replace('「预告」','').strip('/ ')
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return '91制片厂'
|
||||
|
||||
def getActors(self, htmltree):
|
||||
b=[]
|
||||
for player in self.getTreeAll(htmltree, self.expr_actor):
|
||||
player = player.replace('主演:','')
|
||||
if '/' in player:
|
||||
player = player.split('/')[0]
|
||||
player = re.sub(r'[0-9]+', '', player)
|
||||
b.append(player)
|
||||
return b
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
try:
|
||||
result = super().getRelease(htmltree)
|
||||
date = result.replace('日期:','')
|
||||
if isinstance(date, str) and len(date):
|
||||
return date
|
||||
except:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def getCover(self, htmltree):
|
||||
try:
|
||||
url = str(re.findall('var pic_url = "(.*?)"', self.htmlcode)[0])
|
||||
return url.strip()
|
||||
except:
|
||||
return ''
|
||||
|
||||
Reference in New Issue
Block a user