@@ -11,7 +11,6 @@ from .gcolle import Gcolle
|
|||||||
from .getchu import Getchu
|
from .getchu import Getchu
|
||||||
from .jav321 import Jav321
|
from .jav321 import Jav321
|
||||||
from .javdb import Javdb
|
from .javdb import Javdb
|
||||||
from .mv91 import Mv91
|
|
||||||
from .fc2 import Fc2
|
from .fc2 import Fc2
|
||||||
from .madou import Madou
|
from .madou import Madou
|
||||||
from .mgstage import Mgstage
|
from .mgstage import Mgstage
|
||||||
@@ -19,6 +18,7 @@ from .javbus import Javbus
|
|||||||
from .xcity import Xcity
|
from .xcity import Xcity
|
||||||
from .avsox import Avsox
|
from .avsox import Avsox
|
||||||
from .javlibrary import Javlibrary
|
from .javlibrary import Javlibrary
|
||||||
|
from .javday import Javday
|
||||||
|
|
||||||
from .tmdb import Tmdb
|
from .tmdb import Tmdb
|
||||||
from .imdb import Imdb
|
from .imdb import Imdb
|
||||||
@@ -50,8 +50,8 @@ class Scraping:
|
|||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91',
|
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||||
'getchu', 'gcolle'
|
'getchu', 'gcolle','javday'
|
||||||
]
|
]
|
||||||
adult_func_mapping = {
|
adult_func_mapping = {
|
||||||
'avsox': Avsox().scrape,
|
'avsox': Avsox().scrape,
|
||||||
@@ -65,11 +65,11 @@ class Scraping:
|
|||||||
'fanza': Fanza().scrape,
|
'fanza': Fanza().scrape,
|
||||||
'airav': Airav().scrape,
|
'airav': Airav().scrape,
|
||||||
'carib': Carib().scrape,
|
'carib': Carib().scrape,
|
||||||
'mv91': Mv91().scrape,
|
|
||||||
'gcolle': Gcolle().scrape,
|
'gcolle': Gcolle().scrape,
|
||||||
'javdb': Javdb().scrape,
|
'javdb': Javdb().scrape,
|
||||||
'getchu': Getchu().scrape,
|
'getchu': Getchu().scrape,
|
||||||
'javlibrary': Javlibrary().scrape,
|
'javlibrary': Javlibrary().scrape,
|
||||||
|
'javday': Javday().scrape
|
||||||
}
|
}
|
||||||
|
|
||||||
general_full_sources = ['tmdb', 'imdb']
|
general_full_sources = ['tmdb', 'imdb']
|
||||||
|
|||||||
43
scrapinglib/javday.py
Normal file
43
scrapinglib/javday.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
from lxml import etree
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
|
from .parser import Parser
|
||||||
|
|
||||||
|
|
||||||
|
class Javday(Parser):
|
||||||
|
source = 'javday'
|
||||||
|
|
||||||
|
expr_url = '/html/head/meta[@property="og:url"]/@content'
|
||||||
|
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
||||||
|
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||||
|
expr_title = "/html/head/title/text()"
|
||||||
|
expr_actor = "//span[@class='vod_actor']/a/text()"
|
||||||
|
expr_studio = '//span[@class="producer"]/a/text()'
|
||||||
|
expr_number = '//span[@class="jpnum"]/text()'
|
||||||
|
|
||||||
|
def extraInit(self):
|
||||||
|
self.imagecut = 4
|
||||||
|
self.uncensored = True
|
||||||
|
|
||||||
|
def search(self, number):
|
||||||
|
self.number = number.strip().upper()
|
||||||
|
if self.specifiedUrl:
|
||||||
|
self.detailurl = self.specifiedUrl
|
||||||
|
else:
|
||||||
|
self.detailurl = "https://javday.tv/videos/" + self.number.replace("-","") + "/"
|
||||||
|
self.htmlcode = self.getHtml(self.detailurl)
|
||||||
|
if self.htmlcode == 404:
|
||||||
|
return 404
|
||||||
|
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||||
|
self.detailurl = self.getTreeElement(htmltree, self.expr_url)
|
||||||
|
|
||||||
|
result = self.dictformat(htmltree)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def getTitle(self, htmltree):
|
||||||
|
title = super().getTitle(htmltree)
|
||||||
|
# 删除番号和网站名
|
||||||
|
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
|
||||||
|
return result
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
from .parser import Parser
|
|
||||||
|
|
||||||
|
|
||||||
class Mv91(Parser):
|
|
||||||
source = 'mv91'
|
|
||||||
|
|
||||||
expr_number = '//div[@class="player-title"]/text()'
|
|
||||||
expr_title = '//div[@class="player-title"]/text()'
|
|
||||||
expr_release = '//p[@class="date"]/text()'
|
|
||||||
expr_outline = '//div[@class="play-text"]/text()'
|
|
||||||
expr_tags = '//div[@class="player-tag"]/text()'
|
|
||||||
expr_actor = '//p[@class="player-name"]/text()'
|
|
||||||
|
|
||||||
def extraInit(self):
|
|
||||||
self.imagecut = 0
|
|
||||||
self.uncensored = True
|
|
||||||
|
|
||||||
def getHtmlTree(self, url, type=None):
|
|
||||||
self.htmlcode = self.getHtml(url, type)
|
|
||||||
if self.htmlcode == 404:
|
|
||||||
return 404
|
|
||||||
ret = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def queryNumberUrl(self, number):
|
|
||||||
keyword = number.replace('91CM-','').replace('91MS-','')
|
|
||||||
search_html = self.getHtml('https://www.91mv.org/index/search?keywords=' + keyword)
|
|
||||||
html = etree.fromstring(search_html, etree.HTMLParser())
|
|
||||||
endurl = html.xpath('//a[@class="video-list"]/@href')[0]
|
|
||||||
return 'https://www.91mv.org' + endurl
|
|
||||||
|
|
||||||
def getNum(self, htmltree):
|
|
||||||
try:
|
|
||||||
num = super().getNum(htmltree)
|
|
||||||
finds = re.findall('(.*)(91.*-\d*)',num)
|
|
||||||
if finds:
|
|
||||||
result = str(finds[0][1])
|
|
||||||
else:
|
|
||||||
result = ' '.join(num.replace('/',' ').split())
|
|
||||||
result = result.split()[1]
|
|
||||||
if self.number.upper() != result.upper():
|
|
||||||
raise Exception(f'[!] {self.number}: find {result} in mv91, not match')
|
|
||||||
return result.strip()
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getTitle(self, htmltree):
|
|
||||||
try:
|
|
||||||
title = super().getTitle(htmltree)
|
|
||||||
finds = re.findall('(.*)(91.*-\d*)',title)
|
|
||||||
if finds:
|
|
||||||
result = str(finds[0][0])
|
|
||||||
else:
|
|
||||||
result = ' '.join(title.replace('/',' ').split())
|
|
||||||
result = result.split()[0]
|
|
||||||
return result.replace('「预告」','').strip('/ ')
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getStudio(self, htmltree):
|
|
||||||
return '91制片厂'
|
|
||||||
|
|
||||||
def getActors(self, htmltree):
|
|
||||||
b=[]
|
|
||||||
for player in self.getTreeAll(htmltree, self.expr_actor):
|
|
||||||
player = player.replace('主演:','')
|
|
||||||
if '/' in player:
|
|
||||||
player = player.split('/')[0]
|
|
||||||
player = re.sub(r'[0-9]+', '', player)
|
|
||||||
b.append(player)
|
|
||||||
return b
|
|
||||||
|
|
||||||
def getRelease(self, htmltree):
|
|
||||||
try:
|
|
||||||
result = super().getRelease(htmltree)
|
|
||||||
date = result.replace('日期:','')
|
|
||||||
if isinstance(date, str) and len(date):
|
|
||||||
return date
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getCover(self, htmltree):
|
|
||||||
try:
|
|
||||||
url = str(re.findall('var pic_url = "(.*?)"', self.htmlcode)[0])
|
|
||||||
return url.strip()
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user