添加Javmenu

This commit is contained in:
hejianjun
2023-03-22 11:31:44 +08:00
parent e8505a89f6
commit e2669169ea
4 changed files with 70 additions and 4 deletions

View File

@@ -272,7 +272,10 @@ def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
def image_ext(url): def image_ext(url):
try: try:
return os.path.splitext(url)[-1] ext = os.path.splitext(url)[-1]
if ext in {'.jpg','.jpge','.bmp','.png','.gif'}:
return ext
return ".jpg"
except: except:
return ".jpg" return ".jpg"

View File

@@ -54,7 +54,7 @@ def get_number(debug: bool, file_path: str) -> str:
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间 filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
lower_check = filename.lower() lower_check = filename.lower()
if 'fc2' in lower_check: if 'fc2' in lower_check:
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper() filename = lower_check.replace('--', '-').replace('_', '-').upper()
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE) filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况例如n1012-CD1.wmv if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况例如n1012-CD1.wmv
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())

View File

@@ -21,6 +21,7 @@ from .avsox import Avsox
from .javlibrary import Javlibrary from .javlibrary import Javlibrary
from .javday import Javday from .javday import Javday
from .pissplay import Pissplay from .pissplay import Pissplay
from .javmenu import Javmenu
from .tmdb import Tmdb from .tmdb import Tmdb
from .imdb import Imdb from .imdb import Imdb
@@ -53,7 +54,7 @@ class Scraping:
""" """
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle','javday','pissplay' 'getchu', 'gcolle','javday','pissplay','javmenu'
] ]
adult_func_mapping = { adult_func_mapping = {
'avsox': Avsox().scrape, 'avsox': Avsox().scrape,
@@ -72,7 +73,8 @@ class Scraping:
'getchu': Getchu().scrape, 'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape, 'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape, 'javday': Javday().scrape,
'pissplay': Pissplay().scrape 'pissplay': Pissplay().scrape,
'javmenu': Javmenu().scrape
} }
general_full_sources = ['tmdb', 'imdb'] general_full_sources = ['tmdb', 'imdb']

61
scrapinglib/javmenu.py Normal file
View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from urllib.parse import urljoin
from .parser import Parser
class Javmenu(Parser):
source = 'javmenu'
expr_title = '/html/head/meta[@property="og:title"]/@content'
expr_cover = '/html/head/meta[@property="og:image"]/@content'
expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
expr_actor = '//a[contains(@class,"actress")]/text()'
expr_tags = '//a[contains(@class,"genre")]/text()'
def extraInit(self):
self.imagecut = 4
self.uncensored = True
def search(self, number):
self.number = number
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.HTML(self.htmlcode)
result = self.dictformat(htmltree)
return result
def getNum(self, htmltree):
# 番号被分割开,需要合并后才是完整番号
part1 = self.getTreeElement(htmltree, self.expr_number)
part2 = self.getTreeElement(htmltree, self.expr_number2)
dp_number = part1 + part2
# NOTE 检测匹配与更新 self.number
if dp_number.upper() != self.number.upper():
raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
self.number = dp_number
return self.number
def getTitle(self, htmltree):
browser_title = super().getTitle(htmltree)
# 删除番号
number = re.findall("\d+",self.number)[1]
title = browser_title.split(number,1)[-1]
title = title.replace(' | JAV目錄大全 | 每日更新',"")
title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
return title.replace(self.number, '').strip()