5
core.py
5
core.py
@@ -272,7 +272,10 @@ def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
|
||||
|
||||
def image_ext(url):
|
||||
try:
|
||||
return os.path.splitext(url)[-1]
|
||||
ext = os.path.splitext(url)[-1]
|
||||
if ext in {'.jpg','.jpge','.bmp','.png','.gif'}:
|
||||
return ext
|
||||
return ".jpg"
|
||||
except:
|
||||
return ".jpg"
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ def get_number(debug: bool, file_path: str) -> str:
|
||||
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
||||
lower_check = filename.lower()
|
||||
if 'fc2' in lower_check:
|
||||
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
||||
filename = lower_check.replace('--', '-').replace('_', '-').upper()
|
||||
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
|
||||
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv
|
||||
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
|
||||
|
||||
@@ -21,6 +21,7 @@ from .avsox import Avsox
|
||||
from .javlibrary import Javlibrary
|
||||
from .javday import Javday
|
||||
from .pissplay import Pissplay
|
||||
from .javmenu import Javmenu
|
||||
|
||||
from .tmdb import Tmdb
|
||||
from .imdb import Imdb
|
||||
@@ -53,7 +54,7 @@ class Scraping:
|
||||
"""
|
||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||
'getchu', 'gcolle','javday','pissplay'
|
||||
'getchu', 'gcolle','javday','pissplay','javmenu'
|
||||
]
|
||||
adult_func_mapping = {
|
||||
'avsox': Avsox().scrape,
|
||||
@@ -72,7 +73,8 @@ class Scraping:
|
||||
'getchu': Getchu().scrape,
|
||||
'javlibrary': Javlibrary().scrape,
|
||||
'javday': Javday().scrape,
|
||||
'pissplay': Pissplay().scrape
|
||||
'pissplay': Pissplay().scrape,
|
||||
'javmenu': Javmenu().scrape
|
||||
}
|
||||
|
||||
general_full_sources = ['tmdb', 'imdb']
|
||||
|
||||
61
scrapinglib/javmenu.py
Normal file
61
scrapinglib/javmenu.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Javmenu(Parser):
|
||||
source = 'javmenu'
|
||||
|
||||
expr_title = '/html/head/meta[@property="og:title"]/@content'
|
||||
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
||||
|
||||
expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
|
||||
expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
|
||||
expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
|
||||
expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
|
||||
expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
|
||||
|
||||
expr_actor = '//a[contains(@class,"actress")]/text()'
|
||||
expr_tags = '//a[contains(@class,"genre")]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 4
|
||||
self.uncensored = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
|
||||
self.htmlcode = self.getHtml(self.detailurl)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.HTML(self.htmlcode)
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
def getNum(self, htmltree):
|
||||
# 番号被分割开,需要合并后才是完整番号
|
||||
part1 = self.getTreeElement(htmltree, self.expr_number)
|
||||
part2 = self.getTreeElement(htmltree, self.expr_number2)
|
||||
dp_number = part1 + part2
|
||||
# NOTE 检测匹配与更新 self.number
|
||||
if dp_number.upper() != self.number.upper():
|
||||
raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
|
||||
self.number = dp_number
|
||||
return self.number
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
browser_title = super().getTitle(htmltree)
|
||||
# 删除番号
|
||||
number = re.findall("\d+",self.number)[1]
|
||||
title = browser_title.split(number,1)[-1]
|
||||
title = title.replace(' | JAV目錄大全 | 每日更新',"")
|
||||
title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
|
||||
return title.replace(self.number, '').strip()
|
||||
|
||||
Reference in New Issue
Block a user