支持91制片室和麻豆,优化图片裁剪功能添加了人脸识别模块

This commit is contained in:
hejianjun
2022-01-30 03:37:08 +08:00
parent 9a9d36672f
commit a84452ba1c
8 changed files with 449 additions and 45 deletions

View File

@@ -22,6 +22,8 @@ from . import xcity
from . import dlsite
from . import carib
from . import fc2club
from . import mv91
from . import madou
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
@@ -57,7 +59,9 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
# "javlib": javlib.main,
"dlsite": dlsite.main,
"carib": carib.main,
"fc2club": fc2club.main
"fc2club": fc2club.main,
"mv91": mv91.main,
"madou": madou.main
}
conf = config.getInstance()

View File

@@ -1,3 +1,4 @@
import logging
import sys
sys.path.append('../')
import re
@@ -139,7 +140,8 @@ def getCover_small(html, index=0):
def getTrailer(htmlcode): # 获取预告片
video_pather = re.compile(r'<video id\=\".*?>\s*?<source src=\"(.*?)\"')
video = video_pather.findall(htmlcode)
if video[0] != "":
# 加上数组判空
if video and video[0] != "":
if not 'https:' in video[0]:
video_url = 'https:' + video[0]
else:
@@ -263,16 +265,14 @@ def main(number):
# replace wit normal cover and cut it
imagecut = 1
cover_small = getCover(lx)
dp_number = getNum(lx)
if dp_number.upper() != number:
raise ValueError("number not found")
if dp_number.upper() != number.upper():
raise ValueError("number not eq"+dp_number)
title = getTitle(lx)
if title and dp_number:
number = dp_number
# remove duplicate title
title = title.replace(number, '').strip()
dic = {
'actor': getActor(lx),
'title': title,

164
WebCrawler/madou.py Normal file
View File

@@ -0,0 +1,164 @@
from bs4 import BeautifulSoup # need install
from lxml import etree # need install
from pyquery import PyQuery as pq # need install
from ADC_function import *
import json
import re
from lib2to3.pgen2 import parse
import sys
from urllib.parse import urlparse, unquote
sys.path.append('../')
def getActorPhoto(html):
return ''
def getTitle(html, number): # 获取标题
title = str(html.xpath('//h1[@class="article-title"]/text()')[0])
try:
result = str(re.split(r'[/||-]', title)[1])
return result.strip()
except:
return title.replace(number.upper(), '').strip()
def getStudio(html): # 获取厂商 已修改
try:
category = str(html.xpath('//a[@rel="category tag"]/text()')[0])
return category.strip()
except:
return '麻豆社'
def getYear(html): # 获取年份
return ''
def getCover(htmlcode): # 获取封面图片
try:
url = str(re.findall("shareimage : '(.*?)'", htmlcode)[0])
return url.strip()
except:
return ''
def getRelease(html): # 获取出版日期
return ''
def getRuntime(html): # 获取播放时长
return ''
def getActor(html): # 获取女优
b = []
for player in html.xpath('//div[@class="article-tags"]/a/text()'):
b.append(player)
return b
def getUrl(html):
return str(html.xpath('//a[@class="share-weixin"]/@data-url')[0])
def getNum(url, number): # 获取番号
try:
# 解码url
filename = unquote(urlparse(url).path)
# 裁剪文件名
result = filename[1:-5].upper().strip()
print(result)
# 移除中文
if result.upper() != number.upper():
result = re.split(r'[^\x00-\x7F]+', result, 1)[0]
# 移除多余的符号
return result.strip('-')
except:
return ''
def getDirector(html): # 获取导演 已修改
return ''
def getOutline(html): # 获取概述
return ''
def getSerise(html): # 获取系列 已修改
return ''
def getTag(html): # 获取标签
return html.xpath('//div[@class="article-tags"]/a/text()')
def getExtrafanart(html): # 获取剧照
return ''
def main(number):
try:
try:
number = number.lower()
url = "https://madou.club/" + number + ".html"
htmlcode = get_html(url)
except:
print(number)
html = etree.fromstring(htmlcode, etree.HTMLParser())
url = getUrl(html)
dic = {
# 标题
'title': getTitle(html, number),
# 制作商
'studio': getStudio(html),
# 年份
'year': getYear(html),
# 简介
'outline': getOutline(html),
#
'runtime': getRuntime(html),
# 导演
'director': getDirector(html),
# 演员
'actor': getActor(html),
# 发售日
'release': getRelease(html),
# 番号
'number': getNum(url, number),
# 封面链接
'cover': getCover(htmlcode),
# 剧照获取
'extrafanart': getExtrafanart(html),
'imagecut': 1,
#
'tag': getTag(html),
#
'label': getSerise(html),
# 作者图片
'website': url,
'source': 'madou.py',
# 使用
'series': getSerise(html)
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
except Exception as e:
if config.getInstance().debug():
print(e)
data = {
"title": "",
}
js = json.dumps(
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
)
return js
if __name__ == '__main__':
print(main('MD-0147'))
print(main('MD0147'))

158
WebCrawler/mv91.py Normal file
View File

@@ -0,0 +1,158 @@
import sys
sys.path.append('../')
import re
from pyquery import PyQuery as pq#need install
from lxml import etree#need install
from bs4 import BeautifulSoup#need install
import json
from ADC_function import *
host = 'https://www.91mv.org'
def getActorPhoto(html):
return ''
def getTitle(html): #获取标题
try:
title = str(html.xpath('//div[@class="player-title"]/text()')[0])
result = str(re.findall('(.*)(91.*-\d*)',title)[0][0])
return result.strip()
except:
return ''
def getStudio(html): #获取厂商 已修改
return '91制片厂'
def getYear(html): #获取年份
try:
result = str(html.xpath('//p[@class="date"]/text()')[0])
date = result.replace('日期:','')
if isinstance(date, str) and len(date):
return date
except:
return ''
return ''
def getCover(htmlcode): #获取封面图片
try:
url = str(re.findall('var pic_url = "(.*?)"',htmlcode)[0])
return url.strip()
except:
return ''
def getRelease(html): #获取出版日期
try:
result = str(html.xpath('//p[@class="date"]/text()')[0])
date = result.replace('日期:','')
if isinstance(date, str) and len(date):
return date
except:
return ''
return ''
def getRuntime(htmlcode): #获取播放时长
return ''
def getActor(html): #获取女优
b=[]
for player in html.xpath('//p[@class="player-name"]/text()'):
player = player.replace('主演:','')
b.append(player)
return b
def getNum(html): #获取番号
try:
title = str(html.xpath('//div[@class="player-title"]/text()')[0])
result = str(re.findall('(.*)(91.*-\d*)',title)[0][1])
return result.strip()
except:
return ''
def getDirector(html): #获取导演 已修改
return ''
def getOutline(html): #获取概述
try:
result = str(html.xpath('//div[@class="play-text"]/text()')[0])
return result.strip()
except:
return ''
def getSerise(htmlcode): #获取系列 已修改
return ''
def getTag(html): # 获取标签
return html.xpath('//div[@class="player-tag"]/text()')
def getExtrafanart(htmlcode): # 获取剧照
return ''
def search(keyword): #搜索,返回结果
search_html = get_html(host + '/index/search?keywords=' + keyword)
html = etree.fromstring(search_html, etree.HTMLParser())
return html.xpath('//a[@class="video-list"]/@href')[0]
def main(number):
try:
try:
number = number.replace('91CM-','').replace('91MS-','')
url = host + str(search(number))
htmlcode = get_html(url)
except:
print(number)
html = etree.fromstring(htmlcode, etree.HTMLParser())
dic = {
# 标题
'title': getTitle(html),
# 制作商
'studio': getStudio(html),
# 年份
'year': getYear(html),
# 简介
'outline': getOutline(html),
#
'runtime': getRuntime(html),
# 导演
'director': getDirector(html),
# 演员
'actor': getActor(html),
# 发售日
'release': getRelease(html),
# 番号
'number': getNum(html),
# 封面链接
'cover': getCover(htmlcode),
# 剧照获取
'extrafanart': getExtrafanart(html),
'imagecut': 1,
#
'tag': getTag(html),
#
'label': getSerise(html),
# 作者图片
'website': url,
'source': 'mv91.py',
# 使用
'series': getSerise(html)
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
return js
except Exception as e:
if config.getInstance().debug():
print(e)
data = {
"title": "",
}
js = json.dumps(
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
)
return js
if __name__ == '__main__':
print(main('91CM-121'))
print(main('91CM-122'))
print(main('91CM-143'))
print(main('91MS-006'))

View File

@@ -112,4 +112,8 @@ mode=1
vars=outline,series,studio,tag,title
[javdb]
sites=33,34
sites=33,34
; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确需要GPU/CUDA,速度慢)
[face]
locations_model=hog

View File

@@ -314,6 +314,22 @@ class Config:
except:
return "33,34"
def face_locations_model(self) -> str:
try:
return self.conf.get("face", "locations_model")
except:
return "hog"
def face_app_id(self) -> str:
return self.conf.get("face", "appid")
def face_api_key(self) -> str:
return self.conf.get("face", "key")
def face_app_secret(self) -> str:
return self.conf.get("face", "secret")
@staticmethod
def _exit(sec: str) -> None:
print("[-] Read config error! Please check the {} section in config.ini", sec)

133
core.py
View File

@@ -5,6 +5,7 @@ import re
import shutil
import sys
from PIL import Image
from io import BytesIO
from pathlib import Path
@@ -14,6 +15,7 @@ from ADC_function import *
from WebCrawler import get_data_from_json
from number_parser import is_uncensored
def escape_path(path, escape_literals: str): # Remove escape literals
backslash = '\\'
for literal in escape_literals:
@@ -245,14 +247,18 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
if conf.debug():
print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
def image_ext(url):
try:
return os.path.splitext(url)[-1]
except:
return ".jpg"
# 封面是否下载成功否则移动到failed
def image_download(cover, number, leak_word, c_word, hack_word, path, filepath):
filename = f"{number}{leak_word}{c_word}{hack_word}-fanart.jpg"
full_filepath = os.path.join(path, filename)
def image_download(cover, fanart_path,thumb_path, path, filepath):
full_filepath = os.path.join(path, fanart_path)
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
return
if download_file_with_filename(cover, filename, path, filepath) == 'failed':
if download_file_with_filename(cover, fanart_path, path, filepath) == 'failed':
moveFailedFolder(filepath)
return
@@ -260,17 +266,17 @@ def image_download(cover, number, leak_word, c_word, hack_word, path, filepath):
for i in range(configProxy.retry):
if file_not_exist_or_empty(full_filepath):
print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
download_file_with_filename(cover, filename, path, filepath)
download_file_with_filename(cover, fanart_path, path, filepath)
continue
else:
break
if file_not_exist_or_empty(full_filepath):
return
print('[+]Image Downloaded!', full_filepath)
shutil.copyfile(full_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg"))
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word):
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
if config.getInstance().main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持
nfo_path = str(Path(filepath).with_suffix('.nfo'))
@@ -303,9 +309,9 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <plot><![CDATA[" + outline + "]]></plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + number + leak_word + c_word + hack_word + "-poster.jpg</poster>", file=code)
print(" <thumb>" + number + leak_word + c_word + hack_word + "-thumb.jpg</thumb>", file=code)
print(" <fanart>" + number + leak_word + c_word + hack_word + '-fanart.jpg' + "</fanart>", file=code)
print(" <poster>" + poster_path + "</poster>", file=code)
print(" <thumb>" + thumb_path + "</thumb>", file=code)
print(" <fanart>" + fanart_path + "</fanart>", file=code)
try:
for key in actor_list:
print(" <actor>", file=code)
@@ -365,23 +371,70 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
return
def cutImage(imagecut, path, number, leak_word, c_word, hack_word):
fullpath_noext = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}")
if imagecut == 1: # 剪裁大封面
def face_center(filename, model):
print('[+]Image found face ' + model)
try:
import face_recognition
image = face_recognition.load_image_file(filename)
face_locations = face_recognition.face_locations(image, 0, model)
if face_locations:
top, right, bottom, left = face_locations[0]
# 中心点
return int((right+left)/2)
except Exception as e:
print("[-]", e)
return 0
def face_crop(filename, width, height):
# 新宽度是高度的2/3
cropWidthHalf = int(height/3)
try:
locations_model = filter(lambda x : x,config.getInstance().face_locations_model().lower().split(','))
for model in locations_model:
center = face_center(filename, model)
# 如果找到就跳出循环
if center:
cropLeft = center-cropWidthHalf
cropRight = center+cropWidthHalf
# 越界处理
if cropLeft < 0:
cropLeft = 0
cropRight = cropWidthHalf*2
elif cropRight > width:
cropLeft = width-cropWidthHalf*2
cropRight = width
return (cropLeft, 0, cropRight, height)
except:
print('[-]Not found face! ' + filename)
# 默认靠右切
return (width-cropWidthHalf*2, 0, width, height)
def cutImage(imagecut, path, fanart_path, poster_path):
fullpath_fanart = os.path.join(path, fanart_path)
fullpath_poster = os.path.join(path, poster_path)
if imagecut == 1: # 剪裁大封面
try:
img = Image.open(fullpath_noext + '-fanart.jpg')
imgSize = img.size
w = img.width
h = img.height
img2 = img.crop((w / 1.9, 0, w, h))
img2.save(fullpath_noext + '-poster.jpg')
print('[+]Image Cutted! ' + fullpath_noext + '-poster.jpg')
img = Image.open(fullpath_fanart)
width, height = img.size
if width/height > 2/3: # 如果宽度大于2
# 以人像为中心切取
img2 = img.crop(face_crop(fullpath_fanart, width, height))
elif width/height < 2/3: # 如果高度大于3
# 从底部向上切割
cropBottom = width*3/2
img2 = img.crop(0, 0, width, cropBottom)
else: # 如果等于2/3
img2 = img
img2.save(fullpath_poster)
print('[+]Image Cutted! ' + fullpath_poster)
except Exception as e:
print(e)
print('[-]Cover cut failed!')
elif imagecut == 0: # 复制封面
shutil.copyfile(fullpath_noext + '-fanart.jpg', fullpath_noext + '-poster.jpg')
print('[+]Image Copyed! ' + fullpath_noext + '-poster.jpg')
elif imagecut == 0: # 复制封面
shutil.copyfile(fullpath_fanart, fullpath_poster)
print('[+]Image Copyed! ' + fullpath_poster)
# 此函数从gui版copy过来用用
# 参数说明
@@ -652,6 +705,12 @@ def core_main(file_path, number_th, oCC):
# 创建文件夹
#path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data)
cover = json_data.get('cover')
ext = image_ext(cover)
fanart_path = f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}"
poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}"
thumb_path = f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}"
# main_mode
# 1: 刮削模式 / Scraping mode
# 2: 整理模式 / Organizing mode
@@ -666,8 +725,9 @@ def core_main(file_path, number_th, oCC):
if imagecut == 3:
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
# creatFolder会返回番号路径
image_download( json_data.get('cover'), number, leak_word, c_word, hack_word, path, filepath)
image_download( cover, fanart_path,thumb_path, path, filepath)
if not multi_part or part.lower() == '-cd1':
try:
@@ -683,30 +743,29 @@ def core_main(file_path, number_th, oCC):
except:
pass
# 裁剪图
cutImage(imagecut, path, number, leak_word, c_word, hack_word)
cutImage(imagecut, path , fanart_path, poster_path)
# 添加水印
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
# 移动电影
paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word)
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word)
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word
,fanart_path,poster_path,thumb_path)
elif conf.main_mode() == 2:
# 创建文件夹
path = create_folder(json_data)
# 移动文件
paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word)
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
elif conf.main_mode() == 3:
path = str(Path(file_path).parent)
@@ -718,7 +777,7 @@ def core_main(file_path, number_th, oCC):
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
# creatFolder会返回番号路径
image_download(json_data.get('cover'), number, leak_word, c_word, hack_word, path, filepath)
image_download( cover, fanart_path,thumb_path, path, filepath)
if not multi_part or part.lower() == '-cd1':
# 下载预告片
@@ -730,14 +789,12 @@ def core_main(file_path, number_th, oCC):
extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
# 裁剪图
cutImage(imagecut, path, number, leak_word, c_word, hack_word)
cutImage(imagecut, path , fanart_path, poster_path)
# 添加水印
poster_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-poster.jpg")
thumb_path = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}-thumb.jpg")
if conf.is_watermark():
add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack)
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word)
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)

View File

@@ -9,3 +9,4 @@ urllib3==1.24.3
certifi==2020.12.5
MechanicalSoup==1.1.0
opencc-python-reimplemented
face_recognition