Merge branch 'master' into skip_tags2
This commit is contained in:
@@ -45,12 +45,13 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
||||
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",懸疑,悬疑,"/>
|
||||
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲,亚洲,"/>
|
||||
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",ハロウィーンキャンペーン,"/>
|
||||
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
|
||||
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",數位馬賽克,数位马赛克,"/>
|
||||
|
||||
<a zh_cn="16小时+" zh_tw="16小時+" jp="16時間以上作品" keyword=",16小時以上作品,16小时以上作品,16時間以上作品,16小时+,16小時+,"/>
|
||||
<a zh_cn="3D" zh_tw="3D" jp="3D" keyword=",3D,"/>
|
||||
<a zh_cn="3D卡通" zh_tw="3D卡通" jp="3Dエロアニメ" keyword=",3D卡通,3Dエロアニメ,"/>
|
||||
<a zh_cn="4K" zh_tw="4K" jp="4K" keyword=",4K,"/>
|
||||
<a zh_cn="DMM独家" zh_tw="DMM獨家" jp="DMM獨家" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
|
||||
<a zh_cn="M女" zh_tw="M女" jp="M女" keyword=",M女,"/>
|
||||
<a zh_cn="SM" zh_tw="SM" jp="SM" keyword=",SM,"/>
|
||||
<a zh_cn="轻虐" zh_tw="輕虐" jp="微SM" keyword=",微SM,轻虐,輕虐,"/>
|
||||
@@ -115,8 +116,7 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
||||
<a zh_cn="男优潮吹" zh_tw="男優潮吹" jp="男の潮吹き" keyword=",男潮吹,男の潮吹き,男优潮吹,男優潮吹,"/>
|
||||
<a zh_cn="巴士导游" zh_tw="巴士導遊" jp="車掌小姐" keyword=",車掌小姐,车掌小姐,巴士乘务员,巴士乘務員,巴士导游,巴士導遊,バスガイド,"/>
|
||||
<a zh_cn="熟女" zh_tw="熟女" jp="熟女" keyword=",熟女,成熟的女人,"/>
|
||||
<a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,"/>
|
||||
<a zh_cn="白天出轨" zh_tw="白天出軌" jp="白天出轨" keyword=",白天出軌,白天出轨,通姦,"/>
|
||||
<a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,白天出軌,白天出轨,通姦,"/>
|
||||
<a zh_cn="处男" zh_tw="處男" jp="處男" keyword=",處男,处男,"/>
|
||||
<a zh_cn="处女" zh_tw="處女" jp="處女" keyword=",處女,处女,処女,童貞,"/>
|
||||
<a zh_cn="触手" zh_tw="觸手" jp="觸手" keyword=",觸手,触手,"/>
|
||||
@@ -133,7 +133,7 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
||||
<a zh_cn="放尿" zh_tw="放尿" jp="放尿" keyword=",放尿,"/>
|
||||
<a zh_cn="女服务生" zh_tw="女服務生" jp="ウェイトレス" keyword=",服務生,服务生,女服务生,女服務生,ウェイトレス,"/>
|
||||
<a zh_cn="蒙面" zh_tw="蒙面" jp="覆面・マスク" keyword=",蒙面・面罩,蒙面・面具,覆面・マスク,"/>
|
||||
<a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛交,アナル,"/>
|
||||
<a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛門・肛交,肛交,アナル,"/>
|
||||
<a zh_cn="肛内中出" zh_tw="肛內中出" jp="肛內中出" keyword=",肛内中出,肛內中出,"/>
|
||||
<a zh_cn="个子高" zh_tw="個子高" jp="个子高" keyword=",高,个子高,個子高,"/>
|
||||
<a zh_cn="高中生" zh_tw="高中生" jp="高中生" keyword=",高中女生,高中生,"/>
|
||||
@@ -268,7 +268,6 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
||||
<a zh_cn="插入手指" zh_tw="插入手指" jp="手指插入" keyword=",手指插入,插入手指,"/>
|
||||
<a zh_cn="首次亮相" zh_tw="首次亮相" jp="首次亮相" keyword=",首次亮相,"/>
|
||||
<a zh_cn="叔母" zh_tw="叔母" jp="叔母さん" keyword=",叔母,叔母さん,"/>
|
||||
<a zh_cn="数位马赛克" zh_tw="數位馬賽克" jp="數位馬賽克" keyword=",數位馬賽克,数位马赛克,"/>
|
||||
<a zh_cn="双性人" zh_tw="雙性人" jp="雙性人" keyword=",雙性人,双性人,"/>
|
||||
<a zh_cn="韵律服" zh_tw="韻律服" jp="レオタード" keyword=",韵律服,韻律服,レオタード,"/>
|
||||
<a zh_cn="水手服" zh_tw="水手服" jp="セーラー服" keyword=",水手服,セーラー服,"/>
|
||||
@@ -290,7 +289,7 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
||||
<a zh_cn="玩物" zh_tw="玩物" jp="玩具" keyword=",玩具,玩物,"/>
|
||||
<a zh_cn="适合手机垂直播放" zh_tw="適合手機垂直播放" jp="為智能手機推薦垂直視頻" keyword=",スマホ専用縦動画,為智能手機推薦垂直視頻,适合手机垂直播放,適合手機垂直播放,"/>
|
||||
<a zh_cn="猥亵穿着" zh_tw="猥褻穿着" jp="猥褻穿著" keyword=",猥褻穿著,猥亵穿着,猥褻穿着,"/>
|
||||
<a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",無碼流出,无码流出,"/>
|
||||
<a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",流出,無碼流出,无码流出,"/>
|
||||
<a zh_cn="无码破解" zh_tw="無碼破解" jp="無碼破解" keyword=",無碼破解,无码破解,"/>
|
||||
<a zh_cn="无毛" zh_tw="無毛" jp="無毛" keyword=",無毛,无毛,剃毛,白虎,パイパン,"/>
|
||||
<a zh_cn="剧情" zh_tw="劇情" jp="戲劇" keyword=",戲劇,戏剧,剧情,劇情,戲劇x,戏剧、连续剧,戲劇、連續劇,ドラマ,"/>
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
|
||||
[English](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/README_EN.md)
|
||||
|
||||
## 广告位招租 yoshiko2.dev@gmail.com
|
||||
|
||||
# 文档
|
||||
* [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
|
||||
* [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc)
|
||||
|
||||
69
core.py
69
core.py
@@ -84,14 +84,15 @@ def small_cover_check(path, filename, cover_small, movie_path, json_headers=None
|
||||
|
||||
|
||||
def create_folder(json_data): # 创建文件夹
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
|
||||
json_data)
|
||||
conf = config.getInstance()
|
||||
success_folder = conf.success_folder()
|
||||
actor = json_data.get('actor')
|
||||
location_rule = eval(conf.location_rule(), json_data)
|
||||
if 'actor' in conf.location_rule() and len(actor) > 100:
|
||||
print(conf.location_rule())
|
||||
location_rule = eval(conf.location_rule().replace("actor","'多人作品'"), json_data)
|
||||
location_rule = eval(conf.location_rule().replace("actor", "'多人作品'"), json_data)
|
||||
maxlen = conf.max_title_len()
|
||||
if 'title' in conf.location_rule() and len(title) > maxlen:
|
||||
shorttitle = title[0:maxlen]
|
||||
@@ -129,7 +130,7 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
|
||||
except:
|
||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||
os._exit(0)
|
||||
r = get_html(url=url,return_type='content',json_headers=json_headers)
|
||||
r = get_html(url=url, return_type='content', json_headers=json_headers)
|
||||
if r == '':
|
||||
print('[-]Movie Download Data not found!')
|
||||
return
|
||||
@@ -144,20 +145,22 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
|
||||
# moveFailedFolder(filepath)
|
||||
# return
|
||||
except Exception as e:
|
||||
print('[-]Image Download :Error',e)
|
||||
print('[-]Image Download :Error', e)
|
||||
print('[-]Connect Failed! Please check your Proxy or Network!')
|
||||
moveFailedFolder(filepath)
|
||||
return
|
||||
|
||||
|
||||
def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath):
|
||||
if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed':
|
||||
if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
|
||||
filepath) == 'failed':
|
||||
return
|
||||
configProxy = config.getInstance().proxy()
|
||||
for i in range(configProxy.retry):
|
||||
if file_not_exist_or_empty(path+'/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
|
||||
if file_not_exist_or_empty(path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
|
||||
print('[!]Video Download Failed! Trying again. [{}/3]', i + 1)
|
||||
download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath)
|
||||
download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
|
||||
filepath)
|
||||
continue
|
||||
else:
|
||||
break
|
||||
@@ -197,7 +200,8 @@ def actor_photo_download(actors, save_dir, number):
|
||||
failed += 1
|
||||
print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!")
|
||||
if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐
|
||||
print(f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
|
||||
print(
|
||||
f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
|
||||
else:
|
||||
print(f"[+]Successfully downloaded {len(result)} actor photo.")
|
||||
|
||||
@@ -239,7 +243,7 @@ def extrafanart_download_one_by_one(data, path, filepath, json_data=None):
|
||||
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
|
||||
|
||||
|
||||
def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None):
|
||||
def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
|
||||
tm_start = time.perf_counter()
|
||||
conf = config.getInstance()
|
||||
extrafanart_dir = Path(save_dir) / conf.get_extrafanart()
|
||||
@@ -262,7 +266,8 @@ def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None):
|
||||
failed += 1
|
||||
print(f'[-]Extrafanart {i} for [{number}] download failed!')
|
||||
if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐
|
||||
print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
|
||||
print(
|
||||
f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
|
||||
else:
|
||||
print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
|
||||
if conf.debug():
|
||||
@@ -307,12 +312,14 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
|
||||
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
|
||||
|
||||
|
||||
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word,_4k,fanart_path,poster_path,thumb_path):
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
|
||||
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
|
||||
uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
|
||||
json_data)
|
||||
if config.getInstance().main_mode() == 3: # 模式3下,由于视频文件不做任何改变,.nfo文件必须和视频文件名称除后缀外完全一致,KODI等软件方可支持
|
||||
nfo_path = str(Path(filepath).with_suffix('.nfo'))
|
||||
else:
|
||||
nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
|
||||
nfo_path = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
|
||||
try:
|
||||
if not os.path.exists(path):
|
||||
try:
|
||||
@@ -364,6 +371,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
pass
|
||||
print(" <maker>" + studio + "</maker>", file=code)
|
||||
print(" <label>" + label + "</label>", file=code)
|
||||
|
||||
skip_tags = config.getInstance().donot_save_tags()
|
||||
if not skip_tags:
|
||||
if cn_sub == '1':
|
||||
@@ -385,7 +393,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
if cn_sub == '1':
|
||||
print(" <genre>中文字幕</genre>", file=code)
|
||||
if liuchu == '流出':
|
||||
print(" <genre>流出</genre>", file=code)
|
||||
print(" <genre>无码流出</genre>", file=code)
|
||||
if uncensored == 1:
|
||||
print(" <genre>无码</genre>", file=code)
|
||||
if hack_word != '':
|
||||
@@ -471,7 +479,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack) -> None:
|
||||
if cn_sub:
|
||||
mark_type += ',字幕'
|
||||
if leak:
|
||||
mark_type += ',流出'
|
||||
mark_type += ',无码流出'
|
||||
if uncensored:
|
||||
mark_type += ',无码'
|
||||
if hack:
|
||||
@@ -541,6 +549,8 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
|
||||
]
|
||||
img_pic.paste(img_subt, (pos[count]['x'], pos[count]['y']), mask=a)
|
||||
img_pic.save(pic_path, quality=95)
|
||||
|
||||
|
||||
# ========================结束=================================
|
||||
|
||||
|
||||
@@ -602,7 +612,8 @@ def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_
|
||||
return
|
||||
|
||||
|
||||
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
|
||||
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word,
|
||||
hack_word): # 文件路径,番号,后缀,要移动至的位置
|
||||
if multi_part == 1:
|
||||
number += part # 这时number会被附加上CD1后缀
|
||||
filepath_obj = pathlib.Path(filepath)
|
||||
@@ -727,7 +738,7 @@ def core_main_no_net_op(movie_path, number):
|
||||
c_word = '-C' # 中文字幕影片后缀
|
||||
uncensored = 1 if is_uncensored(number) else 0
|
||||
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||
leak_word = '-流出' # 流出影片后缀
|
||||
leak_word = '-无码流出' # 无码流出影片后缀
|
||||
leak = 1
|
||||
|
||||
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
|
||||
@@ -814,7 +825,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||
liuchu = '流出'
|
||||
leak = 1
|
||||
leak_word = '-流出' # 流出影片后缀
|
||||
leak_word = '-无码流出' # 流出影片后缀
|
||||
else:
|
||||
leak = 0
|
||||
|
||||
@@ -823,9 +834,10 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
hack_word = "-hack"
|
||||
|
||||
# 判断是否4k
|
||||
if '4K' in tag: tag.remove('4K') # 从tag中移除'4K'
|
||||
if '4K' in tag:
|
||||
tag.remove('4K') # 从tag中移除'4K'
|
||||
props = get_video_properties(movie_path) # 判断是否为4K视频
|
||||
if props['width'] >=4096 or props['height'] >= 2160:
|
||||
if props['width'] >= 4096 or props['height'] >= 2160:
|
||||
_4k = '4k'
|
||||
_4k_world = '-4k'
|
||||
|
||||
@@ -834,8 +846,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
debug_print(json_data)
|
||||
|
||||
# 创建文件夹
|
||||
#path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data)
|
||||
|
||||
# path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data)
|
||||
|
||||
cover = json_data.get('cover')
|
||||
ext = image_ext(cover)
|
||||
@@ -889,7 +900,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 添加水印
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
|
||||
|
||||
# 兼容Jellyfin封面图文件名规则
|
||||
if multi_part and conf.jellyfin_multi_part_fanart():
|
||||
@@ -899,8 +910,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
|
||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word
|
||||
,_4k,fanart_path,poster_path,thumb_path)
|
||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
|
||||
json_data.get('actor_list'), liuchu, uncensored, hack_word
|
||||
, _4k, fanart_path, poster_path, thumb_path)
|
||||
|
||||
elif conf.main_mode() == 2:
|
||||
# 创建文件夹
|
||||
@@ -908,7 +920,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
# 移动文件
|
||||
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
|
||||
|
||||
elif conf.main_mode() == 3:
|
||||
path = str(Path(movie_path).parent)
|
||||
@@ -952,7 +964,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 添加水印
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
|
||||
|
||||
# 兼容Jellyfin封面图文件名规则
|
||||
if multi_part and conf.jellyfin_multi_part_fanart():
|
||||
@@ -960,4 +972,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
|
||||
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
|
||||
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
|
||||
thumb_path)
|
||||
|
||||
@@ -9,3 +9,4 @@ certifi
|
||||
MechanicalSoup
|
||||
opencc-python-reimplemented
|
||||
face_recognition
|
||||
get-video-properties
|
||||
|
||||
@@ -11,7 +11,6 @@ from .gcolle import Gcolle
|
||||
from .getchu import Getchu
|
||||
from .jav321 import Jav321
|
||||
from .javdb import Javdb
|
||||
from .mv91 import Mv91
|
||||
from .fc2 import Fc2
|
||||
from .madou import Madou
|
||||
from .mgstage import Mgstage
|
||||
@@ -19,6 +18,7 @@ from .javbus import Javbus
|
||||
from .xcity import Xcity
|
||||
from .avsox import Avsox
|
||||
from .javlibrary import Javlibrary
|
||||
from .javday import Javday
|
||||
|
||||
from .tmdb import Tmdb
|
||||
from .imdb import Imdb
|
||||
@@ -50,8 +50,8 @@ class Scraping:
|
||||
"""
|
||||
"""
|
||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91',
|
||||
'getchu', 'gcolle'
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||
'getchu', 'gcolle','javday'
|
||||
]
|
||||
adult_func_mapping = {
|
||||
'avsox': Avsox().scrape,
|
||||
@@ -65,11 +65,11 @@ class Scraping:
|
||||
'fanza': Fanza().scrape,
|
||||
'airav': Airav().scrape,
|
||||
'carib': Carib().scrape,
|
||||
'mv91': Mv91().scrape,
|
||||
'gcolle': Gcolle().scrape,
|
||||
'javdb': Javdb().scrape,
|
||||
'getchu': Getchu().scrape,
|
||||
'javlibrary': Javlibrary().scrape,
|
||||
'javday': Javday().scrape
|
||||
}
|
||||
|
||||
general_full_sources = ['tmdb', 'imdb']
|
||||
|
||||
43
scrapinglib/javday.py
Normal file
43
scrapinglib/javday.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from urllib.parse import urlparse, unquote
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Javday(Parser):
|
||||
source = 'javday'
|
||||
|
||||
expr_url = '/html/head/meta[@property="og:url"]/@content'
|
||||
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
||||
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||
expr_title = "/html/head/title/text()"
|
||||
expr_actor = "//span[@class='vod_actor']/a/text()"
|
||||
expr_studio = '//span[@class="producer"]/a/text()'
|
||||
expr_number = '//span[@class="jpnum"]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 4
|
||||
self.uncensored = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number.strip().upper()
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
self.detailurl = "https://javday.tv/videos/" + self.number.replace("-","") + "/"
|
||||
self.htmlcode = self.getHtml(self.detailurl)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
self.detailurl = self.getTreeElement(htmltree, self.expr_url)
|
||||
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
title = super().getTitle(htmltree)
|
||||
# 删除番号和网站名
|
||||
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
|
||||
return result
|
||||
@@ -1,94 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Mv91(Parser):
|
||||
source = 'mv91'
|
||||
|
||||
expr_number = '//div[@class="player-title"]/text()'
|
||||
expr_title = '//div[@class="player-title"]/text()'
|
||||
expr_release = '//p[@class="date"]/text()'
|
||||
expr_outline = '//div[@class="play-text"]/text()'
|
||||
expr_tags = '//div[@class="player-tag"]/text()'
|
||||
expr_actor = '//p[@class="player-name"]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 0
|
||||
self.uncensored = True
|
||||
|
||||
def getHtmlTree(self, url, type=None):
|
||||
self.htmlcode = self.getHtml(url, type)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
ret = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
return ret
|
||||
|
||||
def queryNumberUrl(self, number):
|
||||
keyword = number.replace('91CM-','').replace('91MS-','')
|
||||
search_html = self.getHtml('https://www.91mv.org/index/search?keywords=' + keyword)
|
||||
html = etree.fromstring(search_html, etree.HTMLParser())
|
||||
endurl = html.xpath('//a[@class="video-list"]/@href')[0]
|
||||
return 'https://www.91mv.org' + endurl
|
||||
|
||||
def getNum(self, htmltree):
|
||||
try:
|
||||
num = super().getNum(htmltree)
|
||||
finds = re.findall('(.*)(91.*-\d*)',num)
|
||||
if finds:
|
||||
result = str(finds[0][1])
|
||||
else:
|
||||
result = ' '.join(num.replace('/',' ').split())
|
||||
result = result.split()[1]
|
||||
if self.number.upper() != result.upper():
|
||||
raise Exception(f'[!] {self.number}: find {result} in mv91, not match')
|
||||
return result.strip()
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
try:
|
||||
title = super().getTitle(htmltree)
|
||||
finds = re.findall('(.*)(91.*-\d*)',title)
|
||||
if finds:
|
||||
result = str(finds[0][0])
|
||||
else:
|
||||
result = ' '.join(title.replace('/',' ').split())
|
||||
result = result.split()[0]
|
||||
return result.replace('「预告」','').strip('/ ')
|
||||
except:
|
||||
return ''
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return '91制片厂'
|
||||
|
||||
def getActors(self, htmltree):
|
||||
b=[]
|
||||
for player in self.getTreeAll(htmltree, self.expr_actor):
|
||||
player = player.replace('主演:','')
|
||||
if '/' in player:
|
||||
player = player.split('/')[0]
|
||||
player = re.sub(r'[0-9]+', '', player)
|
||||
b.append(player)
|
||||
return b
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
try:
|
||||
result = super().getRelease(htmltree)
|
||||
date = result.replace('日期:','')
|
||||
if isinstance(date, str) and len(date):
|
||||
return date
|
||||
except:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def getCover(self, htmltree):
|
||||
try:
|
||||
url = str(re.findall('var pic_url = "(.*?)"', self.htmlcode)[0])
|
||||
return url.strip()
|
||||
except:
|
||||
return ''
|
||||
|
||||
Reference in New Issue
Block a user