Merge branch 'master' into skip_tags2

This commit is contained in:
Yoshiko2
2022-11-22 20:02:15 +08:00
committed by GitHub
7 changed files with 111 additions and 147 deletions

View File

@@ -45,12 +45,13 @@ zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",懸疑,悬疑,"/> <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",懸疑,悬疑,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲,亚洲,"/> <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",亞洲,亚洲,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",ハロウィーンキャンペーン,"/> <a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",ハロウィーンキャンペーン,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
<a zh_cn="删除" zh_tw="删除" jp="删除" keyword=",數位馬賽克,数位马赛克,"/>
<a zh_cn="16小时+" zh_tw="16小時+" jp="16時間以上作品" keyword=",16小時以上作品,16小时以上作品,16時間以上作品,16小时+,16小時+,"/> <a zh_cn="16小时+" zh_tw="16小時+" jp="16時間以上作品" keyword=",16小時以上作品,16小时以上作品,16時間以上作品,16小时+,16小時+,"/>
<a zh_cn="3D" zh_tw="3D" jp="3D" keyword=",3D,"/> <a zh_cn="3D" zh_tw="3D" jp="3D" keyword=",3D,"/>
<a zh_cn="3D卡通" zh_tw="3D卡通" jp="3Dエロアニメ" keyword=",3D卡通,3Dエロアニメ,"/> <a zh_cn="3D卡通" zh_tw="3D卡通" jp="3Dエロアニメ" keyword=",3D卡通,3Dエロアニメ,"/>
<a zh_cn="4K" zh_tw="4K" jp="4K" keyword=",4K,"/> <a zh_cn="4K" zh_tw="4K" jp="4K" keyword=",4K,"/>
<a zh_cn="DMM独家" zh_tw="DMM獨家" jp="DMM獨家" keyword=",DMM獨家,DMM独家,DMM專屬,DMM专属,"/>
<a zh_cn="M女" zh_tw="M女" jp="M女" keyword=",M女,"/> <a zh_cn="M女" zh_tw="M女" jp="M女" keyword=",M女,"/>
<a zh_cn="SM" zh_tw="SM" jp="SM" keyword=",SM,"/> <a zh_cn="SM" zh_tw="SM" jp="SM" keyword=",SM,"/>
<a zh_cn="轻虐" zh_tw="輕虐" jp="微SM" keyword=",微SM,轻虐,輕虐,"/> <a zh_cn="轻虐" zh_tw="輕虐" jp="微SM" keyword=",微SM,轻虐,輕虐,"/>
@@ -115,8 +116,7 @@ zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。
<a zh_cn="男优潮吹" zh_tw="男優潮吹" jp="男の潮吹き" keyword=",男潮吹,男の潮吹き,男优潮吹,男優潮吹,"/> <a zh_cn="男优潮吹" zh_tw="男優潮吹" jp="男の潮吹き" keyword=",男潮吹,男の潮吹き,男优潮吹,男優潮吹,"/>
<a zh_cn="巴士导游" zh_tw="巴士導遊" jp="車掌小姐" keyword=",車掌小姐,车掌小姐,巴士乘务员,巴士乘務員,巴士导游,巴士導遊,バスガイド,"/> <a zh_cn="巴士导游" zh_tw="巴士導遊" jp="車掌小姐" keyword=",車掌小姐,车掌小姐,巴士乘务员,巴士乘務員,巴士导游,巴士導遊,バスガイド,"/>
<a zh_cn="熟女" zh_tw="熟女" jp="熟女" keyword=",熟女,成熟的女人,"/> <a zh_cn="熟女" zh_tw="熟女" jp="熟女" keyword=",熟女,成熟的女人,"/>
<a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,"/> <a zh_cn="出轨" zh_tw="出軌" jp="出軌" keyword=",出軌,出轨,白天出軌,白天出轨,通姦,"/>
<a zh_cn="白天出轨" zh_tw="白天出軌" jp="白天出轨" keyword=",白天出軌,白天出轨,通姦,"/>
<a zh_cn="处男" zh_tw="處男" jp="處男" keyword=",處男,处男,"/> <a zh_cn="处男" zh_tw="處男" jp="處男" keyword=",處男,处男,"/>
<a zh_cn="处女" zh_tw="處女" jp="處女" keyword=",處女,处女,処女,童貞,"/> <a zh_cn="处女" zh_tw="處女" jp="處女" keyword=",處女,处女,処女,童貞,"/>
<a zh_cn="触手" zh_tw="觸手" jp="觸手" keyword=",觸手,触手,"/> <a zh_cn="触手" zh_tw="觸手" jp="觸手" keyword=",觸手,触手,"/>
@@ -133,7 +133,7 @@ zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。
<a zh_cn="放尿" zh_tw="放尿" jp="放尿" keyword=",放尿,"/> <a zh_cn="放尿" zh_tw="放尿" jp="放尿" keyword=",放尿,"/>
<a zh_cn="女服务生" zh_tw="女服務生" jp="ウェイトレス" keyword=",服務生,服务生,女服务生,女服務生,ウェイトレス,"/> <a zh_cn="女服务生" zh_tw="女服務生" jp="ウェイトレス" keyword=",服務生,服务生,女服务生,女服務生,ウェイトレス,"/>
<a zh_cn="蒙面" zh_tw="蒙面" jp="覆面・マスク" keyword=",蒙面・面罩,蒙面・面具,覆面・マスク,"/> <a zh_cn="蒙面" zh_tw="蒙面" jp="覆面・マスク" keyword=",蒙面・面罩,蒙面・面具,覆面・マスク,"/>
<a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛交,アナル,"/> <a zh_cn="肛交" zh_tw="肛交" jp="肛交" keyword=",肛門・肛交,肛交,アナル,"/>
<a zh_cn="肛内中出" zh_tw="肛內中出" jp="肛內中出" keyword=",肛内中出,肛內中出,"/> <a zh_cn="肛内中出" zh_tw="肛內中出" jp="肛內中出" keyword=",肛内中出,肛內中出,"/>
<a zh_cn="个子高" zh_tw="個子高" jp="个子高" keyword=",高,个子高,個子高,"/> <a zh_cn="个子高" zh_tw="個子高" jp="个子高" keyword=",高,个子高,個子高,"/>
<a zh_cn="高中生" zh_tw="高中生" jp="高中生" keyword=",高中女生,高中生,"/> <a zh_cn="高中生" zh_tw="高中生" jp="高中生" keyword=",高中女生,高中生,"/>
@@ -268,7 +268,6 @@ zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。
<a zh_cn="插入手指" zh_tw="插入手指" jp="手指插入" keyword=",手指插入,插入手指,"/> <a zh_cn="插入手指" zh_tw="插入手指" jp="手指插入" keyword=",手指插入,插入手指,"/>
<a zh_cn="首次亮相" zh_tw="首次亮相" jp="首次亮相" keyword=",首次亮相,"/> <a zh_cn="首次亮相" zh_tw="首次亮相" jp="首次亮相" keyword=",首次亮相,"/>
<a zh_cn="叔母" zh_tw="叔母" jp="叔母さん" keyword=",叔母,叔母さん,"/> <a zh_cn="叔母" zh_tw="叔母" jp="叔母さん" keyword=",叔母,叔母さん,"/>
<a zh_cn="数位马赛克" zh_tw="數位馬賽克" jp="數位馬賽克" keyword=",數位馬賽克,数位马赛克,"/>
<a zh_cn="双性人" zh_tw="雙性人" jp="雙性人" keyword=",雙性人,双性人,"/> <a zh_cn="双性人" zh_tw="雙性人" jp="雙性人" keyword=",雙性人,双性人,"/>
<a zh_cn="韵律服" zh_tw="韻律服" jp="レオタード" keyword=",韵律服,韻律服,レオタード,"/> <a zh_cn="韵律服" zh_tw="韻律服" jp="レオタード" keyword=",韵律服,韻律服,レオタード,"/>
<a zh_cn="水手服" zh_tw="水手服" jp="セーラー服" keyword=",水手服,セーラー服,"/> <a zh_cn="水手服" zh_tw="水手服" jp="セーラー服" keyword=",水手服,セーラー服,"/>
@@ -290,7 +289,7 @@ zh_cn/zh_tw/jp指对应语言输出的词按设置的对应语言输出。
<a zh_cn="玩物" zh_tw="玩物" jp="玩具" keyword=",玩具,玩物,"/> <a zh_cn="玩物" zh_tw="玩物" jp="玩具" keyword=",玩具,玩物,"/>
<a zh_cn="适合手机垂直播放" zh_tw="適合手機垂直播放" jp="為智能手機推薦垂直視頻" keyword=",スマホ専用縦動画,為智能手機推薦垂直視頻,适合手机垂直播放,適合手機垂直播放,"/> <a zh_cn="适合手机垂直播放" zh_tw="適合手機垂直播放" jp="為智能手機推薦垂直視頻" keyword=",スマホ専用縦動画,為智能手機推薦垂直視頻,适合手机垂直播放,適合手機垂直播放,"/>
<a zh_cn="猥亵穿着" zh_tw="猥褻穿着" jp="猥褻穿著" keyword=",猥褻穿著,猥亵穿着,猥褻穿着,"/> <a zh_cn="猥亵穿着" zh_tw="猥褻穿着" jp="猥褻穿著" keyword=",猥褻穿著,猥亵穿着,猥褻穿着,"/>
<a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",無碼流出,无码流出,"/> <a zh_cn="无码流出" zh_tw="無碼流出" jp="无码流出" keyword=",流出,無碼流出,无码流出,"/>
<a zh_cn="无码破解" zh_tw="無碼破解" jp="無碼破解" keyword=",無碼破解,无码破解,"/> <a zh_cn="无码破解" zh_tw="無碼破解" jp="無碼破解" keyword=",無碼破解,无码破解,"/>
<a zh_cn="无毛" zh_tw="無毛" jp="無毛" keyword=",無毛,无毛,剃毛,白虎,パイパン,"/> <a zh_cn="无毛" zh_tw="無毛" jp="無毛" keyword=",無毛,无毛,剃毛,白虎,パイパン,"/>
<a zh_cn="剧情" zh_tw="劇情" jp="戲劇" keyword=",戲劇,戏剧,剧情,劇情,戲劇x,戏剧、连续剧,戲劇、連續劇,ドラマ,"/> <a zh_cn="剧情" zh_tw="劇情" jp="戲劇" keyword=",戲劇,戏剧,剧情,劇情,戲劇x,戏剧、连续剧,戲劇、連續劇,ドラマ,"/>

View File

@@ -15,6 +15,8 @@
[English](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/README_EN.md) [English](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/README_EN.md)
## 广告位招租 yoshiko2.dev@gmail.com
# 文档 # 文档
* [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki) * [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
* [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc) * [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc)

69
core.py
View File

@@ -84,14 +84,15 @@ def small_cover_check(path, filename, cover_small, movie_path, json_headers=None
def create_folder(json_data): # 创建文件夹 def create_folder(json_data): # 创建文件夹
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data) title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
json_data)
conf = config.getInstance() conf = config.getInstance()
success_folder = conf.success_folder() success_folder = conf.success_folder()
actor = json_data.get('actor') actor = json_data.get('actor')
location_rule = eval(conf.location_rule(), json_data) location_rule = eval(conf.location_rule(), json_data)
if 'actor' in conf.location_rule() and len(actor) > 100: if 'actor' in conf.location_rule() and len(actor) > 100:
print(conf.location_rule()) print(conf.location_rule())
location_rule = eval(conf.location_rule().replace("actor","'多人作品'"), json_data) location_rule = eval(conf.location_rule().replace("actor", "'多人作品'"), json_data)
maxlen = conf.max_title_len() maxlen = conf.max_title_len()
if 'title' in conf.location_rule() and len(title) > maxlen: if 'title' in conf.location_rule() and len(title) > maxlen:
shorttitle = title[0:maxlen] shorttitle = title[0:maxlen]
@@ -129,7 +130,7 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
except: except:
print(f"[-]Fatal error! Can not make folder '{path}'") print(f"[-]Fatal error! Can not make folder '{path}'")
os._exit(0) os._exit(0)
r = get_html(url=url,return_type='content',json_headers=json_headers) r = get_html(url=url, return_type='content', json_headers=json_headers)
if r == '': if r == '':
print('[-]Movie Download Data not found!') print('[-]Movie Download Data not found!')
return return
@@ -144,20 +145,22 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None
# moveFailedFolder(filepath) # moveFailedFolder(filepath)
# return # return
except Exception as e: except Exception as e:
print('[-]Image Download :Error',e) print('[-]Image Download :Error', e)
print('[-]Connect Failed! Please check your Proxy or Network!') print('[-]Connect Failed! Please check your Proxy or Network!')
moveFailedFolder(filepath) moveFailedFolder(filepath)
return return
def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath): def trailer_download(trailer, leak_word, c_word, hack_word, number, path, filepath):
if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) == 'failed': if download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
filepath) == 'failed':
return return
configProxy = config.getInstance().proxy() configProxy = config.getInstance().proxy()
for i in range(configProxy.retry): for i in range(configProxy.retry):
if file_not_exist_or_empty(path+'/' + number + leak_word + c_word + hack_word + '-trailer.mp4'): if file_not_exist_or_empty(path + '/' + number + leak_word + c_word + hack_word + '-trailer.mp4'):
print('[!]Video Download Failed! Trying again. [{}/3]', i + 1) print('[!]Video Download Failed! Trying again. [{}/3]', i + 1)
download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path, filepath) download_file_with_filename(trailer, number + leak_word + c_word + hack_word + '-trailer.mp4', path,
filepath)
continue continue
else: else:
break break
@@ -197,7 +200,8 @@ def actor_photo_download(actors, save_dir, number):
failed += 1 failed += 1
print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!") print(f"[-]Actor photo '{dn_list[i][0]}' to '{dn_list[i][1]}' download failed!")
if failed: # 非致命错误电影不移入失败文件夹将来可以用模式3补齐 if failed: # 非致命错误电影不移入失败文件夹将来可以用模式3补齐
print(f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.") print(
f"[-]Failed downloaded {failed}/{len(result)} actor photo for [{number}] to '{actors_dir}', you may retry run mode 3 later.")
else: else:
print(f"[+]Successfully downloaded {len(result)} actor photo.") print(f"[+]Successfully downloaded {len(result)} actor photo.")
@@ -239,7 +243,7 @@ def extrafanart_download_one_by_one(data, path, filepath, json_data=None):
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s') print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None): def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
tm_start = time.perf_counter() tm_start = time.perf_counter()
conf = config.getInstance() conf = config.getInstance()
extrafanart_dir = Path(save_dir) / conf.get_extrafanart() extrafanart_dir = Path(save_dir) / conf.get_extrafanart()
@@ -262,7 +266,8 @@ def extrafanart_download_threadpool(url_list, save_dir, number,json_data=None):
failed += 1 failed += 1
print(f'[-]Extrafanart {i} for [{number}] download failed!') print(f'[-]Extrafanart {i} for [{number}] download failed!')
if failed: # 非致命错误电影不移入失败文件夹将来可以用模式3补齐 if failed: # 非致命错误电影不移入失败文件夹将来可以用模式3补齐
print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.") print(
f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
else: else:
print(f"[+]Successfully downloaded {len(result)} extrafanarts.") print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
if conf.debug(): if conf.debug():
@@ -307,12 +312,14 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
shutil.copyfile(full_filepath, os.path.join(path, thumb_path)) shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu, uncensored, hack_word,_4k,fanart_path,poster_path,thumb_path): def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data) uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
json_data)
if config.getInstance().main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持 if config.getInstance().main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持
nfo_path = str(Path(filepath).with_suffix('.nfo')) nfo_path = str(Path(filepath).with_suffix('.nfo'))
else: else:
nfo_path = os.path.join(path,f"{number}{part}{leak_word}{c_word}{hack_word}.nfo") nfo_path = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}.nfo")
try: try:
if not os.path.exists(path): if not os.path.exists(path):
try: try:
@@ -364,6 +371,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
pass pass
print(" <maker>" + studio + "</maker>", file=code) print(" <maker>" + studio + "</maker>", file=code)
print(" <label>" + label + "</label>", file=code) print(" <label>" + label + "</label>", file=code)
skip_tags = config.getInstance().donot_save_tags() skip_tags = config.getInstance().donot_save_tags()
if not skip_tags: if not skip_tags:
if cn_sub == '1': if cn_sub == '1':
@@ -385,7 +393,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
if cn_sub == '1': if cn_sub == '1':
print(" <genre>中文字幕</genre>", file=code) print(" <genre>中文字幕</genre>", file=code)
if liuchu == '流出': if liuchu == '流出':
print(" <genre>流出</genre>", file=code) print(" <genre>无码流出</genre>", file=code)
if uncensored == 1: if uncensored == 1:
print(" <genre>无码</genre>", file=code) print(" <genre>无码</genre>", file=code)
if hack_word != '': if hack_word != '':
@@ -471,7 +479,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack) -> None:
if cn_sub: if cn_sub:
mark_type += ',字幕' mark_type += ',字幕'
if leak: if leak:
mark_type += ',流出' mark_type += ',无码流出'
if uncensored: if uncensored:
mark_type += ',无码' mark_type += ',无码'
if hack: if hack:
@@ -541,6 +549,8 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
] ]
img_pic.paste(img_subt, (pos[count]['x'], pos[count]['y']), mask=a) img_pic.paste(img_subt, (pos[count]['x'], pos[count]['y']), mask=a)
img_pic.save(pic_path, quality=95) img_pic.save(pic_path, quality=95)
# ========================结束================================= # ========================结束=================================
@@ -602,7 +612,8 @@ def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_
return return
def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置 def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word,
hack_word): # 文件路径,番号,后缀,要移动至的位置
if multi_part == 1: if multi_part == 1:
number += part # 这时number会被附加上CD1后缀 number += part # 这时number会被附加上CD1后缀
filepath_obj = pathlib.Path(filepath) filepath_obj = pathlib.Path(filepath)
@@ -727,7 +738,7 @@ def core_main_no_net_op(movie_path, number):
c_word = '-C' # 中文字幕影片后缀 c_word = '-C' # 中文字幕影片后缀
uncensored = 1 if is_uncensored(number) else 0 uncensored = 1 if is_uncensored(number) else 0
if '流出' in movie_path or 'uncensored' in movie_path.lower(): if '流出' in movie_path or 'uncensored' in movie_path.lower():
leak_word = '-流出' # 流出影片后缀 leak_word = '-无码流出' # 无码流出影片后缀
leak = 1 leak = 1
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path: if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
@@ -814,7 +825,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
if '流出' in movie_path or 'uncensored' in movie_path.lower(): if '流出' in movie_path or 'uncensored' in movie_path.lower():
liuchu = '流出' liuchu = '流出'
leak = 1 leak = 1
leak_word = '-流出' # 流出影片后缀 leak_word = '-无码流出' # 流出影片后缀
else: else:
leak = 0 leak = 0
@@ -823,9 +834,10 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
hack_word = "-hack" hack_word = "-hack"
# 判断是否4k # 判断是否4k
if '4K' in tag: tag.remove('4K') # 从tag中移除'4K' if '4K' in tag:
tag.remove('4K') # 从tag中移除'4K'
props = get_video_properties(movie_path) # 判断是否为4K视频 props = get_video_properties(movie_path) # 判断是否为4K视频
if props['width'] >=4096 or props['height'] >= 2160: if props['width'] >= 4096 or props['height'] >= 2160:
_4k = '4k' _4k = '4k'
_4k_world = '-4k' _4k_world = '-4k'
@@ -834,8 +846,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
debug_print(json_data) debug_print(json_data)
# 创建文件夹 # 创建文件夹
#path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data) # path = create_folder(rootpath + '/' + conf.success_folder(), json_data.get('location_rule'), json_data)
cover = json_data.get('cover') cover = json_data.get('cover')
ext = image_ext(cover) ext = image_ext(cover)
@@ -889,7 +900,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 添加水印 # 添加水印
if conf.is_watermark(): if conf.is_watermark():
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
# 兼容Jellyfin封面图文件名规则 # 兼容Jellyfin封面图文件名规则
if multi_part and conf.jellyfin_multi_part_fanart(): if multi_part and conf.jellyfin_multi_part_fanart():
@@ -899,8 +910,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志 # 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
,_4k,fanart_path,poster_path,thumb_path) json_data.get('actor_list'), liuchu, uncensored, hack_word
, _4k, fanart_path, poster_path, thumb_path)
elif conf.main_mode() == 2: elif conf.main_mode() == 2:
# 创建文件夹 # 创建文件夹
@@ -908,7 +920,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 移动文件 # 移动文件
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
if conf.is_watermark(): if conf.is_watermark():
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
elif conf.main_mode() == 3: elif conf.main_mode() == 3:
path = str(Path(movie_path).parent) path = str(Path(movie_path).parent)
@@ -952,7 +964,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 添加水印 # 添加水印
if conf.is_watermark(): if conf.is_watermark():
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack)
# 兼容Jellyfin封面图文件名规则 # 兼容Jellyfin封面图文件名规则
if multi_part and conf.jellyfin_multi_part_fanart(): if multi_part and conf.jellyfin_multi_part_fanart():
@@ -960,4 +972,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志 # 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path) tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
thumb_path)

View File

@@ -9,3 +9,4 @@ certifi
MechanicalSoup MechanicalSoup
opencc-python-reimplemented opencc-python-reimplemented
face_recognition face_recognition
get-video-properties

View File

@@ -11,7 +11,6 @@ from .gcolle import Gcolle
from .getchu import Getchu from .getchu import Getchu
from .jav321 import Jav321 from .jav321 import Jav321
from .javdb import Javdb from .javdb import Javdb
from .mv91 import Mv91
from .fc2 import Fc2 from .fc2 import Fc2
from .madou import Madou from .madou import Madou
from .mgstage import Mgstage from .mgstage import Mgstage
@@ -19,6 +18,7 @@ from .javbus import Javbus
from .xcity import Xcity from .xcity import Xcity
from .avsox import Avsox from .avsox import Avsox
from .javlibrary import Javlibrary from .javlibrary import Javlibrary
from .javday import Javday
from .tmdb import Tmdb from .tmdb import Tmdb
from .imdb import Imdb from .imdb import Imdb
@@ -50,8 +50,8 @@ class Scraping:
""" """
""" """
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91', 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle' 'getchu', 'gcolle','javday'
] ]
adult_func_mapping = { adult_func_mapping = {
'avsox': Avsox().scrape, 'avsox': Avsox().scrape,
@@ -65,11 +65,11 @@ class Scraping:
'fanza': Fanza().scrape, 'fanza': Fanza().scrape,
'airav': Airav().scrape, 'airav': Airav().scrape,
'carib': Carib().scrape, 'carib': Carib().scrape,
'mv91': Mv91().scrape,
'gcolle': Gcolle().scrape, 'gcolle': Gcolle().scrape,
'javdb': Javdb().scrape, 'javdb': Javdb().scrape,
'getchu': Getchu().scrape, 'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape, 'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape
} }
general_full_sources = ['tmdb', 'imdb'] general_full_sources = ['tmdb', 'imdb']

43
scrapinglib/javday.py Normal file
View File

@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from urllib.parse import urlparse, unquote
from .parser import Parser
class Javday(Parser):
source = 'javday'
expr_url = '/html/head/meta[@property="og:url"]/@content'
expr_cover = '/html/head/meta[@property="og:image"]/@content'
expr_tags = '/html/head/meta[@name="keywords"]/@content'
expr_title = "/html/head/title/text()"
expr_actor = "//span[@class='vod_actor']/a/text()"
expr_studio = '//span[@class="producer"]/a/text()'
expr_number = '//span[@class="jpnum"]/text()'
def extraInit(self):
self.imagecut = 4
self.uncensored = True
def search(self, number):
self.number = number.strip().upper()
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
self.detailurl = "https://javday.tv/videos/" + self.number.replace("-","") + "/"
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
self.detailurl = self.getTreeElement(htmltree, self.expr_url)
result = self.dictformat(htmltree)
return result
def getTitle(self, htmltree):
title = super().getTitle(htmltree)
# 删除番号和网站名
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
return result

View File

@@ -1,94 +0,0 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from .parser import Parser
class Mv91(Parser):
source = 'mv91'
expr_number = '//div[@class="player-title"]/text()'
expr_title = '//div[@class="player-title"]/text()'
expr_release = '//p[@class="date"]/text()'
expr_outline = '//div[@class="play-text"]/text()'
expr_tags = '//div[@class="player-tag"]/text()'
expr_actor = '//p[@class="player-name"]/text()'
def extraInit(self):
self.imagecut = 0
self.uncensored = True
def getHtmlTree(self, url, type=None):
self.htmlcode = self.getHtml(url, type)
if self.htmlcode == 404:
return 404
ret = etree.fromstring(self.htmlcode, etree.HTMLParser())
return ret
def queryNumberUrl(self, number):
keyword = number.replace('91CM-','').replace('91MS-','')
search_html = self.getHtml('https://www.91mv.org/index/search?keywords=' + keyword)
html = etree.fromstring(search_html, etree.HTMLParser())
endurl = html.xpath('//a[@class="video-list"]/@href')[0]
return 'https://www.91mv.org' + endurl
def getNum(self, htmltree):
try:
num = super().getNum(htmltree)
finds = re.findall('(.*)(91.*-\d*)',num)
if finds:
result = str(finds[0][1])
else:
result = ' '.join(num.replace('/',' ').split())
result = result.split()[1]
if self.number.upper() != result.upper():
raise Exception(f'[!] {self.number}: find {result} in mv91, not match')
return result.strip()
except:
return ''
def getTitle(self, htmltree):
try:
title = super().getTitle(htmltree)
finds = re.findall('(.*)(91.*-\d*)',title)
if finds:
result = str(finds[0][0])
else:
result = ' '.join(title.replace('/',' ').split())
result = result.split()[0]
return result.replace('「预告」','').strip('/ ')
except:
return ''
def getStudio(self, htmltree):
return '91制片厂'
def getActors(self, htmltree):
b=[]
for player in self.getTreeAll(htmltree, self.expr_actor):
player = player.replace('主演:','')
if '/' in player:
player = player.split('/')[0]
player = re.sub(r'[0-9]+', '', player)
b.append(player)
return b
def getRelease(self, htmltree):
try:
result = super().getRelease(htmltree)
date = result.replace('日期:','')
if isinstance(date, str) and len(date):
return date
except:
pass
return ''
def getCover(self, htmltree):
try:
url = str(re.findall('var pic_url = "(.*?)"', self.htmlcode)[0])
return url.strip()
except:
return ''