Merge pull request #1008 from yoshiko2/master

1
This commit is contained in:
Yoshiko2
2023-05-05 01:19:39 +08:00
committed by GitHub
25 changed files with 463 additions and 208 deletions

View File

@@ -19,6 +19,16 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Install UPX
uses: crazy-max/ghaction-upx@v2
if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
with:
install-only: true
- name: UPX version
if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
run: upx --version
- name: Setup Python 3.10
uses: actions/setup-python@v2
with:
@@ -28,6 +38,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install face_recognition --no-deps
pip install pyinstaller
- name: Test number_perser.get_number
@@ -39,11 +50,11 @@ jobs:
run: |
pyinstaller \
--onefile Movie_Data_Capture.py \
--python-option u \
--hidden-import "ImageProcessing.cnn" \
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
--collect-submodules "scrapinglib" \
--collect-submodules "ImageProcessing" \
--collect-data "face_recognition_models" \
--collect-data "cloudscraper" \
--collect-data "opencc" \
--add-data "Img:Img" \
--add-data "config.ini:." \
@@ -52,11 +63,11 @@ jobs:
run: |
pyinstaller `
--onefile Movie_Data_Capture.py `
--python-option u `
--hidden-import "ImageProcessing.cnn" `
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
--collect-submodules "scrapinglib" `
--collect-submodules "ImageProcessing" `
--collect-data "face_recognition_models" `
--collect-data "cloudscraper" `
--collect-data "opencc" `
--add-data "Img;Img" `
--add-data "config.ini;." `
@@ -77,5 +88,5 @@ jobs:
- name: Upload build artifact
uses: actions/upload-artifact@v1
with:
name: Movie_Data_Capture-CLI-${{ env.VERSION }}-${{ runner.os }}-amd64
name: MDC-${{ env.VERSION }}-${{ runner.os }}-amd64
path: dist

View File

@@ -301,7 +301,7 @@ def get_html_by_scraper(url: str = None, cookies: dict = None, ua: str = None, r
def translate(
src: str,
target_language: str = "zh_cn",
target_language: str = config.getInstance().get_target_language(),
engine: str = "google-free",
app_id: str = "",
key: str = "",
@@ -342,7 +342,15 @@ def translate(
result = post_html(url=url, query=body, headers=headers)
translate_list = [i["text"] for i in result.json()[0]["translations"]]
trans_result = trans_result.join(translate_list)
elif engine == "deeplx":
url = config.getInstance().get_translate_service_site()
res = requests.post(f"{url}/translate", json={
'text': src,
'source_lang': 'auto',
'target_lang': target_language,
})
if res.text.strip():
trans_result = res.json().get('data')
else:
raise ValueError("Non-existent translation engine")

View File

@@ -60,9 +60,9 @@ def face_crop_height(filename, width, height):
return (0, 0, width, cropHeight)
def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
def cutImage(imagecut, path, thumb_path, poster_path, skip_facerec=False):
conf = config.getInstance()
fullpath_fanart = os.path.join(path, fanart_path)
fullpath_fanart = os.path.join(path, thumb_path)
fullpath_poster = os.path.join(path, poster_path)
aspect_ratio = conf.face_aspect_ratio()
if conf.face_aways_imagecut():

View File

@@ -104,9 +104,9 @@ is performed. It may help you correct wrong numbers before real job.""")
set_str_or_none("common:source_folder", args.path)
set_bool_or_none("common:auto_exit", args.auto_exit)
set_natural_number_or_none("common:nfo_skip_days", args.days)
set_natural_number_or_none("common:stop_counter", args.cnt)
set_natural_number_or_none("advenced_sleep:stop_counter", args.cnt)
set_bool_or_none("common:ignore_failed_list", args.ignore_failed_list)
set_str_or_none("common:rerun_delay", args.delaytm)
set_str_or_none("advenced_sleep:rerun_delay", args.delaytm)
set_str_or_none("priority:website", args.site)
if isinstance(args.dnimg, bool) and args.dnimg:
conf.set_override("common:download_only_missing_images=0")
@@ -119,7 +119,7 @@ is performed. It may help you correct wrong numbers before real job.""")
if conf.main_mode() == 3:
no_net_op = args.no_network_operation
if no_net_op:
conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1")
conf.set_override("advenced_sleep:stop_counter=0;advenced_sleep:rerun_delay=0s;face:aways_imagecut=1")
return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op, args.specified_source, args.specified_url
@@ -681,7 +681,7 @@ def period(delta, pattern):
if __name__ == '__main__':
version = '6.4.1'
version = '6.6.2'
urllib3.disable_warnings() # Ignore http proxy warning
app_start = time.time()

View File

@@ -24,8 +24,12 @@
# 申明
当你查阅、下载了本项目源代码或二进制程序,即代表你接受了以下条款
* 本项目和项目成果仅供技术学术交流和Python3性能测试使用
* 用户必须确保获取影片的途径在用户当地是合法的
* 运行时和运行后所获取的元数据和封面图片等数据的版权,归版权持有人持有
* 本项目贡献者编写该项目旨在学习Python3 ,提高编程水平
* 本项目不提供任何影片下载的线索
* 请勿提供运行时和运行后获取的数据提供给可能有非法目的的第三方,例如用于非法交易、侵犯未成年人的权利等
* 用户仅能在自己的私人计算机或者测试环境中使用该工具,禁止将获取到的数据用于商业目的或其他目的,如销售、传播等
* 用户在使用本项目和项目成果前,请用户了解并遵守当地法律法规,如果本项目及项目成果使用过程中存在违反当地法律法规的行为,请勿使用该项目及项目成果
* 法律后果及使用后果由使用者承担
* [GPL LICENSE](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/LICENSE)
@@ -36,3 +40,6 @@
# 贡献者
[![](https://opencollective.com/movie_data_capture/contributors.svg?width=890)](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
# Star History
[![Star History Chart](https://api.star-history.com/svg?repos=yoshiko2/Movie_Data_Capture&type=Date)](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)

View File

@@ -32,3 +32,7 @@
# 贡献者
[![](https://opencollective.com/movie_data_capture/contributors.svg?width=890)](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
# Star History
[![Star History Chart](https://api.star-history.com/svg?repos=yoshiko2/Movie_Data_Capture&type=Date)](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)

View File

@@ -21,11 +21,14 @@ nfo_skip_days = 30
ignore_failed_list = 0
download_only_missing_images = 1
mapping_table_validity = 7
; jellyfin中tags和genres重复因此可以只保存genres到nfo中
donot_save_tags = 0
; 一些jellyfin中特有的设置 (0:不开启, 1开启) 比如
; 在jellyfin中tags和genres重复因此可以只需保存genres到nfo中
; jellyfin中只需要保存thumb不需要保存fanart
jellyfin = 0
; 开启后tag和genere只显示演员
actor_only_tag = 0
sleep = 3
anonymous_fill = 1
[advenced_sleep]
; 处理完多少个视频文件后停止0为处理所有视频文件
@@ -48,13 +51,14 @@ cacert_file =
location_rule = actor+'/'+number
naming_rule = number+'-'+title
max_title_len = 50
image_naming_with_number = 1
; 刮削后图片是否命名为番号
image_naming_with_number = 0
[update]
update_check = 1
[priority]
website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,mv91,getchu,javdb,gcolle
website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu
[escape]
literals = \()/
@@ -66,13 +70,15 @@ switch = 0
; 机器翻译
[translate]
switch = 0
;可选项 google-free,azure
;可选项 google-free,azure,deeplx
engine = google-free
; azure翻译密钥
target_language = zh_cn
; azure翻译密钥key
key =
; 翻译延迟
delay = 1
values = title,outline
; google翻译服务站点或deeplx访问链接
service_site = translate.google.cn
; 预告片

View File

@@ -169,13 +169,13 @@ class Config:
self._exit("common:main_mode")
def source_folder(self) -> str:
return self.conf.get("common", "source_folder")
return self.conf.get("common", "source_folder").replace("\\\\", "/").replace("\\", "/")
def failed_folder(self) -> str:
return self.conf.get("common", "failed_output_folder")
return self.conf.get("common", "failed_output_folder").replace("\\\\", "/").replace("\\", "/")
def success_folder(self) -> str:
return self.conf.get("common", "success_output_folder")
return self.conf.get("common", "success_output_folder").replace("\\\\", "/").replace("\\", "/")
def actor_gender(self) -> str:
return self.conf.get("common", "actor_gender")
@@ -213,8 +213,8 @@ class Config:
def mapping_table_validity(self) -> int:
return self.conf.getint("common", "mapping_table_validity")
def donot_save_tags(self) -> int:
return self.conf.getint("common", "donot_save_tags")
def jellyfin(self) -> int:
return self.conf.getint("common", "jellyfin")
def actor_only_tag(self) -> bool:
return self.conf.getboolean("common", "actor_only_tag")
@@ -222,6 +222,9 @@ class Config:
def sleep(self) -> int:
return self.conf.getint("common", "sleep")
def anonymous_fill(self) -> bool:
return self.conf.getint("common", "anonymous_fill")
def stop_counter(self) -> int:
return self.conf.getint("advenced_sleep", "stop_counter", fallback=0)
@@ -279,6 +282,9 @@ class Config:
def get_translate_engine(self) -> str:
return self.conf.get("translate", "engine")
def get_target_language(self) -> str:
return self.conf.get("translate", "target_language")
# def get_translate_appId(self) ->str:
# return self.conf.get("translate","appid")
@@ -439,16 +445,19 @@ class Config:
# actor_gender value: female or male or both or all(含人妖)
conf.set(sec1, "actor_gender", "female")
conf.set(sec1, "del_empty_folder", "1")
conf.set(sec1, "nfo_skip_days", 30)
conf.set(sec1, "ignore_failed_list", 0)
conf.set(sec1, "download_only_missing_images", 1)
conf.set(sec1, "mapping_table_validity", 7)
conf.set(sec1, "donot_save_tags", 0)
conf.set(sec1, "nfo_skip_days", "30")
conf.set(sec1, "ignore_failed_list", "0")
conf.set(sec1, "download_only_missing_images", "1")
conf.set(sec1, "mapping_table_validity", "7")
conf.set(sec1, "jellyfin", "0")
conf.set(sec1, "actor_only_tag", "0")
conf.set(sec1, "sleep", "3")
conf.set(sec1, "anonymous_fill", "0")
sec2 = "advenced_sleep"
conf.add_section(sec2)
conf.set(sec2, "stop_counter", 0)
conf.set(sec2, "rerun_delay", 0)
conf.set(sec2, "stop_counter", "0")
conf.set(sec2, "rerun_delay", "0")
sec3 = "proxy"
conf.add_section(sec3)
@@ -463,6 +472,7 @@ class Config:
conf.set(sec4, "location_rule", "actor + '/' + number")
conf.set(sec4, "naming_rule", "number + '-' + title")
conf.set(sec4, "max_title_len", "50")
conf.set(sec4, "image_naming_with_number", "0")
sec5 = "update"
conf.add_section(sec5)
@@ -485,6 +495,7 @@ class Config:
conf.add_section(sec9)
conf.set(sec9, "switch", "0")
conf.set(sec9, "engine", "google-free")
conf.set(sec9, "target_language", "zh_cn")
# conf.set(sec8, "appid", "")
conf.set(sec9, "key", "")
conf.set(sec9, "delay", "1")
@@ -508,28 +519,28 @@ class Config:
sec13 = "watermark"
conf.add_section(sec13)
conf.set(sec13, "switch", 1)
conf.set(sec13, "water", 2)
conf.set(sec13, "switch", "1")
conf.set(sec13, "water", "2")
sec14 = "extrafanart"
conf.add_section(sec14)
conf.set(sec14, "switch", 1)
conf.set(sec14, "switch", "1")
conf.set(sec14, "extrafanart_folder", "extrafanart")
conf.set(sec14, "parallel_download", 1)
conf.set(sec14, "parallel_download", "1")
sec15 = "storyline"
conf.add_section(sec15)
conf.set(sec15, "switch", 1)
conf.set(sec15, "switch", "1")
conf.set(sec15, "site", "1:avno1,4:airavwiki")
conf.set(sec15, "censored_site", "2:airav,5:xcity,6:amazon")
conf.set(sec15, "uncensored_site", "3:58avgo")
conf.set(sec15, "show_result", 0)
conf.set(sec15, "run_mode", 1)
conf.set(sec15, "cc_convert", 1)
conf.set(sec15, "show_result", "0")
conf.set(sec15, "run_mode", "1")
conf.set(sec15, "cc_convert", "1")
sec16 = "cc_convert"
conf.add_section(sec16)
conf.set(sec16, "mode", 1)
conf.set(sec16, "mode", "1")
conf.set(sec16, "vars", "actor,director,label,outline,series,studio,tag,title")
sec17 = "javdb"

144
core.py
View File

@@ -272,22 +272,25 @@ def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
def image_ext(url):
try:
return os.path.splitext(url)[-1]
ext = os.path.splitext(url)[-1]
if ext in {'.jpg','.jpge','.bmp','.png','.gif'}:
return ext
return ".jpg"
except:
return ".jpg"
# 封面是否下载成功否则移动到failed
def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=None):
full_filepath = os.path.join(path, fanart_path)
full_filepath = os.path.join(path, thumb_path)
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
return
if json_headers != None:
if download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers']) == 'failed':
if download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers']) == 'failed':
moveFailedFolder(filepath)
return
else:
if download_file_with_filename(cover, fanart_path, path, filepath) == 'failed':
if download_file_with_filename(cover, thumb_path, path, filepath) == 'failed':
moveFailedFolder(filepath)
return
@@ -296,20 +299,21 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
if file_not_exist_or_empty(full_filepath):
print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
if json_headers != None:
download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers'])
download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers'])
else:
download_file_with_filename(cover, fanart_path, path, filepath)
download_file_with_filename(cover, thumb_path, path, filepath)
continue
else:
break
if file_not_exist_or_empty(full_filepath):
return
print('[+]Image Downloaded!', Path(full_filepath).name)
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
if not config.getInstance().jellyfin():
shutil.copyfile(full_filepath, os.path.join(path, fanart_path))
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
uncensored, hack, hack_word, _4k, fanart_path, poster_path, thumb_path):
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
json_data)
if config.getInstance().main_mode() == 3: # 模式3下由于视频文件不做任何改变.nfo文件必须和视频文件名称除后缀外完全一致KODI等软件方可支持
@@ -332,13 +336,23 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
pass
# KODI内查看影片信息时找不到number配置naming_rule=number+'#'+title虽可解决
# 但使得标题太长放入时常为空的outline内会更适合软件给outline留出的显示版面也较大
if not outline:
pass
elif json_data['source'] == 'pissplay':
outline = f"{outline}"
else:
outline = f"{number}#{outline}"
with open(nfo_path, "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
print("<movie>", file=code)
if not config.getInstance().jellyfin():
print(" <title><![CDATA[" + naming_rule + "]]></title>", file=code)
print(" <originaltitle><![CDATA[" + naming_rule + "]]></originaltitle>", file=code)
print(" <originaltitle><![CDATA[" + json_data['original_naming_rule'] + "]]></originaltitle>", file=code)
print(" <sorttitle><![CDATA[" + naming_rule + "]]></sorttitle>", file=code)
else:
print(" <title>" + naming_rule + "</title>", file=code)
print(" <originaltitle>" + json_data['original_naming_rule'] + "</originaltitle>", file=code)
print(" <sorttitle>" + naming_rule + "</sorttitle>", file=code)
print(" <customrating>JP-18+</customrating>", file=code)
print(" <mpaa>JP-18+</mpaa>", file=code)
try:
@@ -347,12 +361,17 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <set></set>", file=code)
print(" <studio>" + studio + "</studio>", file=code)
print(" <year>" + year + "</year>", file=code)
if not config.getInstance().jellyfin():
print(" <outline><![CDATA[" + outline + "]]></outline>", file=code)
print(" <plot><![CDATA[" + outline + "]]></plot>", file=code)
else:
print(" <outline>" + outline + "</outline>", file=code)
print(" <plot>" + outline + "</plot>", file=code)
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
print(" <director>" + director + "</director>", file=code)
print(" <poster>" + poster_path + "</poster>", file=code)
print(" <thumb>" + thumb_path + "</thumb>", file=code)
if not config.getInstance().jellyfin(): # jellyfin 不需要保存fanart
print(" <fanart>" + fanart_path + "</fanart>", file=code)
try:
for key in actor_list:
@@ -368,8 +387,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
print(" <maker>" + studio + "</maker>", file=code)
print(" <label>" + label + "</label>", file=code)
skip_tags = config.getInstance().donot_save_tags()
if not skip_tags:
jellyfin = config.getInstance().jellyfin()
if not jellyfin:
if config.getInstance().actor_only_tag():
for key in actor_list:
try:
@@ -377,27 +396,27 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
except:
pass
else:
if cn_sub == '1':
if cn_sub:
print(" <tag>中文字幕</tag>", file=code)
if liuchu == '流出':
if liuchu:
print(" <tag>流出</tag>", file=code)
if uncensored == 1:
if uncensored:
print(" <tag>无码</tag>", file=code)
if hack_word != '':
if hack:
print(" <tag>破解</tag>", file=code)
if _4k == '1':
if _4k:
print(" <tag>4k</tag>", file=code)
for i in tag:
print(" <tag>" + i + "</tag>", file=code)
if cn_sub == '1':
if cn_sub:
print(" <genre>中文字幕</genre>", file=code)
if liuchu == '流出':
if liuchu:
print(" <genre>无码流出</genre>", file=code)
if uncensored == 1:
if uncensored:
print(" <genre>无码</genre>", file=code)
if hack_word != '':
if hack:
print(" <genre>破解</genre>", file=code)
if _4k == '1':
if _4k:
print(" <genre>4k</genre>", file=code)
try:
for i in tag:
@@ -470,6 +489,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack, _4k) -> No
:cn_sub: 中文字幕 可选值1,"1" 或其他值
:uncensored 无码 可选值1,"1" 或其他值
:hack 破解 可选值1,"1" 或其他值
:_4k Bool
"""
mark_type = ''
if cn_sub:
@@ -495,17 +515,17 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack, _4k):
# 获取自定义位置取余配合pos达到顺时针添加的效果
# 左上 0, 右上 1, 右下 2 左下 3
count = config.getInstance().watermark_type()
if cn_sub == 1 or cn_sub == '1':
if cn_sub:
add_to_pic(pic_path, img_pic, size, count, 1) # 添加
count = (count + 1) % 4
if leak == 1 or leak == '1':
if leak:
add_to_pic(pic_path, img_pic, size, count, 2)
count = (count + 1) % 4
if uncensored == 1 or uncensored == '1':
if uncensored:
add_to_pic(pic_path, img_pic, size, count, 3)
if hack == 1 or hack == '1':
if hack:
add_to_pic(pic_path, img_pic, size, count, 4)
if _4k == 1 or _4k == '1':
if _4k:
add_to_pic(pic_path, img_pic, size, count, 5)
img_pic.close()
@@ -613,6 +633,8 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
create_softlink = False
if link_mode not in (1, 2):
shutil.move(filepath, targetpath)
print("[!]Move => ", path)
return
elif link_mode == 2:
try:
os.link(filepath, targetpath, follow_symlinks=False)
@@ -624,16 +646,13 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
os.symlink(filerelpath, targetpath)
except:
os.symlink(str(filepath_obj.resolve()), targetpath)
return
print("[!]Link => ", path)
except FileExistsError as fee:
print(f'[-]FileExistsError: {fee}')
return
except PermissionError:
print('[-]Error! Please run as administrator!')
return
except OSError as oserr:
print(f'[-]OS Error errno {oserr.errno}')
return
def linkImage(path, number, part, leak_word, c_word, hack_word, ext):
@@ -693,12 +712,12 @@ def core_main_no_net_op(movie_path, number):
conf = config.getInstance()
part = ''
leak_word = ''
leak = 0
leak = False
c_word = ''
cn_sub = ''
hack = ''
cn_sub = False
hack = False
hack_word = ''
_4k = ''
_4k = False
imagecut = 1
multi = False
part = ''
@@ -709,30 +728,30 @@ def core_main_no_net_op(movie_path, number):
multi = True
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
re.I) or '中文' in movie_path or '字幕' in movie_path or ".chs" in movie_path or '.cht' in movie_path:
cn_sub = '1'
cn_sub = True
c_word = '-C' # 中文字幕影片后缀
uncensored = 1 if is_uncensored(number) else 0
uncensored = True if is_uncensored(number) else 0
if '流出' in movie_path or 'uncensored' in movie_path.lower():
leak_word = '-无码流出' # 无码流出影片后缀
leak = 1
leak = True
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
hack = 1
hack = True
hack_word = "-hack"
# try:
# props = get_video_properties(movie_path) # 判断是否为4K视频
# if props['width'] >= 4096 or props['height'] >= 2160:
# _4k = '1'
# _4k = True
# except:
# pass
prestr = f"{number}{leak_word}{c_word}{hack_word}"
full_nfo = Path(path) / f"{prestr}{part}.nfo"
if full_nfo.is_file():
if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
uncensored = 1
uncensored = True
try:
nfo_xml = etree.parse(full_nfo)
nfo_fanart_path = nfo_xml.xpath('//fanart/text()')[0]
@@ -791,15 +810,15 @@ def move_subtitles(filepath, path, multi_part, number, part, leak_word, c_word,
def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=None):
conf = config.getInstance()
# =======================================================================初始化所需变量
multi_part = 0
multi_part = False
part = ''
leak_word = ''
c_word = ''
cn_sub = ''
liuchu = ''
hack = ''
cn_sub = False
liuchu = False
hack = False
hack_word = ''
_4k = ''
_4k = False
# 下面被注释的变量不需要
# rootpath = os.getcwd
@@ -822,11 +841,11 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
tag = json_data.get('tag')
# =======================================================================判断-C,-CD后缀
if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
multi_part = 1
multi_part = True
part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
re.I) or '中文' in movie_path or '字幕' in movie_path:
cn_sub = '1'
cn_sub = True
c_word = '-C' # 中文字幕影片后缀
# 判断是否无码
@@ -835,15 +854,18 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
if '流出' in movie_path or 'uncensored' in movie_path.lower():
liuchu = '流出'
leak = 1
leak = True
leak_word = '-无码流出' # 流出影片后缀
else:
leak = 0
leak = False
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
hack = 1
hack = True
hack_word = "-hack"
if '4k'.upper() in str(movie_path).upper() or '4k' in movie_path:
_4k = True
# 判断是否4k
if '4K' in tag:
tag.remove('4K') # 从tag中移除'4K'
@@ -855,7 +877,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# try:
# props = get_video_properties(movie_path) # 判断是否为4K视频
# if props['width'] >= 4096 or props['height'] >= 2160:
# _4k = '1'
# _4k = True
# except:
# pass
@@ -920,7 +942,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
pass
# 裁剪图
cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
cutImage(imagecut, path, thumb_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
# 兼容Jellyfin封面图文件名规则
if multi_part and conf.jellyfin_multi_part_fanart():
@@ -932,7 +954,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# Move subtitles
move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
if move_status:
cn_sub = "1"
cn_sub = True
# 添加水印
if conf.is_watermark():
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored,
@@ -940,7 +962,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
json_data.get('actor_list'), liuchu, uncensored, hack_word
json_data.get('actor_list'), liuchu, uncensored, hack, hack_word
, _4k, fanart_path, poster_path, thumb_path)
elif conf.main_mode() == 2:
@@ -948,13 +970,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
path = create_folder(json_data)
# 移动文件
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
# Move subtitles
move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
if move_status:
cn_sub = "1"
if conf.is_watermark():
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
_4k)
move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
elif conf.main_mode() == 3:
path = str(Path(movie_path).parent)
@@ -998,7 +1016,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 添加水印
if conf.is_watermark():
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
add_mark(os.path.join(path, poster_path), os.path.join(path, fanart_path), cn_sub, leak, uncensored, hack,
_4k)
# 兼容Jellyfin封面图文件名规则
@@ -1007,5 +1025,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
# 最后输出.nfo元数据文件以完成.nfo文件创建作为任务成功标志
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
tag, json_data.get('actor_list'), liuchu, uncensored, hack, hack_word, _4k, fanart_path, poster_path,
thumb_path)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 146 KiB

After

Width:  |  Height:  |  Size: 163 KiB

View File

@@ -7,7 +7,7 @@ import typing
G_spat = re.compile(
"^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
"^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|leak)",
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|hack|leak)",
re.IGNORECASE)
@@ -54,12 +54,12 @@ def get_number(debug: bool, file_path: str) -> str:
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
lower_check = filename.lower()
if 'fc2' in lower_check:
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
filename = lower_check.replace('--', '-').replace('_', '-').upper()
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况例如n1012-CD1.wmv
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
file_number = os.path.splitext(filename)
filename = re.search(r'\w+(-|_)\w+', filename, re.A)
filename = re.search(r'[\w\-_]+', filename, re.A)
if filename:
file_number = str(filename.group())
else:
@@ -85,34 +85,7 @@ def get_number(debug: bool, file_path: str) -> str:
print(f'[-]Number Parser exception: {e} [{file_path}]')
return None
# modou提取number
def md(filename):
m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*|-\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}'
def mmz(filename):
m = re.search(r'(mmz-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def msd(filename):
m = re.search(r'(msd-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def mky(filename):
m = re.search(r'(mky-[a-z]{2,2}-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def yk(filename):
m = re.search(r'(yk-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def pm(filename):
m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def fsog(filename):
m = re.search(r'(fsog-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
# 按javdb数据源的命名规范提取number
G_TAKE_NUM_RULES = {
@@ -126,13 +99,6 @@ G_TAKE_NUM_RULES = {
'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0],
'mdbk': lambda x: str(re.search(r'mdbk(-|_)(\d{4})', x, re.I).group()),
'mdtm': lambda x: str(re.search(r'mdtm(-|_)(\d{4})', x, re.I).group()),
r'\bmd[a-z]{0,2}-\d{2,}': md,
r'\bmmz-\d{2,}':mmz,
r'\bmsd-\d{2,}':msd,
r'\bmky-[a-z]{2,2}-\d{2,}':mky,
r'\byk-\d{2,3}': yk,
r'\bpm[a-z]?-?\d{2,}':pm,
r'\bfsog-?\d{2,}':fsog
}

View File

@@ -1,21 +1,21 @@
# If you can't run this script, please execute the following command in PowerShell.
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
$CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1)
$OPENCC_PATH=$(python -c 'import opencc as _; print(_.__path__[0])' | select -Last 1)
$FACE_RECOGNITION_MODELS=$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | select -Last 1)
# bugfixset submodules find path
$Env:PYTHONPATH=$pwd.path
$PYTHONPATH=$pwd.path
mkdir build
mkdir __pycache__
pyinstaller --onefile Movie_Data_Capture.py `
--hidden-import "ImageProcessing.cnn" `
--python-option u `
--add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
--add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
--add-data "$OPENCC_PATH;opencc" `
pyinstaller --collect-submodules "scrapinglib" `
--collect-submodules "ImageProcessing" `
--collect-data "face_recognition_models" `
--collect-data "cloudscraper" `
--collect-data "opencc" `
--add-data "Img;Img" `
--add-data "config.ini;." `
--onefile Movie_Data_Capture.py
rmdir -Recurse -Force build
rmdir -Recurse -Force __pycache__

View File

@@ -1,4 +1,8 @@
requests
dlib-bin
Click
numpy
face-recognition-models
lxml
beautifulsoup4
pillow
@@ -8,5 +12,3 @@ urllib3
certifi
MechanicalSoup
opencc-python-reimplemented
face_recognition
get-video-properties

View File

@@ -99,6 +99,10 @@ def get_data_from_json(
# ================================================网站规则添加结束================================================
if json_data.get('title') == '':
print('[-]Movie Number or Title not found!')
return None
title = json_data.get('title')
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list] # 去除空白
@@ -134,12 +138,11 @@ def get_data_from_json(
tag.remove('XXXX')
while 'xxx' in tag:
tag.remove('xxx')
if json_data['source'] =='pissplay': # pissplay actor为英文名不用去除空格
actor = str(actor_list).strip("[ ]").replace("'", '')
else:
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
if title == '' or number == '':
print('[-]Movie Number or Title not found!')
return None
# if imagecut == '3':
# DownloadFileWithFilename()
@@ -266,14 +269,22 @@ def get_data_from_json(
pass
naming_rule = ""
original_naming_rule = ""
for i in conf.naming_rule().split("+"):
if i not in json_data:
naming_rule += i.strip("'").strip('"')
original_naming_rule += i.strip("'").strip('"')
else:
item = json_data.get(i)
naming_rule += item if type(item) is not list else "&".join(item)
# PATCH处理[title]存在翻译的情况后续NFO文件的original_name只会直接沿用naming_rule,这导致original_name非原始名
# 理应在翻译处处理 naming_rule和original_naming_rule
if i == 'title':
item = json_data.get('original_title')
original_naming_rule += item if type(item) is not list else "&".join(item)
json_data['naming_rule'] = naming_rule
json_data['original_naming_rule'] = original_naming_rule
return json_data

View File

@@ -1,3 +1,2 @@
# -*- coding: utf-8 -*-
from .api import search, getSupportedSources

View File

@@ -20,6 +20,8 @@ from .xcity import Xcity
from .avsox import Avsox
from .javlibrary import Javlibrary
from .javday import Javday
from .pissplay import Pissplay
from .javmenu import Javmenu
from .tmdb import Tmdb
from .imdb import Imdb
@@ -52,7 +54,7 @@ class Scraping:
"""
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle','javday'
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu'
]
adult_func_mapping = {
'avsox': Avsox().scrape,
@@ -70,7 +72,9 @@ class Scraping:
'javdb': Javdb().scrape,
'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape
'javday': Javday().scrape,
'pissplay': Pissplay().scrape,
'javmenu': Javmenu().scrape
}
general_full_sources = ['tmdb', 'imdb']
@@ -143,6 +147,14 @@ class Scraping:
print(f'[-]Movie Number [{name}] not found!')
return None
# If actor is anonymous, Fill in Anonymous
if len(json_data['actor']) == 0:
if config.getInstance().anonymous_fill() == True:
if "zh_" in config.getInstance().get_target_language():
json_data['actor'] = "佚名"
else:
json_data['actor'] = "Anonymous"
return json_data
def searchAdult(self, number, sources):
@@ -201,6 +213,14 @@ class Scraping:
print(f'[-]Movie Number [{number}] not found!')
return None
# If actor is anonymous, Fill in Anonymous
if len(json_data['actor']) == 0:
if config.getInstance().anonymous_fill() == True:
if "zh_" in config.getInstance().get_target_language():
json_data['actor'] = "佚名"
else:
json_data['actor'] = "Anonymous"
return json_data
def checkGeneralSources(self, c_sources, name):
@@ -283,4 +303,8 @@ class Scraping:
return False
if data["number"] is None or data["number"] == "" or data["number"] == "null":
return False
if (data["cover"] is None or data["cover"] == "" or data["cover"] == "null") \
and (data["cover_small"] is None or data["cover_small"] == "" or
data["cover_small"] == "null"):
return False
return True

View File

@@ -31,12 +31,14 @@ class Avsox(Parser):
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
if result1 == '' or result1 == 'null' or result1 == 'None':
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('-', '_'))
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
if result1 == '' or result1 == 'null' or result1 == 'None':
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('_', ''))
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
return None
return "https:" + result1
def getNum(self, htmltree):

View File

@@ -49,13 +49,13 @@ class Fanza(Parser):
self.detailurl = url + fanza_search_number
url = "https://www.dmm.co.jp/age_check/=/declared=yes/?"+ urlencode({"rurl": self.detailurl})
self.htmlcode = self.getHtml(url)
if self.htmlcode != 404:
if self.htmlcode != 404 \
and 'Sorry! This content is not available in your region.' not in self.htmlcode:
self.htmltree = etree.HTML(self.htmlcode)
break
if self.htmlcode == 404:
return 404
if self.htmltree is not None:
result = self.dictformat(self.htmltree)
return result
return 404
def getNum(self, htmltree):
# for some old page, the input number does not match the page

View File

@@ -22,6 +22,7 @@ class Fc2(Parser):
def extraInit(self):
self.imagecut = 0
self.allow_number_change = True
def search(self, number):
self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '')

View File

@@ -128,7 +128,7 @@ class Javbus(Parser):
def getTags(self, htmltree):
tags = self.getTreeElement(htmltree, self.expr_tags).split(',')
return tags[1:]
return tags[2:]
def getOutline(self, htmltree):
if self.morestoryline:

View File

@@ -39,3 +39,8 @@ class Javday(Parser):
# 删除番号和网站名
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
return result
def getTags(self, htmltree) -> list:
tags = super().getTags(htmltree)
return [tag for tag in tags if 'JAVDAY.TV' not in tag]

61
scrapinglib/javmenu.py Normal file
View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from urllib.parse import urljoin
from .parser import Parser
class Javmenu(Parser):
source = 'javmenu'
expr_title = '/html/head/meta[@property="og:title"]/@content'
expr_cover = '/html/head/meta[@property="og:image"]/@content'
expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
expr_actor = '//a[contains(@class,"actress")]/text()'
expr_tags = '//a[contains(@class,"genre")]/text()'
def extraInit(self):
self.imagecut = 4
self.uncensored = True
def search(self, number):
self.number = number
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.HTML(self.htmlcode)
result = self.dictformat(htmltree)
return result
def getNum(self, htmltree):
# 番号被分割开,需要合并后才是完整番号
part1 = self.getTreeElement(htmltree, self.expr_number)
part2 = self.getTreeElement(htmltree, self.expr_number2)
dp_number = part1 + part2
# NOTE 检测匹配与更新 self.number
if dp_number.upper() != self.number.upper():
raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
self.number = dp_number
return self.number
def getTitle(self, htmltree):
browser_title = super().getTitle(htmltree)
# 删除番号
number = re.findall("\d+",self.number)[1]
title = browser_title.split(number,1)[-1]
title = title.replace(' | JAV目錄大全 | 每日更新',"")
title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
return title.replace(self.number, '').strip()

View File

@@ -6,6 +6,28 @@ from urllib.parse import urlparse, unquote
from .parser import Parser
NUM_RULES3=[
r'(mmz{2,4})-?(\d{2,})(-ep\d*|-\d*)?.*',
r'(msd)-?(\d{2,})(-ep\d*|-\d*)?.*',
r'(yk)-?(\d{2,})(-ep\d*|-\d*)?.*',
r'(pm)-?(\d{2,})(-ep\d*|-\d*)?.*',
r'(mky-[a-z]{2,2})-?(\d{2,})(-ep\d*|-\d*)?.*',
]
# modou提取number
def change_number(number):
number = number.lower().strip()
m = re.search(r'(md[a-z]{0,2})-?(\d{2,})(-ep\d*|-\d*)?.*', number, re.I)
if m:
return f'{m.group(1)}{m.group(2).zfill(4)}{m.group(3) or ""}'
for rules in NUM_RULES3:
m = re.search(rules, number, re.I)
if m:
return f'{m.group(1)}{m.group(2).zfill(3)}{m.group(3) or ""}'
return number
class Madou(Parser):
source = 'madou'
@@ -14,12 +36,15 @@ class Madou(Parser):
expr_studio = '//a[@rel="category tag"]/text()'
expr_tags = '/html/head/meta[@name="keywords"]/@content'
def extraInit(self):
self.imagecut = 0
self.imagecut = 4
self.uncensored = True
self.allow_number_change = True
def search(self, number):
self.number = number.lower().strip()
self.number = change_number(number)
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
@@ -65,5 +90,5 @@ class Madou(Parser):
def getTags(self, htmltree):
studio = self.getStudio(htmltree)
x = super().getTags(htmltree)
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
tags = super().getTags(htmltree)
return [tag for tag in tags if studio not in tag and '麻豆' not in tag]

View File

@@ -85,7 +85,7 @@ class Parser:
else:
self.detailurl = self.queryNumberUrl(number)
if not self.detailurl:
return None
return 404
htmltree = self.getHtmlTree(self.detailurl)
result = self.dictformat(htmltree)
return result
@@ -210,6 +210,13 @@ class Parser:
def getTags(self, htmltree) -> list:
alls = self.getTreeAll(htmltree, self.expr_tags)
tags = []
for t in alls:
for tag in t.strip().split(','):
tag = tag.strip()
if tag:
tags.append(tag)
return tags
return [ x.strip() for x in alls if x.strip()]
def getStudio(self, htmltree):

87
scrapinglib/pissplay.py Normal file
View File

@@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
import re
from lxml import etree
from .parser import Parser
from datetime import datetime
# 搜刮 https://pissplay.com/ 中的视频
# pissplay中的视频没有番号所以要通过文件名搜索
# 只用文件名和网站视频名完全一致时才可以被搜刮
class Pissplay(Parser):
source = 'pissplay'
expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
expr_title = '//*[@id="video_title"]/text()'
expr_cover = '/html/head//meta[@property="og:image"]/@content'
expr_tags = '//div[@id="video_tags"]/a/text()'
expr_release = '//div[@class="video_date"]/text()'
expr_outline = '//*[@id="video_description"]/p//text()'
def extraInit(self):
self.imagecut = 0 # 不裁剪封面
self.specifiedSource = None
def search(self, number):
self.number = number.strip().upper()
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
result = self.dictformat(htmltree)
return result
def getNum(self, htmltree):
title = self.getTitle(htmltree)
return title
def getTitle(self, htmltree):
title = super().getTitle(htmltree)
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
return title
def getCover(self, htmltree):
url = super().getCover(htmltree)
if not url.startswith('http'):
url = 'https:' + url
return url
def getRelease(self, htmltree):
releaseDate = super().getRelease(htmltree)
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
return isoData
def getStudio(self, htmltree):
return 'PissPlay'
def getTags(self, htmltree):
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
del tags[1]
else:
tags = tags[1:]
return tags
def getActors(self, htmltree) -> list:
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
return [tags[1]]
else:
return [tags[0]]
else:
return ['Bruce and Morgan']
def getOutline(self, htmltree):
outline = self.getTreeAll(htmltree, self.expr_outline)
if ' Morgan xx' in outline:
num = outline.index(' Morgan xx')
outline = outline[:num]
rstring = ''.join(outline).replace("&","and")
return rstring