33
.github/workflows/main.yml
vendored
33
.github/workflows/main.yml
vendored
@@ -19,6 +19,16 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Install UPX
|
||||
uses: crazy-max/ghaction-upx@v2
|
||||
if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
|
||||
with:
|
||||
install-only: true
|
||||
|
||||
- name: UPX version
|
||||
if: matrix.os == 'windows-latest' || matrix.os == 'ubuntu-latest'
|
||||
run: upx --version
|
||||
|
||||
- name: Setup Python 3.10
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
@@ -28,6 +38,7 @@ jobs:
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install face_recognition --no-deps
|
||||
pip install pyinstaller
|
||||
|
||||
- name: Test number_perser.get_number
|
||||
@@ -39,11 +50,11 @@ jobs:
|
||||
run: |
|
||||
pyinstaller \
|
||||
--onefile Movie_Data_Capture.py \
|
||||
--python-option u \
|
||||
--hidden-import "ImageProcessing.cnn" \
|
||||
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
||||
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
||||
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
|
||||
--collect-submodules "scrapinglib" \
|
||||
--collect-submodules "ImageProcessing" \
|
||||
--collect-data "face_recognition_models" \
|
||||
--collect-data "cloudscraper" \
|
||||
--collect-data "opencc" \
|
||||
--add-data "Img:Img" \
|
||||
--add-data "config.ini:." \
|
||||
|
||||
@@ -52,11 +63,11 @@ jobs:
|
||||
run: |
|
||||
pyinstaller `
|
||||
--onefile Movie_Data_Capture.py `
|
||||
--python-option u `
|
||||
--hidden-import "ImageProcessing.cnn" `
|
||||
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
|
||||
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
|
||||
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
|
||||
--collect-submodules "scrapinglib" `
|
||||
--collect-submodules "ImageProcessing" `
|
||||
--collect-data "face_recognition_models" `
|
||||
--collect-data "cloudscraper" `
|
||||
--collect-data "opencc" `
|
||||
--add-data "Img;Img" `
|
||||
--add-data "config.ini;." `
|
||||
|
||||
@@ -77,5 +88,5 @@ jobs:
|
||||
- name: Upload build artifact
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: Movie_Data_Capture-CLI-${{ env.VERSION }}-${{ runner.os }}-amd64
|
||||
name: MDC-${{ env.VERSION }}-${{ runner.os }}-amd64
|
||||
path: dist
|
||||
|
||||
@@ -301,7 +301,7 @@ def get_html_by_scraper(url: str = None, cookies: dict = None, ua: str = None, r
|
||||
|
||||
def translate(
|
||||
src: str,
|
||||
target_language: str = "zh_cn",
|
||||
target_language: str = config.getInstance().get_target_language(),
|
||||
engine: str = "google-free",
|
||||
app_id: str = "",
|
||||
key: str = "",
|
||||
@@ -342,7 +342,15 @@ def translate(
|
||||
result = post_html(url=url, query=body, headers=headers)
|
||||
translate_list = [i["text"] for i in result.json()[0]["translations"]]
|
||||
trans_result = trans_result.join(translate_list)
|
||||
|
||||
elif engine == "deeplx":
|
||||
url = config.getInstance().get_translate_service_site()
|
||||
res = requests.post(f"{url}/translate", json={
|
||||
'text': src,
|
||||
'source_lang': 'auto',
|
||||
'target_lang': target_language,
|
||||
})
|
||||
if res.text.strip():
|
||||
trans_result = res.json().get('data')
|
||||
else:
|
||||
raise ValueError("Non-existent translation engine")
|
||||
|
||||
|
||||
@@ -60,9 +60,9 @@ def face_crop_height(filename, width, height):
|
||||
return (0, 0, width, cropHeight)
|
||||
|
||||
|
||||
def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
|
||||
def cutImage(imagecut, path, thumb_path, poster_path, skip_facerec=False):
|
||||
conf = config.getInstance()
|
||||
fullpath_fanart = os.path.join(path, fanart_path)
|
||||
fullpath_fanart = os.path.join(path, thumb_path)
|
||||
fullpath_poster = os.path.join(path, poster_path)
|
||||
aspect_ratio = conf.face_aspect_ratio()
|
||||
if conf.face_aways_imagecut():
|
||||
|
||||
@@ -104,9 +104,9 @@ is performed. It may help you correct wrong numbers before real job.""")
|
||||
set_str_or_none("common:source_folder", args.path)
|
||||
set_bool_or_none("common:auto_exit", args.auto_exit)
|
||||
set_natural_number_or_none("common:nfo_skip_days", args.days)
|
||||
set_natural_number_or_none("common:stop_counter", args.cnt)
|
||||
set_natural_number_or_none("advenced_sleep:stop_counter", args.cnt)
|
||||
set_bool_or_none("common:ignore_failed_list", args.ignore_failed_list)
|
||||
set_str_or_none("common:rerun_delay", args.delaytm)
|
||||
set_str_or_none("advenced_sleep:rerun_delay", args.delaytm)
|
||||
set_str_or_none("priority:website", args.site)
|
||||
if isinstance(args.dnimg, bool) and args.dnimg:
|
||||
conf.set_override("common:download_only_missing_images=0")
|
||||
@@ -119,7 +119,7 @@ is performed. It may help you correct wrong numbers before real job.""")
|
||||
if conf.main_mode() == 3:
|
||||
no_net_op = args.no_network_operation
|
||||
if no_net_op:
|
||||
conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1")
|
||||
conf.set_override("advenced_sleep:stop_counter=0;advenced_sleep:rerun_delay=0s;face:aways_imagecut=1")
|
||||
|
||||
return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op, args.specified_source, args.specified_url
|
||||
|
||||
@@ -681,7 +681,7 @@ def period(delta, pattern):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
version = '6.4.1'
|
||||
version = '6.6.2'
|
||||
urllib3.disable_warnings() # Ignore http proxy warning
|
||||
app_start = time.time()
|
||||
|
||||
|
||||
@@ -24,8 +24,12 @@
|
||||
# 申明
|
||||
当你查阅、下载了本项目源代码或二进制程序,即代表你接受了以下条款
|
||||
* 本项目和项目成果仅供技术,学术交流和Python3性能测试使用
|
||||
* 用户必须确保获取影片的途径在用户当地是合法的
|
||||
* 运行时和运行后所获取的元数据和封面图片等数据的版权,归版权持有人持有
|
||||
* 本项目贡献者编写该项目旨在学习Python3 ,提高编程水平
|
||||
* 本项目不提供任何影片下载的线索
|
||||
* 请勿提供运行时和运行后获取的数据提供给可能有非法目的的第三方,例如用于非法交易、侵犯未成年人的权利等
|
||||
* 用户仅能在自己的私人计算机或者测试环境中使用该工具,禁止将获取到的数据用于商业目的或其他目的,如销售、传播等
|
||||
* 用户在使用本项目和项目成果前,请用户了解并遵守当地法律法规,如果本项目及项目成果使用过程中存在违反当地法律法规的行为,请勿使用该项目及项目成果
|
||||
* 法律后果及使用后果由使用者承担
|
||||
* [GPL LICENSE](https://github.com/yoshiko2/Movie_Data_Capture/blob/master/LICENSE)
|
||||
@@ -36,3 +40,6 @@
|
||||
|
||||
# 贡献者
|
||||
[](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
|
||||
|
||||
# Star History
|
||||
[](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)
|
||||
|
||||
@@ -32,3 +32,7 @@
|
||||
|
||||
# 贡献者
|
||||
[](https://github.com/yoshiko2/movie_data_Capture/graphs/contributors)
|
||||
|
||||
# Star History
|
||||
|
||||
[](https://star-history.com/#yoshiko2/Movie_Data_Capture&Date)
|
||||
|
||||
18
config.ini
18
config.ini
@@ -21,11 +21,14 @@ nfo_skip_days = 30
|
||||
ignore_failed_list = 0
|
||||
download_only_missing_images = 1
|
||||
mapping_table_validity = 7
|
||||
; 在jellyfin中tags和genres重复,因此可以只保存genres到nfo中
|
||||
donot_save_tags = 0
|
||||
; 一些jellyfin中特有的设置 (0:不开启, 1:开启) 比如
|
||||
; 在jellyfin中tags和genres重复,因此可以只需保存genres到nfo中
|
||||
; jellyfin中只需要保存thumb,不需要保存fanart
|
||||
jellyfin = 0
|
||||
; 开启后tag和genere只显示演员
|
||||
actor_only_tag = 0
|
||||
sleep = 3
|
||||
anonymous_fill = 1
|
||||
|
||||
[advenced_sleep]
|
||||
; 处理完多少个视频文件后停止,0为处理所有视频文件
|
||||
@@ -48,13 +51,14 @@ cacert_file =
|
||||
location_rule = actor+'/'+number
|
||||
naming_rule = number+'-'+title
|
||||
max_title_len = 50
|
||||
image_naming_with_number = 1
|
||||
; 刮削后图片是否命名为番号
|
||||
image_naming_with_number = 0
|
||||
|
||||
[update]
|
||||
update_check = 1
|
||||
|
||||
[priority]
|
||||
website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,mv91,getchu,javdb,gcolle
|
||||
website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu
|
||||
|
||||
[escape]
|
||||
literals = \()/
|
||||
@@ -66,13 +70,15 @@ switch = 0
|
||||
; 机器翻译
|
||||
[translate]
|
||||
switch = 0
|
||||
;可选项 google-free,azure
|
||||
;可选项 google-free,azure,deeplx
|
||||
engine = google-free
|
||||
; azure翻译密钥
|
||||
target_language = zh_cn
|
||||
; azure翻译密钥key
|
||||
key =
|
||||
; 翻译延迟
|
||||
delay = 1
|
||||
values = title,outline
|
||||
; google翻译服务站点,或deeplx访问链接
|
||||
service_site = translate.google.cn
|
||||
|
||||
; 预告片
|
||||
|
||||
55
config.py
55
config.py
@@ -169,13 +169,13 @@ class Config:
|
||||
self._exit("common:main_mode")
|
||||
|
||||
def source_folder(self) -> str:
|
||||
return self.conf.get("common", "source_folder")
|
||||
return self.conf.get("common", "source_folder").replace("\\\\", "/").replace("\\", "/")
|
||||
|
||||
def failed_folder(self) -> str:
|
||||
return self.conf.get("common", "failed_output_folder")
|
||||
return self.conf.get("common", "failed_output_folder").replace("\\\\", "/").replace("\\", "/")
|
||||
|
||||
def success_folder(self) -> str:
|
||||
return self.conf.get("common", "success_output_folder")
|
||||
return self.conf.get("common", "success_output_folder").replace("\\\\", "/").replace("\\", "/")
|
||||
|
||||
def actor_gender(self) -> str:
|
||||
return self.conf.get("common", "actor_gender")
|
||||
@@ -213,8 +213,8 @@ class Config:
|
||||
def mapping_table_validity(self) -> int:
|
||||
return self.conf.getint("common", "mapping_table_validity")
|
||||
|
||||
def donot_save_tags(self) -> int:
|
||||
return self.conf.getint("common", "donot_save_tags")
|
||||
def jellyfin(self) -> int:
|
||||
return self.conf.getint("common", "jellyfin")
|
||||
|
||||
def actor_only_tag(self) -> bool:
|
||||
return self.conf.getboolean("common", "actor_only_tag")
|
||||
@@ -222,13 +222,16 @@ class Config:
|
||||
def sleep(self) -> int:
|
||||
return self.conf.getint("common", "sleep")
|
||||
|
||||
def anonymous_fill(self) -> bool:
|
||||
return self.conf.getint("common", "anonymous_fill")
|
||||
|
||||
def stop_counter(self) -> int:
|
||||
return self.conf.getint("advenced_sleep", "stop_counter", fallback=0)
|
||||
|
||||
def rerun_delay(self) -> int:
|
||||
value = self.conf.get("advenced_sleep", "rerun_delay")
|
||||
if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)):
|
||||
return 0 # not match '1h30m45s' or '30' or '1s2m1h4s5m'
|
||||
return 0 # not match '1h30m45s' or '30' or '1s2m1h4s5m'
|
||||
if value.isnumeric() and int(value) >= 0:
|
||||
return int(value)
|
||||
sec = 0
|
||||
@@ -279,6 +282,9 @@ class Config:
|
||||
def get_translate_engine(self) -> str:
|
||||
return self.conf.get("translate", "engine")
|
||||
|
||||
def get_target_language(self) -> str:
|
||||
return self.conf.get("translate", "target_language")
|
||||
|
||||
# def get_translate_appId(self) ->str:
|
||||
# return self.conf.get("translate","appid")
|
||||
|
||||
@@ -439,16 +445,19 @@ class Config:
|
||||
# actor_gender value: female or male or both or all(含人妖)
|
||||
conf.set(sec1, "actor_gender", "female")
|
||||
conf.set(sec1, "del_empty_folder", "1")
|
||||
conf.set(sec1, "nfo_skip_days", 30)
|
||||
conf.set(sec1, "ignore_failed_list", 0)
|
||||
conf.set(sec1, "download_only_missing_images", 1)
|
||||
conf.set(sec1, "mapping_table_validity", 7)
|
||||
conf.set(sec1, "donot_save_tags", 0)
|
||||
conf.set(sec1, "nfo_skip_days", "30")
|
||||
conf.set(sec1, "ignore_failed_list", "0")
|
||||
conf.set(sec1, "download_only_missing_images", "1")
|
||||
conf.set(sec1, "mapping_table_validity", "7")
|
||||
conf.set(sec1, "jellyfin", "0")
|
||||
conf.set(sec1, "actor_only_tag", "0")
|
||||
conf.set(sec1, "sleep", "3")
|
||||
conf.set(sec1, "anonymous_fill", "0")
|
||||
|
||||
sec2 = "advenced_sleep"
|
||||
conf.add_section(sec2)
|
||||
conf.set(sec2, "stop_counter", 0)
|
||||
conf.set(sec2, "rerun_delay", 0)
|
||||
conf.set(sec2, "stop_counter", "0")
|
||||
conf.set(sec2, "rerun_delay", "0")
|
||||
|
||||
sec3 = "proxy"
|
||||
conf.add_section(sec3)
|
||||
@@ -463,6 +472,7 @@ class Config:
|
||||
conf.set(sec4, "location_rule", "actor + '/' + number")
|
||||
conf.set(sec4, "naming_rule", "number + '-' + title")
|
||||
conf.set(sec4, "max_title_len", "50")
|
||||
conf.set(sec4, "image_naming_with_number", "0")
|
||||
|
||||
sec5 = "update"
|
||||
conf.add_section(sec5)
|
||||
@@ -485,6 +495,7 @@ class Config:
|
||||
conf.add_section(sec9)
|
||||
conf.set(sec9, "switch", "0")
|
||||
conf.set(sec9, "engine", "google-free")
|
||||
conf.set(sec9, "target_language", "zh_cn")
|
||||
# conf.set(sec8, "appid", "")
|
||||
conf.set(sec9, "key", "")
|
||||
conf.set(sec9, "delay", "1")
|
||||
@@ -508,28 +519,28 @@ class Config:
|
||||
|
||||
sec13 = "watermark"
|
||||
conf.add_section(sec13)
|
||||
conf.set(sec13, "switch", 1)
|
||||
conf.set(sec13, "water", 2)
|
||||
conf.set(sec13, "switch", "1")
|
||||
conf.set(sec13, "water", "2")
|
||||
|
||||
sec14 = "extrafanart"
|
||||
conf.add_section(sec14)
|
||||
conf.set(sec14, "switch", 1)
|
||||
conf.set(sec14, "switch", "1")
|
||||
conf.set(sec14, "extrafanart_folder", "extrafanart")
|
||||
conf.set(sec14, "parallel_download", 1)
|
||||
conf.set(sec14, "parallel_download", "1")
|
||||
|
||||
sec15 = "storyline"
|
||||
conf.add_section(sec15)
|
||||
conf.set(sec15, "switch", 1)
|
||||
conf.set(sec15, "switch", "1")
|
||||
conf.set(sec15, "site", "1:avno1,4:airavwiki")
|
||||
conf.set(sec15, "censored_site", "2:airav,5:xcity,6:amazon")
|
||||
conf.set(sec15, "uncensored_site", "3:58avgo")
|
||||
conf.set(sec15, "show_result", 0)
|
||||
conf.set(sec15, "run_mode", 1)
|
||||
conf.set(sec15, "cc_convert", 1)
|
||||
conf.set(sec15, "show_result", "0")
|
||||
conf.set(sec15, "run_mode", "1")
|
||||
conf.set(sec15, "cc_convert", "1")
|
||||
|
||||
sec16 = "cc_convert"
|
||||
conf.add_section(sec16)
|
||||
conf.set(sec16, "mode", 1)
|
||||
conf.set(sec16, "mode", "1")
|
||||
conf.set(sec16, "vars", "actor,director,label,outline,series,studio,tag,title")
|
||||
|
||||
sec17 = "javdb"
|
||||
|
||||
158
core.py
158
core.py
@@ -272,22 +272,25 @@ def extrafanart_download_threadpool(url_list, save_dir, number, json_data=None):
|
||||
|
||||
def image_ext(url):
|
||||
try:
|
||||
return os.path.splitext(url)[-1]
|
||||
ext = os.path.splitext(url)[-1]
|
||||
if ext in {'.jpg','.jpge','.bmp','.png','.gif'}:
|
||||
return ext
|
||||
return ".jpg"
|
||||
except:
|
||||
return ".jpg"
|
||||
|
||||
|
||||
# 封面是否下载成功,否则移动到failed
|
||||
def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=None):
|
||||
full_filepath = os.path.join(path, fanart_path)
|
||||
full_filepath = os.path.join(path, thumb_path)
|
||||
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
|
||||
return
|
||||
if json_headers != None:
|
||||
if download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers']) == 'failed':
|
||||
if download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers']) == 'failed':
|
||||
moveFailedFolder(filepath)
|
||||
return
|
||||
else:
|
||||
if download_file_with_filename(cover, fanart_path, path, filepath) == 'failed':
|
||||
if download_file_with_filename(cover, thumb_path, path, filepath) == 'failed':
|
||||
moveFailedFolder(filepath)
|
||||
return
|
||||
|
||||
@@ -296,20 +299,21 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=
|
||||
if file_not_exist_or_empty(full_filepath):
|
||||
print('[!]Image Download Failed! Trying again. [{}/3]', i + 1)
|
||||
if json_headers != None:
|
||||
download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers'])
|
||||
download_file_with_filename(cover, thumb_path, path, filepath, json_headers['headers'])
|
||||
else:
|
||||
download_file_with_filename(cover, fanart_path, path, filepath)
|
||||
download_file_with_filename(cover, thumb_path, path, filepath)
|
||||
continue
|
||||
else:
|
||||
break
|
||||
if file_not_exist_or_empty(full_filepath):
|
||||
return
|
||||
print('[+]Image Downloaded!', Path(full_filepath).name)
|
||||
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
|
||||
if not config.getInstance().jellyfin():
|
||||
shutil.copyfile(full_filepath, os.path.join(path, fanart_path))
|
||||
|
||||
|
||||
def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, filepath, tag, actor_list, liuchu,
|
||||
uncensored, hack_word, _4k, fanart_path, poster_path, thumb_path):
|
||||
uncensored, hack, hack_word, _4k, fanart_path, poster_path, thumb_path):
|
||||
title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(
|
||||
json_data)
|
||||
if config.getInstance().main_mode() == 3: # 模式3下,由于视频文件不做任何改变,.nfo文件必须和视频文件名称除后缀外完全一致,KODI等软件方可支持
|
||||
@@ -332,13 +336,23 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
pass
|
||||
# KODI内查看影片信息时找不到number,配置naming_rule=number+'#'+title虽可解决
|
||||
# 但使得标题太长,放入时常为空的outline内会更适合,软件给outline留出的显示版面也较大
|
||||
outline = f"{number}#{outline}"
|
||||
if not outline:
|
||||
pass
|
||||
elif json_data['source'] == 'pissplay':
|
||||
outline = f"{outline}"
|
||||
else:
|
||||
outline = f"{number}#{outline}"
|
||||
with open(nfo_path, "wt", encoding='UTF-8') as code:
|
||||
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
|
||||
print("<movie>", file=code)
|
||||
print(" <title><![CDATA[" + naming_rule + "]]></title>", file=code)
|
||||
print(" <originaltitle><![CDATA[" + naming_rule + "]]></originaltitle>", file=code)
|
||||
print(" <sorttitle><![CDATA[" + naming_rule + "]]></sorttitle>", file=code)
|
||||
if not config.getInstance().jellyfin():
|
||||
print(" <title><![CDATA[" + naming_rule + "]]></title>", file=code)
|
||||
print(" <originaltitle><![CDATA[" + json_data['original_naming_rule'] + "]]></originaltitle>", file=code)
|
||||
print(" <sorttitle><![CDATA[" + naming_rule + "]]></sorttitle>", file=code)
|
||||
else:
|
||||
print(" <title>" + naming_rule + "</title>", file=code)
|
||||
print(" <originaltitle>" + json_data['original_naming_rule'] + "</originaltitle>", file=code)
|
||||
print(" <sorttitle>" + naming_rule + "</sorttitle>", file=code)
|
||||
print(" <customrating>JP-18+</customrating>", file=code)
|
||||
print(" <mpaa>JP-18+</mpaa>", file=code)
|
||||
try:
|
||||
@@ -347,13 +361,18 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
print(" <set></set>", file=code)
|
||||
print(" <studio>" + studio + "</studio>", file=code)
|
||||
print(" <year>" + year + "</year>", file=code)
|
||||
print(" <outline><![CDATA[" + outline + "]]></outline>", file=code)
|
||||
print(" <plot><![CDATA[" + outline + "]]></plot>", file=code)
|
||||
if not config.getInstance().jellyfin():
|
||||
print(" <outline><![CDATA[" + outline + "]]></outline>", file=code)
|
||||
print(" <plot><![CDATA[" + outline + "]]></plot>", file=code)
|
||||
else:
|
||||
print(" <outline>" + outline + "</outline>", file=code)
|
||||
print(" <plot>" + outline + "</plot>", file=code)
|
||||
print(" <runtime>" + str(runtime).replace(" ", "") + "</runtime>", file=code)
|
||||
print(" <director>" + director + "</director>", file=code)
|
||||
print(" <poster>" + poster_path + "</poster>", file=code)
|
||||
print(" <thumb>" + thumb_path + "</thumb>", file=code)
|
||||
print(" <fanart>" + fanart_path + "</fanart>", file=code)
|
||||
if not config.getInstance().jellyfin(): # jellyfin 不需要保存fanart
|
||||
print(" <fanart>" + fanart_path + "</fanart>", file=code)
|
||||
try:
|
||||
for key in actor_list:
|
||||
print(" <actor>", file=code)
|
||||
@@ -368,8 +387,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
print(" <maker>" + studio + "</maker>", file=code)
|
||||
print(" <label>" + label + "</label>", file=code)
|
||||
|
||||
skip_tags = config.getInstance().donot_save_tags()
|
||||
if not skip_tags:
|
||||
jellyfin = config.getInstance().jellyfin()
|
||||
if not jellyfin:
|
||||
if config.getInstance().actor_only_tag():
|
||||
for key in actor_list:
|
||||
try:
|
||||
@@ -377,27 +396,27 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
if cn_sub == '1':
|
||||
if cn_sub:
|
||||
print(" <tag>中文字幕</tag>", file=code)
|
||||
if liuchu == '流出':
|
||||
if liuchu:
|
||||
print(" <tag>流出</tag>", file=code)
|
||||
if uncensored == 1:
|
||||
if uncensored:
|
||||
print(" <tag>无码</tag>", file=code)
|
||||
if hack_word != '':
|
||||
if hack:
|
||||
print(" <tag>破解</tag>", file=code)
|
||||
if _4k == '1':
|
||||
if _4k:
|
||||
print(" <tag>4k</tag>", file=code)
|
||||
for i in tag:
|
||||
print(" <tag>" + i + "</tag>", file=code)
|
||||
if cn_sub == '1':
|
||||
if cn_sub:
|
||||
print(" <genre>中文字幕</genre>", file=code)
|
||||
if liuchu == '流出':
|
||||
if liuchu:
|
||||
print(" <genre>无码流出</genre>", file=code)
|
||||
if uncensored == 1:
|
||||
if uncensored:
|
||||
print(" <genre>无码</genre>", file=code)
|
||||
if hack_word != '':
|
||||
if hack:
|
||||
print(" <genre>破解</genre>", file=code)
|
||||
if _4k == '1':
|
||||
if _4k:
|
||||
print(" <genre>4k</genre>", file=code)
|
||||
try:
|
||||
for i in tag:
|
||||
@@ -470,6 +489,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack, _4k) -> No
|
||||
:cn_sub: 中文字幕 可选值:1,"1" 或其他值
|
||||
:uncensored 无码 可选值:1,"1" 或其他值
|
||||
:hack 破解 可选值:1,"1" 或其他值
|
||||
:_4k Bool
|
||||
"""
|
||||
mark_type = ''
|
||||
if cn_sub:
|
||||
@@ -495,17 +515,17 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack, _4k):
|
||||
# 获取自定义位置,取余配合pos达到顺时针添加的效果
|
||||
# 左上 0, 右上 1, 右下 2, 左下 3
|
||||
count = config.getInstance().watermark_type()
|
||||
if cn_sub == 1 or cn_sub == '1':
|
||||
if cn_sub:
|
||||
add_to_pic(pic_path, img_pic, size, count, 1) # 添加
|
||||
count = (count + 1) % 4
|
||||
if leak == 1 or leak == '1':
|
||||
if leak:
|
||||
add_to_pic(pic_path, img_pic, size, count, 2)
|
||||
count = (count + 1) % 4
|
||||
if uncensored == 1 or uncensored == '1':
|
||||
if uncensored:
|
||||
add_to_pic(pic_path, img_pic, size, count, 3)
|
||||
if hack == 1 or hack == '1':
|
||||
if hack:
|
||||
add_to_pic(pic_path, img_pic, size, count, 4)
|
||||
if _4k == 1 or _4k == '1':
|
||||
if _4k:
|
||||
add_to_pic(pic_path, img_pic, size, count, 5)
|
||||
img_pic.close()
|
||||
|
||||
@@ -613,6 +633,8 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
||||
create_softlink = False
|
||||
if link_mode not in (1, 2):
|
||||
shutil.move(filepath, targetpath)
|
||||
print("[!]Move => ", path)
|
||||
return
|
||||
elif link_mode == 2:
|
||||
try:
|
||||
os.link(filepath, targetpath, follow_symlinks=False)
|
||||
@@ -624,16 +646,13 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
||||
os.symlink(filerelpath, targetpath)
|
||||
except:
|
||||
os.symlink(str(filepath_obj.resolve()), targetpath)
|
||||
return
|
||||
print("[!]Link => ", path)
|
||||
except FileExistsError as fee:
|
||||
print(f'[-]FileExistsError: {fee}')
|
||||
return
|
||||
except PermissionError:
|
||||
print('[-]Error! Please run as administrator!')
|
||||
return
|
||||
except OSError as oserr:
|
||||
print(f'[-]OS Error errno {oserr.errno}')
|
||||
return
|
||||
|
||||
|
||||
def linkImage(path, number, part, leak_word, c_word, hack_word, ext):
|
||||
@@ -693,12 +712,12 @@ def core_main_no_net_op(movie_path, number):
|
||||
conf = config.getInstance()
|
||||
part = ''
|
||||
leak_word = ''
|
||||
leak = 0
|
||||
leak = False
|
||||
c_word = ''
|
||||
cn_sub = ''
|
||||
hack = ''
|
||||
cn_sub = False
|
||||
hack = False
|
||||
hack_word = ''
|
||||
_4k = ''
|
||||
_4k = False
|
||||
imagecut = 1
|
||||
multi = False
|
||||
part = ''
|
||||
@@ -709,30 +728,30 @@ def core_main_no_net_op(movie_path, number):
|
||||
multi = True
|
||||
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
|
||||
re.I) or '中文' in movie_path or '字幕' in movie_path or ".chs" in movie_path or '.cht' in movie_path:
|
||||
cn_sub = '1'
|
||||
cn_sub = True
|
||||
c_word = '-C' # 中文字幕影片后缀
|
||||
uncensored = 1 if is_uncensored(number) else 0
|
||||
uncensored = True if is_uncensored(number) else 0
|
||||
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||
leak_word = '-无码流出' # 无码流出影片后缀
|
||||
leak = 1
|
||||
leak = True
|
||||
|
||||
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
|
||||
hack = 1
|
||||
hack = True
|
||||
hack_word = "-hack"
|
||||
|
||||
# try:
|
||||
|
||||
# props = get_video_properties(movie_path) # 判断是否为4K视频
|
||||
# if props['width'] >= 4096 or props['height'] >= 2160:
|
||||
# _4k = '1'
|
||||
# _4k = True
|
||||
# except:
|
||||
# pass
|
||||
|
||||
prestr = f"{number}{leak_word}{c_word}{hack_word}"
|
||||
|
||||
full_nfo = Path(path) / f"{prestr}{part}.nfo"
|
||||
if full_nfo.is_file():
|
||||
if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
|
||||
uncensored = 1
|
||||
uncensored = True
|
||||
try:
|
||||
nfo_xml = etree.parse(full_nfo)
|
||||
nfo_fanart_path = nfo_xml.xpath('//fanart/text()')[0]
|
||||
@@ -791,15 +810,15 @@ def move_subtitles(filepath, path, multi_part, number, part, leak_word, c_word,
|
||||
def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=None):
|
||||
conf = config.getInstance()
|
||||
# =======================================================================初始化所需变量
|
||||
multi_part = 0
|
||||
multi_part = False
|
||||
part = ''
|
||||
leak_word = ''
|
||||
c_word = ''
|
||||
cn_sub = ''
|
||||
liuchu = ''
|
||||
hack = ''
|
||||
cn_sub = False
|
||||
liuchu = False
|
||||
hack = False
|
||||
hack_word = ''
|
||||
_4k = ''
|
||||
_4k = False
|
||||
|
||||
# 下面被注释的变量不需要
|
||||
# rootpath = os.getcwd
|
||||
@@ -822,11 +841,11 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
tag = json_data.get('tag')
|
||||
# =======================================================================判断-C,-CD后缀
|
||||
if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
|
||||
multi_part = 1
|
||||
multi_part = True
|
||||
part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
|
||||
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
|
||||
re.I) or '中文' in movie_path or '字幕' in movie_path:
|
||||
cn_sub = '1'
|
||||
cn_sub = True
|
||||
c_word = '-C' # 中文字幕影片后缀
|
||||
|
||||
# 判断是否无码
|
||||
@@ -835,19 +854,22 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||
liuchu = '流出'
|
||||
leak = 1
|
||||
leak = True
|
||||
leak_word = '-无码流出' # 流出影片后缀
|
||||
else:
|
||||
leak = 0
|
||||
leak = False
|
||||
|
||||
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
|
||||
hack = 1
|
||||
hack = True
|
||||
hack_word = "-hack"
|
||||
|
||||
if '4k'.upper() in str(movie_path).upper() or '4k' in movie_path:
|
||||
_4k = True
|
||||
|
||||
# 判断是否4k
|
||||
if '4K' in tag:
|
||||
tag.remove('4K') # 从tag中移除'4K'
|
||||
|
||||
|
||||
# 判断是否为无码破解
|
||||
if '无码破解' in tag:
|
||||
tag.remove('无码破解') # 从tag中移除'无码破解'
|
||||
@@ -855,7 +877,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
# try:
|
||||
# props = get_video_properties(movie_path) # 判断是否为4K视频
|
||||
# if props['width'] >= 4096 or props['height'] >= 2160:
|
||||
# _4k = '1'
|
||||
# _4k = True
|
||||
# except:
|
||||
# pass
|
||||
|
||||
@@ -920,7 +942,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
pass
|
||||
|
||||
# 裁剪图
|
||||
cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
|
||||
cutImage(imagecut, path, thumb_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
|
||||
|
||||
# 兼容Jellyfin封面图文件名规则
|
||||
if multi_part and conf.jellyfin_multi_part_fanart():
|
||||
@@ -932,7 +954,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
# Move subtitles
|
||||
move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
if move_status:
|
||||
cn_sub = "1"
|
||||
cn_sub = True
|
||||
# 添加水印
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored,
|
||||
@@ -940,7 +962,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag,
|
||||
json_data.get('actor_list'), liuchu, uncensored, hack_word
|
||||
json_data.get('actor_list'), liuchu, uncensored, hack, hack_word
|
||||
, _4k, fanart_path, poster_path, thumb_path)
|
||||
|
||||
elif conf.main_mode() == 2:
|
||||
@@ -948,13 +970,9 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
path = create_folder(json_data)
|
||||
# 移动文件
|
||||
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
|
||||
# Move subtitles
|
||||
move_status = move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
if move_status:
|
||||
cn_sub = "1"
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
|
||||
_4k)
|
||||
move_subtitles(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||
|
||||
elif conf.main_mode() == 3:
|
||||
path = str(Path(movie_path).parent)
|
||||
@@ -998,7 +1016,7 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 添加水印
|
||||
if conf.is_watermark():
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, thumb_path), cn_sub, leak, uncensored, hack,
|
||||
add_mark(os.path.join(path, poster_path), os.path.join(path, fanart_path), cn_sub, leak, uncensored, hack,
|
||||
_4k)
|
||||
|
||||
# 兼容Jellyfin封面图文件名规则
|
||||
@@ -1007,5 +1025,5 @@ def core_main(movie_path, number_th, oCC, specified_source=None, specified_url=N
|
||||
|
||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
|
||||
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word, fanart_path, poster_path,
|
||||
tag, json_data.get('actor_list'), liuchu, uncensored, hack, hack_word, _4k, fanart_path, poster_path,
|
||||
thumb_path)
|
||||
|
||||
BIN
donate.png
BIN
donate.png
Binary file not shown.
|
Before Width: | Height: | Size: 146 KiB After Width: | Height: | Size: 163 KiB |
@@ -7,7 +7,7 @@ import typing
|
||||
G_spat = re.compile(
|
||||
"^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
|
||||
"^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
|
||||
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|leak)",
|
||||
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|hack|leak)",
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
@@ -54,12 +54,12 @@ def get_number(debug: bool, file_path: str) -> str:
|
||||
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
|
||||
lower_check = filename.lower()
|
||||
if 'fc2' in lower_check:
|
||||
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
||||
filename = lower_check.replace('--', '-').replace('_', '-').upper()
|
||||
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
|
||||
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv
|
||||
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
|
||||
file_number = os.path.splitext(filename)
|
||||
filename = re.search(r'\w+(-|_)\w+', filename, re.A)
|
||||
filename = re.search(r'[\w\-_]+', filename, re.A)
|
||||
if filename:
|
||||
file_number = str(filename.group())
|
||||
else:
|
||||
@@ -85,34 +85,7 @@ def get_number(debug: bool, file_path: str) -> str:
|
||||
print(f'[-]Number Parser exception: {e} [{file_path}]')
|
||||
return None
|
||||
|
||||
# modou提取number
|
||||
def md(filename):
|
||||
m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*|-\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}'
|
||||
|
||||
def mmz(filename):
|
||||
m = re.search(r'(mmz-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
def msd(filename):
|
||||
m = re.search(r'(msd-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
def mky(filename):
|
||||
m = re.search(r'(mky-[a-z]{2,2}-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
def yk(filename):
|
||||
m = re.search(r'(yk-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
def pm(filename):
|
||||
m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
def fsog(filename):
|
||||
m = re.search(r'(fsog-?)(\d{2,})(-ep\d*)*', filename, re.I)
|
||||
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
|
||||
# 按javdb数据源的命名规范提取number
|
||||
G_TAKE_NUM_RULES = {
|
||||
@@ -126,13 +99,6 @@ G_TAKE_NUM_RULES = {
|
||||
'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0],
|
||||
'mdbk': lambda x: str(re.search(r'mdbk(-|_)(\d{4})', x, re.I).group()),
|
||||
'mdtm': lambda x: str(re.search(r'mdtm(-|_)(\d{4})', x, re.I).group()),
|
||||
r'\bmd[a-z]{0,2}-\d{2,}': md,
|
||||
r'\bmmz-\d{2,}':mmz,
|
||||
r'\bmsd-\d{2,}':msd,
|
||||
r'\bmky-[a-z]{2,2}-\d{2,}':mky,
|
||||
r'\byk-\d{2,3}': yk,
|
||||
r'\bpm[a-z]?-?\d{2,}':pm,
|
||||
r'\bfsog-?\d{2,}':fsog
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
# If you can't run this script, please execute the following command in PowerShell.
|
||||
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
|
||||
|
||||
$CLOUDSCRAPER_PATH=$(python -c 'import cloudscraper as _; print(_.__path__[0])' | select -Last 1)
|
||||
$OPENCC_PATH=$(python -c 'import opencc as _; print(_.__path__[0])' | select -Last 1)
|
||||
$FACE_RECOGNITION_MODELS=$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | select -Last 1)
|
||||
|
||||
mkdir build
|
||||
mkdir __pycache__
|
||||
|
||||
pyinstaller --onefile Movie_Data_Capture.py `
|
||||
--hidden-import "ImageProcessing.cnn" `
|
||||
--python-option u `
|
||||
--add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
|
||||
--add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
|
||||
--add-data "$OPENCC_PATH;opencc" `
|
||||
--add-data "Img;Img" `
|
||||
--add-data "config.ini;." `
|
||||
|
||||
rmdir -Recurse -Force build
|
||||
rmdir -Recurse -Force __pycache__
|
||||
rmdir -Recurse -Force Movie_Data_Capture.spec
|
||||
|
||||
echo "[Make]Finish"
|
||||
pause
|
||||
# If you can't run this script, please execute the following command in PowerShell.
|
||||
# Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force
|
||||
|
||||
# bugfix:set submodules find path
|
||||
$Env:PYTHONPATH=$pwd.path
|
||||
$PYTHONPATH=$pwd.path
|
||||
mkdir build
|
||||
mkdir __pycache__
|
||||
|
||||
pyinstaller --collect-submodules "scrapinglib" `
|
||||
--collect-submodules "ImageProcessing" `
|
||||
--collect-data "face_recognition_models" `
|
||||
--collect-data "cloudscraper" `
|
||||
--collect-data "opencc" `
|
||||
--add-data "Img;Img" `
|
||||
--add-data "config.ini;." `
|
||||
--onefile Movie_Data_Capture.py
|
||||
|
||||
|
||||
rmdir -Recurse -Force build
|
||||
rmdir -Recurse -Force __pycache__
|
||||
rmdir -Recurse -Force Movie_Data_Capture.spec
|
||||
|
||||
echo "[Make]Finish"
|
||||
pause
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
requests
|
||||
dlib-bin
|
||||
Click
|
||||
numpy
|
||||
face-recognition-models
|
||||
lxml
|
||||
beautifulsoup4
|
||||
pillow
|
||||
@@ -8,5 +12,3 @@ urllib3
|
||||
certifi
|
||||
MechanicalSoup
|
||||
opencc-python-reimplemented
|
||||
face_recognition
|
||||
get-video-properties
|
||||
|
||||
21
scraper.py
21
scraper.py
@@ -99,6 +99,10 @@ def get_data_from_json(
|
||||
|
||||
# ================================================网站规则添加结束================================================
|
||||
|
||||
if json_data.get('title') == '':
|
||||
print('[-]Movie Number or Title not found!')
|
||||
return None
|
||||
|
||||
title = json_data.get('title')
|
||||
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
|
||||
actor_list = [actor.strip() for actor in actor_list] # 去除空白
|
||||
@@ -134,11 +138,10 @@ def get_data_from_json(
|
||||
tag.remove('XXXX')
|
||||
while 'xxx' in tag:
|
||||
tag.remove('xxx')
|
||||
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
|
||||
|
||||
if title == '' or number == '':
|
||||
print('[-]Movie Number or Title not found!')
|
||||
return None
|
||||
if json_data['source'] =='pissplay': # pissplay actor为英文名,不用去除空格
|
||||
actor = str(actor_list).strip("[ ]").replace("'", '')
|
||||
else:
|
||||
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
|
||||
|
||||
# if imagecut == '3':
|
||||
# DownloadFileWithFilename()
|
||||
@@ -266,14 +269,22 @@ def get_data_from_json(
|
||||
pass
|
||||
|
||||
naming_rule = ""
|
||||
original_naming_rule = ""
|
||||
for i in conf.naming_rule().split("+"):
|
||||
if i not in json_data:
|
||||
naming_rule += i.strip("'").strip('"')
|
||||
original_naming_rule += i.strip("'").strip('"')
|
||||
else:
|
||||
item = json_data.get(i)
|
||||
naming_rule += item if type(item) is not list else "&".join(item)
|
||||
# PATCH:处理[title]存在翻译的情况,后续NFO文件的original_name只会直接沿用naming_rule,这导致original_name非原始名
|
||||
# 理应在翻译处处理 naming_rule和original_naming_rule
|
||||
if i == 'title':
|
||||
item = json_data.get('original_title')
|
||||
original_naming_rule += item if type(item) is not list else "&".join(item)
|
||||
|
||||
json_data['naming_rule'] = naming_rule
|
||||
json_data['original_naming_rule'] = original_naming_rule
|
||||
return json_data
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .api import search, getSupportedSources
|
||||
from .api import search, getSupportedSources
|
||||
@@ -20,6 +20,8 @@ from .xcity import Xcity
|
||||
from .avsox import Avsox
|
||||
from .javlibrary import Javlibrary
|
||||
from .javday import Javday
|
||||
from .pissplay import Pissplay
|
||||
from .javmenu import Javmenu
|
||||
|
||||
from .tmdb import Tmdb
|
||||
from .imdb import Imdb
|
||||
@@ -51,8 +53,8 @@ class Scraping:
|
||||
"""
|
||||
"""
|
||||
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||
'getchu', 'gcolle','javday'
|
||||
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
|
||||
'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu'
|
||||
]
|
||||
adult_func_mapping = {
|
||||
'avsox': Avsox().scrape,
|
||||
@@ -70,7 +72,9 @@ class Scraping:
|
||||
'javdb': Javdb().scrape,
|
||||
'getchu': Getchu().scrape,
|
||||
'javlibrary': Javlibrary().scrape,
|
||||
'javday': Javday().scrape
|
||||
'javday': Javday().scrape,
|
||||
'pissplay': Pissplay().scrape,
|
||||
'javmenu': Javmenu().scrape
|
||||
}
|
||||
|
||||
general_full_sources = ['tmdb', 'imdb']
|
||||
@@ -143,6 +147,14 @@ class Scraping:
|
||||
print(f'[-]Movie Number [{name}] not found!')
|
||||
return None
|
||||
|
||||
# If actor is anonymous, Fill in Anonymous
|
||||
if len(json_data['actor']) == 0:
|
||||
if config.getInstance().anonymous_fill() == True:
|
||||
if "zh_" in config.getInstance().get_target_language():
|
||||
json_data['actor'] = "佚名"
|
||||
else:
|
||||
json_data['actor'] = "Anonymous"
|
||||
|
||||
return json_data
|
||||
|
||||
def searchAdult(self, number, sources):
|
||||
@@ -174,13 +186,13 @@ class Scraping:
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
|
||||
# javdb的封面有水印,如果可以用其他源的封面来替换javdb的封面
|
||||
if 'source' in json_data and json_data['source'] == 'javdb':
|
||||
# search other sources
|
||||
other_sources = sources[sources.index('javdb') + 1:]
|
||||
while other_sources:
|
||||
# If cover not found in other source, then skip using other sources using javdb cover instead
|
||||
# If cover not found in other source, then skip using other sources using javdb cover instead
|
||||
try:
|
||||
other_json_data = self.searchAdult(number, other_sources)
|
||||
if other_json_data is not None and 'cover' in other_json_data and other_json_data['cover'] != '':
|
||||
@@ -195,12 +207,20 @@ class Scraping:
|
||||
other_sources = sources[sources.index(other_json_data['source']) + 1:]
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# Return if data not found in all sources
|
||||
if not json_data:
|
||||
print(f'[-]Movie Number [{number}] not found!')
|
||||
return None
|
||||
|
||||
# If actor is anonymous, Fill in Anonymous
|
||||
if len(json_data['actor']) == 0:
|
||||
if config.getInstance().anonymous_fill() == True:
|
||||
if "zh_" in config.getInstance().get_target_language():
|
||||
json_data['actor'] = "佚名"
|
||||
else:
|
||||
json_data['actor'] = "Anonymous"
|
||||
|
||||
return json_data
|
||||
|
||||
def checkGeneralSources(self, c_sources, name):
|
||||
@@ -283,4 +303,8 @@ class Scraping:
|
||||
return False
|
||||
if data["number"] is None or data["number"] == "" or data["number"] == "null":
|
||||
return False
|
||||
if (data["cover"] is None or data["cover"] == "" or data["cover"] == "null") \
|
||||
and (data["cover_small"] is None or data["cover_small"] == "" or
|
||||
data["cover_small"] == "null"):
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -31,12 +31,14 @@ class Avsox(Parser):
|
||||
site = self.getTreeElement(qurySiteTree, '//div[@class="container"]/div/a/@href')
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number)
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('-', '_'))
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
|
||||
self.searchtree = self.getHtmlTree(site + '/cn/search/' + number.replace('_', ''))
|
||||
result1 = self.getTreeElement(self.searchtree, '//*[@id="waterfall"]/div/a/@href')
|
||||
if result1 == '' or result1 == 'null' or result1 == 'None' or result1.find('movie') == -1:
|
||||
return None
|
||||
return "https:" + result1
|
||||
|
||||
def getNum(self, htmltree):
|
||||
|
||||
@@ -49,13 +49,13 @@ class Fanza(Parser):
|
||||
self.detailurl = url + fanza_search_number
|
||||
url = "https://www.dmm.co.jp/age_check/=/declared=yes/?"+ urlencode({"rurl": self.detailurl})
|
||||
self.htmlcode = self.getHtml(url)
|
||||
if self.htmlcode != 404:
|
||||
if self.htmlcode != 404 \
|
||||
and 'Sorry! This content is not available in your region.' not in self.htmlcode:
|
||||
self.htmltree = etree.HTML(self.htmlcode)
|
||||
break
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
result = self.dictformat(self.htmltree)
|
||||
return result
|
||||
if self.htmltree is not None:
|
||||
result = self.dictformat(self.htmltree)
|
||||
return result
|
||||
return 404
|
||||
|
||||
def getNum(self, htmltree):
|
||||
# for some old page, the input number does not match the page
|
||||
|
||||
@@ -22,6 +22,7 @@ class Fc2(Parser):
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 0
|
||||
self.allow_number_change = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '')
|
||||
|
||||
@@ -128,7 +128,7 @@ class Javbus(Parser):
|
||||
|
||||
def getTags(self, htmltree):
|
||||
tags = self.getTreeElement(htmltree, self.expr_tags).split(',')
|
||||
return tags[1:]
|
||||
return tags[2:]
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
if self.morestoryline:
|
||||
|
||||
@@ -39,3 +39,8 @@ class Javday(Parser):
|
||||
# 删除番号和网站名
|
||||
result = title.replace(self.number,"").replace("- JAVDAY.TV","").strip()
|
||||
return result
|
||||
|
||||
def getTags(self, htmltree) -> list:
|
||||
tags = super().getTags(htmltree)
|
||||
return [tag for tag in tags if 'JAVDAY.TV' not in tag]
|
||||
|
||||
61
scrapinglib/javmenu.py
Normal file
61
scrapinglib/javmenu.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
class Javmenu(Parser):
|
||||
source = 'javmenu'
|
||||
|
||||
expr_title = '/html/head/meta[@property="og:title"]/@content'
|
||||
expr_cover = '/html/head/meta[@property="og:image"]/@content'
|
||||
|
||||
expr_number = '//span[contains(text(),"番號") or contains(text(),"番号")]/../a/text()'
|
||||
expr_number2 = '//span[contains(text(),"番號") or contains(text(),"番号")]/../span[2]/text()'
|
||||
expr_runtime = '//span[contains(text(),"時長;") or contains(text(),"时长")]/../span[2]/text()'
|
||||
expr_release = '//span[contains(text(),"日期")]/../span[2]/text()'
|
||||
expr_studio = '//span[contains(text(),"製作")]/../span[2]/a/text()'
|
||||
|
||||
expr_actor = '//a[contains(@class,"actress")]/text()'
|
||||
expr_tags = '//a[contains(@class,"genre")]/text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 4
|
||||
self.uncensored = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
self.detailurl = 'https://javmenu.com/zh/' + self.number + '/'
|
||||
self.htmlcode = self.getHtml(self.detailurl)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.HTML(self.htmlcode)
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
def getNum(self, htmltree):
|
||||
# 番号被分割开,需要合并后才是完整番号
|
||||
part1 = self.getTreeElement(htmltree, self.expr_number)
|
||||
part2 = self.getTreeElement(htmltree, self.expr_number2)
|
||||
dp_number = part1 + part2
|
||||
# NOTE 检测匹配与更新 self.number
|
||||
if dp_number.upper() != self.number.upper():
|
||||
raise Exception(f'[!] {self.number}: find [{dp_number}] in javmenu, not match')
|
||||
self.number = dp_number
|
||||
return self.number
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
browser_title = super().getTitle(htmltree)
|
||||
# 删除番号
|
||||
number = re.findall("\d+",self.number)[1]
|
||||
title = browser_title.split(number,1)[-1]
|
||||
title = title.replace(' | JAV目錄大全 | 每日更新',"")
|
||||
title = title.replace(' | JAV目录大全 | 每日更新',"").strip()
|
||||
return title.replace(self.number, '').strip()
|
||||
|
||||
@@ -6,6 +6,28 @@ from urllib.parse import urlparse, unquote
|
||||
from .parser import Parser
|
||||
|
||||
|
||||
NUM_RULES3=[
|
||||
r'(mmz{2,4})-?(\d{2,})(-ep\d*|-\d*)?.*',
|
||||
r'(msd)-?(\d{2,})(-ep\d*|-\d*)?.*',
|
||||
r'(yk)-?(\d{2,})(-ep\d*|-\d*)?.*',
|
||||
r'(pm)-?(\d{2,})(-ep\d*|-\d*)?.*',
|
||||
r'(mky-[a-z]{2,2})-?(\d{2,})(-ep\d*|-\d*)?.*',
|
||||
]
|
||||
|
||||
# modou提取number
|
||||
def change_number(number):
|
||||
number = number.lower().strip()
|
||||
m = re.search(r'(md[a-z]{0,2})-?(\d{2,})(-ep\d*|-\d*)?.*', number, re.I)
|
||||
if m:
|
||||
return f'{m.group(1)}{m.group(2).zfill(4)}{m.group(3) or ""}'
|
||||
for rules in NUM_RULES3:
|
||||
m = re.search(rules, number, re.I)
|
||||
if m:
|
||||
return f'{m.group(1)}{m.group(2).zfill(3)}{m.group(3) or ""}'
|
||||
return number
|
||||
|
||||
|
||||
|
||||
class Madou(Parser):
|
||||
source = 'madou'
|
||||
|
||||
@@ -14,12 +36,15 @@ class Madou(Parser):
|
||||
expr_studio = '//a[@rel="category tag"]/text()'
|
||||
expr_tags = '/html/head/meta[@name="keywords"]/@content'
|
||||
|
||||
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 0
|
||||
self.imagecut = 4
|
||||
self.uncensored = True
|
||||
self.allow_number_change = True
|
||||
|
||||
def search(self, number):
|
||||
self.number = number.lower().strip()
|
||||
self.number = change_number(number)
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
@@ -65,5 +90,5 @@ class Madou(Parser):
|
||||
|
||||
def getTags(self, htmltree):
|
||||
studio = self.getStudio(htmltree)
|
||||
x = super().getTags(htmltree)
|
||||
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
|
||||
tags = super().getTags(htmltree)
|
||||
return [tag for tag in tags if studio not in tag and '麻豆' not in tag]
|
||||
|
||||
@@ -85,7 +85,7 @@ class Parser:
|
||||
else:
|
||||
self.detailurl = self.queryNumberUrl(number)
|
||||
if not self.detailurl:
|
||||
return None
|
||||
return 404
|
||||
htmltree = self.getHtmlTree(self.detailurl)
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
@@ -210,6 +210,13 @@ class Parser:
|
||||
|
||||
def getTags(self, htmltree) -> list:
|
||||
alls = self.getTreeAll(htmltree, self.expr_tags)
|
||||
tags = []
|
||||
for t in alls:
|
||||
for tag in t.strip().split(','):
|
||||
tag = tag.strip()
|
||||
if tag:
|
||||
tags.append(tag)
|
||||
return tags
|
||||
return [ x.strip() for x in alls if x.strip()]
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
|
||||
87
scrapinglib/pissplay.py
Normal file
87
scrapinglib/pissplay.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from lxml import etree
|
||||
from .parser import Parser
|
||||
from datetime import datetime
|
||||
|
||||
# 搜刮 https://pissplay.com/ 中的视频
|
||||
# pissplay中的视频没有番号,所以要通过文件名搜索
|
||||
# 只用文件名和网站视频名完全一致时才可以被搜刮
|
||||
class Pissplay(Parser):
|
||||
source = 'pissplay'
|
||||
|
||||
expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
|
||||
expr_title = '//*[@id="video_title"]/text()'
|
||||
expr_cover = '/html/head//meta[@property="og:image"]/@content'
|
||||
expr_tags = '//div[@id="video_tags"]/a/text()'
|
||||
expr_release = '//div[@class="video_date"]/text()'
|
||||
expr_outline = '//*[@id="video_description"]/p//text()'
|
||||
|
||||
def extraInit(self):
|
||||
self.imagecut = 0 # 不裁剪封面
|
||||
self.specifiedSource = None
|
||||
|
||||
def search(self, number):
|
||||
self.number = number.strip().upper()
|
||||
if self.specifiedUrl:
|
||||
self.detailurl = self.specifiedUrl
|
||||
else:
|
||||
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
|
||||
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
|
||||
self.htmlcode = self.getHtml(self.detailurl)
|
||||
if self.htmlcode == 404:
|
||||
return 404
|
||||
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
|
||||
result = self.dictformat(htmltree)
|
||||
return result
|
||||
|
||||
def getNum(self, htmltree):
|
||||
title = self.getTitle(htmltree)
|
||||
return title
|
||||
|
||||
def getTitle(self, htmltree):
|
||||
title = super().getTitle(htmltree)
|
||||
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
|
||||
return title
|
||||
|
||||
def getCover(self, htmltree):
|
||||
url = super().getCover(htmltree)
|
||||
if not url.startswith('http'):
|
||||
url = 'https:' + url
|
||||
return url
|
||||
|
||||
def getRelease(self, htmltree):
|
||||
releaseDate = super().getRelease(htmltree)
|
||||
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
|
||||
return isoData
|
||||
|
||||
def getStudio(self, htmltree):
|
||||
return 'PissPlay'
|
||||
|
||||
def getTags(self, htmltree):
|
||||
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||||
if 'Guests' in tags:
|
||||
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||||
del tags[1]
|
||||
else:
|
||||
tags = tags[1:]
|
||||
return tags
|
||||
|
||||
def getActors(self, htmltree) -> list:
|
||||
tags = self.getTreeAll(htmltree, self.expr_tags)
|
||||
if 'Guests' in tags:
|
||||
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
|
||||
return [tags[1]]
|
||||
else:
|
||||
return [tags[0]]
|
||||
else:
|
||||
return ['Bruce and Morgan']
|
||||
|
||||
def getOutline(self, htmltree):
|
||||
outline = self.getTreeAll(htmltree, self.expr_outline)
|
||||
if '– Morgan xx' in outline:
|
||||
num = outline.index('– Morgan xx')
|
||||
outline = outline[:num]
|
||||
rstring = ''.join(outline).replace("&","and")
|
||||
return rstring
|
||||
Reference in New Issue
Block a user