Merge branch 'upstream'
# Conflicts: # WebCrawler/fanza.py
This commit is contained in:
4
.github/workflows/main.yml
vendored
4
.github/workflows/main.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pyinstaller \
|
pyinstaller \
|
||||||
--onefile Movie_Data_Capture.py \
|
--onefile Movie_Data_Capture.py \
|
||||||
--hidden-import "ImageProcessing.hog" \
|
--hidden-import "ImageProcessing.cnn" \
|
||||||
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
||||||
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
||||||
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
|
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
|
||||||
@@ -51,7 +51,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pyinstaller `
|
pyinstaller `
|
||||||
--onefile Movie_Data_Capture.py `
|
--onefile Movie_Data_Capture.py `
|
||||||
--hidden-import "ImageProcessing.hog" `
|
--hidden-import "ImageProcessing.cnn" `
|
||||||
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
|
--add-data "$(python -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1);cloudscraper" `
|
||||||
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
|
--add-data "$(python -c 'import opencc as _; print(_.__path__[0])' | tail -n 1);opencc" `
|
||||||
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
|
--add-data "$(python -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1);face_recognition_models" `
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from requests.adapters import HTTPAdapter
|
|||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
from cloudscraper import create_scraper
|
from cloudscraper import create_scraper
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from unicodedata import category
|
||||||
|
|
||||||
|
|
||||||
def getXpathSingle(htmlcode, xpath):
|
def getXpathSingle(htmlcode, xpath):
|
||||||
@@ -26,7 +27,7 @@ def getXpathSingle(htmlcode, xpath):
|
|||||||
return result1
|
return result1
|
||||||
|
|
||||||
|
|
||||||
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
|
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
|
||||||
|
|
||||||
|
|
||||||
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
@@ -69,7 +70,6 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None,
|
|||||||
print('[-]Connect Failed! Please check your Proxy or Network!')
|
print('[-]Connect Failed! Please check your Proxy or Network!')
|
||||||
raise Exception('Connect Failed')
|
raise Exception('Connect Failed')
|
||||||
|
|
||||||
|
|
||||||
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
||||||
configProxy = config.getInstance().proxy()
|
configProxy = config.getInstance().proxy()
|
||||||
errors = ""
|
errors = ""
|
||||||
@@ -381,7 +381,7 @@ def load_cookies(cookie_json_filename: str):
|
|||||||
break
|
break
|
||||||
if not cookies_filename:
|
if not cookies_filename:
|
||||||
return None, None
|
return None, None
|
||||||
return json.load(open(cookies_filename)), cookies_filename
|
return json.loads(Path(cookies_filename).read_text(encoding='utf-8')), cookies_filename
|
||||||
except:
|
except:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
@@ -466,7 +466,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None:
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
proxies = configProxy.proxies()
|
proxies = configProxy.proxies()
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': G_USER_AGENT}
|
'User-Agent': G_USER_AGENT}
|
||||||
@@ -483,7 +483,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None:
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': G_USER_AGENT}
|
'User-Agent': G_USER_AGENT}
|
||||||
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
|
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
|
||||||
@@ -519,14 +519,13 @@ def download_one_file(args) -> str:
|
|||||||
wrapped for map function
|
wrapped for map function
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _inner(url: str, save_path: Path):
|
(url, save_path) = args
|
||||||
filebytes = get_html(url, return_type='content')
|
filebytes = get_html(url, return_type='content')
|
||||||
if isinstance(filebytes, bytes) and len(filebytes):
|
if isinstance(filebytes, bytes) and len(filebytes):
|
||||||
if len(filebytes) == save_path.open('wb').write(filebytes):
|
with save_path.open('wb') as fpbyte:
|
||||||
|
if len(filebytes) == fpbyte.write(filebytes):
|
||||||
return str(save_path)
|
return str(save_path)
|
||||||
|
|
||||||
return _inner(*args)
|
|
||||||
|
|
||||||
|
|
||||||
def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0):
|
def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0):
|
||||||
"""
|
"""
|
||||||
@@ -567,6 +566,7 @@ def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]):
|
|||||||
new_lists.append(i)
|
new_lists.append(i)
|
||||||
return new_lists
|
return new_lists
|
||||||
|
|
||||||
|
|
||||||
def delete_all_elements_in_str(string_delete: str, string: str):
|
def delete_all_elements_in_str(string_delete: str, string: str):
|
||||||
"""
|
"""
|
||||||
delete same string in given list
|
delete same string in given list
|
||||||
@@ -574,4 +574,9 @@ def delete_all_elements_in_str(string_delete: str, string: str):
|
|||||||
for i in string:
|
for i in string:
|
||||||
if i == string_delete:
|
if i == string_delete:
|
||||||
string = string.replace(i,"")
|
string = string.replace(i,"")
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
|
||||||
|
# print format空格填充对齐内容包含中文时的空格计算
|
||||||
|
def cnspace(v: str, n: int) -> int:
|
||||||
|
return n - [category(c) for c in v].count('Lo')
|
||||||
|
|||||||
@@ -1,12 +1,18 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.append('../')
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import config
|
import config
|
||||||
import importlib
|
import importlib
|
||||||
|
from pathlib import Path
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import shutil
|
import shutil
|
||||||
|
from ADC_function import file_not_exist_or_empty
|
||||||
|
|
||||||
|
|
||||||
def face_crop_width(filename, width, height):
|
def face_crop_width(filename, width, height):
|
||||||
|
aspect_ratio = config.getInstance().face_aspect_ratio()
|
||||||
# 新宽度是高度的2/3
|
# 新宽度是高度的2/3
|
||||||
cropWidthHalf = int(height/3)
|
cropWidthHalf = int(height/3)
|
||||||
try:
|
try:
|
||||||
@@ -21,15 +27,15 @@ def face_crop_width(filename, width, height):
|
|||||||
# 越界处理
|
# 越界处理
|
||||||
if cropLeft < 0:
|
if cropLeft < 0:
|
||||||
cropLeft = 0
|
cropLeft = 0
|
||||||
cropRight = cropWidthHalf*2
|
cropRight = cropWidthHalf * aspect_ratio
|
||||||
elif cropRight > width:
|
elif cropRight > width:
|
||||||
cropLeft = width-cropWidthHalf*2
|
cropLeft = width - cropWidthHalf * aspect_ratio
|
||||||
cropRight = width
|
cropRight = width
|
||||||
return (cropLeft, 0, cropRight, height)
|
return (cropLeft, 0, cropRight, height)
|
||||||
except:
|
except:
|
||||||
print('[-]Not found face! ' + filename)
|
print('[-]Not found face! ' + filename)
|
||||||
# 默认靠右切
|
# 默认靠右切
|
||||||
return (width-cropWidthHalf*2, 0, width, height)
|
return (width-cropWidthHalf * aspect_ratio, 0, width, height)
|
||||||
|
|
||||||
|
|
||||||
def face_crop_height(filename, width, height):
|
def face_crop_height(filename, width, height):
|
||||||
@@ -54,29 +60,43 @@ def face_crop_height(filename, width, height):
|
|||||||
return (0, 0, width, cropHeight)
|
return (0, 0, width, cropHeight)
|
||||||
|
|
||||||
|
|
||||||
def cutImage(imagecut, path, fanart_path, poster_path):
|
def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False):
|
||||||
|
conf = config.getInstance()
|
||||||
fullpath_fanart = os.path.join(path, fanart_path)
|
fullpath_fanart = os.path.join(path, fanart_path)
|
||||||
fullpath_poster = os.path.join(path, poster_path)
|
fullpath_poster = os.path.join(path, poster_path)
|
||||||
if imagecut == 1: # 剪裁大封面
|
aspect_ratio = conf.face_aspect_ratio()
|
||||||
|
if conf.face_aways_imagecut():
|
||||||
|
imagecut = 1
|
||||||
|
elif conf.download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster):
|
||||||
|
return
|
||||||
|
# imagecut为4时同时也是有码影片 也用人脸识别裁剪封面
|
||||||
|
if imagecut == 1 or imagecut == 4: # 剪裁大封面
|
||||||
try:
|
try:
|
||||||
img = Image.open(fullpath_fanart)
|
img = Image.open(fullpath_fanart)
|
||||||
width, height = img.size
|
width, height = img.size
|
||||||
if width/height > 2/3: # 如果宽度大于2
|
if width/height > 2/3: # 如果宽度大于2
|
||||||
# 以人像为中心切取
|
if imagecut == 4:
|
||||||
img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
|
# 以人像为中心切取
|
||||||
|
img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
|
||||||
|
elif skip_facerec:
|
||||||
|
# 有码封面默认靠右切
|
||||||
|
img2 = img.crop((width - int(height / 3) * aspect_ratio, 0, width, height))
|
||||||
|
else:
|
||||||
|
# 以人像为中心切取
|
||||||
|
img2 = img.crop(face_crop_width(fullpath_fanart, width, height))
|
||||||
elif width/height < 2/3: # 如果高度大于3
|
elif width/height < 2/3: # 如果高度大于3
|
||||||
# 从底部向上切割
|
# 从底部向上切割
|
||||||
img2 = img.crop(face_crop_height(fullpath_fanart, width, height))
|
img2 = img.crop(face_crop_height(fullpath_fanart, width, height))
|
||||||
else: # 如果等于2/3
|
else: # 如果等于2/3
|
||||||
img2 = img
|
img2 = img
|
||||||
img2.save(fullpath_poster)
|
img2.save(fullpath_poster)
|
||||||
print('[+]Image Cutted! ' + fullpath_poster)
|
print(f"[+]Image Cutted! {Path(fullpath_poster).name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
print('[-]Cover cut failed!')
|
print('[-]Cover cut failed!')
|
||||||
elif imagecut == 0: # 复制封面
|
elif imagecut == 0: # 复制封面
|
||||||
shutil.copyfile(fullpath_fanart, fullpath_poster)
|
shutil.copyfile(fullpath_fanart, fullpath_poster)
|
||||||
print('[+]Image Copyed! ' + fullpath_poster)
|
print(f"[+]Image Copyed! {Path(fullpath_poster).name}")
|
||||||
|
|
||||||
|
|
||||||
def face_center(filename, model):
|
def face_center(filename, model):
|
||||||
@@ -91,5 +111,5 @@ def face_center(filename, model):
|
|||||||
return (0, 0)
|
return (0, 0)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
cutImage(1,'H:\\test\\','12.jpg','test.jpg')
|
cutImage(1,'z:/t/','p.jpg','o.jpg')
|
||||||
|
#cutImage(1,'H:\\test\\','12.jpg','test.jpg')
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
import hog
|
import sys
|
||||||
|
sys.path.append('../')
|
||||||
|
|
||||||
|
from ImageProcessing.hog import face_center as hog_face_center
|
||||||
|
|
||||||
|
|
||||||
def face_center(filename, model):
|
def face_center(filename, model):
|
||||||
return hog.face_center(filename, model)
|
return hog_face_center(filename, model)
|
||||||
|
|||||||
12
Makefile
12
Makefile
@@ -7,18 +7,20 @@ SHELL = /bin/bash
|
|||||||
|
|
||||||
.DEFAULT: make
|
.DEFAULT: make
|
||||||
make:
|
make:
|
||||||
#@echo "[+]make prepare-dev"
|
@echo "[+]make prepare-dev"
|
||||||
#sudo apt-get -y install python3.7 python3-pip
|
#sudo apt-get -y install python3 python3-pip
|
||||||
#pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
#pip3 install pyinstaller
|
pip3 install pyinstaller
|
||||||
|
|
||||||
#@echo "[+]Set CLOUDSCRAPER_PATH variable"
|
#@echo "[+]Set CLOUDSCRAPER_PATH variable"
|
||||||
#export cloudscraper_path=$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1)
|
#export cloudscraper_path=$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1)
|
||||||
|
|
||||||
@echo "[+]Pyinstaller make"
|
@echo "[+]Pyinstaller make"
|
||||||
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
pyinstaller --onefile Movie_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
||||||
|
--hidden-import "ImageProcessing.cnn" \
|
||||||
--add-data "`python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1`:cloudscraper" \
|
--add-data "`python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1`:cloudscraper" \
|
||||||
--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
|
--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
|
||||||
|
--add-data "`python3 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1`:face_recognition_models" \
|
||||||
--add-data "Img:Img" \
|
--add-data "Img:Img" \
|
||||||
--add-data "config.ini:." \
|
--add-data "config.ini:." \
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
<!-- 说明:可使用文本编辑器打开本文件后自行编辑。
|
<!-- 说明:可使用文本编辑器打开本文件后自行编辑。
|
||||||
keyword:用于匹配标签/导演/系列/制作/发行的关键词,每个名字前后都需要用逗号隔开。当其中包含刮削得到的关键词时,可以输出对应语言的词。
|
keyword:用于匹配标签/导演/系列/制作/发行的关键词,每个名字前后都需要用逗号隔开。当其中包含刮削得到的关键词时,可以输出对应语言的词。
|
||||||
zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。当输出词为“删除”时表示:遇到该关键词时,在对应内容中删除该关键词-->
|
zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。当输出词为“删除”时表示:遇到该关键词时,在对应内容中删除该关键词-->
|
||||||
@@ -575,7 +575,7 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
|||||||
<a zh_cn="一本道" zh_tw="一本道" jp="一本道" keyword=",一本道,"/>
|
<a zh_cn="一本道" zh_tw="一本道" jp="一本道" keyword=",一本道,"/>
|
||||||
<a zh_cn="加勒比" zh_tw="加勒比" jp="加勒比" keyword=",加勒比,カリビアンコム,"/>
|
<a zh_cn="加勒比" zh_tw="加勒比" jp="加勒比" keyword=",加勒比,カリビアンコム,"/>
|
||||||
<a zh_cn="东京热" zh_tw="東京熱" jp="TOKYO-HOT" keyword=",东京热,東京熱,東熱,TOKYO-HOT,"/>
|
<a zh_cn="东京热" zh_tw="東京熱" jp="TOKYO-HOT" keyword=",东京热,東京熱,東熱,TOKYO-HOT,"/>
|
||||||
<a zh_cn="SOD" zh_tw="SOD" jp="SOD" keyword=",SOD,SODクリエイト,サディスティックヴィレッジ,"/>
|
<a zh_cn="SOD" zh_tw="SOD" jp="SOD" keyword=",SOD,SODクリエイト,"/>
|
||||||
<a zh_cn="PRESTIGE" zh_tw="PRESTIGE" jp="PRESTIGE" keyword=",PRESTIGE,プレステージ,"/>
|
<a zh_cn="PRESTIGE" zh_tw="PRESTIGE" jp="PRESTIGE" keyword=",PRESTIGE,プレステージ,"/>
|
||||||
<a zh_cn="MOODYZ" zh_tw="MOODYZ" jp="MOODYZ" keyword=",MOODYZ,ムーディーズ,"/>
|
<a zh_cn="MOODYZ" zh_tw="MOODYZ" jp="MOODYZ" keyword=",MOODYZ,ムーディーズ,"/>
|
||||||
<a zh_cn="ROCKET" zh_tw="ROCKET" jp="ROCKET" keyword=",ROCKET,"/>
|
<a zh_cn="ROCKET" zh_tw="ROCKET" jp="ROCKET" keyword=",ROCKET,"/>
|
||||||
@@ -600,28 +600,5 @@ zh_cn/zh_tw/jp:指对应语言输出的词,按设置的对应语言输出。
|
|||||||
<a zh_cn="WANZ" zh_tw="WANZ" jp="WANZ" keyword=",WANZ,ワンズファクトリー,"/>
|
<a zh_cn="WANZ" zh_tw="WANZ" jp="WANZ" keyword=",WANZ,ワンズファクトリー,"/>
|
||||||
<a zh_cn="BeFree" zh_tw="BeFree" jp="BeFree" keyword=",BeFree,"/>
|
<a zh_cn="BeFree" zh_tw="BeFree" jp="BeFree" keyword=",BeFree,"/>
|
||||||
<a zh_cn="MAX-A" zh_tw="MAX-A" jp="MAX-A" keyword=",MAX-A,マックスエー,"/>
|
<a zh_cn="MAX-A" zh_tw="MAX-A" jp="MAX-A" keyword=",MAX-A,マックスエー,"/>
|
||||||
<!-- 2021-11-8 Update -->
|
|
||||||
<a zh_cn="Energy" zh_tw="Energy" jp="アイエナジー" keyword=",アイエナジー,"/>
|
|
||||||
<a zh_cn="Idea Pocket" zh_tw="Idea Pocket" jp="アイデアポケット" keyword=",アイデアポケット,"/>
|
|
||||||
<a zh_cn="AKNR" zh_tw="AKNR" jp="アキノリ" keyword=",アキノリ,"/>
|
|
||||||
<a zh_cn="Attackers" zh_tw="Attackers" jp="アタッカーズ" keyword=",アタッカーズ,"/>
|
|
||||||
<a zh_cn="Alice Japan" zh_tw="Alice Japan" jp="アリスJAPAN" keyword=",アリスJAPAN,"/>
|
|
||||||
<a zh_cn="Aurora Project Annex" zh_tw="Aurora Project Annex" jp="オーロラプロジェクト・アネックス" keyword=",オーロラプロジェクト・アネックス,"/>
|
|
||||||
<a zh_cn="Crystal 映像" zh_tw="Crystal 映像" jp="クリスタル映像" keyword=",クリスタル映像,"/>
|
|
||||||
<a zh_cn="Glory Quest" zh_tw="Glory Quest" jp="グローリークエスト" keyword=",グローリークエスト,"/>
|
|
||||||
<a zh_cn="DAS!" zh_tw="DAS!" jp="ダスッ!" keyword=",ダスッ!,"/>
|
|
||||||
<a zh_cn="DEEP’s" zh_tw="DEEP’s" jp="ディープス" keyword=",ディープス,"/>
|
|
||||||
<a zh_cn="Dogma" zh_tw="Dogma" jp="ドグマ" keyword=",ドグマ,"/>
|
|
||||||
<a zh_cn="宇宙企画" zh_tw="宇宙企画" jp="メディアステーション" keyword=",メディアステーション,"/>
|
|
||||||
<a zh_cn="WANZ FACTORY" zh_tw="WANZ FACTORY" jp="ワンズファクトリー" keyword=",ワンズファクトリー,"/>
|
|
||||||
<a zh_cn="V&R PRODUCE" zh_tw="V&R PRODUCE" jp="V&Rプロダクツ" keyword=",V&Rプロダクツ,V&RPRODUCE,"/>
|
|
||||||
<a zh_cn="Real Works" zh_tw="Real Works" jp="レアルワークス" keyword=",レアルワークス,"/>
|
|
||||||
<a zh_cn="MAX-A" zh_tw="MAX-A" jp="マックスエー" keyword=",マックスエー,"/>
|
|
||||||
<a zh_cn="PETERS MAX" zh_tw="PETERS MAX" jp="ピーターズMAX" keyword=",ピーターズMAX,"/>
|
|
||||||
<a zh_cn="NATURAL HIGH" zh_tw="NATURAL HIGH" jp="ナチュラルハイ" keyword=",ナチュラルハイ,"/>
|
|
||||||
<a zh_cn="MAXING" zh_tw="MAXING" jp="マキシング" keyword=",マキシング,"/>
|
|
||||||
<a zh_cn="M’s Video Group" zh_tw="M’s Video Group" jp="エムズビデオグループ" keyword=",エムズビデオグループ,"/>
|
|
||||||
<a zh_cn="Minimum" zh_tw="Minimum" jp="ミニマム" keyword=",ミニマム,"/>
|
|
||||||
<a zh_cn="WAAP Entertainment" zh_tw="WAAP Entertainment" jp="ワープエンタテインメント" keyword=",ワープエンタテインメント,"/>
|
|
||||||
<a zh_cn="pacopacomama" zh_tw="pacopacomama" jp="パコパコママ" keyword=",pacopacomama,パコパコママ,"/>
|
|
||||||
</info>
|
</info>
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from opencc import OpenCC
|
|||||||
import config
|
import config
|
||||||
from ADC_function import file_modification_days, get_html, parallel_download_files
|
from ADC_function import file_modification_days, get_html, parallel_download_files
|
||||||
from number_parser import get_number
|
from number_parser import get_number
|
||||||
from core import core_main, moveFailedFolder
|
from core import core_main, core_main_no_net_op, moveFailedFolder
|
||||||
|
|
||||||
|
|
||||||
def check_update(local_version):
|
def check_update(local_version):
|
||||||
@@ -40,7 +40,7 @@ def check_update(local_version):
|
|||||||
print("[*]======================================================")
|
print("[*]======================================================")
|
||||||
|
|
||||||
|
|
||||||
def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]:
|
||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
|
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
|
||||||
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
|
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
|
||||||
@@ -49,6 +49,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
|||||||
help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
||||||
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||||
|
parser.add_argument("-L", "--link-mode", default='', nargs='?',
|
||||||
|
help="Create movie file link. 0:moving movie file, do not create link 1:soft link 2:try hard link first")
|
||||||
default_logdir = str(Path.home() / '.mlogs')
|
default_logdir = str(Path.home() / '.mlogs')
|
||||||
parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
|
parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
|
||||||
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
||||||
@@ -60,12 +62,22 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
|||||||
help="Override nfo_skip_days value in config.")
|
help="Override nfo_skip_days value in config.")
|
||||||
parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
|
parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
|
||||||
help="Override stop_counter value in config.")
|
help="Override stop_counter value in config.")
|
||||||
|
parser.add_argument("-R", "--rerun-delay", dest='delaytm', default='', nargs='?',
|
||||||
|
help="Delay (eg. 1h10m30s or 60 (second)) time and rerun, until all movies proceed. Note: stop_counter value in config or -c must none zero.")
|
||||||
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
|
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
|
||||||
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
|
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
|
||||||
parser.add_argument("-a", "--auto-exit", action="store_true",
|
parser.add_argument("-a", "--auto-exit", action="store_true",
|
||||||
help="Auto exit after program complete")
|
help="Auto exit after program complete")
|
||||||
parser.add_argument("-g", "--debug", action="store_true",
|
parser.add_argument("-g", "--debug", action="store_true",
|
||||||
help="Turn on debug mode to generate diagnostic log for issue report.")
|
help="Turn on debug mode to generate diagnostic log for issue report.")
|
||||||
|
parser.add_argument("-N", "--no-network-operation", action="store_true",
|
||||||
|
help="No network query, do not get metadata, for cover cropping purposes, only takes effect when main mode is 3.")
|
||||||
|
parser.add_argument("-w", "--website", dest='site', default='', nargs='?',
|
||||||
|
help="Override [priority]website= in config.")
|
||||||
|
parser.add_argument("-D", "--download-images", dest='dnimg', action="store_true",
|
||||||
|
help="Override [common]download_only_missing_images=0 force invoke image downloading.")
|
||||||
|
parser.add_argument("-C", "--config-override", dest='cfgcmd', default='', nargs='?',
|
||||||
|
help="Common use config override. grammar: section:key=value[;[section:]key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1")
|
||||||
parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
|
parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
|
||||||
help="""Only show job list of files and numbers, and **NO** actual operation
|
help="""Only show job list of files and numbers, and **NO** actual operation
|
||||||
is performed. It may help you correct wrong numbers before real job.""")
|
is performed. It may help you correct wrong numbers before real job.""")
|
||||||
@@ -73,24 +85,40 @@ is performed. It may help you correct wrong numbers before real job.""")
|
|||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
def get_natural_number_or_none(value):
|
def set_natural_number_or_none(sk, value):
|
||||||
return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None
|
if isinstance(value, str) and value.isnumeric() and int(value) >= 0:
|
||||||
|
conf.set_override(f'{sk}={value}')
|
||||||
|
|
||||||
def get_str_or_none(value):
|
def set_str_or_none(sk, value):
|
||||||
return value if isinstance(value, str) and len(value) else None
|
if isinstance(value, str) and len(value):
|
||||||
|
conf.set_override(f'{sk}={value}')
|
||||||
|
|
||||||
def get_bool_or_none(value):
|
def set_bool_or_none(sk, value):
|
||||||
return True if isinstance(value, bool) and value else None
|
if isinstance(value, bool) and value:
|
||||||
|
conf.set_override(f'{sk}=1')
|
||||||
|
|
||||||
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
|
set_natural_number_or_none("common:main_mode", args.main_mode)
|
||||||
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
|
set_natural_number_or_none("common:link_mode", args.link_mode)
|
||||||
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
|
set_str_or_none("common:source_folder", args.path)
|
||||||
config.G_conf_override["common:nfo_skip_days"] = get_natural_number_or_none(args.days)
|
set_bool_or_none("common:auto_exit", args.auto_exit)
|
||||||
config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt)
|
set_natural_number_or_none("common:nfo_skip_days", args.days)
|
||||||
config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list)
|
set_natural_number_or_none("common:stop_counter", args.cnt)
|
||||||
config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug)
|
set_bool_or_none("common:ignore_failed_list", args.ignore_failed_list)
|
||||||
|
set_str_or_none("common:rerun_delay", args.delaytm)
|
||||||
|
set_str_or_none("priority:website", args.site)
|
||||||
|
if isinstance(args.dnimg, bool) and args.dnimg:
|
||||||
|
conf.set_override("common:download_only_missing_images=0")
|
||||||
|
set_bool_or_none("debug_mode:switch", args.debug)
|
||||||
|
if isinstance(args.cfgcmd, str) and len(args.cfgcmd.strip()):
|
||||||
|
conf.set_override(args.cfgcmd.strip())
|
||||||
|
|
||||||
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
|
no_net_op = False
|
||||||
|
if conf.main_mode() == 3:
|
||||||
|
no_net_op = args.no_network_operation
|
||||||
|
if no_net_op:
|
||||||
|
conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1")
|
||||||
|
|
||||||
|
return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op
|
||||||
|
|
||||||
|
|
||||||
class OutLogger(object):
|
class OutLogger(object):
|
||||||
@@ -113,9 +141,12 @@ class OutLogger(object):
|
|||||||
self.log.write(msg)
|
self.log.write(msg)
|
||||||
|
|
||||||
def flush(self):
|
def flush(self):
|
||||||
self.term.flush()
|
if 'flush' in dir(self.term):
|
||||||
self.log.flush()
|
self.term.flush()
|
||||||
os.fsync(self.log.fileno())
|
if 'flush' in dir(self.log):
|
||||||
|
self.log.flush()
|
||||||
|
if 'fileno' in dir(self.log):
|
||||||
|
os.fsync(self.log.fileno())
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.term is not None:
|
if self.term is not None:
|
||||||
@@ -244,39 +275,42 @@ def close_logfile(logdir: str):
|
|||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
# 第三步,月合并到年
|
# 第三步,月合并到年
|
||||||
if today.month < 4:
|
for i in range(1):
|
||||||
return
|
if today.month < 4:
|
||||||
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
|
break
|
||||||
if not mons or not len(mons):
|
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)]
|
||||||
return
|
if not mons or not len(mons):
|
||||||
mons.sort()
|
break
|
||||||
deadline_year = f'mdc_{today.year - 1}13'
|
mons.sort()
|
||||||
year_merge = [f for f in mons if f.stem < deadline_year]
|
deadline_year = f'mdc_{today.year - 1}13'
|
||||||
if not year_merge or not len(year_merge):
|
year_merge = [f for f in mons if f.stem < deadline_year]
|
||||||
return
|
if not year_merge or not len(year_merge):
|
||||||
toyear = len('12.txt') # cut length mdc_2020|12.txt
|
break
|
||||||
for f in year_merge:
|
toyear = len('12.txt') # cut length mdc_2020|12.txt
|
||||||
try:
|
for f in year_merge:
|
||||||
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
|
try:
|
||||||
with open(year_file_name, 'a', encoding='utf-8') as y:
|
year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt
|
||||||
y.write(f.read_text(encoding='utf-8'))
|
with open(year_file_name, 'a', encoding='utf-8') as y:
|
||||||
f.unlink(missing_ok=True)
|
y.write(f.read_text(encoding='utf-8'))
|
||||||
except:
|
f.unlink(missing_ok=True)
|
||||||
pass
|
except:
|
||||||
|
pass
|
||||||
# 第四步,压缩年志 如果有压缩需求,请自行手工压缩,或者使用外部脚本来定时完成。推荐nongnu的lzip,对于
|
# 第四步,压缩年志 如果有压缩需求,请自行手工压缩,或者使用外部脚本来定时完成。推荐nongnu的lzip,对于
|
||||||
# 这种粒度的文本日志,压缩比是目前最好的。lzip -9的运行参数下,日志压缩比要高于xz -9,而且内存占用更少,
|
# 这种粒度的文本日志,压缩比是目前最好的。lzip -9的运行参数下,日志压缩比要高于xz -9,而且内存占用更少,
|
||||||
# 多核利用率更高(plzip多线程版本),解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右,
|
# 多核利用率更高(plzip多线程版本),解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右,
|
||||||
# 100MB的日志文件能缩小到3.7MB。
|
# 100MB的日志文件能缩小到3.7MB。
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
|
||||||
def signal_handler(*args):
|
def signal_handler(*args):
|
||||||
print('[!]Ctrl+C detected, Exit.')
|
print('[!]Ctrl+C detected, Exit.')
|
||||||
sys.exit(9)
|
os._exit(9)
|
||||||
|
|
||||||
|
|
||||||
def sigdebug_handler(*args):
|
def sigdebug_handler(*args):
|
||||||
config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
|
conf = config.getInstance()
|
||||||
print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
|
conf.set_override(f"debug_mode:switch={int(not conf.debug())}")
|
||||||
|
print(f"[!]Debug {('oFF', 'On')[int(conf.debug())]}")
|
||||||
|
|
||||||
|
|
||||||
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告
|
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告
|
||||||
@@ -285,7 +319,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
|
|||||||
main_mode = conf.main_mode()
|
main_mode = conf.main_mode()
|
||||||
debug = conf.debug()
|
debug = conf.debug()
|
||||||
nfo_skip_days = conf.nfo_skip_days()
|
nfo_skip_days = conf.nfo_skip_days()
|
||||||
soft_link = conf.soft_link()
|
link_mode = conf.link_mode()
|
||||||
file_type = conf.media_type().lower().split(",")
|
file_type = conf.media_type().lower().split(",")
|
||||||
trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
|
trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
|
||||||
cliRE = None
|
cliRE = None
|
||||||
@@ -296,7 +330,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
|
|||||||
pass
|
pass
|
||||||
failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
|
failed_list_txt_path = Path(conf.failed_folder()).resolve() / 'failed_list.txt'
|
||||||
failed_set = set()
|
failed_set = set()
|
||||||
if (main_mode == 3 or soft_link) and not conf.ignore_failed_list():
|
if (main_mode == 3 or link_mode) and not conf.ignore_failed_list():
|
||||||
try:
|
try:
|
||||||
flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
|
flist = failed_list_txt_path.read_text(encoding='utf-8').splitlines()
|
||||||
failed_set = set(flist)
|
failed_set = set(flist)
|
||||||
@@ -327,20 +361,24 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
|
|||||||
print('[!]Skip failed movie:', absf)
|
print('[!]Skip failed movie:', absf)
|
||||||
continue
|
continue
|
||||||
is_sym = full_name.is_symlink()
|
is_sym = full_name.is_symlink()
|
||||||
if main_mode != 3 and (is_sym or full_name.stat().st_nlink > 1): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
|
if main_mode != 3 and (is_sym or (full_name.stat().st_nlink > 1 and not conf.scan_hardlink())): # 短路布尔 符号链接不取stat(),因为符号链接可能指向不存在目标
|
||||||
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
|
continue # 模式不等于3下跳过软连接和未配置硬链接刮削
|
||||||
# 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
|
# 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
|
||||||
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测
|
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测
|
||||||
if 0 < movie_size < 125829120: # 1024*1024*120=125829120
|
if 0 < movie_size < 125829120: # 1024*1024*120=125829120
|
||||||
continue
|
continue
|
||||||
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
|
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
|
||||||
continue
|
continue
|
||||||
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(
|
if main_mode == 3:
|
||||||
full_name.with_suffix('.nfo')) <= nfo_skip_days:
|
nfo = full_name.with_suffix('.nfo')
|
||||||
skip_nfo_days_cnt += 1
|
if not nfo.is_file():
|
||||||
if debug:
|
if debug:
|
||||||
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
|
print(f"[!]Metadata {nfo.name} not found for '{absf}'")
|
||||||
continue
|
elif nfo_skip_days > 0 and file_modification_days(nfo) <= nfo_skip_days:
|
||||||
|
skip_nfo_days_cnt += 1
|
||||||
|
if debug:
|
||||||
|
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
|
||||||
|
continue
|
||||||
total.append(absf)
|
total.append(absf)
|
||||||
|
|
||||||
if skip_failed_cnt:
|
if skip_failed_cnt:
|
||||||
@@ -348,13 +386,13 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
|
|||||||
if skip_nfo_days_cnt:
|
if skip_nfo_days_cnt:
|
||||||
print(
|
print(
|
||||||
f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
|
f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
|
||||||
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
|
if nfo_skip_days <= 0 or not link_mode or main_mode == 3:
|
||||||
return total
|
return total
|
||||||
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的
|
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的
|
||||||
skip_numbers = set()
|
skip_numbers = set()
|
||||||
success_folder = Path(conf.success_folder()).resolve()
|
success_folder = Path(conf.success_folder()).resolve()
|
||||||
for f in success_folder.glob(r'**/*'):
|
for f in success_folder.glob(r'**/*'):
|
||||||
if not re.match(r'\.nfo', f.suffix, re.IGNORECASE):
|
if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE):
|
||||||
continue
|
continue
|
||||||
if file_modification_days(f) > nfo_skip_days:
|
if file_modification_days(f) > nfo_skip_days:
|
||||||
continue
|
continue
|
||||||
@@ -388,7 +426,7 @@ def create_failed_folder(failed_folder: str):
|
|||||||
os.makedirs(failed_folder)
|
os.makedirs(failed_folder)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{failed_folder}'")
|
print(f"[-]Fatal error! Can not make folder '{failed_folder}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
|
|
||||||
def rm_empty_folder(path):
|
def rm_empty_folder(path):
|
||||||
@@ -405,38 +443,44 @@ def rm_empty_folder(path):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def create_data_and_move(file_path: str, zero_op, oCC):
|
def create_data_and_move(movie_path: str, zero_op: bool, no_net_op: bool, oCC):
|
||||||
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
# Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4
|
||||||
debug = config.getInstance().debug()
|
debug = config.getInstance().debug()
|
||||||
n_number = get_number(debug, os.path.basename(file_path))
|
n_number = get_number(debug, os.path.basename(movie_path))
|
||||||
file_path = os.path.abspath(file_path)
|
movie_path = os.path.abspath(movie_path)
|
||||||
|
|
||||||
if debug is True:
|
if debug is True:
|
||||||
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
|
print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
|
||||||
if zero_op:
|
if zero_op:
|
||||||
return
|
return
|
||||||
if n_number:
|
if n_number:
|
||||||
core_main(file_path, n_number, oCC)
|
if no_net_op:
|
||||||
|
core_main_no_net_op(movie_path, n_number)
|
||||||
|
else:
|
||||||
|
core_main(movie_path, n_number, oCC)
|
||||||
else:
|
else:
|
||||||
print("[-] number empty ERROR")
|
print("[-] number empty ERROR")
|
||||||
moveFailedFolder(file_path)
|
moveFailedFolder(movie_path)
|
||||||
print("[*]======================================================")
|
print("[*]======================================================")
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
|
print(f"[!] [{n_number}] As Number making data for '{movie_path}'")
|
||||||
if zero_op:
|
if zero_op:
|
||||||
return
|
return
|
||||||
if n_number:
|
if n_number:
|
||||||
core_main(file_path, n_number, oCC)
|
if no_net_op:
|
||||||
|
core_main_no_net_op(movie_path, n_number)
|
||||||
|
else:
|
||||||
|
core_main(movie_path, n_number, oCC)
|
||||||
else:
|
else:
|
||||||
raise ValueError("number empty")
|
raise ValueError("number empty")
|
||||||
print("[*]======================================================")
|
print("[*]======================================================")
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print(f"[-] [{file_path}] ERROR:")
|
print(f"[-] [{movie_path}] ERROR:")
|
||||||
print('[-]', err)
|
print('[-]', err)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
moveFailedFolder(file_path)
|
moveFailedFolder(movie_path)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
print('[!]', err)
|
print('[!]', err)
|
||||||
|
|
||||||
@@ -455,7 +499,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
|
|||||||
print("[-] [{}] ERROR:".format(file_path))
|
print("[-] [{}] ERROR:".format(file_path))
|
||||||
print('[-]', err)
|
print('[-]', err)
|
||||||
|
|
||||||
if conf.soft_link():
|
if conf.link_mode():
|
||||||
print("[-]Link {} to failed folder".format(file_path))
|
print("[-]Link {} to failed folder".format(file_path))
|
||||||
os.symlink(file_path, os.path.join(conf.failed_folder(), file_name))
|
os.symlink(file_path, os.path.join(conf.failed_folder(), file_name))
|
||||||
else:
|
else:
|
||||||
@@ -466,23 +510,14 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
|
|||||||
print('[!]', err)
|
print('[!]', err)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main(args: tuple) -> Path:
|
||||||
version = '6.0.2'
|
(single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args
|
||||||
urllib3.disable_warnings() # Ignore http proxy warning
|
conf = config.getInstance()
|
||||||
|
|
||||||
# Read config.ini first, in argparse_function() need conf.failed_folder()
|
|
||||||
conf = config.Config("config.ini")
|
|
||||||
|
|
||||||
# Parse command line args
|
|
||||||
single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main_mode = conf.main_mode()
|
main_mode = conf.main_mode()
|
||||||
folder_path = ""
|
folder_path = ""
|
||||||
if main_mode not in (1, 2, 3):
|
if main_mode not in (1, 2, 3):
|
||||||
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
|
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
|
||||||
sys.exit(4)
|
os._exit(4)
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, signal_handler)
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
@@ -508,8 +543,8 @@ def main():
|
|||||||
print(f"[+]Load Config file '{conf.ini_path}'.")
|
print(f"[+]Load Config file '{conf.ini_path}'.")
|
||||||
if conf.debug():
|
if conf.debug():
|
||||||
print('[+]Enable debug')
|
print('[+]Enable debug')
|
||||||
if conf.soft_link():
|
if conf.link_mode() in (1, 2):
|
||||||
print('[!]Enable soft link')
|
print('[!]Enable {} link'.format(('soft', 'hard')[conf.link_mode() - 1]))
|
||||||
if len(sys.argv) > 1:
|
if len(sys.argv) > 1:
|
||||||
print('[!]CmdLine:', " ".join(sys.argv[1:]))
|
print('[!]CmdLine:', " ".join(sys.argv[1:]))
|
||||||
print('[+]Main Working mode ## {}: {} ## {}{}{}'
|
print('[+]Main Working mode ## {}: {} ## {}{}{}'
|
||||||
@@ -521,7 +556,10 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
if conf.update_check():
|
if conf.update_check():
|
||||||
check_update(version)
|
try:
|
||||||
|
check_update(version)
|
||||||
|
except Exception as e:
|
||||||
|
print('[-]Update check failed!',e)
|
||||||
|
|
||||||
create_failed_folder(conf.failed_folder())
|
create_failed_folder(conf.failed_folder())
|
||||||
|
|
||||||
@@ -539,15 +577,21 @@ def main():
|
|||||||
if file_modification_days(str(v)) >= conf.mapping_table_validity():
|
if file_modification_days(str(v)) >= conf.mapping_table_validity():
|
||||||
print("[+]Mapping Table Out of date! Remove", str(v))
|
print("[+]Mapping Table Out of date! Remove", str(v))
|
||||||
os.remove(str(v))
|
os.remove(str(v))
|
||||||
res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
|
try:
|
||||||
for i, fp in enumerate(res, start=1):
|
res = parallel_download_files(((k, v) for k, v in map_tab if not v.exists()))
|
||||||
if fp and len(fp):
|
for i, fp in enumerate(res, start=1):
|
||||||
print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
|
if fp and len(fp):
|
||||||
else:
|
print(f"[+] [{i}/{len(res)}] Mapping Table Downloaded to {fp}")
|
||||||
print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
|
else:
|
||||||
print("[-] --- AUTO EXIT AFTER 30s !!! --- ")
|
print(f"[-] [{i}/{len(res)}] Mapping Table Download failed")
|
||||||
time.sleep(30)
|
except Exception as e:
|
||||||
os._exit(-1)
|
print("[!] ==================== ERROR ====================")
|
||||||
|
print("[!] " + "Mapping Table Download FAILED".center(47))
|
||||||
|
print("[!] " + "无法连接github".center(47))
|
||||||
|
print("[!] " + "请过几小时再试试".center(47))
|
||||||
|
print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47))
|
||||||
|
time.sleep(30)
|
||||||
|
os._exit(-1)
|
||||||
|
|
||||||
# create OpenCC converter
|
# create OpenCC converter
|
||||||
ccm = conf.cc_convert_mode()
|
ccm = conf.cc_convert_mode()
|
||||||
@@ -587,7 +631,7 @@ def main():
|
|||||||
percentage = str(count / int(count_all) * 100)[:4] + '%'
|
percentage = str(count / int(count_all) * 100)[:4] + '%'
|
||||||
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
|
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
|
||||||
time.strftime("%H:%M:%S")))
|
time.strftime("%H:%M:%S")))
|
||||||
create_data_and_move(movie_path, zero_op, oCC)
|
create_data_and_move(movie_path, zero_op, no_net_op, oCC)
|
||||||
if count >= stop_count:
|
if count >= stop_count:
|
||||||
print("[!]Stop counter triggered!")
|
print("[!]Stop counter triggered!")
|
||||||
break
|
break
|
||||||
@@ -605,14 +649,68 @@ def main():
|
|||||||
|
|
||||||
print("[+]All finished!!!")
|
print("[+]All finished!!!")
|
||||||
|
|
||||||
close_logfile(logdir)
|
return close_logfile(logdir)
|
||||||
|
|
||||||
if not conf.auto_exit():
|
|
||||||
input("Press enter key exit, you can check the error message before you exit...")
|
|
||||||
|
|
||||||
sys.exit(0)
|
def 分析日志文件(logfile):
|
||||||
|
try:
|
||||||
|
if not (isinstance(logfile, Path) and logfile.is_file()):
|
||||||
|
raise FileNotFoundError('log file not found')
|
||||||
|
logtxt = logfile.read_text(encoding='utf-8')
|
||||||
|
扫描电影数 = int(re.findall(r'\[\+]Find (.*) movies\.', logtxt)[0])
|
||||||
|
已处理 = int(re.findall(r'\[1/(.*?)] -', logtxt)[0])
|
||||||
|
完成数 = logtxt.count(r'[+]Wrote!')
|
||||||
|
return 扫描电影数, 已处理, 完成数
|
||||||
|
except:
|
||||||
|
return None, None, None
|
||||||
|
|
||||||
|
|
||||||
|
def period(delta, pattern):
|
||||||
|
d = {'d': delta.days}
|
||||||
|
d['h'], rem = divmod(delta.seconds, 3600)
|
||||||
|
d['m'], d['s'] = divmod(rem, 60)
|
||||||
|
return pattern.format(**d)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
multiprocessing.freeze_support()
|
version = '6.1.1'
|
||||||
main()
|
urllib3.disable_warnings() # Ignore http proxy warning
|
||||||
|
app_start = time.time()
|
||||||
|
|
||||||
|
# Read config.ini first, in argparse_function() need conf.failed_folder()
|
||||||
|
conf = config.Config("config.ini")
|
||||||
|
|
||||||
|
# Parse command line args
|
||||||
|
args = tuple(argparse_function(version))
|
||||||
|
|
||||||
|
再运行延迟 = conf.rerun_delay()
|
||||||
|
if 再运行延迟 > 0 and conf.stop_counter() > 0:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
logfile = main(args)
|
||||||
|
(扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile))
|
||||||
|
if all(isinstance(v, int) for v in 分析结果元组):
|
||||||
|
剩余个数 = 扫描电影数 - 已处理
|
||||||
|
总用时 = timedelta(seconds = time.time() - app_start)
|
||||||
|
print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}' +
|
||||||
|
' Elapsed time {}'.format(
|
||||||
|
period(总用时, "{d} day {h}:{m:02}:{s:02}") if 总用时.days == 1
|
||||||
|
else period(总用时, "{d} days {h}:{m:02}:{s:02}") if 总用时.days > 1
|
||||||
|
else period(总用时, "{h}:{m:02}:{s:02}")))
|
||||||
|
if 剩余个数 == 0:
|
||||||
|
break
|
||||||
|
下次运行 = datetime.now() + timedelta(seconds=再运行延迟)
|
||||||
|
print(f'Next run time: {下次运行.strftime("%H:%M:%S")}, rerun_delay={再运行延迟}, press Ctrl+C stop run.')
|
||||||
|
time.sleep(再运行延迟)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
main(args)
|
||||||
|
|
||||||
|
if not conf.auto_exit():
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
input("Press enter key exit, you can check the error message before you exit...")
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|||||||
51
README.md
51
README.md
@@ -25,7 +25,7 @@ CLI 版本
|
|||||||
|
|
||||||
# 文档
|
# 文档
|
||||||
* [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
|
* [官方教程WIKI](https://github.com/yoshiko2/Movie_Data_Capture/wiki)
|
||||||
* [VergilGao's Docker部署](https://github.com/VergilGao/docker-avdc)
|
* [VergilGao's Docker部署](https://github.com/VergilGao/docker-mdc)
|
||||||
|
|
||||||
# 下载
|
# 下载
|
||||||
* [Releases](https://github.com/yoshiko2/Movie_Data_Capture/releases/latest)
|
* [Releases](https://github.com/yoshiko2/Movie_Data_Capture/releases/latest)
|
||||||
@@ -36,43 +36,40 @@ CLI 版本
|
|||||||
# 申明
|
# 申明
|
||||||
当你查阅、下载了本项目源代码或二进制程序,即代表你接受了以下条款
|
当你查阅、下载了本项目源代码或二进制程序,即代表你接受了以下条款
|
||||||
|
|
||||||
* 本软件仅供技术交流,学术交流使用
|
* 本项目和项目成果仅供技术,学术交流和Python3性能测试使用
|
||||||
* **请勿在墙内的社交平台上宣传此项目**
|
* **请勿在墙内的社交平台上宣传此项目**
|
||||||
* 本软件作者编写出该软件旨在学习 Python ,提高编程水平
|
* 本项目贡献者编写该项目旨在学习Python3 ,提高编程水平
|
||||||
* 本软件不提供任何影片下载的线索
|
* 本项目不提供任何影片下载的线索
|
||||||
* 用户在使用本软件前,请用户了解并遵守当地法律法规,如果本软件使用过程中存在违反当地法律法规的行为,请勿使用该软件
|
* 用户在使用本项目和项目成果前,请用户了解并遵守当地法律法规,如果本项目及项目成果使用过程中存在违反当地法律法规的行为,请勿使用该项目及项目成果
|
||||||
* 用户在使用本软件时,若用户在当地产生一切违法行为由用户承担
|
* 用户在使用本项目和项目成果时,若用户在当地产生一切违法行为由用户承担
|
||||||
* 严禁用户将本软件使用于商业和个人其他意图
|
* 严禁用户将本项目和项目成果使用于商业和个人其他意图
|
||||||
* 源代码和二进制程序请在下载后24小时内删除
|
* 源代码和二进制程序请在下载后24小时内删除
|
||||||
* 出售源码者的母亲会升天
|
* 用户使用本项目及项目成果所造成的一切后果由用户自行承担,贡献者概不负责
|
||||||
* 本项目发起者yoshiko2保留最终决定权和最终解释权
|
* 若用户不同意上述条款任意一条,请勿使用本项目和项目成果
|
||||||
* 若用户不同意上述条款任意一条,请勿使用本软件
|
|
||||||
---
|
---
|
||||||
When you view and download the source code or binary program of this project, it means that you have accepted the following terms
|
When you view and download the source code or binary program of this project, it means that you have accepted the following terms
|
||||||
|
|
||||||
* This software is only for technical exchange and academic exchange
|
* This project is only for technical exchange, academic exchange and Python3 performance test
|
||||||
* **Please do not promote this project on popular social platforms**
|
* **Please do not promote this project on popular social platforms**
|
||||||
* The software author wrote this software to learn Python and improve programming
|
* The project contributors wrote this project to learn Python and improve programming
|
||||||
* This software does not provide any clues for video download
|
* This project does not provide any clues for video download
|
||||||
* Before using this software, please understand and abide by local laws and regulations. If there is any violation of local laws and regulations during the use of this software, * please do not use this software
|
* Before using this project results, please understand and abide by local laws and regulations. If there is any violation of local laws and regulations during the use of this project results, * please do not use this project results
|
||||||
* When the user uses this software, if the user has any illegal acts in the local area, the user shall bear
|
* When the user uses this project results, if the user has any illegal acts in the local area, the user shall bear
|
||||||
* It is strictly forbidden for users to use this software for commercial and personal intentions
|
* It is strictly forbidden for users to use this project and project results for commercial and personal intentions
|
||||||
* Please delete the source code and binary program within 24 hours after downloading
|
* Please delete the source code and binary program within 24 hours after downloading
|
||||||
* The mother of the source seller will die
|
* All consequences caused by the user's use of this project and project results shall be borne by the user, and the contributors shall not be responsible
|
||||||
* The author of this software yoshiko2 reserves the right of final decision and final interpretation
|
* If the user does not agree with any of the above terms, please do not use this project results and project
|
||||||
* If the user does not agree with any of the above terms, please do not use this software
|
|
||||||
---
|
---
|
||||||
本プロジェクトのソースコード、バイナリファイルをダウンロード、または表示するしたうえで、あなたは本規約に同意したものと見なします。
|
本プロジェクトのソースコード、バイナリファイルをダウンロード、または表示するしたうえで、あなたは本規約に同意したものと見なします。
|
||||||
* このソフトウェアは、開発技術学習することのみに使用できます。
|
* このプロジェクトは、開発技術学習、Python3性能テストすることのみに使用できます。
|
||||||
* **ソーシャルメディアで本プロジェクトの宣伝をご遠慮ください**
|
* **ソーシャルメディアで本プロジェクトの宣伝をご遠慮ください**
|
||||||
* 作者はPythonの勉強と技術力の向上のために、このソフトウェアを作成しました
|
* 貢献者はPythonの勉強と技術力の向上のために、このソフトウェアを作成しました
|
||||||
* 本ソフトウェアは、あらゆる動画ダウンロード機能一切提供しません
|
* 本プロジェクトは、あらゆる動画ダウンロード機能一切提供しません
|
||||||
* 本ソフトウェアを使用する前に、現地の法律規範をよく理解する必要があります。あなたは、適用される現地の法令を順守する責任を負います
|
* 本プロジェクトとプロジェクトの成果を使用する前に、現地の法律規範をよく理解する必要があります。あなたは、適用される現地の法令を順守する責任を負います
|
||||||
* 本ソフトウェアを使用した結果生じた損害や法的責任につきまして作者は一切責任を負いません
|
* 本プロジェクトとプロジェクトの成果を使用した結果生じた損害や法的責任につきまして作者は一切責任を負いません
|
||||||
* 本ソフトウェアを商用、業務、その他の営利目的のために使用することは一切禁止します。
|
* 本プロジェクトとプロジェクトの成果を商用、業務、その他の営利目的のために使用することは一切禁止します。
|
||||||
* 本プロジェクトのソースコード、バイナリファイルをダウンロードした場合、24時間以内に削除してください
|
* 本プロジェクトのソースコード、バイナリファイルをダウンロードした場合、24時間以内に削除してください
|
||||||
* 元売り手の母親が天に召される
|
* ユーザーによるこのプロジェクトの使用およびプロジェクトの結果によって引き起こされるすべての結果は、ユーザーが負担するものとし、寄稿者は責任を負わないものとします。
|
||||||
* 最終解釈権は作者yoshiko2に属します
|
|
||||||
* 本規約およびすべての適用法、規約および規則を遵守する場合にのみ本ソフトウェアを使用することができます
|
* 本規約およびすべての適用法、規約および規則を遵守する場合にのみ本ソフトウェアを使用することができます
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ from . import carib
|
|||||||
from . import fc2club
|
from . import fc2club
|
||||||
from . import mv91
|
from . import mv91
|
||||||
from . import madou
|
from . import madou
|
||||||
|
from . import gcolle
|
||||||
|
|
||||||
|
|
||||||
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
||||||
@@ -62,7 +63,8 @@ def get_data_from_json(file_number, oCC):
|
|||||||
"carib": carib.main,
|
"carib": carib.main,
|
||||||
"fc2club": fc2club.main,
|
"fc2club": fc2club.main,
|
||||||
"mv91": mv91.main,
|
"mv91": mv91.main,
|
||||||
"madou": madou.main
|
"madou": madou.main,
|
||||||
|
"gcolle": gcolle.main,
|
||||||
}
|
}
|
||||||
|
|
||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
@@ -91,6 +93,8 @@ def get_data_from_json(file_number, oCC):
|
|||||||
sources.insert(0, sources.pop(sources.index("fc2")))
|
sources.insert(0, sources.pop(sources.index("fc2")))
|
||||||
if "fc2club" in sources:
|
if "fc2club" in sources:
|
||||||
sources.insert(0, sources.pop(sources.index("fc2club")))
|
sources.insert(0, sources.pop(sources.index("fc2club")))
|
||||||
|
elif "gcolle" in sources and (re.search("\d{6}", file_number)):
|
||||||
|
sources.insert(0, sources.pop(sources.index("gcolle")))
|
||||||
elif "dlsite" in sources and (
|
elif "dlsite" in sources and (
|
||||||
"rj" in lo_file_number or "vj" in lo_file_number
|
"rj" in lo_file_number or "vj" in lo_file_number
|
||||||
):
|
):
|
||||||
@@ -100,6 +104,12 @@ def get_data_from_json(file_number, oCC):
|
|||||||
sources.insert(0, sources.pop(sources.index("javdb")))
|
sources.insert(0, sources.pop(sources.index("javdb")))
|
||||||
if "xcity" in sources:
|
if "xcity" in sources:
|
||||||
sources.insert(0, sources.pop(sources.index("xcity")))
|
sources.insert(0, sources.pop(sources.index("xcity")))
|
||||||
|
if "madou" in sources:
|
||||||
|
sources.insert(0, sources.pop(sources.index("madou")))
|
||||||
|
elif "madou" in sources and (
|
||||||
|
re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)
|
||||||
|
):
|
||||||
|
sources.insert(0, sources.pop(sources.index("madou")))
|
||||||
|
|
||||||
# check sources in func_mapping
|
# check sources in func_mapping
|
||||||
todel = []
|
todel = []
|
||||||
@@ -124,7 +134,10 @@ def get_data_from_json(file_number, oCC):
|
|||||||
for source in sources:
|
for source in sources:
|
||||||
if conf.debug() == True:
|
if conf.debug() == True:
|
||||||
print('[+]select', source)
|
print('[+]select', source)
|
||||||
json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
|
try:
|
||||||
|
json_data = json.loads(pool.apply_async(func_mapping[source], (file_number,)).get())
|
||||||
|
except:
|
||||||
|
json_data = pool.apply_async(func_mapping[source], (file_number,)).get()
|
||||||
# if any service return a valid return, break
|
# if any service return a valid return, break
|
||||||
if get_data_state(json_data):
|
if get_data_state(json_data):
|
||||||
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
|
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
|
||||||
@@ -136,7 +149,10 @@ def get_data_from_json(file_number, oCC):
|
|||||||
try:
|
try:
|
||||||
if conf.debug() == True:
|
if conf.debug() == True:
|
||||||
print('[+]select', source)
|
print('[+]select', source)
|
||||||
json_data = json.loads(func_mapping[source](file_number))
|
try:
|
||||||
|
json_data = json.loads(func_mapping[source](file_number))
|
||||||
|
except:
|
||||||
|
json_data = func_mapping[source](file_number)
|
||||||
# if any service return a valid return, break
|
# if any service return a valid return, break
|
||||||
if get_data_state(json_data):
|
if get_data_state(json_data):
|
||||||
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
|
print(f"[+]Find movie [{file_number}] metadata on website '{source}'")
|
||||||
@@ -242,8 +258,8 @@ def get_data_from_json(file_number, oCC):
|
|||||||
if json_data[translate_value] == "":
|
if json_data[translate_value] == "":
|
||||||
continue
|
continue
|
||||||
if translate_value == "title":
|
if translate_value == "title":
|
||||||
title_dict = json.load(
|
title_dict = json.loads(
|
||||||
open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8"))
|
(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json').read_text(encoding="utf-8"))
|
||||||
try:
|
try:
|
||||||
json_data[translate_value] = title_dict[number]
|
json_data[translate_value] = title_dict[number]
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from lxml import etree
|
|||||||
import json
|
import json
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler.storyline import getStoryline
|
from WebCrawler.storyline import getStoryline
|
||||||
|
from WebCrawler.crawler import *
|
||||||
# import io
|
# import io
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||||
|
|
||||||
@@ -17,95 +18,64 @@ def getActorPhoto(html):
|
|||||||
p2 = {t: l}
|
p2 = {t: l}
|
||||||
d.update(p2)
|
d.update(p2)
|
||||||
return d
|
return d
|
||||||
def getTitle(html):
|
|
||||||
try:
|
|
||||||
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
|
|
||||||
return result.replace('/', '')
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
def getActor(html):
|
def getActor(html):
|
||||||
a = html.xpath('//a[@class="avatar-box"]')
|
a = html.xpath('//a[@class="avatar-box"]')
|
||||||
d = []
|
d = []
|
||||||
for i in a:
|
for i in a:
|
||||||
d.append(i.find('span').text)
|
d.append(i.find('span').text)
|
||||||
return d
|
return d
|
||||||
def getStudio(html):
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
|
|
||||||
return result1
|
|
||||||
def getRuntime(html):
|
|
||||||
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
|
|
||||||
return result1
|
|
||||||
def getLabel(html):
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getNum(html):
|
|
||||||
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getYear(release):
|
|
||||||
try:
|
|
||||||
result = str(re.search('\d{4}',release).group())
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return release
|
|
||||||
def getRelease(html):
|
|
||||||
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getCover(html):
|
|
||||||
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
|
|
||||||
return result
|
|
||||||
def getCover_small(html):
|
def getCover_small(html):
|
||||||
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
|
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getTag(html):
|
def getTag(html):
|
||||||
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
||||||
return [i.strip() for i in x[2:]] if len(x) > 2 else []
|
return [i.strip() for i in x[2:]] if len(x) > 2 else []
|
||||||
def getSeries(html):
|
|
||||||
try:
|
|
||||||
result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
html = get_html('https://tellme.pw/avsox')
|
html = get_html('https://tellme.pw/avsox')
|
||||||
site = etree.HTML(html).xpath('//div[@class="container"]/div/a/@href')[0]
|
site = Crawler(html).getString('//div[@class="container"]/div/a/@href')
|
||||||
a = get_html(site + '/cn/search/' + number)
|
a = get_html(site + '/cn/search/' + number)
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = Crawler(a)
|
||||||
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
|
result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
|
||||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||||
a = get_html(site + '/cn/search/' + number.replace('-', '_'))
|
a = get_html(site + '/cn/search/' + number.replace('-', '_'))
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = Crawler(a)
|
||||||
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
|
result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
|
||||||
if result1 == '' or result1 == 'null' or result1 == 'None':
|
if result1 == '' or result1 == 'null' or result1 == 'None':
|
||||||
a = get_html(site + '/cn/search/' + number.replace('_', ''))
|
a = get_html(site + '/cn/search/' + number.replace('_', ''))
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = Crawler(a)
|
||||||
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
|
result1 = html.getString('//*[@id="waterfall"]/div/a/@href')
|
||||||
detail = get_html("https:" + result1)
|
detail = get_html("https:" + result1)
|
||||||
lx = etree.fromstring(detail, etree.HTMLParser())
|
lx = etree.fromstring(detail, etree.HTMLParser())
|
||||||
|
avsox_crawler2 = Crawler(a)
|
||||||
|
avsox_crawler = Crawler(detail)
|
||||||
try:
|
try:
|
||||||
new_number = getNum(lx)
|
new_number = avsox_crawler.getString('//span[contains(text(),"识别码:")]/../span[2]/text()')
|
||||||
if new_number.upper() != number.upper():
|
if new_number.upper() != number.upper():
|
||||||
raise ValueError('number not found')
|
raise ValueError('number not found')
|
||||||
title = getTitle(lx).strip(new_number)
|
title = avsox_crawler.getString('/html/body/div[2]/h3/text()').replace('/','').strip(new_number)
|
||||||
dic = {
|
dic = {
|
||||||
'actor': getActor(lx),
|
'actor': getActor(lx),
|
||||||
'title': title,
|
'title': title,
|
||||||
'studio': getStudio(lx),
|
'studio': avsox_crawler.getString('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()').replace("', '",' '),
|
||||||
'outline': getStoryline(number, title),
|
'outline': getStoryline(number, title),
|
||||||
'runtime': getRuntime(lx),
|
'runtime': avsox_crawler.getString('//span[contains(text(),"长度:")]/../text()').replace('分钟',''),
|
||||||
'director': '', #
|
'director': '', #
|
||||||
'release': getRelease(lx),
|
'release': avsox_crawler.getString('//span[contains(text(),"发行时间:")]/../text()'),
|
||||||
'number': new_number,
|
'number': new_number,
|
||||||
'cover': getCover(lx),
|
'cover': avsox_crawler.getString('/html/body/div[2]/div[1]/div[1]/a/img/@src'),
|
||||||
'cover_small': getCover_small(html),
|
#'cover_small' : getCover_small(html),
|
||||||
|
'cover_small': avsox_crawler2.getString('//*[@id="waterfall"]/div/a/div[1]/img/@src'),
|
||||||
'imagecut': 3,
|
'imagecut': 3,
|
||||||
'tag': getTag(lx),
|
'tag': getTag(lx),
|
||||||
'label': getLabel(lx),
|
'label': avsox_crawler.getString('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()'),
|
||||||
'year': getYear(getRelease(lx)),
|
'year': re.findall('\d{4}',avsox_crawler.getString('//span[contains(text(),"发行时间:")]/../text()'))[0],
|
||||||
'actor_photo': getActorPhoto(lx),
|
'actor_photo': getActorPhoto(lx),
|
||||||
'website': "https:" + result1,
|
'website': "https:" + result1,
|
||||||
'source': 'avsox.py',
|
'source': 'avsox.py',
|
||||||
'series': getSeries(lx),
|
'series': avsox_crawler.getString('//span[contains(text(),"系列:")]/../span[2]/text()'),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if config.getInstance().debug():
|
if config.getInstance().debug():
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ def main(number: str) -> json:
|
|||||||
'website': f'{G_SITE}/moviepages/{number}/index.html',
|
'website': f'{G_SITE}/moviepages/{number}/index.html',
|
||||||
'source': 'carib.py',
|
'source': 'carib.py',
|
||||||
'series': get_series(lx),
|
'series': get_series(lx),
|
||||||
|
'无码': True
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
|
||||||
return js
|
return js
|
||||||
@@ -59,7 +60,7 @@ def get_year(lx: html.HtmlElement) -> str:
|
|||||||
|
|
||||||
def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
|
def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
|
||||||
o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
|
o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
|
||||||
g = getStoryline(number, title)
|
g = getStoryline(number, title, 无码=True)
|
||||||
if len(g):
|
if len(g):
|
||||||
return g
|
return g
|
||||||
return o
|
return o
|
||||||
|
|||||||
28
WebCrawler/crawler.py
Normal file
28
WebCrawler/crawler.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
class Crawler:
|
||||||
|
def __init__(self,htmlcode):
|
||||||
|
self.html = etree.HTML(htmlcode)
|
||||||
|
|
||||||
|
def getString(self,_xpath):
|
||||||
|
if _xpath == "":
|
||||||
|
return ""
|
||||||
|
result = self.html.xpath(_xpath)
|
||||||
|
try:
|
||||||
|
return result[0]
|
||||||
|
except:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def getStrings(self,_xpath):
|
||||||
|
result = self.html.xpath(_xpath)
|
||||||
|
try:
|
||||||
|
return result
|
||||||
|
except:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def getOutline(self,_xpath):
|
||||||
|
result = self.html.xpath(_xpath)
|
||||||
|
try:
|
||||||
|
return "\n".join(result)
|
||||||
|
except:
|
||||||
|
return ""
|
||||||
@@ -1,15 +1,14 @@
|
|||||||
import re
|
import re
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import json
|
import json
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import sys
|
import sys
|
||||||
sys.path.append('../')
|
sys.path.append('../')
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
# import sys
|
# import sys
|
||||||
# import io
|
# import io
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||||
#print(get_html('https://www.dlsite.com/pro/work/=/product_id/VJ013152.html'))
|
#print(get_html('https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html'))
|
||||||
#title //*[@id="work_name"]/a/text()
|
#title /html/head/title/text()
|
||||||
#studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
|
#studio //th[contains(text(),"ブランド名")]/../td/span[1]/a/text()
|
||||||
#release //th[contains(text(),"販売日")]/../td/a/text()
|
#release //th[contains(text(),"販売日")]/../td/a/text()
|
||||||
#story //th[contains(text(),"シナリオ")]/../td/a/text()
|
#story //th[contains(text(),"シナリオ")]/../td/a/text()
|
||||||
@@ -18,14 +17,14 @@ from ADC_function import *
|
|||||||
#jianjie //*[@id="main_inner"]/div[3]/text()
|
#jianjie //*[@id="main_inner"]/div[3]/text()
|
||||||
#photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
|
#photo //*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src
|
||||||
|
|
||||||
#https://www.dlsite.com/pro/work/=/product_id/VJ013152.html
|
#https://www.dlsite.com/maniax/work/=/product_id/VJ013152.html
|
||||||
|
|
||||||
def getTitle(a):
|
def getTitle(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
result = str(html.xpath('/html/head/title/text()')[0])
|
||||||
result = html.xpath('//*[@id="work_name"]/a/text()')[0]
|
result = result[:result.rfind(' | DLsite')]
|
||||||
|
result = result[:result.rfind(' [')]
|
||||||
return result
|
return result
|
||||||
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
def getActor(html): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
result1 = html.xpath('//th[contains(text(),"声优")]/../td/a/text()')
|
result1 = html.xpath('//th[contains(text(),"声优")]/../td/a/text()')
|
||||||
except:
|
except:
|
||||||
@@ -38,8 +37,7 @@ def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
|
|||||||
p={i:''}
|
p={i:''}
|
||||||
d.update(p)
|
d.update(p)
|
||||||
return d
|
return d
|
||||||
def getStudio(a):
|
def getStudio(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
||||||
@@ -53,8 +51,7 @@ def getRuntime(a):
|
|||||||
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').rstrip('mi')
|
return str(result1 + result2).strip('+').rstrip('mi')
|
||||||
def getLabel(a):
|
def getLabel(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
||||||
@@ -69,12 +66,10 @@ def getYear(getRelease):
|
|||||||
return result
|
return result
|
||||||
except:
|
except:
|
||||||
return getRelease
|
return getRelease
|
||||||
def getRelease(a):
|
def getRelease(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = html.xpath('//th[contains(text(),"贩卖日")]/../td/a/text()')[0]
|
result1 = html.xpath('//th[contains(text(),"贩卖日")]/../td/a/text()')[0]
|
||||||
return result1.replace('年','-').replace('月','-').replace('日','')
|
return result1.replace('年','-').replace('月','-').replace('日','')
|
||||||
def getTag(a):
|
def getTag(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//th[contains(text(),"分类")]/../td/div/a/text()')
|
result = html.xpath('//th[contains(text(),"分类")]/../td/div/a/text()')
|
||||||
return result
|
return result
|
||||||
@@ -96,26 +91,22 @@ def getCover_small(a, index=0):
|
|||||||
if not 'https' in result:
|
if not 'https' in result:
|
||||||
result = 'https:' + result
|
result = 'https:' + result
|
||||||
return result
|
return result
|
||||||
def getCover(htmlcode):
|
def getCover(html):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
result = html.xpath('//*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li[1]/picture/source/@srcset')[0]
|
||||||
result = html.xpath('//*[@id="work_left"]/div/div/div[2]/div/div[1]/div[1]/ul/li/img/@src')[0]
|
return result.replace('.webp', '.jpg')
|
||||||
return result
|
def getDirector(html):
|
||||||
def getDirector(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//th[contains(text(),"剧情")]/../td/a/text()')[0]
|
result = html.xpath('//th[contains(text(),"剧情")]/../td/a/text()')[0]
|
||||||
except:
|
except:
|
||||||
result = ''
|
result = ''
|
||||||
return result
|
return result
|
||||||
def getOutline(htmlcode):
|
def getOutline(html):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
total = []
|
total = []
|
||||||
result = html.xpath('//*[@id="main_inner"]/div[3]/text()')
|
result = html.xpath('//*[@class="work_parts_area"]/p/text()')
|
||||||
for i in result:
|
for i in result:
|
||||||
total.append(i.strip('\r\n'))
|
total.append(i.strip('\r\n'))
|
||||||
return str(total).strip(" ['']").replace("', '', '",r'\n').replace("', '",r'\n').strip(", '', '")
|
return str(total).strip(" ['']").replace("', '', '",r'\n').replace("', '",r'\n').strip(", '', '")
|
||||||
def getSeries(a):
|
def getSeries(html):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
result = html.xpath('//th[contains(text(),"系列名")]/../td/span[1]/a/text()')[0]
|
||||||
@@ -127,28 +118,28 @@ def getSeries(a):
|
|||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
number = number.upper()
|
number = number.upper()
|
||||||
htmlcode = get_html('https://www.dlsite.com/pro/work/=/product_id/' + number + '.html',
|
htmlcode = get_html('https://www.dlsite.com/maniax/work/=/product_id/' + number + '.html/?locale=zh_CN',
|
||||||
cookies={'locale': 'zh-cn'})
|
cookies={'locale': 'zh-cn'})
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
dic = {
|
dic = {
|
||||||
'actor': getActor(htmlcode),
|
'actor': getActor(html),
|
||||||
'title': getTitle(htmlcode),
|
'title': getTitle(html),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(html),
|
||||||
'outline': getOutline(htmlcode),
|
'outline': getOutline(html),
|
||||||
'runtime': '',
|
'runtime': '',
|
||||||
'director': getDirector(htmlcode),
|
'director': getDirector(html),
|
||||||
'release': getRelease(htmlcode),
|
'release': getRelease(html),
|
||||||
'number': number,
|
'number': number,
|
||||||
'cover': 'https:' + getCover(htmlcode),
|
'cover': 'https:' + getCover(html),
|
||||||
'cover_small': '',
|
'cover_small': '',
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
'tag': getTag(htmlcode),
|
'tag': getTag(html),
|
||||||
'label': getLabel(htmlcode),
|
'label': getLabel(html),
|
||||||
'year': getYear(getRelease(htmlcode)), # str(re.search('\d{4}',getRelease(a)).group()),
|
'year': getYear(getRelease(html)), # str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website': 'https://www.dlsite.com/pro/work/=/product_id/' + number + '.html',
|
'website': 'https://www.dlsite.com/maniax/work/=/product_id/' + number + '.html',
|
||||||
'source': 'dlsite.py',
|
'source': 'dlsite.py',
|
||||||
'series': getSeries(htmlcode),
|
'series': getSeries(html),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
@@ -166,4 +157,6 @@ def main(number):
|
|||||||
# main('DV-1562')
|
# main('DV-1562')
|
||||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
print(main('VJ013178'))
|
print(main('VJ013178'))
|
||||||
|
print(main('RJ329607'))
|
||||||
|
|||||||
@@ -9,130 +9,33 @@ from urllib.parse import urlencode
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
# import sys
|
||||||
# import io
|
# import io
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||||
|
|
||||||
|
class fanzaCrawler(Crawler):
|
||||||
|
def getFanzaString(self,string):
|
||||||
|
result1 = str(self.html.xpath("//td[contains(text(),'"+string+"')]/following-sibling::td/a/text()")).strip(" ['']")
|
||||||
|
result2 = str(self.html.xpath("//td[contains(text(),'"+string+"')]/following-sibling::td/text()")).strip(" ['']")
|
||||||
|
return result1+result2
|
||||||
|
|
||||||
def getTitle(text):
|
def getFanzaStrings(self, string):
|
||||||
html = etree.fromstring(text, etree.HTMLParser())
|
result1 = self.html.xpath("//td[contains(text(),'" + string + "')]/following-sibling::td/a/text()")
|
||||||
result = html.xpath('//*[starts-with(@id, "title")]/text()')[0]
|
if len(result1) > 0:
|
||||||
return result
|
return result1
|
||||||
|
result2 = self.html.xpath("//td[contains(text(),'" + string + "')]/following-sibling::td/text()")
|
||||||
|
return result2
|
||||||
|
|
||||||
|
|
||||||
def getActor(text):
|
def getRelease(fanza_Crawler):
|
||||||
# //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
result = fanza_Crawler.getFanzaString('発売日:')
|
||||||
html = etree.fromstring(text, etree.HTMLParser())
|
if result == '----':
|
||||||
result = (
|
result = fanza_Crawler.getFanzaString('配信開始日:')
|
||||||
str(
|
return result.replace("/", "-").strip('\\n')
|
||||||
html.xpath(
|
|
||||||
"//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.strip(" ['']")
|
|
||||||
.replace("', '", ",")
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getStudio(text):
|
def getCover(html, number):
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
|
|
||||||
)[0]
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'メーカー')]/following-sibling::td/text()"
|
|
||||||
)[0]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getRuntime(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
|
|
||||||
return re.search(r"\d+", str(result)).group()
|
|
||||||
|
|
||||||
|
|
||||||
def getLabel(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'レーベル:')]/following-sibling::td/a/text()"
|
|
||||||
)[0]
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'レーベル:')]/following-sibling::td/text()"
|
|
||||||
)[0]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getNum(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
|
|
||||||
)[0]
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'品番:')]/following-sibling::td/text()"
|
|
||||||
)[0]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getYear(getRelease):
|
|
||||||
try:
|
|
||||||
result = str(re.search(r"\d{4}", getRelease).group())
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return getRelease
|
|
||||||
|
|
||||||
|
|
||||||
def getRelease(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
|
|
||||||
)[0].lstrip("\n")
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'発売日:')]/following-sibling::td/text()"
|
|
||||||
)[0].lstrip("\n")
|
|
||||||
except:
|
|
||||||
result = "----"
|
|
||||||
if result == "----":
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'配信開始日:')]/following-sibling::td/a/text()"
|
|
||||||
)[0].lstrip("\n")
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'配信開始日:')]/following-sibling::td/text()"
|
|
||||||
)[0].lstrip("\n")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return result.replace("/", "-")
|
|
||||||
|
|
||||||
|
|
||||||
def getTag(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
|
|
||||||
)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getCover(text, number):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser())
|
|
||||||
cover_number = number
|
cover_number = number
|
||||||
try:
|
try:
|
||||||
result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
|
result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
|
||||||
@@ -151,29 +54,11 @@ def getCover(text, number):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def getDirector(text):
|
def getOutline(html):
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
try:
|
||||||
result = html.xpath(
|
result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace("\n", "")
|
||||||
"//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
|
|
||||||
)[0]
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'監督:')]/following-sibling::td/text()"
|
|
||||||
)[0]
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def getOutline(text):
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser())
|
|
||||||
try:
|
|
||||||
result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
|
|
||||||
"\n", ""
|
|
||||||
)
|
|
||||||
if result == "":
|
if result == "":
|
||||||
result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
|
result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace("\n", "")
|
||||||
"\n", ""
|
|
||||||
)
|
|
||||||
except:
|
except:
|
||||||
# (TODO) handle more edge case
|
# (TODO) handle more edge case
|
||||||
# print(html)
|
# print(html)
|
||||||
@@ -181,23 +66,8 @@ def getOutline(text):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def getSeries(text):
|
|
||||||
try:
|
|
||||||
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
try:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
|
|
||||||
)[0]
|
|
||||||
except:
|
|
||||||
result = html.xpath(
|
|
||||||
"//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
|
|
||||||
)[0]
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def getExtrafanart(htmlcode): # 获取剧照
|
def getExtrafanart(htmlcode): # 获取剧照
|
||||||
html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div>\n</div>')
|
html_pather = re.compile(r'<div id=\"sample-image-block\"[\s\S]*?<br></div></div>')
|
||||||
html = html_pather.search(htmlcode)
|
html = html_pather.search(htmlcode)
|
||||||
if html:
|
if html:
|
||||||
html = html.group()
|
html = html.group()
|
||||||
@@ -232,6 +102,7 @@ def main(number):
|
|||||||
"https://www.dmm.co.jp/rental/-/detail/=/cid=",
|
"https://www.dmm.co.jp/rental/-/detail/=/cid=",
|
||||||
]
|
]
|
||||||
chosen_url = ""
|
chosen_url = ""
|
||||||
|
fanza_Crawler = ''
|
||||||
|
|
||||||
for url in fanza_urls:
|
for url in fanza_urls:
|
||||||
chosen_url = url + fanza_search_number
|
chosen_url = url + fanza_search_number
|
||||||
@@ -240,6 +111,7 @@ def main(number):
|
|||||||
urlencode({"rurl": chosen_url})
|
urlencode({"rurl": chosen_url})
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
fanza_Crawler = fanzaCrawler(htmlcode)
|
||||||
if "404 Not Found" not in htmlcode:
|
if "404 Not Found" not in htmlcode:
|
||||||
break
|
break
|
||||||
if "404 Not Found" in htmlcode:
|
if "404 Not Found" in htmlcode:
|
||||||
@@ -249,28 +121,34 @@ def main(number):
|
|||||||
# for example, the url will be cid=test012
|
# for example, the url will be cid=test012
|
||||||
# but the hinban on the page is test00012
|
# but the hinban on the page is test00012
|
||||||
# so get the hinban first, and then pass it to following functions
|
# so get the hinban first, and then pass it to following functions
|
||||||
fanza_hinban = getNum(htmlcode)
|
fanza_hinban = fanza_Crawler.getFanzaString('品番:')
|
||||||
|
out_num = fanza_hinban
|
||||||
|
number_lo = number.lower()
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
|
if (re.sub('-|_', '', number_lo) == fanza_hinban or
|
||||||
|
number_lo.replace('-', '00') == fanza_hinban or
|
||||||
|
number_lo.replace('-', '') + 'so' == fanza_hinban
|
||||||
|
):
|
||||||
|
out_num = number
|
||||||
data = {
|
data = {
|
||||||
"title": getTitle(htmlcode).strip(),
|
"title": fanza_Crawler.getString('//*[starts-with(@id, "title")]/text()').strip(),
|
||||||
"studio": getStudio(htmlcode),
|
"studio": fanza_Crawler.getFanzaString('メーカー'),
|
||||||
"outline": getOutline(htmlcode),
|
"outline": getOutline(html),
|
||||||
"runtime": getRuntime(htmlcode),
|
"runtime": str(re.search(r'\d+',fanza_Crawler.getString("//td[contains(text(),'収録時間')]/following-sibling::td/text()")).group()).strip(" ['']"),
|
||||||
"director": getDirector(htmlcode) if "anime" not in chosen_url else "",
|
"director": fanza_Crawler.getFanzaString('監督:') if "anime" not in chosen_url else "",
|
||||||
"actor": getActor(htmlcode) if "anime" not in chosen_url else "",
|
"actor": fanza_Crawler.getString("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()").replace("', '", ",") if "anime" not in chosen_url else "",
|
||||||
"release": getRelease(htmlcode),
|
"release": getRelease(fanza_Crawler),
|
||||||
"number": fanza_hinban,
|
"number": out_num,
|
||||||
"cover": getCover(htmlcode, fanza_hinban),
|
"cover": getCover(html, fanza_hinban),
|
||||||
"imagecut": 1,
|
"imagecut": 1,
|
||||||
"tag": getTag(htmlcode),
|
"tag": fanza_Crawler.getFanzaStrings('ジャンル:'),
|
||||||
"extrafanart": getExtrafanart(htmlcode),
|
"extrafanart": getExtrafanart(htmlcode),
|
||||||
"label": getLabel(htmlcode),
|
"label": fanza_Crawler.getFanzaString('レーベル'),
|
||||||
"year": getYear(
|
"year": re.findall('\d{4}',getRelease(fanza_Crawler))[0], # str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
getRelease(htmlcode)
|
|
||||||
), # str(re.search('\d{4}',getRelease(a)).group()),
|
|
||||||
"actor_photo": "",
|
"actor_photo": "",
|
||||||
"website": chosen_url,
|
"website": chosen_url,
|
||||||
"source": "fanza.py",
|
"source": "fanza.py",
|
||||||
"series": getSeries(htmlcode),
|
"series": fanza_Crawler.getFanzaString('シリーズ:'),
|
||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
data = {
|
data = {
|
||||||
@@ -314,4 +192,6 @@ def main_htmlcode(number):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# print(main("DV-1562"))
|
# print(main("DV-1562"))
|
||||||
# print(main("96fad1217"))
|
# print(main("96fad1217"))
|
||||||
print(main("h_173ghmt68"))
|
print(main("pred00251"))
|
||||||
|
print(main("MIAA-391"))
|
||||||
|
print(main("OBA-326"))
|
||||||
|
|||||||
@@ -4,58 +4,11 @@ import re
|
|||||||
from lxml import etree#need install
|
from lxml import etree#need install
|
||||||
import json
|
import json
|
||||||
import ADC_function
|
import ADC_function
|
||||||
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
# import sys
|
||||||
# import io
|
# import io
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||||
|
|
||||||
def getTitle_fc2com(htmlcode): #获取厂商
|
|
||||||
html = etree.fromstring(htmlcode,etree.HTMLParser())
|
|
||||||
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
|
|
||||||
return result
|
|
||||||
def getActor_fc2com(htmlcode):
|
|
||||||
try:
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
def getStudio_fc2com(htmlcode): #获取厂商
|
|
||||||
try:
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
def getNum_fc2com(htmlcode): #获取番号
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
|
|
||||||
return result
|
|
||||||
def getRelease_fc2com(htmlcode2): #
|
|
||||||
html=etree.fromstring(htmlcode2,etree.HTMLParser())
|
|
||||||
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/div[2]/p/text()')).strip(" ['販売日 : ']").replace('/','-')
|
|
||||||
return result
|
|
||||||
def getCover_fc2com(htmlcode2): #获取厂商 #
|
|
||||||
html = etree.fromstring(htmlcode2, etree.HTMLParser())
|
|
||||||
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
|
|
||||||
return 'http:' + result
|
|
||||||
# def getOutline_fc2com(htmlcode2): #获取番号 #
|
|
||||||
# xpath_html = etree.fromstring(htmlcode2, etree.HTMLParser())
|
|
||||||
# path = str(xpath_html.xpath('//*[@id="top"]/div[1]/section[4]/iframe/@src')).strip(" ['']")
|
|
||||||
# html = etree.fromstring(ADC_function.get_html('https://adult.contents.fc2.com/'+path), etree.HTMLParser())
|
|
||||||
# print('https://adult.contents.fc2.com'+path)
|
|
||||||
# print(ADC_function.get_html('https://adult.contents.fc2.com'+path,cookies={'wei6H':'1'}))
|
|
||||||
# result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
|
|
||||||
# return result
|
|
||||||
def getTag_fc2com(lx):
|
|
||||||
result = lx.xpath("//a[@class='tag tagTag']/text()")
|
|
||||||
return result
|
|
||||||
def getYear_fc2com(release):
|
|
||||||
try:
|
|
||||||
result = re.search('\d{4}',release).group()
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def getExtrafanart(htmlcode): # 获取剧照
|
def getExtrafanart(htmlcode): # 获取剧照
|
||||||
html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
|
html_pather = re.compile(r'<ul class=\"items_article_SampleImagesArea\"[\s\S]*?</ul>')
|
||||||
html = html_pather.search(htmlcode)
|
html = html_pather.search(htmlcode)
|
||||||
@@ -79,27 +32,30 @@ def getTrailer(htmlcode, number):
|
|||||||
except:
|
except:
|
||||||
return ''
|
return ''
|
||||||
else:
|
else:
|
||||||
video_url = ''
|
return ''
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
number = number.replace('FC2-', '').replace('fc2-', '')
|
number = number.replace('FC2-', '').replace('fc2-', '')
|
||||||
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/')
|
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/', encoding='utf-8')
|
||||||
actor = getActor_fc2com(htmlcode2)
|
fc2_crawler = Crawler(htmlcode2)
|
||||||
if not actor:
|
actor = fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')
|
||||||
|
if actor == "":
|
||||||
actor = '素人'
|
actor = '素人'
|
||||||
lx = etree.fromstring(htmlcode2, etree.HTMLParser())
|
lx = etree.fromstring(htmlcode2, etree.HTMLParser())
|
||||||
cover = str(lx.xpath("//div[@class='items_article_MainitemThumb']/span/img/@src")).strip(" ['']")
|
cover = fc2_crawler.getString("//div[@class='items_article_MainitemThumb']/span/img/@src")
|
||||||
cover = ADC_function.urljoin('https://adult.contents.fc2.com', cover)
|
cover = ADC_function.urljoin('https://adult.contents.fc2.com', cover)
|
||||||
|
release = fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/div[2]/p/text()').\
|
||||||
|
strip(" ['販売日 : ']").replace('/','-')
|
||||||
dic = {
|
dic = {
|
||||||
'title': lx.xpath('/html/head/title/text()')[0],
|
'title': fc2_crawler.getString('/html/head/title/text()'),
|
||||||
'studio': getStudio_fc2com(htmlcode2),
|
'studio': fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'),
|
||||||
'year': getYear_fc2com(getRelease_fc2com(htmlcode2)),
|
'year': re.findall('\d{4}',release)[0],
|
||||||
'outline': '', # getOutline_fc2com(htmlcode2),
|
'outline': '', # getOutline_fc2com(htmlcode2),
|
||||||
'runtime': str(lx.xpath("//p[@class='items_article_info']/text()")[0]),
|
'runtime': str(lx.xpath("//p[@class='items_article_info']/text()")[0]),
|
||||||
'director': getStudio_fc2com(htmlcode2),
|
'director': fc2_crawler.getString('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()'),
|
||||||
'actor': actor,
|
'actor': actor,
|
||||||
'release': getRelease_fc2com(htmlcode2),
|
'release': release,
|
||||||
'number': 'FC2-' + number,
|
'number': 'FC2-' + number,
|
||||||
'label': '',
|
'label': '',
|
||||||
'cover': cover,
|
'cover': cover,
|
||||||
@@ -107,7 +63,7 @@ def main(number):
|
|||||||
'extrafanart': getExtrafanart(htmlcode2),
|
'extrafanart': getExtrafanart(htmlcode2),
|
||||||
"trailer": getTrailer(htmlcode2, number),
|
"trailer": getTrailer(htmlcode2, number),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
'tag': getTag_fc2com(lx),
|
'tag': fc2_crawler.getStrings("//a[@class='tag tagTag']/text()"),
|
||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website': 'https://adult.contents.fc2.com/article/' + number + '/',
|
'website': 'https://adult.contents.fc2.com/article/' + number + '/',
|
||||||
'source': 'https://adult.contents.fc2.com/article/' + number + '/',
|
'source': 'https://adult.contents.fc2.com/article/' + number + '/',
|
||||||
@@ -121,6 +77,4 @@ def main(number):
|
|||||||
return js
|
return js
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('FC2-1787685'))
|
print(main('FC2-2182382'))
|
||||||
print(main('FC2-2086710'))
|
|
||||||
|
|
||||||
88
WebCrawler/gcolle.py
Normal file
88
WebCrawler/gcolle.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.append('../')
|
||||||
|
|
||||||
|
from WebCrawler.crawler import *
|
||||||
|
from ADC_function import *
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
|
def main(number):
|
||||||
|
save_cookies = False
|
||||||
|
cookie_filename = 'gcolle.json'
|
||||||
|
try:
|
||||||
|
gcolle_cooikes, cookies_filepath = load_cookies(cookie_filename)
|
||||||
|
session = get_html_session(cookies=gcolle_cooikes)
|
||||||
|
number = number.upper().replace('GCOLLE-','')
|
||||||
|
|
||||||
|
htmlcode = session.get('https://gcolle.net/product_info.php/products_id/' + number).text
|
||||||
|
gcolle_crawler = Crawler(htmlcode)
|
||||||
|
r18_continue = gcolle_crawler.getString('//*[@id="main_content"]/table[1]/tbody/tr/td[2]/table/tbody/tr/td/h4/a[2]/@href')
|
||||||
|
if r18_continue and r18_continue.startswith('http'):
|
||||||
|
htmlcode = session.get(r18_continue).text
|
||||||
|
gcolle_crawler = Crawler(htmlcode)
|
||||||
|
save_cookies = True
|
||||||
|
cookies_filepath and len(cookies_filepath) and Path(cookies_filepath).is_file() and Path(cookies_filepath).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
number_html = gcolle_crawler.getString('//td[contains(text(),"商品番号")]/../td[2]/text()')
|
||||||
|
if number != number_html:
|
||||||
|
raise Exception('[-]gcolle.py: number not match')
|
||||||
|
|
||||||
|
if save_cookies:
|
||||||
|
cookies_save = Path.home() / f".local/share/mdc/{cookie_filename}"
|
||||||
|
cookies_save.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
cookies_save.write_text(json.dumps(session.cookies.get_dict(), sort_keys=True, indent=4), encoding='utf-8')
|
||||||
|
|
||||||
|
# get extrafanart url
|
||||||
|
if len(gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')) == 0:
|
||||||
|
extrafanart = gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src')
|
||||||
|
else:
|
||||||
|
extrafanart = gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[3]/td/div/img/@src')
|
||||||
|
# Add "https:" in each extrafanart url
|
||||||
|
for i in range(len(extrafanart)):
|
||||||
|
extrafanart[i] = 'https:' + extrafanart[i]
|
||||||
|
|
||||||
|
dic = {
|
||||||
|
"title": gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[1]/td/h1/text()').strip(),
|
||||||
|
"studio": gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
|
||||||
|
"year": re.findall('\d{4}',gcolle_crawler.getString('//td[contains(text(),"商品登録日")]/../td[2]/time/@datetime'))[0],
|
||||||
|
"outline": gcolle_crawler.getOutline('//*[@id="cart_quantity"]/table/tr[3]/td/p/text()'),
|
||||||
|
"runtime": '',
|
||||||
|
"director": gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
|
||||||
|
"actor": gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
|
||||||
|
"release": re.findall('\d{4}-\d{2}-\d{2}',gcolle_crawler.getString('//td[contains(text(),"商品登録日")]/../td[2]/time/@datetime'))[0],
|
||||||
|
"number": "GCOLLE-" + str(number_html),
|
||||||
|
"cover": "https:" + gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[3]/td/table/tr/td/a/@href'),
|
||||||
|
"thumb": "https:" + gcolle_crawler.getString('//*[@id="cart_quantity"]/table/tr[3]/td/table/tr/td/a/@href'),
|
||||||
|
"trailer": '',
|
||||||
|
"actor_photo":'',
|
||||||
|
"imagecut": 4, # 该值为4时同时也是有码影片 也用人脸识别裁剪封面
|
||||||
|
"tag": gcolle_crawler.getStrings('//*[@id="cart_quantity"]/table/tr[4]/td/a/text()'),
|
||||||
|
"extrafanart":extrafanart,
|
||||||
|
"label": gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
|
||||||
|
"website": 'https://gcolle.net/product_info.php/products_id/' + number,
|
||||||
|
"source": 'gcolle.py',
|
||||||
|
"series": gcolle_crawler.getString('//td[contains(text(),"アップロード会員名")]/b/text()'),
|
||||||
|
'无码': False,
|
||||||
|
}
|
||||||
|
# for k,v in dic.items():
|
||||||
|
# if k == 'outline':
|
||||||
|
# print(k,len(v))
|
||||||
|
# else:
|
||||||
|
# print(k,v)
|
||||||
|
# print('===============================================================')
|
||||||
|
except Exception as e:
|
||||||
|
dic = {'title':''}
|
||||||
|
if config.getInstance().debug():
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
return dic
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from pprint import pprint
|
||||||
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
|
pprint(main('840724'))
|
||||||
|
pprint(main('840386'))
|
||||||
|
pprint(main('838671'))
|
||||||
|
pprint(main('814179'))
|
||||||
|
pprint(main('834255'))
|
||||||
|
pprint(main('814179'))
|
||||||
@@ -56,9 +56,9 @@ def parse_info(soup: BeautifulSoup) -> dict:
|
|||||||
"label": get_label(data_dic),
|
"label": get_label(data_dic),
|
||||||
"studio": get_studio(data_dic),
|
"studio": get_studio(data_dic),
|
||||||
"tag": get_tag(data_dic),
|
"tag": get_tag(data_dic),
|
||||||
"number": get_number(data_dic),
|
"number": get_number(data_dic).upper(),
|
||||||
"release": get_release(data_dic),
|
"release": get_release(data_dic),
|
||||||
"runtime": get_runtime(data_dic),
|
"runtime": get_runtime(data_dic).replace(" minutes", ""),
|
||||||
"series": get_series(data_dic),
|
"series": get_series(data_dic),
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -60,10 +60,10 @@ def getCID(html):
|
|||||||
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','')
|
||||||
result = re.sub('/.*?.jpg','',string)
|
result = re.sub('/.*?.jpg','',string)
|
||||||
return result
|
return result
|
||||||
def getOutline(number, title): #获取剧情介绍 多进程并发查询
|
def getOutline(number, title, uncensored): #获取剧情介绍 多进程并发查询
|
||||||
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
|
||||||
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
|
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
|
||||||
return getStoryline(number,title)
|
return getStoryline(number,title, 无码=uncensored)
|
||||||
def getSeriseJa(html):
|
def getSeriseJa(html):
|
||||||
x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
|
x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()')
|
||||||
return str(x[0]) if len(x) else ''
|
return str(x[0]) if len(x) else ''
|
||||||
@@ -83,9 +83,13 @@ def getExtrafanart(htmlcode): # 获取剧照
|
|||||||
if extrafanart_imgs:
|
if extrafanart_imgs:
|
||||||
return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
|
return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs]
|
||||||
return ''
|
return ''
|
||||||
|
def getUncensored(html):
|
||||||
|
x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]')
|
||||||
|
return bool(x)
|
||||||
|
|
||||||
def main_uncensored(number):
|
def main_uncensored(number):
|
||||||
htmlcode = get_html('https://www.javbus.com/ja/' + number)
|
w_number = number.replace('.', '-')
|
||||||
|
htmlcode = get_html('https://www.javbus.red/' + w_number)
|
||||||
if "<title>404 Page Not Found" in htmlcode:
|
if "<title>404 Page Not Found" in htmlcode:
|
||||||
raise Exception('404 page not found')
|
raise Exception('404 page not found')
|
||||||
lx = etree.fromstring(htmlcode, etree.HTMLParser())
|
lx = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
@@ -94,7 +98,7 @@ def main_uncensored(number):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'studio': getStudioJa(lx),
|
'studio': getStudioJa(lx),
|
||||||
'year': getYear(lx),
|
'year': getYear(lx),
|
||||||
'outline': getOutline(number, title),
|
'outline': getOutline(w_number, title, True),
|
||||||
'runtime': getRuntime(lx),
|
'runtime': getRuntime(lx),
|
||||||
'director': getDirectorJa(lx),
|
'director': getDirectorJa(lx),
|
||||||
'actor': getActor(lx),
|
'actor': getActor(lx),
|
||||||
@@ -106,9 +110,10 @@ def main_uncensored(number):
|
|||||||
'label': getSeriseJa(lx),
|
'label': getSeriseJa(lx),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
# 'actor_photo': '',
|
# 'actor_photo': '',
|
||||||
'website': 'https://www.javbus.com/ja/' + number,
|
'website': 'https://www.javbus.red/' + w_number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
'series': getSeriseJa(lx),
|
'series': getSeriseJa(lx),
|
||||||
|
'无码': True
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
@@ -136,7 +141,7 @@ def main(number):
|
|||||||
'title': title,
|
'title': title,
|
||||||
'studio': getStudio(lx),
|
'studio': getStudio(lx),
|
||||||
'year': getYear(lx),
|
'year': getYear(lx),
|
||||||
'outline': getOutline(number, title),
|
'outline': getOutline(number, title, getUncensored(lx)),
|
||||||
'runtime': getRuntime(lx),
|
'runtime': getRuntime(lx),
|
||||||
'director': getDirector(lx),
|
'director': getDirector(lx),
|
||||||
'actor': getActor(lx),
|
'actor': getActor(lx),
|
||||||
@@ -151,6 +156,7 @@ def main(number):
|
|||||||
'website': 'https://www.javbus.com/' + number,
|
'website': 'https://www.javbus.com/' + number,
|
||||||
'source': 'javbus.py',
|
'source': 'javbus.py',
|
||||||
'series': getSerise(lx),
|
'series': getSerise(lx),
|
||||||
|
'无码': getUncensored(lx)
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
@@ -168,13 +174,14 @@ def main(number):
|
|||||||
return js
|
return js
|
||||||
|
|
||||||
if __name__ == "__main__" :
|
if __name__ == "__main__" :
|
||||||
config.G_conf_override['debug_mode:switch'] = True
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
print(main('ABP-888'))
|
# print(main('ABP-888'))
|
||||||
print(main('ABP-960'))
|
# print(main('ABP-960'))
|
||||||
print(main('ADV-R0624')) # 404
|
# print(main('ADV-R0624')) # 404
|
||||||
print(main('MMNT-010'))
|
# print(main('MMNT-010'))
|
||||||
print(main('ipx-292'))
|
# print(main('ipx-292'))
|
||||||
print(main('CEMD-011'))
|
# print(main('CEMD-011'))
|
||||||
print(main('CJOD-278'))
|
# print(main('CJOD-278'))
|
||||||
|
print(main('BrazzersExxtra.21.02.01'))
|
||||||
print(main('100221_001'))
|
print(main('100221_001'))
|
||||||
print(main('AVSW-061'))
|
print(main('AVSW-061'))
|
||||||
|
|||||||
@@ -166,12 +166,23 @@ def getDirector(html):
|
|||||||
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
def getOutline(number, title): #获取剧情介绍 多进程并发查询
|
def getOutline(number, title, uncensored): #获取剧情介绍 多进程并发查询
|
||||||
return getStoryline(number,title)
|
return getStoryline(number, title, 无码=uncensored)
|
||||||
def getSeries(html):
|
def getSeries(html):
|
||||||
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']")
|
||||||
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']")
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
||||||
|
def getUserRating(html):
|
||||||
|
try:
|
||||||
|
result = str(html.xpath('//span[@class="score-stars"]/../text()')[0])
|
||||||
|
v = re.findall(r'(\d+|\d+\.\d+)分, 由(\d+)人評價', result)
|
||||||
|
return float(v[0][0]), int(v[0][1])
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
def getUncensored(html):
|
||||||
|
x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")'
|
||||||
|
' or contains(@href,"/tags/western?")]')
|
||||||
|
return bool(x)
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
# javdb更新后同一时间只能登录一个数字站,最新登录站会踢出旧的登录,因此按找到的第一个javdb*.json文件选择站点,
|
# javdb更新后同一时间只能登录一个数字站,最新登录站会踢出旧的登录,因此按找到的第一个javdb*.json文件选择站点,
|
||||||
@@ -276,7 +287,7 @@ def main(number):
|
|||||||
'actor': getActor(lx),
|
'actor': getActor(lx),
|
||||||
'title': title,
|
'title': title,
|
||||||
'studio': getStudio(detail_page, lx),
|
'studio': getStudio(detail_page, lx),
|
||||||
'outline': getOutline(number, title),
|
'outline': getOutline(number, title, getUncensored(lx)),
|
||||||
'runtime': getRuntime(lx),
|
'runtime': getRuntime(lx),
|
||||||
'director': getDirector(lx),
|
'director': getDirector(lx),
|
||||||
'release': getRelease(detail_page),
|
'release': getRelease(detail_page),
|
||||||
@@ -293,8 +304,12 @@ def main(number):
|
|||||||
'website': urljoin('https://javdb.com', correct_url),
|
'website': urljoin('https://javdb.com', correct_url),
|
||||||
'source': 'javdb.py',
|
'source': 'javdb.py',
|
||||||
'series': getSeries(lx),
|
'series': getSeries(lx),
|
||||||
|
'无码': getUncensored(lx)
|
||||||
}
|
}
|
||||||
|
userrating = getUserRating(lx)
|
||||||
|
if isinstance(userrating, tuple) and len(userrating) == 2:
|
||||||
|
dic['用户评分'] = userrating[0]
|
||||||
|
dic['评分人数'] = userrating[1]
|
||||||
if not dic['actor'] and re.match(r'FC2-[\d]+', number, re.A):
|
if not dic['actor'] and re.match(r'FC2-[\d]+', number, re.A):
|
||||||
dic['actor'].append('素人')
|
dic['actor'].append('素人')
|
||||||
if not dic['series']:
|
if not dic['series']:
|
||||||
@@ -313,18 +328,19 @@ def main(number):
|
|||||||
# main('DV-1562')
|
# main('DV-1562')
|
||||||
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
config.G_conf_override['debug_mode:switch'] = True
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
# print(main('blacked.20.05.30'))
|
# print(main('blacked.20.05.30'))
|
||||||
# print(main('AGAV-042'))
|
# print(main('AGAV-042'))
|
||||||
# print(main('BANK-022'))
|
# print(main('BANK-022'))
|
||||||
# print(main('070116-197'))
|
print(main('070116-197'))
|
||||||
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
||||||
#print(main('FC2-2278260'))
|
#print(main('FC2-2278260'))
|
||||||
# print(main('FC2-735670'))
|
# print(main('FC2-735670'))
|
||||||
# print(main('FC2-1174949')) # not found
|
# print(main('FC2-1174949')) # not found
|
||||||
#print(main('MVSD-439'))
|
#print(main('MVSD-439'))
|
||||||
# print(main('EHM0001')) # not found
|
# print(main('EHM0001')) # not found
|
||||||
print(main('FC2-2314275'))
|
#print(main('FC2-2314275'))
|
||||||
# print(main('EBOD-646'))
|
# print(main('EBOD-646'))
|
||||||
# print(main('LOVE-262'))
|
# print(main('LOVE-262'))
|
||||||
#print(main('ABP-890'))
|
print(main('ABP-890'))
|
||||||
|
print(main('blacked.14.12.08'))
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.append('../')
|
||||||
from bs4 import BeautifulSoup # need install
|
from bs4 import BeautifulSoup # need install
|
||||||
from lxml import etree # need install
|
from lxml import etree # need install
|
||||||
from pyquery import PyQuery as pq # need install
|
from pyquery import PyQuery as pq # need install
|
||||||
@@ -5,24 +7,22 @@ from ADC_function import *
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from lib2to3.pgen2 import parse
|
from lib2to3.pgen2 import parse
|
||||||
import sys
|
|
||||||
|
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
sys.path.append('../')
|
|
||||||
|
|
||||||
|
|
||||||
def getActorPhoto(html):
|
def getActorPhoto(html):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getTitle(html, number): # 获取标题
|
def getTitle(html): # 获取标题
|
||||||
title = str(html.xpath('//h1[@class="article-title"]/text()')[0])
|
# <title>MD0140-2 / 家有性事EP2 爱在身边-麻豆社</title>
|
||||||
try:
|
# <title>MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社</title>
|
||||||
result = str(re.split(r'[/|/|-]', title)[1])
|
# <title>MD0094/贫嘴贱舌中出大嫂/坏嫂嫂和小叔偷腥内射受孕-麻豆社</title>
|
||||||
return result.strip()
|
# <title>TM0002-我的痴女女友-麻豆社</title>
|
||||||
except:
|
browser_title = str(html.xpath("/html/head/title/text()")[0])
|
||||||
return title.replace(number.upper(), '').strip()
|
title = str(re.findall(r'^[A-Z0-9 //\-]*(.*)-麻豆社$', browser_title)[0]).strip()
|
||||||
|
return title
|
||||||
|
|
||||||
def getStudio(html): # 获取厂商 已修改
|
def getStudio(html): # 获取厂商 已修改
|
||||||
try:
|
try:
|
||||||
@@ -61,7 +61,6 @@ def getNum(url, number): # 获取番号
|
|||||||
filename = unquote(urlparse(url).path)
|
filename = unquote(urlparse(url).path)
|
||||||
# 裁剪文件名
|
# 裁剪文件名
|
||||||
result = filename[1:-5].upper().strip()
|
result = filename[1:-5].upper().strip()
|
||||||
print(result)
|
|
||||||
# 移除中文
|
# 移除中文
|
||||||
if result.upper() != number.upper():
|
if result.upper() != number.upper():
|
||||||
result = re.split(r'[^\x00-\x7F]+', result, 1)[0]
|
result = re.split(r'[^\x00-\x7F]+', result, 1)[0]
|
||||||
@@ -83,13 +82,15 @@ def getSerise(html): # 获取系列 已修改
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getTag(html): # 获取标签
|
def getTag(html, studio): # 获取标签
|
||||||
return html.xpath('//div[@class="article-tags"]/a/text()')
|
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
||||||
|
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
|
||||||
|
|
||||||
|
|
||||||
def getExtrafanart(html): # 获取剧照
|
def getExtrafanart(html): # 获取剧照
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def cutTags(tags):
|
def cutTags(tags):
|
||||||
actors = []
|
actors = []
|
||||||
tags = []
|
tags = []
|
||||||
@@ -109,13 +110,15 @@ def main(number):
|
|||||||
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
url = getUrl(html)
|
url = getUrl(html)
|
||||||
tags = getTag(html)
|
studio = getStudio(html)
|
||||||
actor,tags = cutTags(tags);
|
tags = getTag(html, studio)
|
||||||
|
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定,放弃尝试获取
|
||||||
|
actor = ''
|
||||||
dic = {
|
dic = {
|
||||||
# 标题
|
# 标题
|
||||||
'title': getTitle(html, number),
|
'title': getTitle(html),
|
||||||
# 制作商
|
# 制作商
|
||||||
'studio': getStudio(html),
|
'studio': studio,
|
||||||
# 年份
|
# 年份
|
||||||
'year': getYear(html),
|
'year': getYear(html),
|
||||||
# 简介
|
# 简介
|
||||||
@@ -143,7 +146,8 @@ def main(number):
|
|||||||
'website': url,
|
'website': url,
|
||||||
'source': 'madou.py',
|
'source': 'madou.py',
|
||||||
# 使用
|
# 使用
|
||||||
'series': getSerise(html)
|
'series': getSerise(html),
|
||||||
|
'无码': True
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
|
||||||
indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
@@ -161,4 +165,11 @@ def main(number):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('MD0094'))
|
config.getInstance().set_override("debug_mode:switch=1")
|
||||||
|
print(main('MD0129'))
|
||||||
|
# print(main('TM0002'))
|
||||||
|
# print(main('MD0222'))
|
||||||
|
# print(main('MD0140-2'))
|
||||||
|
# print(main('MAD039'))
|
||||||
|
# print(main('JDMY027'))
|
||||||
|
|
||||||
|
|||||||
@@ -5,95 +5,28 @@ from lxml import etree
|
|||||||
import json
|
import json
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
|
from WebCrawler.crawler import *
|
||||||
# import sys
|
# import sys
|
||||||
# import io
|
# import io
|
||||||
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
|
||||||
|
|
||||||
def getTitle(a):
|
class MgsCrawler(Crawler):
|
||||||
try:
|
def getMgsString(self, _xpath):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = self.html
|
||||||
result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
|
result1 = str(html.xpath(_xpath)).strip(" ['']").strip('\\n ').strip('\\n').strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
|
||||||
return result.replace('/', ',')
|
result2 = str(html.xpath(_xpath.replace('td/a/','td/'))).strip(" ['']").strip('\\n ').strip('\\n')
|
||||||
except:
|
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
|
||||||
return ''
|
|
||||||
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
|
||||||
result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
|
|
||||||
def getStudio(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
|
||||||
result1=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
result2=str(html.xpath('//th[contains(text(),"メーカー:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
return str(result1+result2).strip('+').replace("', '",'').replace('"','')
|
|
||||||
def getRuntime(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
|
||||||
return str(result1 + result2).strip('+').rstrip('mi')
|
|
||||||
def getLabel(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
|
|
||||||
def getNum(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
return str(result1 + result2).strip('+')
|
|
||||||
def getYear(getRelease):
|
|
||||||
try:
|
|
||||||
result = str(re.search('\d{4}',getRelease).group())
|
|
||||||
return result
|
|
||||||
except:
|
|
||||||
return getRelease
|
|
||||||
def getRelease(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
return str(result1 + result2).strip('+').replace('/','-')
|
|
||||||
def getTag(a):
|
def getTag(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||||
'\\n')
|
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
|
||||||
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
result = str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','').replace(',,','').split(',')
|
||||||
return result
|
return result
|
||||||
def getCover(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result = str(html.xpath('//*[@id="EnlargeImage"]/@href')).strip(" ['']")
|
|
||||||
# result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
|
||||||
# /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
|
|
||||||
return result
|
|
||||||
def getDirector(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
|
|
||||||
def getOutline(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
|
|
||||||
return result
|
|
||||||
def getSeries(a):
|
|
||||||
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
|
|
||||||
'\\n')
|
|
||||||
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
|
|
||||||
|
|
||||||
def getExtrafanart(htmlcode): # 获取剧照
|
def getExtrafanart(htmlcode2): # 获取剧照
|
||||||
html_pather = re.compile(r'<dd>\s*?<ul>[\s\S]*?</ul>\s*?</dd>')
|
html_pather = re.compile(r'<dd>\s*?<ul>[\s\S]*?</ul>\s*?</dd>')
|
||||||
html = html_pather.search(htmlcode)
|
html = html_pather.search(htmlcode2)
|
||||||
if html:
|
if html:
|
||||||
html = html.group()
|
html = html.group()
|
||||||
extrafanart_pather = re.compile(r'<a class=\"sample_image\" href=\"(.*?)\"')
|
extrafanart_pather = re.compile(r'<a class=\"sample_image\" href=\"(.*?)\"')
|
||||||
@@ -104,36 +37,35 @@ def getExtrafanart(htmlcode): # 获取剧照
|
|||||||
|
|
||||||
def main(number2):
|
def main(number2):
|
||||||
number=number2.upper()
|
number=number2.upper()
|
||||||
htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
|
htmlcode2=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode2, 'lxml')
|
||||||
a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
|
a2 = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
|
||||||
b = str(soup.find(attrs={'id': 'introduction'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
|
b2 = str(soup.find(attrs={'id': 'introduction'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
|
||||||
|
htmlcode = MgsCrawler(htmlcode2)
|
||||||
|
a = MgsCrawler(a2)
|
||||||
|
b = MgsCrawler(b2)
|
||||||
#print(b)
|
#print(b)
|
||||||
try:
|
dic = {
|
||||||
dic = {
|
'title': htmlcode.getString('//*[@id="center_column"]/div[1]/h1/text()').replace('/', ',').replace("\\n",'').replace(' ', '').strip(),
|
||||||
'title': getTitle(htmlcode).replace("\\n", '').replace(' ', ''),
|
'studio': a.getMgsString('//th[contains(text(),"メーカー:")]/../td/a/text()'),
|
||||||
'studio': getStudio(a),
|
'outline': b.getString('//p/text()').strip(" ['']").replace(u'\\n', '').replace("', '', '", ''),
|
||||||
'outline': getOutline(b),
|
'runtime': a.getMgsString('//th[contains(text(),"収録時間:")]/../td/a/text()').rstrip('mi'),
|
||||||
'runtime': getRuntime(a),
|
'director': a.getMgsString('//th[contains(text(),"シリーズ")]/../td/a/text()'),
|
||||||
'director': getDirector(a),
|
'actor': a.getMgsString('//th[contains(text(),"出演:")]/../td/a/text()'),
|
||||||
'actor': getActor(a),
|
'release': a.getMgsString('//th[contains(text(),"配信開始日:")]/../td/a/text()').replace('/','-'),
|
||||||
'release': getRelease(a),
|
'number': a.getMgsString('//th[contains(text(),"品番:")]/../td/a/text()'),
|
||||||
'number': getNum(a),
|
'cover': htmlcode.getString('//*[@id="EnlargeImage"]/@href'),
|
||||||
'cover': getCover(htmlcode),
|
'imagecut': 1,
|
||||||
'imagecut': 1,
|
'tag': getTag(a2),
|
||||||
'tag': getTag(a),
|
'label': a.getMgsString('//th[contains(text(),"シリーズ:")]/../td/a/text()'),
|
||||||
'label': getLabel(a),
|
'extrafanart': getExtrafanart(htmlcode2),
|
||||||
'extrafanart': getExtrafanart(htmlcode),
|
'year': str(re.findall('\d{4}',a.getMgsString('//th[contains(text(),"配信開始日:")]/../td/a/text()'))).strip(" ['']"),
|
||||||
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
|
# str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
'actor_photo': '',
|
'actor_photo': '',
|
||||||
'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
|
'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
|
||||||
'source': 'mgstage.py',
|
'source': 'mgstage.py',
|
||||||
'series': getSeries(a),
|
'series': a.getMgsString('//th[contains(text(),"シリーズ")]/../td/a/text()'),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
|
||||||
if config.getInstance().debug():
|
|
||||||
print(e)
|
|
||||||
dic = {"title": ""}
|
|
||||||
|
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js
|
return js
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ import json
|
|||||||
import builtins
|
import builtins
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from lxml.html import fromstring
|
from lxml.html import fromstring
|
||||||
from multiprocessing import Pool
|
|
||||||
from multiprocessing.dummy import Pool as ThreadPool
|
from multiprocessing.dummy import Pool as ThreadPool
|
||||||
from difflib import SequenceMatcher
|
from difflib import SequenceMatcher
|
||||||
from unicodedata import category
|
from unicodedata import category
|
||||||
@@ -13,7 +12,7 @@ from number_parser import is_uncensored
|
|||||||
|
|
||||||
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"}
|
G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"}
|
||||||
|
|
||||||
G_mode_txt = ('顺序执行','线程池','进程池')
|
G_mode_txt = ('顺序执行','线程池')
|
||||||
|
|
||||||
class noThread(object):
|
class noThread(object):
|
||||||
def map(self, fn, param):
|
def map(self, fn, param):
|
||||||
@@ -25,14 +24,15 @@ class noThread(object):
|
|||||||
|
|
||||||
|
|
||||||
# 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后
|
# 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后
|
||||||
def getStoryline(number, title, sites: list=None):
|
def getStoryline(number, title, sites: list=None, 无码=None):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
if not conf.is_storyline():
|
if not conf.is_storyline():
|
||||||
return ''
|
return ''
|
||||||
debug = conf.debug() or conf.storyline_show() == 2
|
debug = conf.debug() or conf.storyline_show() == 2
|
||||||
storyine_sites = conf.storyline_site().split(',') if sites is None else sites
|
storyine_sites = conf.storyline_site().split(',') if sites is None else sites
|
||||||
if is_uncensored(number):
|
unc = 无码 if isinstance(无码, bool) else is_uncensored(number)
|
||||||
|
if unc:
|
||||||
storyine_sites += conf.storyline_uncensored_site().split(',')
|
storyine_sites += conf.storyline_uncensored_site().split(',')
|
||||||
else:
|
else:
|
||||||
storyine_sites += conf.storyline_censored_site().split(',')
|
storyine_sites += conf.storyline_censored_site().split(',')
|
||||||
@@ -49,9 +49,8 @@ def getStoryline(number, title, sites: list=None):
|
|||||||
cores = min(len(apply_sites), os.cpu_count())
|
cores = min(len(apply_sites), os.cpu_count())
|
||||||
if cores == 0:
|
if cores == 0:
|
||||||
return ''
|
return ''
|
||||||
run_mode = conf.storyline_mode()
|
run_mode = 1 if conf.storyline_mode() > 0 else 0
|
||||||
assert run_mode in (0,1,2)
|
with ThreadPool(cores) if run_mode > 0 else noThread() as pool:
|
||||||
with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool:
|
|
||||||
results = pool.map(getStoryline_mp, mp_args)
|
results = pool.map(getStoryline_mp, mp_args)
|
||||||
sel = ''
|
sel = ''
|
||||||
if not debug and conf.storyline_show() == 0:
|
if not debug and conf.storyline_show() == 0:
|
||||||
@@ -62,7 +61,7 @@ def getStoryline(number, title, sites: list=None):
|
|||||||
if not len(sel):
|
if not len(sel):
|
||||||
sel = value
|
sel = value
|
||||||
return sel
|
return sel
|
||||||
# 以下debug结果输出会写入日志,进程池中的则不会,只在标准输出中显示
|
# 以下debug结果输出会写入日志
|
||||||
s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}'
|
s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}'
|
||||||
sel_site = ''
|
sel_site = ''
|
||||||
for site, desc in zip(apply_sites, results):
|
for site, desc in zip(apply_sites, results):
|
||||||
@@ -80,34 +79,33 @@ def getStoryline(number, title, sites: list=None):
|
|||||||
|
|
||||||
|
|
||||||
def getStoryline_mp(args):
|
def getStoryline_mp(args):
|
||||||
def _inner(site, number, title, debug):
|
(site, number, title, debug) = args
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
storyline = None
|
storyline = None
|
||||||
if not isinstance(site, str):
|
if not isinstance(site, str):
|
||||||
return storyline
|
|
||||||
elif site == "airavwiki":
|
|
||||||
storyline = getStoryline_airavwiki(number, debug)
|
|
||||||
elif site == "airav":
|
|
||||||
storyline = getStoryline_airav(number, debug)
|
|
||||||
elif site == "avno1":
|
|
||||||
storyline = getStoryline_avno1(number, debug)
|
|
||||||
elif site == "xcity":
|
|
||||||
storyline = getStoryline_xcity(number, debug)
|
|
||||||
elif site == "amazon":
|
|
||||||
storyline = getStoryline_amazon(title, number, debug)
|
|
||||||
elif site == "58avgo":
|
|
||||||
storyline = getStoryline_58avgo(number, debug)
|
|
||||||
if not debug:
|
|
||||||
return storyline
|
|
||||||
# 进程池模式的子进程getStoryline_*()的print()不会写入日志中,线程池和顺序执行不受影响
|
|
||||||
print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
|
||||||
site,
|
|
||||||
time.time() - start_time,
|
|
||||||
time.strftime("%H:%M:%S"),
|
|
||||||
storyline if isinstance(storyline, str) and len(storyline) else '[空]')
|
|
||||||
)
|
|
||||||
return storyline
|
return storyline
|
||||||
return _inner(*args)
|
elif site == "airavwiki":
|
||||||
|
storyline = getStoryline_airavwiki(number, debug)
|
||||||
|
#storyline = getStoryline_airavwiki_super(number, debug)
|
||||||
|
elif site == "airav":
|
||||||
|
storyline = getStoryline_airav(number, debug)
|
||||||
|
elif site == "avno1":
|
||||||
|
storyline = getStoryline_avno1(number, debug)
|
||||||
|
elif site == "xcity":
|
||||||
|
storyline = getStoryline_xcity(number, debug)
|
||||||
|
elif site == "amazon":
|
||||||
|
storyline = getStoryline_amazon(title, number, debug)
|
||||||
|
elif site == "58avgo":
|
||||||
|
storyline = getStoryline_58avgo(number, debug)
|
||||||
|
if not debug:
|
||||||
|
return storyline
|
||||||
|
print("[!]MP 线程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
||||||
|
site,
|
||||||
|
time.time() - start_time,
|
||||||
|
time.strftime("%H:%M:%S"),
|
||||||
|
storyline if isinstance(storyline, str) and len(storyline) else '[空]')
|
||||||
|
)
|
||||||
|
return storyline
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_airav(number, debug):
|
def getStoryline_airav(number, debug):
|
||||||
@@ -308,8 +306,8 @@ def getStoryline_amazon(q_title, number, debug):
|
|||||||
res = session.get(urljoin(res.url, lks[0]))
|
res = session.get(urljoin(res.url, lks[0]))
|
||||||
cookie = None
|
cookie = None
|
||||||
lx = fromstring(res.text)
|
lx = fromstring(res.text)
|
||||||
titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()")
|
titles = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/text()")
|
||||||
urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href")
|
urls = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/../@href")
|
||||||
if not len(urls) or len(urls) != len(titles):
|
if not len(urls) or len(urls) != len(titles):
|
||||||
raise ValueError("titles not found")
|
raise ValueError("titles not found")
|
||||||
idx = amazon_select_one(titles, q_title, number, debug)
|
idx = amazon_select_one(titles, q_title, number, debug)
|
||||||
@@ -325,8 +323,9 @@ def getStoryline_amazon(q_title, number, debug):
|
|||||||
res = session.get(urljoin(res.url, lks[0]))
|
res = session.get(urljoin(res.url, lks[0]))
|
||||||
cookie = None
|
cookie = None
|
||||||
lx = fromstring(res.text)
|
lx = fromstring(res.text)
|
||||||
div = lx.xpath('//*[@id="productDescription"]')[0]
|
p1 = lx.xpath('//*[@id="productDescription"]/p[1]/span/text()')
|
||||||
ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)])
|
p2 = lx.xpath('//*[@id="productDescription"]/p[2]/span/text()')
|
||||||
|
ama_t = ' '.join(p1) + ' '.join(p2)
|
||||||
ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip()
|
ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip()
|
||||||
|
|
||||||
if cookie is None:
|
if cookie is None:
|
||||||
@@ -406,10 +405,10 @@ def amazon_select_one(a_titles, q_title, number, debug):
|
|||||||
# debug 模式下记录识别准确率日志
|
# debug 模式下记录识别准确率日志
|
||||||
if ratio < 0.9:
|
if ratio < 0.9:
|
||||||
# 相似度[0.5, 0.9)的淘汰结果单独记录日志
|
# 相似度[0.5, 0.9)的淘汰结果单独记录日志
|
||||||
(Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write(
|
with (Path.home() / '.mlogs/ratio0.5.txt').open('a', encoding='utf-8') as hrt:
|
||||||
f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
|
hrt.write(f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
|
||||||
return -1
|
return -1
|
||||||
# 被采信的结果日志
|
# 被采信的结果日志
|
||||||
(Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write(
|
with (Path.home() / '.mlogs/ratio.txt').open('a', encoding='utf-8') as hrt:
|
||||||
f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
|
hrt.write(f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n')
|
||||||
return sel
|
return sel
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ def getOutline(html, number, title):
|
|||||||
a = set(storyline_site) & {'airav', 'avno1'} # 只要中文的简介文字
|
a = set(storyline_site) & {'airav', 'avno1'} # 只要中文的简介文字
|
||||||
if len(a):
|
if len(a):
|
||||||
site = [n for n in storyline_site if n in a]
|
site = [n for n in storyline_site if n in a]
|
||||||
g = getStoryline(number, title, site)
|
g = getStoryline(number, title, site, 无码=False)
|
||||||
if len(g):
|
if len(g):
|
||||||
return g
|
return g
|
||||||
try:
|
try:
|
||||||
|
|||||||
249
config.ini
249
config.ini
@@ -1,119 +1,130 @@
|
|||||||
# 详细教程请看
|
# 详细教程请看
|
||||||
# - https://github.com/yoshiko2/Movie_Data_Capture/wiki#%E9%85%8D%E7%BD%AEconfigini
|
# - https://github.com/yoshiko2/Movie_Data_Capture/wiki#%E9%85%8D%E7%BD%AEconfigini
|
||||||
[common]
|
[common]
|
||||||
main_mode=1
|
main_mode=1
|
||||||
source_folder=./
|
source_folder=./
|
||||||
failed_output_folder=failed
|
failed_output_folder=failed
|
||||||
success_output_folder=JAV_output
|
success_output_folder=JAV_output
|
||||||
soft_link=0
|
link_mode=0
|
||||||
failed_move=1
|
; 0: 不刮削硬链接文件 1: 刮削硬链接文件
|
||||||
auto_exit=0
|
scan_hardlink=0
|
||||||
translate_to_sc=0
|
failed_move=0
|
||||||
multi_threading=0
|
auto_exit=0
|
||||||
;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
|
translate_to_sc=0
|
||||||
actor_gender=female
|
multi_threading=0
|
||||||
del_empty_folder=1
|
;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
|
||||||
; 跳过最近(默认:30)天新修改过的.NFO,可避免整理模式(main_mode=3)和软连接(soft_link=0)时
|
actor_gender=female
|
||||||
; 反复刮削靠前的视频文件,0为处理所有视频文件
|
del_empty_folder=1
|
||||||
nfo_skip_days=30
|
; 跳过最近(默认:30)天新修改过的.NFO,可避免整理模式(main_mode=3)和软连接(soft_link=0)时
|
||||||
; 处理完多少个视频文件后停止,0为处理所有视频文件
|
; 反复刮削靠前的视频文件,0为处理所有视频文件
|
||||||
stop_counter=0
|
nfo_skip_days=30
|
||||||
; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
|
; 处理完多少个视频文件后停止,0为处理所有视频文件
|
||||||
ignore_failed_list=0
|
stop_counter=0
|
||||||
download_only_missing_images=1
|
; 再运行延迟时间,单位:h时m分s秒 举例: 1h30m45s(1小时30分45秒) 45(45秒)
|
||||||
mapping_table_validity=7
|
; stop_counter不为零的条件下才有效,每处理stop_counter部影片后延迟rerun_delay秒再次运行
|
||||||
|
rerun_delay=0
|
||||||
[proxy]
|
; 以上三个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁
|
||||||
;proxytype: http or socks5 or socks5h switch: 0 1
|
ignore_failed_list=0
|
||||||
switch=0
|
download_only_missing_images=1
|
||||||
type=socks5
|
mapping_table_validity=7
|
||||||
proxy=127.0.0.1:1080
|
|
||||||
timeout=10
|
[proxy]
|
||||||
retry=3
|
;proxytype: http or socks5 or socks5h switch: 0 1
|
||||||
cacert_file=
|
switch=0
|
||||||
|
type=socks5
|
||||||
[Name_Rule]
|
proxy=127.0.0.1:1080
|
||||||
location_rule=actor+'/'+number
|
timeout=10
|
||||||
naming_rule=number+'-'+title
|
retry=3
|
||||||
max_title_len=50
|
cacert_file=
|
||||||
|
|
||||||
[update]
|
[Name_Rule]
|
||||||
update_check=1
|
location_rule=actor+'/'+number
|
||||||
|
naming_rule=number+'-'+title
|
||||||
[priority]
|
max_title_len=50
|
||||||
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club
|
|
||||||
|
[update]
|
||||||
[escape]
|
update_check=1
|
||||||
literals=\()/
|
|
||||||
folders=failed,JAV_output
|
[priority]
|
||||||
|
website=javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91,javdb,gcolle
|
||||||
[debug_mode]
|
|
||||||
switch=0
|
[escape]
|
||||||
|
literals=\()/
|
||||||
; 机器翻译
|
folders=failed,JAV_output
|
||||||
[translate]
|
|
||||||
switch=0
|
[debug_mode]
|
||||||
;可选项 google-free,azure
|
switch=0
|
||||||
engine=google-free
|
|
||||||
; azure翻译密钥
|
; 机器翻译
|
||||||
key=
|
[translate]
|
||||||
; 翻译延迟
|
switch=0
|
||||||
delay=1
|
;可选项 google-free,azure
|
||||||
values=title,outline
|
engine=google-free
|
||||||
service_site=translate.google.cn
|
; azure翻译密钥
|
||||||
|
key=
|
||||||
; 预告片
|
; 翻译延迟
|
||||||
[trailer]
|
delay=1
|
||||||
switch=0
|
values=title,outline
|
||||||
|
service_site=translate.google.cn
|
||||||
; 用来确定是否是无码
|
|
||||||
[uncensored]
|
; 预告片
|
||||||
uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,MMDV
|
[trailer]
|
||||||
|
switch=0
|
||||||
[media]
|
|
||||||
; 影片后缀
|
; 用来确定是否是无码
|
||||||
media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v
|
[uncensored]
|
||||||
; 字幕后缀
|
uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,MMDV
|
||||||
sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
|
|
||||||
|
[media]
|
||||||
; 水印
|
; 影片后缀
|
||||||
[watermark]
|
media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v
|
||||||
switch=1
|
; 字幕后缀
|
||||||
water=2
|
sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml
|
||||||
; 左上 0, 右上 1, 右下 2, 左下 3
|
|
||||||
|
; 水印
|
||||||
; 剧照
|
[watermark]
|
||||||
[extrafanart]
|
switch=1
|
||||||
switch=1
|
water=2
|
||||||
parallel_download=5
|
; 左上 0, 右上 1, 右下 2, 左下 3
|
||||||
extrafanart_folder=extrafanart
|
|
||||||
|
; 剧照
|
||||||
; 剧情简介
|
[extrafanart]
|
||||||
[storyline]
|
switch=1
|
||||||
switch=1
|
parallel_download=5
|
||||||
; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的
|
extrafanart_folder=extrafanart
|
||||||
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。
|
|
||||||
; 其中airavwiki airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1 airavwiki 有码无码都能查,
|
; 剧情简介
|
||||||
; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
|
[storyline]
|
||||||
; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
|
switch=1
|
||||||
; 设置成不查询可大幅提高刮削速度。
|
; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的
|
||||||
; site=
|
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。
|
||||||
site=1:avno1,4:airavwiki
|
; 其中airavwiki airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1 airavwiki 有码无码都能查,
|
||||||
censored_site=2:airav,5:xcity,6:amazon
|
; 58avgo只能查无码或者流出破解马赛克的影片(此功能没使用)。
|
||||||
uncensored_site=3:58avgo
|
; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
|
||||||
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
; 设置成不查询可大幅提高刮削速度。
|
||||||
run_mode=1
|
; site=
|
||||||
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
site=1:avno1,4:airavwiki
|
||||||
show_result=0
|
censored_site=2:airav,5:xcity,6:amazon
|
||||||
|
uncensored_site=3:58avgo
|
||||||
; 繁简转换 繁简转换模式mode=0:不转换 1:繁转简 2:简转繁
|
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
||||||
[cc_convert]
|
run_mode=1
|
||||||
mode=1
|
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
||||||
vars=outline,series,studio,tag,title
|
show_result=0
|
||||||
|
|
||||||
[javdb]
|
; 繁简转换 繁简转换模式mode=0:不转换 1:繁转简 2:简转繁
|
||||||
sites=33,34
|
[cc_convert]
|
||||||
|
mode=1
|
||||||
; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢)
|
vars=outline,series,studio,tag,title
|
||||||
[face]
|
|
||||||
locations_model=hog
|
[javdb]
|
||||||
|
sites=38,39
|
||||||
|
|
||||||
|
; 人脸识别 locations_model=hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢)
|
||||||
|
; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面,有码封面直接切右半部分
|
||||||
|
; aways_imagecut=0:按各网站默认行为 1:总是裁剪封面,开启此项将无视[common]download_only_missing_images=1总是覆盖封面
|
||||||
|
; 封面裁剪的宽高比可配置,公式为aspect_ratio/3。默认aspect_ratio=2.12: 适配大部分有码影片封面,前一版本默认为2/3即aspect_ratio=2
|
||||||
|
[face]
|
||||||
|
locations_model=hog
|
||||||
|
uncensored_only=1
|
||||||
|
aways_imagecut=0
|
||||||
|
aspect_ratio=2.12
|
||||||
|
|||||||
226
config.py
226
config.py
@@ -3,19 +3,14 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import configparser
|
import configparser
|
||||||
import time
|
import time
|
||||||
|
import typing
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
G_conf_override = {
|
G_conf_override = {
|
||||||
# index 0 save Config() first instance for quick access by using getInstance()
|
# index 0 save Config() first instance for quick access by using getInstance()
|
||||||
0: None,
|
0: None,
|
||||||
# register override config items
|
# register override config items
|
||||||
"common:main_mode": None,
|
# no need anymore
|
||||||
"common:source_folder": None,
|
|
||||||
"common:auto_exit": None,
|
|
||||||
"common:nfo_skip_days": None,
|
|
||||||
"common:stop_counter": None,
|
|
||||||
"common:ignore_failed_list": None,
|
|
||||||
"debug_mode:switch": None
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -74,17 +69,17 @@ class Config:
|
|||||||
elif (Path(__file__).resolve().parent / 'config.ini').is_file():
|
elif (Path(__file__).resolve().parent / 'config.ini').is_file():
|
||||||
res_path = Path(__file__).resolve().parent / 'config.ini'
|
res_path = Path(__file__).resolve().parent / 'config.ini'
|
||||||
if res_path is None:
|
if res_path is None:
|
||||||
sys.exit(2)
|
os._exit(2)
|
||||||
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
|
ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:")
|
||||||
if re.search('n', ins, re.I):
|
if re.search('n', ins, re.I):
|
||||||
sys.exit(2)
|
os._exit(2)
|
||||||
# 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
# 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的
|
||||||
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
|
# 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。
|
||||||
write_path = path_search_order[2] # Path.home() / "mdc.ini"
|
write_path = path_search_order[2] # Path.home() / "mdc.ini"
|
||||||
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
|
write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8')
|
||||||
print("Config file '{}' created.".format(write_path.resolve()))
|
print("Config file '{}' created.".format(write_path.resolve()))
|
||||||
input("Press Enter key exit...")
|
input("Press Enter key exit...")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
# self.conf = self._default_config()
|
# self.conf = self._default_config()
|
||||||
# try:
|
# try:
|
||||||
# self.conf = configparser.ConfigParser()
|
# self.conf = configparser.ConfigParser()
|
||||||
@@ -95,29 +90,86 @@ class Config:
|
|||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
# print("[-]Config file not found! Use the default settings")
|
# print("[-]Config file not found! Use the default settings")
|
||||||
# print("[-]",e)
|
# print("[-]",e)
|
||||||
# sys.exit(3)
|
# os._exit(3)
|
||||||
# #self.conf = self._default_config()
|
# #self.conf = self._default_config()
|
||||||
|
|
||||||
def getboolean_override(self, section, item) -> bool:
|
def set_override(self, option_cmd: str):
|
||||||
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(
|
"""
|
||||||
G_conf_override[f"{section}:{item}"])
|
通用的参数覆盖选项 -C 配置覆盖串
|
||||||
|
配置覆盖串语法:小节名:键名=值[;[小节名:]键名=值][;[小节名:]键名+=值] 多个键用分号分隔 名称可省略部分尾部字符
|
||||||
|
或 小节名:键名+=值[;[小节名:]键名=值][;[小节名:]键名+=值] 在已有值的末尾追加内容,多个键的=和+=可以交叉出现
|
||||||
|
例子: face:aspect_ratio=2;aways_imagecut=1;priority:website=javdb
|
||||||
|
小节名必须出现在开头至少一次,分号后可只出现键名=值,不再出现小节名,如果后续全部键名都属于同一个小节
|
||||||
|
例如配置文件存在两个小节[proxy][priority],那么pro可指代proxy,pri可指代priority
|
||||||
|
[face] ;face小节下方有4个键名locations_model= uncensored_only= aways_imagecut= aspect_ratio=
|
||||||
|
l,lo,loc,loca,locat,locati...直到locations_model完整名称都可以用来指代locations_model=键名
|
||||||
|
u,un,unc...直到uncensored_only完整名称都可以用来指代uncensored_only=键名
|
||||||
|
aw,awa...直到aways_imagecut完整名称都可以用来指代aways_imagecut=键名
|
||||||
|
as,asp...aspect_ratio完整名称都可以用来指代aspect_ratio=键名
|
||||||
|
a则因为二义性,不是合法的省略键名
|
||||||
|
"""
|
||||||
|
def err_exit(str):
|
||||||
|
print(str)
|
||||||
|
os._exit(2)
|
||||||
|
|
||||||
def getint_override(self, section, item) -> int:
|
sections = self.conf.sections()
|
||||||
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(
|
sec_name = None
|
||||||
G_conf_override[f"{section}:{item}"])
|
for cmd in option_cmd.split(';'):
|
||||||
|
syntax_err = True
|
||||||
def get_override(self, section, item) -> str:
|
rex = re.findall(r'^(.*?):(.*?)(=|\+=)(.*)$', cmd, re.U)
|
||||||
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(
|
if len(rex) and len(rex[0]) == 4:
|
||||||
G_conf_override[f"{section}:{item}"])
|
(sec, key, assign, val) = rex[0]
|
||||||
|
sec_lo = sec.lower().strip()
|
||||||
|
key_lo = key.lower().strip()
|
||||||
|
syntax_err = False
|
||||||
|
elif sec_name: # 已经出现过一次小节名,属于同一个小节的后续键名可以省略小节名
|
||||||
|
rex = re.findall(r'^(.*?)(=|\+=)(.*)$', cmd, re.U)
|
||||||
|
if len(rex) and len(rex[0]) == 3:
|
||||||
|
(key, assign, val) = rex[0]
|
||||||
|
sec_lo = sec_name.lower()
|
||||||
|
key_lo = key.lower().strip()
|
||||||
|
syntax_err = False
|
||||||
|
if syntax_err:
|
||||||
|
err_exit(f"[-]Config override syntax incorrect. example: 'd:s=1' or 'debug_mode:switch=1'. cmd='{cmd}' all='{option_cmd}'")
|
||||||
|
if not len(sec_lo):
|
||||||
|
err_exit(f"[-]Config override Section name '{sec}' is empty! cmd='{cmd}'")
|
||||||
|
if not len(key_lo):
|
||||||
|
err_exit(f"[-]Config override Key name '{key}' is empty! cmd='{cmd}'")
|
||||||
|
if not len(val.strip()):
|
||||||
|
print(f"[!]Conig overide value '{val}' is empty! cmd='{cmd}'")
|
||||||
|
sec_name = None
|
||||||
|
for s in sections:
|
||||||
|
if not s.lower().startswith(sec_lo):
|
||||||
|
continue
|
||||||
|
if sec_name:
|
||||||
|
err_exit(f"[-]Conig overide Section short name '{sec_lo}' is not unique! dup1='{sec_name}' dup2='{s}' cmd='{cmd}'")
|
||||||
|
sec_name = s
|
||||||
|
if sec_name is None:
|
||||||
|
err_exit(f"[-]Conig overide Section name '{sec}' not found! cmd='{cmd}'")
|
||||||
|
key_name = None
|
||||||
|
keys = self.conf[sec_name]
|
||||||
|
for k in keys:
|
||||||
|
if not k.lower().startswith(key_lo):
|
||||||
|
continue
|
||||||
|
if key_name:
|
||||||
|
err_exit(f"[-]Conig overide Key short name '{key_lo}' is not unique! dup1='{key_name}' dup2='{k}' cmd='{cmd}'")
|
||||||
|
key_name = k
|
||||||
|
if key_name is None:
|
||||||
|
err_exit(f"[-]Conig overide Key name '{key}' not found! cmd='{cmd}'")
|
||||||
|
if assign == "+=":
|
||||||
|
val = keys[key_name] + val
|
||||||
|
if self.debug():
|
||||||
|
print(f"[!]Set config override [{sec_name}]{key_name}={val} by cmd='{cmd}'")
|
||||||
|
self.conf.set(sec_name, key_name, val)
|
||||||
|
|
||||||
def main_mode(self) -> int:
|
def main_mode(self) -> int:
|
||||||
try:
|
try:
|
||||||
return self.getint_override("common", "main_mode")
|
return self.conf.getint("common", "main_mode")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
self._exit("common:main_mode")
|
self._exit("common:main_mode")
|
||||||
|
|
||||||
def source_folder(self) -> str:
|
def source_folder(self) -> str:
|
||||||
return self.get_override("common", "source_folder")
|
return self.conf.get("common", "source_folder")
|
||||||
|
|
||||||
def failed_folder(self) -> str:
|
def failed_folder(self) -> str:
|
||||||
return self.conf.get("common", "failed_output_folder")
|
return self.conf.get("common", "failed_output_folder")
|
||||||
@@ -128,14 +180,17 @@ class Config:
|
|||||||
def actor_gender(self) -> str:
|
def actor_gender(self) -> str:
|
||||||
return self.conf.get("common", "actor_gender")
|
return self.conf.get("common", "actor_gender")
|
||||||
|
|
||||||
def soft_link(self) -> bool:
|
def link_mode(self) -> int:
|
||||||
return self.conf.getboolean("common", "soft_link")
|
return self.conf.getint("common", "link_mode")
|
||||||
|
|
||||||
|
def scan_hardlink(self) -> bool:
|
||||||
|
return self.conf.getboolean("common", "scan_hardlink", fallback=False)#未找到配置选项,默认不刮削
|
||||||
|
|
||||||
def failed_move(self) -> bool:
|
def failed_move(self) -> bool:
|
||||||
return self.conf.getboolean("common", "failed_move")
|
return self.conf.getboolean("common", "failed_move")
|
||||||
|
|
||||||
def auto_exit(self) -> bool:
|
def auto_exit(self) -> bool:
|
||||||
return self.getboolean_override("common", "auto_exit")
|
return self.conf.getboolean("common", "auto_exit")
|
||||||
|
|
||||||
def translate_to_sc(self) -> bool:
|
def translate_to_sc(self) -> bool:
|
||||||
return self.conf.getboolean("common", "translate_to_sc")
|
return self.conf.getboolean("common", "translate_to_sc")
|
||||||
@@ -147,19 +202,13 @@ class Config:
|
|||||||
return self.conf.getboolean("common", "del_empty_folder")
|
return self.conf.getboolean("common", "del_empty_folder")
|
||||||
|
|
||||||
def nfo_skip_days(self) -> int:
|
def nfo_skip_days(self) -> int:
|
||||||
try:
|
return self.conf.getint("common", "nfo_skip_days", fallback=30)
|
||||||
return self.getint_override("common", "nfo_skip_days")
|
|
||||||
except:
|
|
||||||
return 30
|
|
||||||
|
|
||||||
def stop_counter(self) -> int:
|
def stop_counter(self) -> int:
|
||||||
try:
|
return self.conf.getint("common", "stop_counter", fallback=0)
|
||||||
return self.getint_override("common", "stop_counter")
|
|
||||||
except:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def ignore_failed_list(self) -> bool:
|
def ignore_failed_list(self) -> bool:
|
||||||
return self.getboolean_override("common", "ignore_failed_list")
|
return self.conf.getboolean("common", "ignore_failed_list")
|
||||||
|
|
||||||
def download_only_missing_images(self) -> bool:
|
def download_only_missing_images(self) -> bool:
|
||||||
return self.conf.getboolean("common", "download_only_missing_images")
|
return self.conf.getboolean("common", "download_only_missing_images")
|
||||||
@@ -167,6 +216,18 @@ class Config:
|
|||||||
def mapping_table_validity(self) -> int:
|
def mapping_table_validity(self) -> int:
|
||||||
return self.conf.getint("common", "mapping_table_validity")
|
return self.conf.getint("common", "mapping_table_validity")
|
||||||
|
|
||||||
|
def rerun_delay(self) -> int:
|
||||||
|
value = self.conf.get("common", "rerun_delay")
|
||||||
|
if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)):
|
||||||
|
return 0 # not match '1h30m45s' or '30' or '1s2m1h4s5m'
|
||||||
|
if value.isnumeric() and int(value) >= 0:
|
||||||
|
return int(value)
|
||||||
|
sec = 0
|
||||||
|
sec += sum(int(v) for v in re.findall(r'(\d+)s', value, re.I))
|
||||||
|
sec += sum(int(v) for v in re.findall(r'(\d+)m', value, re.I)) * 60
|
||||||
|
sec += sum(int(v) for v in re.findall(r'(\d+)h', value, re.I)) * 3600
|
||||||
|
return sec
|
||||||
|
|
||||||
def is_translate(self) -> bool:
|
def is_translate(self) -> bool:
|
||||||
return self.conf.getboolean("translate", "switch")
|
return self.conf.getboolean("translate", "switch")
|
||||||
|
|
||||||
@@ -243,8 +304,8 @@ class Config:
|
|||||||
def media_type(self) -> str:
|
def media_type(self) -> str:
|
||||||
return self.conf.get('media', 'media_type')
|
return self.conf.get('media', 'media_type')
|
||||||
|
|
||||||
def sub_rule(self):
|
def sub_rule(self) -> typing.Set[str]:
|
||||||
return self.conf.get('media', 'sub_type').split(',')
|
return set(self.conf.get('media', 'sub_type').lower().split(','))
|
||||||
|
|
||||||
def naming_rule(self) -> str:
|
def naming_rule(self) -> str:
|
||||||
return self.conf.get("Name_Rule", "naming_rule")
|
return self.conf.get("Name_Rule", "naming_rule")
|
||||||
@@ -277,7 +338,7 @@ class Config:
|
|||||||
return self.conf.get("escape", "folders")
|
return self.conf.get("escape", "folders")
|
||||||
|
|
||||||
def debug(self) -> bool:
|
def debug(self) -> bool:
|
||||||
return self.getboolean_override("debug_mode", "switch")
|
return self.conf.getboolean("debug_mode", "switch")
|
||||||
|
|
||||||
def is_storyline(self) -> bool:
|
def is_storyline(self) -> bool:
|
||||||
try:
|
try:
|
||||||
@@ -304,43 +365,34 @@ class Config:
|
|||||||
return "3:58avgo"
|
return "3:58avgo"
|
||||||
|
|
||||||
def storyline_show(self) -> int:
|
def storyline_show(self) -> int:
|
||||||
try:
|
v = self.conf.getint("storyline", "show_result", fallback=0)
|
||||||
v = self.conf.getint("storyline", "show_result")
|
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
||||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
|
||||||
except:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def storyline_mode(self) -> int:
|
def storyline_mode(self) -> int:
|
||||||
try:
|
return 1 if self.conf.getint("storyline", "run_mode", fallback=1) > 0 else 0
|
||||||
v = self.conf.getint("storyline", "run_mode")
|
|
||||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
|
||||||
except:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def cc_convert_mode(self) -> int:
|
def cc_convert_mode(self) -> int:
|
||||||
try:
|
v = self.conf.getint("cc_convert", "mode", fallback=1)
|
||||||
v = self.conf.getint("cc_convert", "mode")
|
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
||||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
|
||||||
except:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def cc_convert_vars(self) -> str:
|
def cc_convert_vars(self) -> str:
|
||||||
try:
|
return self.conf.get("cc_convert", "vars",
|
||||||
return self.conf.get("cc_convert", "vars")
|
fallback="actor,director,label,outline,series,studio,tag,title")
|
||||||
except:
|
|
||||||
return "actor,director,label,outline,series,studio,tag,title"
|
|
||||||
|
|
||||||
def javdb_sites(self) -> str:
|
def javdb_sites(self) -> str:
|
||||||
try:
|
return self.conf.get("javdb", "sites", fallback="38,39")
|
||||||
return self.conf.get("javdb", "sites")
|
|
||||||
except:
|
|
||||||
return "33,34"
|
|
||||||
|
|
||||||
def face_locations_model(self) -> str:
|
def face_locations_model(self) -> str:
|
||||||
try:
|
return self.conf.get("face", "locations_model", fallback="hog")
|
||||||
return self.conf.get("face", "locations_model")
|
|
||||||
except:
|
def face_uncensored_only(self) -> bool:
|
||||||
return "hog"
|
return self.conf.getboolean("face", "uncensored_only", fallback=True)
|
||||||
|
|
||||||
|
def face_aways_imagecut(self) -> bool:
|
||||||
|
return self.conf.getboolean("face", "aways_imagecut", fallback=False)
|
||||||
|
|
||||||
|
def face_aspect_ratio(self) -> float:
|
||||||
|
return self.conf.getfloat("face", "aspect_ratio", fallback=2.12)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _exit(sec: str) -> None:
|
def _exit(sec: str) -> None:
|
||||||
@@ -358,7 +410,8 @@ class Config:
|
|||||||
conf.set(sec1, "source_folder", "./")
|
conf.set(sec1, "source_folder", "./")
|
||||||
conf.set(sec1, "failed_output_folder", "failed")
|
conf.set(sec1, "failed_output_folder", "failed")
|
||||||
conf.set(sec1, "success_output_folder", "JAV_output")
|
conf.set(sec1, "success_output_folder", "JAV_output")
|
||||||
conf.set(sec1, "soft_link", "0")
|
conf.set(sec1, "link_mode", "0")
|
||||||
|
conf.set(sec1, "scan_hardlink", "0")
|
||||||
conf.set(sec1, "failed_move", "1")
|
conf.set(sec1, "failed_move", "1")
|
||||||
conf.set(sec1, "auto_exit", "0")
|
conf.set(sec1, "auto_exit", "0")
|
||||||
conf.set(sec1, "translate_to_sc", "1")
|
conf.set(sec1, "translate_to_sc", "1")
|
||||||
@@ -370,6 +423,7 @@ class Config:
|
|||||||
conf.set(sec1, "ignore_failed_list", 0)
|
conf.set(sec1, "ignore_failed_list", 0)
|
||||||
conf.set(sec1, "download_only_missing_images", 1)
|
conf.set(sec1, "download_only_missing_images", 1)
|
||||||
conf.set(sec1, "mapping_table_validity", 7)
|
conf.set(sec1, "mapping_table_validity", 7)
|
||||||
|
conf.set(sec1, "rerun_delay", 0)
|
||||||
|
|
||||||
sec2 = "proxy"
|
sec2 = "proxy"
|
||||||
conf.add_section(sec2)
|
conf.add_section(sec2)
|
||||||
@@ -423,9 +477,9 @@ class Config:
|
|||||||
sec11 = "media"
|
sec11 = "media"
|
||||||
conf.add_section(sec11)
|
conf.add_section(sec11)
|
||||||
conf.set(sec11, "media_type",
|
conf.set(sec11, "media_type",
|
||||||
".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
|
".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,iso")
|
||||||
conf.set(sec11, "sub_type",
|
conf.set(sec11, "sub_type",
|
||||||
".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
|
".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
|
||||||
|
|
||||||
sec12 = "watermark"
|
sec12 = "watermark"
|
||||||
conf.add_section(sec12)
|
conf.add_section(sec12)
|
||||||
@@ -503,8 +557,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
|
|
||||||
config = Config()
|
config = Config()
|
||||||
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override',
|
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'ini_path', 'set_override'}
|
||||||
'ini_path'}
|
|
||||||
for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
|
for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
|
||||||
evprint(f'config.{_m}()')
|
evprint(f'config.{_m}()')
|
||||||
pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
|
pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
|
||||||
@@ -513,36 +566,13 @@ if __name__ == "__main__":
|
|||||||
for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
|
for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
|
||||||
evprint(f'getInstance().proxy().{_p}')
|
evprint(f'getInstance().proxy().{_p}')
|
||||||
|
|
||||||
# Override Test
|
|
||||||
G_conf_override["common:nfo_skip_days"] = 4321
|
|
||||||
G_conf_override["common:stop_counter"] = 1234
|
|
||||||
assert config.nfo_skip_days() == 4321
|
|
||||||
assert getInstance().stop_counter() == 1234
|
|
||||||
# remove override
|
|
||||||
G_conf_override["common:stop_counter"] = None
|
|
||||||
G_conf_override["common:nfo_skip_days"] = None
|
|
||||||
assert config.nfo_skip_days() != 4321
|
|
||||||
assert config.stop_counter() != 1234
|
|
||||||
# Create new instance
|
# Create new instance
|
||||||
conf2 = Config()
|
conf2 = Config()
|
||||||
assert getInstance() != conf2
|
assert getInstance() != conf2
|
||||||
assert getInstance() == config
|
assert getInstance() == config
|
||||||
G_conf_override["common:main_mode"] = 9
|
|
||||||
G_conf_override["common:source_folder"] = "A:/b/c"
|
conf2.set_override("d:s=1;face:asp=2;f:aw=0;pri:w=javdb;f:l=")
|
||||||
# Override effect to all instances
|
assert conf2.face_aspect_ratio() == 2
|
||||||
assert config.main_mode() == 9
|
assert conf2.face_aways_imagecut() == False
|
||||||
assert conf2.main_mode() == 9
|
assert conf2.sources() == "javdb"
|
||||||
assert getInstance().main_mode() == 9
|
|
||||||
assert conf2.source_folder() == "A:/b/c"
|
|
||||||
print("### Override Test ###".center(36))
|
|
||||||
evprint('getInstance().main_mode()')
|
|
||||||
evprint('config.source_folder()')
|
|
||||||
G_conf_override["common:main_mode"] = None
|
|
||||||
evprint('conf2.main_mode()')
|
|
||||||
evprint('config.main_mode()')
|
|
||||||
# unregister key acess will raise except
|
|
||||||
try:
|
|
||||||
print(G_conf_override["common:actor_gender"])
|
|
||||||
except KeyError as ke:
|
|
||||||
print(f'Catched KeyError: {ke} is not a register key of G_conf_override dict.', file=sys.stderr)
|
|
||||||
print(f"Load Config file '{conf2.ini_path}'.")
|
print(f"Load Config file '{conf2.ini_path}'.")
|
||||||
|
|||||||
288
core.py
288
core.py
@@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
@@ -10,6 +11,7 @@ from PIL import Image
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
from WebCrawler import get_data_from_json
|
from WebCrawler import get_data_from_json
|
||||||
@@ -27,15 +29,15 @@ def escape_path(path, escape_literals: str): # Remove escape literals
|
|||||||
def moveFailedFolder(filepath):
|
def moveFailedFolder(filepath):
|
||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
failed_folder = conf.failed_folder()
|
failed_folder = conf.failed_folder()
|
||||||
soft_link = conf.soft_link()
|
link_mode = conf.link_mode()
|
||||||
# 模式3或软连接,改为维护一个失败列表,启动扫描时加载用于排除该路径,以免反复处理
|
# 模式3或软连接,改为维护一个失败列表,启动扫描时加载用于排除该路径,以免反复处理
|
||||||
# 原先的创建软连接到失败目录,并不直观,不方便找到失败文件位置,不如直接记录该文件路径
|
# 原先的创建软连接到失败目录,并不直观,不方便找到失败文件位置,不如直接记录该文件路径
|
||||||
if conf.main_mode() == 3 or soft_link:
|
if conf.main_mode() == 3 or link_mode:
|
||||||
ftxt = os.path.abspath(os.path.join(failed_folder, 'failed_list.txt'))
|
ftxt = os.path.abspath(os.path.join(failed_folder, 'failed_list.txt'))
|
||||||
print("[-]Add to Failed List file, see '%s'" % ftxt)
|
print("[-]Add to Failed List file, see '%s'" % ftxt)
|
||||||
with open(ftxt, 'a', encoding='utf-8') as flt:
|
with open(ftxt, 'a', encoding='utf-8') as flt:
|
||||||
flt.write(f'{filepath}\n')
|
flt.write(f'{filepath}\n')
|
||||||
elif conf.failed_move() and not soft_link:
|
elif conf.failed_move() and not link_mode:
|
||||||
failed_name = os.path.join(failed_folder, os.path.basename(filepath))
|
failed_name = os.path.join(failed_folder, os.path.basename(filepath))
|
||||||
mtxt = os.path.abspath(os.path.join(failed_folder, 'where_was_i_before_being_moved.txt'))
|
mtxt = os.path.abspath(os.path.join(failed_folder, 'where_was_i_before_being_moved.txt'))
|
||||||
print("'[-]Move to Failed output folder, see '%s'" % mtxt)
|
print("'[-]Move to Failed output folder, see '%s'" % mtxt)
|
||||||
@@ -69,10 +71,12 @@ def get_info(json_data): # 返回json里的数据
|
|||||||
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label
|
return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label
|
||||||
|
|
||||||
|
|
||||||
def small_cover_check(path, number, cover_small, leak_word, c_word, hack_word, filepath):
|
def small_cover_check(path, filename, cover_small, movie_path):
|
||||||
filename = f"{number}{leak_word}{c_word}{hack_word}-poster.jpg"
|
full_filepath = Path(path) / filename
|
||||||
download_file_with_filename(cover_small, filename, path, filepath)
|
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(str(full_filepath)):
|
||||||
print('[+]Image Downloaded! ' + os.path.join(path, filename))
|
return
|
||||||
|
download_file_with_filename(cover_small, filename, path, movie_path)
|
||||||
|
print('[+]Image Downloaded! ' + full_filepath.name)
|
||||||
|
|
||||||
|
|
||||||
def create_folder(json_data): # 创建文件夹
|
def create_folder(json_data): # 创建文件夹
|
||||||
@@ -101,7 +105,7 @@ def create_folder(json_data): # 创建文件夹
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
return os.path.normpath(path)
|
return os.path.normpath(path)
|
||||||
|
|
||||||
@@ -121,7 +125,7 @@ def download_file_with_filename(url, filename, path, filepath):
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
proxies = configProxy.proxies()
|
proxies = configProxy.proxies()
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': G_USER_AGENT}
|
'User-Agent': G_USER_AGENT}
|
||||||
@@ -138,7 +142,7 @@ def download_file_with_filename(url, filename, path, filepath):
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! Can not make folder '{path}'")
|
print(f"[-]Fatal error! Can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': G_USER_AGENT}
|
'User-Agent': G_USER_AGENT}
|
||||||
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
|
r = requests.get(url, timeout=configProxy.timeout, headers=headers)
|
||||||
@@ -213,7 +217,7 @@ def extrafanart_download_one_by_one(data, path, filepath):
|
|||||||
break
|
break
|
||||||
if file_not_exist_or_empty(jpg_fullpath):
|
if file_not_exist_or_empty(jpg_fullpath):
|
||||||
return
|
return
|
||||||
print('[+]Image Downloaded!', jpg_fullpath)
|
print('[+]Image Downloaded!', Path(jpg_fullpath).name)
|
||||||
j += 1
|
j += 1
|
||||||
if conf.debug():
|
if conf.debug():
|
||||||
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
|
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
|
||||||
@@ -244,7 +248,7 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
|
|||||||
if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐
|
if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐
|
||||||
print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
|
print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.")
|
||||||
else:
|
else:
|
||||||
print(f"[+]Successfully downloaded {len(result)} extrafanart to '{extrafanart_dir}'")
|
print(f"[+]Successfully downloaded {len(result)} extrafanarts.")
|
||||||
if conf.debug():
|
if conf.debug():
|
||||||
print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
|
print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
|
||||||
|
|
||||||
@@ -255,7 +259,7 @@ def image_ext(url):
|
|||||||
return ".jpg"
|
return ".jpg"
|
||||||
|
|
||||||
# 封面是否下载成功,否则移动到failed
|
# 封面是否下载成功,否则移动到failed
|
||||||
def image_download(cover, fanart_path,thumb_path, path, filepath):
|
def image_download(cover, fanart_path, thumb_path, path, filepath):
|
||||||
full_filepath = os.path.join(path, fanart_path)
|
full_filepath = os.path.join(path, fanart_path)
|
||||||
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
|
if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath):
|
||||||
return
|
return
|
||||||
@@ -273,7 +277,7 @@ def image_download(cover, fanart_path,thumb_path, path, filepath):
|
|||||||
break
|
break
|
||||||
if file_not_exist_or_empty(full_filepath):
|
if file_not_exist_or_empty(full_filepath):
|
||||||
return
|
return
|
||||||
print('[+]Image Downloaded!', full_filepath)
|
print('[+]Image Downloaded!', Path(full_filepath).name)
|
||||||
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
|
shutil.copyfile(full_filepath, os.path.join(path, thumb_path))
|
||||||
|
|
||||||
|
|
||||||
@@ -289,8 +293,14 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
|||||||
os.makedirs(path)
|
os.makedirs(path)
|
||||||
except:
|
except:
|
||||||
print(f"[-]Fatal error! can not make folder '{path}'")
|
print(f"[-]Fatal error! can not make folder '{path}'")
|
||||||
sys.exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
|
old_nfo = None
|
||||||
|
try:
|
||||||
|
if os.path.isfile(nfo_path):
|
||||||
|
old_nfo = etree.parse(nfo_path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
# KODI内查看影片信息时找不到number,配置naming_rule=number+'#'+title虽可解决
|
# KODI内查看影片信息时找不到number,配置naming_rule=number+'#'+title虽可解决
|
||||||
# 但使得标题太长,放入时常为空的outline内会更适合,软件给outline留出的显示版面也较大
|
# 但使得标题太长,放入时常为空的outline内会更适合,软件给outline留出的显示版面也较大
|
||||||
outline = f"{number}#{outline}"
|
outline = f"{number}#{outline}"
|
||||||
@@ -354,6 +364,41 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
|||||||
print(" <premiered>" + release + "</premiered>", file=code)
|
print(" <premiered>" + release + "</premiered>", file=code)
|
||||||
print(" <releasedate>" + release + "</releasedate>", file=code)
|
print(" <releasedate>" + release + "</releasedate>", file=code)
|
||||||
print(" <release>" + release + "</release>", file=code)
|
print(" <release>" + release + "</release>", file=code)
|
||||||
|
if old_nfo:
|
||||||
|
try:
|
||||||
|
xur = old_nfo.xpath('//userrating/text()')[0]
|
||||||
|
if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()):
|
||||||
|
print(f" <userrating>{xur.strip()}</userrating>", file=code)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
f_rating = json_data['用户评分']
|
||||||
|
uc = json_data['评分人数']
|
||||||
|
print(f""" <rating>{round(f_rating * 2.0, 1)}</rating>
|
||||||
|
<criticrating>{round(f_rating * 20.0, 1)}</criticrating>
|
||||||
|
<ratings>
|
||||||
|
<rating name="javdb" max="5" default="true">
|
||||||
|
<value>{f_rating}</value>
|
||||||
|
<votes>{uc}</votes>
|
||||||
|
</rating>
|
||||||
|
</ratings>""", file=code)
|
||||||
|
except:
|
||||||
|
if old_nfo:
|
||||||
|
try:
|
||||||
|
for rtag in ('rating', 'criticrating'):
|
||||||
|
xur = old_nfo.xpath(f'//{rtag}/text()')[0]
|
||||||
|
if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()):
|
||||||
|
print(f" <{rtag}>{xur.strip()}</{rtag}>", file=code)
|
||||||
|
f_rating = old_nfo.xpath(f"//ratings/rating[@name='javdb']/value/text()")[0]
|
||||||
|
uc = old_nfo.xpath(f"//ratings/rating[@name='javdb']/votes/text()")[0]
|
||||||
|
print(f""" <ratings>
|
||||||
|
<rating name="javdb" max="5" default="true">
|
||||||
|
<value>{f_rating}</value>
|
||||||
|
<votes>{uc}</votes>
|
||||||
|
</rating>
|
||||||
|
</ratings>""", file=code)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
print(" <cover>" + cover + "</cover>", file=code)
|
print(" <cover>" + cover + "</cover>", file=code)
|
||||||
if config.getInstance().is_trailer():
|
if config.getInstance().is_trailer():
|
||||||
print(" <trailer>" + trailer + "</trailer>", file=code)
|
print(" <trailer>" + trailer + "</trailer>", file=code)
|
||||||
@@ -462,51 +507,51 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
|
|||||||
# ========================结束=================================
|
# ========================结束=================================
|
||||||
|
|
||||||
|
|
||||||
def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
|
def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
|
||||||
filepath_obj = pathlib.Path(filepath)
|
filepath_obj = pathlib.Path(filepath)
|
||||||
houzhui = filepath_obj.suffix
|
houzhui = filepath_obj.suffix
|
||||||
file_parent_origin_path = str(filepath_obj.parent)
|
|
||||||
try:
|
try:
|
||||||
targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}")
|
targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}")
|
||||||
# 任何情况下都不要覆盖,以免遭遇数据源或者引擎错误导致所有文件得到同一个number,逐一
|
# 任何情况下都不要覆盖,以免遭遇数据源或者引擎错误导致所有文件得到同一个number,逐一
|
||||||
# 同名覆盖致使全部文件损失且不可追回的最坏情况
|
# 同名覆盖致使全部文件损失且不可追回的最坏情况
|
||||||
if os.path.exists(targetpath):
|
if os.path.exists(targetpath):
|
||||||
raise FileExistsError('File Exists on destination path, we will never overwriting.')
|
raise FileExistsError('File Exists on destination path, we will never overwriting.')
|
||||||
soft_link = config.getInstance().soft_link()
|
link_mode = config.getInstance().link_mode()
|
||||||
# 如果soft_link=1 使用软链接
|
# 如果link_mode 1: 建立软链接 2: 硬链接优先、无法建立硬链接再尝试软链接。
|
||||||
if soft_link == 0:
|
# 移除原先soft_link=2的功能代码,因默认记录日志,已经可追溯文件来源
|
||||||
|
create_softlink = False
|
||||||
|
if link_mode not in (1, 2):
|
||||||
shutil.move(filepath, targetpath)
|
shutil.move(filepath, targetpath)
|
||||||
elif soft_link == 1:
|
elif link_mode == 2:
|
||||||
|
# 跨卷或跨盘符无法建立硬链接导致异常,回落到建立软链接
|
||||||
|
try:
|
||||||
|
os.link(filepath, targetpath, follow_symlinks=False)
|
||||||
|
except:
|
||||||
|
create_softlink = True
|
||||||
|
if link_mode == 1 or create_softlink:
|
||||||
# 先尝试采用相对路径,以便网络访问时能正确打开视频,失败则可能是因为跨盘符等原因无法支持
|
# 先尝试采用相对路径,以便网络访问时能正确打开视频,失败则可能是因为跨盘符等原因无法支持
|
||||||
# 相对路径径,改用绝对路径方式尝试建立软链接
|
# 相对路径径,改用绝对路径方式尝试建立软链接
|
||||||
try:
|
try:
|
||||||
filerelpath = os.path.relpath(filepath, path)
|
filerelpath = os.path.relpath(filepath, path)
|
||||||
os.symlink(filerelpath, targetpath)
|
os.symlink(filerelpath, targetpath)
|
||||||
except:
|
except:
|
||||||
os.symlink(filepath_obj.resolve(), targetpath)
|
os.symlink(str(filepath_obj.resolve()), targetpath)
|
||||||
elif soft_link == 2:
|
|
||||||
shutil.move(filepath, targetpath)
|
|
||||||
# 移走文件后,在原来位置增加一个可追溯的软链接,指向文件新位置
|
|
||||||
# 以便追查文件从原先位置被移动到哪里了,避免因为得到错误番号后改名移动导致的文件失踪
|
|
||||||
# 便于手工找回文件。由于目前软链接已经不会被刮削,文件名后缀无需再修改。
|
|
||||||
targetabspath = os.path.abspath(targetpath)
|
|
||||||
if targetabspath != os.path.abspath(filepath):
|
|
||||||
targetrelpath = os.path.relpath(targetabspath, file_parent_origin_path)
|
|
||||||
os.symlink(targetrelpath, filepath)
|
|
||||||
sub_res = config.getInstance().sub_rule()
|
|
||||||
|
|
||||||
for subname in sub_res:
|
sub_res = config.getInstance().sub_rule()
|
||||||
sub_filepath = str(filepath_obj.with_suffix(subname))
|
for subfile in filepath_obj.parent.glob('**/*'):
|
||||||
if os.path.isfile(sub_filepath.replace(subname,".chs" + subname)):
|
if subfile.is_file() and subfile.suffix.lower() in sub_res:
|
||||||
sub_filepath = sub_filepath.replace(subname,".chs" + subname)
|
if multi_part and part.lower() not in subfile.name.lower():
|
||||||
subname = ".chs" + subname
|
continue
|
||||||
elif os.path.isfile(sub_filepath.replace(subname,".cht" + subname)):
|
if filepath_obj.stem.split('.')[0].lower() != subfile.stem.split('.')[0].lower():
|
||||||
sub_filepath = sub_filepath.replace(subname, ".cht" + subname)
|
continue
|
||||||
subname = ".cht" + subname
|
sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
|
||||||
if os.path.isfile(sub_filepath):
|
if link_mode not in (1, 2):
|
||||||
shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}"))
|
shutil.move(str(subfile), str(sub_targetpath))
|
||||||
print('[+]Sub moved!')
|
print(f"[+]Sub Moved! {sub_targetpath.name}")
|
||||||
return True
|
else:
|
||||||
|
shutil.copyfile(str(subfile), str(sub_targetpath))
|
||||||
|
print(f"[+]Sub Copied! {sub_targetpath.name}")
|
||||||
|
return
|
||||||
|
|
||||||
except FileExistsError as fee:
|
except FileExistsError as fee:
|
||||||
print(f'[-]FileExistsError: {fee}')
|
print(f'[-]FileExistsError: {fee}')
|
||||||
@@ -525,24 +570,39 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
|||||||
number += part # 这时number会被附加上CD1后缀
|
number += part # 这时number会被附加上CD1后缀
|
||||||
filepath_obj = pathlib.Path(filepath)
|
filepath_obj = pathlib.Path(filepath)
|
||||||
houzhui = filepath_obj.suffix
|
houzhui = filepath_obj.suffix
|
||||||
file_parent_origin_path = str(filepath_obj.parent)
|
|
||||||
targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}")
|
targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}")
|
||||||
if os.path.exists(targetpath):
|
if os.path.exists(targetpath):
|
||||||
raise FileExistsError('File Exists on destination path, we will never overwriting.')
|
raise FileExistsError('File Exists on destination path, we will never overwriting.')
|
||||||
try:
|
try:
|
||||||
if config.getInstance().soft_link():
|
link_mode = config.getInstance().link_mode()
|
||||||
os.symlink(filepath, targetpath)
|
create_softlink = False
|
||||||
else:
|
if link_mode not in (1, 2):
|
||||||
shutil.move(filepath, targetpath)
|
shutil.move(filepath, targetpath)
|
||||||
|
elif link_mode == 2:
|
||||||
|
try:
|
||||||
|
os.link(filepath, targetpath, follow_symlinks=False)
|
||||||
|
except:
|
||||||
|
create_softlink = True
|
||||||
|
if link_mode == 1 or create_softlink:
|
||||||
|
try:
|
||||||
|
filerelpath = os.path.relpath(filepath, path)
|
||||||
|
os.symlink(filerelpath, targetpath)
|
||||||
|
except:
|
||||||
|
os.symlink(str(filepath_obj.resolve()), targetpath)
|
||||||
|
|
||||||
sub_res = config.getInstance().sub_rule()
|
sub_res = config.getInstance().sub_rule()
|
||||||
for subname in sub_res:
|
for subfile in filepath_obj.parent.glob('**/*'):
|
||||||
sub_filepath = str(filepath_obj.with_suffix(subname))
|
if subfile.is_file() and subfile.suffix.lower() in sub_res:
|
||||||
if os.path.isfile(sub_filepath): # 字幕移动
|
if multi_part and part.lower() not in subfile.name.lower():
|
||||||
shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{subname}"))
|
continue
|
||||||
print('[+]Sub moved!')
|
sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}"
|
||||||
print('[!]Success')
|
if link_mode not in (1, 2):
|
||||||
return True
|
shutil.move(str(subfile), str(sub_targetpath))
|
||||||
|
print(f"[+]Sub Moved! {sub_targetpath.name}")
|
||||||
|
else:
|
||||||
|
shutil.copyfile(str(subfile), str(sub_targetpath))
|
||||||
|
print(f"[+]Sub Copied! {sub_targetpath.name}")
|
||||||
|
return
|
||||||
except FileExistsError as fee:
|
except FileExistsError as fee:
|
||||||
print(f'[-]FileExistsError: {fee}')
|
print(f'[-]FileExistsError: {fee}')
|
||||||
return
|
return
|
||||||
@@ -554,18 +614,6 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
|||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def get_part(filepath):
|
|
||||||
try:
|
|
||||||
if re.search('-CD\d+', filepath):
|
|
||||||
return re.findall('-CD\d+', filepath)[0]
|
|
||||||
if re.search('-cd\d+', filepath):
|
|
||||||
return re.findall('-cd\d+', filepath)[0]
|
|
||||||
except:
|
|
||||||
print("[-]failed!Please rename the filename again!")
|
|
||||||
moveFailedFolder(filepath)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def debug_print(data: json):
|
def debug_print(data: json):
|
||||||
try:
|
try:
|
||||||
print("[+] ------- DEBUG INFO -------")
|
print("[+] ------- DEBUG INFO -------")
|
||||||
@@ -578,14 +626,65 @@ def debug_print(data: json):
|
|||||||
if i == 'extrafanart':
|
if i == 'extrafanart':
|
||||||
print('[+] -', "%-14s" % i, ':', len(v), 'links')
|
print('[+] -', "%-14s" % i, ':', len(v), 'links')
|
||||||
continue
|
continue
|
||||||
print('[+] -', "%-14s" % i, ':', v)
|
print(f'[+] - {i:<{cnspace(i,14)}} : {v}')
|
||||||
|
|
||||||
print("[+] ------- DEBUG INFO -------")
|
print("[+] ------- DEBUG INFO -------")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def core_main(file_path, number_th, oCC):
|
def core_main_no_net_op(movie_path, number):
|
||||||
|
conf = config.getInstance()
|
||||||
|
part = ''
|
||||||
|
leak_word = ''
|
||||||
|
leak = 0
|
||||||
|
c_word = ''
|
||||||
|
cn_sub = ''
|
||||||
|
hack = ''
|
||||||
|
hack_word = ''
|
||||||
|
ext = '.jpg'
|
||||||
|
imagecut = 1
|
||||||
|
path = str(Path(movie_path).parent)
|
||||||
|
|
||||||
|
if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
|
||||||
|
part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
|
||||||
|
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
|
||||||
|
re.I) or '中文' in movie_path or '字幕' in movie_path:
|
||||||
|
cn_sub = '1'
|
||||||
|
c_word = '-C' # 中文字幕影片后缀
|
||||||
|
uncensored = 1 if is_uncensored(number) else 0
|
||||||
|
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||||
|
leak_word = '-流出' # 流出影片后缀
|
||||||
|
leak = 1
|
||||||
|
|
||||||
|
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
|
||||||
|
hack = 1
|
||||||
|
hack_word = "-hack"
|
||||||
|
|
||||||
|
prestr = f"{number}{leak_word}{c_word}{hack_word}"
|
||||||
|
fanart_path = f"{prestr}-fanart{ext}"
|
||||||
|
poster_path = f"{prestr}-poster{ext}"
|
||||||
|
thumb_path = f"{prestr}-thumb{ext}"
|
||||||
|
full_fanart_path = os.path.join(path, fanart_path)
|
||||||
|
full_poster_path = os.path.join(path, poster_path)
|
||||||
|
full_thumb_path = os.path.join(path, thumb_path)
|
||||||
|
full_nfo = Path(path) / f"{prestr}{part}.nfo"
|
||||||
|
|
||||||
|
if full_nfo.is_file():
|
||||||
|
if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0:
|
||||||
|
uncensored = 1
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)):
|
||||||
|
return
|
||||||
|
|
||||||
|
cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
|
||||||
|
if conf.is_watermark():
|
||||||
|
add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack)
|
||||||
|
|
||||||
|
|
||||||
|
def core_main(movie_path, number_th, oCC):
|
||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
# =======================================================================初始化所需变量
|
# =======================================================================初始化所需变量
|
||||||
multi_part = 0
|
multi_part = 0
|
||||||
@@ -597,8 +696,6 @@ def core_main(file_path, number_th, oCC):
|
|||||||
hack = ''
|
hack = ''
|
||||||
hack_word = ''
|
hack_word = ''
|
||||||
|
|
||||||
|
|
||||||
filepath = file_path # 影片的路径 绝对路径
|
|
||||||
# 下面被注释的变量不需要
|
# 下面被注释的变量不需要
|
||||||
#rootpath= os.getcwd
|
#rootpath= os.getcwd
|
||||||
number = number_th
|
number = number_th
|
||||||
@@ -606,7 +703,7 @@ def core_main(file_path, number_th, oCC):
|
|||||||
|
|
||||||
# Return if blank dict returned (data not found)
|
# Return if blank dict returned (data not found)
|
||||||
if not json_data:
|
if not json_data:
|
||||||
moveFailedFolder(filepath)
|
moveFailedFolder(movie_path)
|
||||||
return
|
return
|
||||||
|
|
||||||
if json_data["number"] != number:
|
if json_data["number"] != number:
|
||||||
@@ -619,25 +716,26 @@ def core_main(file_path, number_th, oCC):
|
|||||||
imagecut = json_data.get('imagecut')
|
imagecut = json_data.get('imagecut')
|
||||||
tag = json_data.get('tag')
|
tag = json_data.get('tag')
|
||||||
# =======================================================================判断-C,-CD后缀
|
# =======================================================================判断-C,-CD后缀
|
||||||
if '-CD' in filepath or '-cd' in filepath:
|
if re.search('[-_]CD\d+', movie_path, re.IGNORECASE):
|
||||||
multi_part = 1
|
multi_part = 1
|
||||||
part = get_part(filepath)
|
part = re.findall('[-_]CD\d+', movie_path, re.IGNORECASE)[0].upper()
|
||||||
if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
|
if re.search(r'[-_]C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path,
|
||||||
|
re.I) or '中文' in movie_path or '字幕' in movie_path:
|
||||||
cn_sub = '1'
|
cn_sub = '1'
|
||||||
c_word = '-C' # 中文字幕影片后缀
|
c_word = '-C' # 中文字幕影片后缀
|
||||||
|
|
||||||
# 判断是否无码
|
# 判断是否无码
|
||||||
uncensored = 1 if is_uncensored(number) else 0
|
unce = json_data.get('无码')
|
||||||
|
uncensored = int(unce) if isinstance(unce, bool) else int(is_uncensored(number))
|
||||||
|
|
||||||
|
if '流出' in movie_path or 'uncensored' in movie_path.lower():
|
||||||
if '流出' in filepath or 'uncensored' in filepath:
|
|
||||||
liuchu = '流出'
|
liuchu = '流出'
|
||||||
leak = 1
|
leak = 1
|
||||||
leak_word = '-流出' # 流出影片后缀
|
leak_word = '-流出' # 流出影片后缀
|
||||||
else:
|
else:
|
||||||
leak = 0
|
leak = 0
|
||||||
|
|
||||||
if 'hack'.upper() in str(filepath).upper() or '破解' in filepath:
|
if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path:
|
||||||
hack = 1
|
hack = 1
|
||||||
hack_word = "-hack"
|
hack_word = "-hack"
|
||||||
|
|
||||||
@@ -666,78 +764,76 @@ def core_main(file_path, number_th, oCC):
|
|||||||
|
|
||||||
# 检查小封面, 如果image cut为3,则下载小封面
|
# 检查小封面, 如果image cut为3,则下载小封面
|
||||||
if imagecut == 3:
|
if imagecut == 3:
|
||||||
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
|
small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)
|
||||||
|
|
||||||
|
|
||||||
# creatFolder会返回番号路径
|
# creatFolder会返回番号路径
|
||||||
image_download( cover, fanart_path,thumb_path, path, filepath)
|
image_download( cover, fanart_path,thumb_path, path, movie_path)
|
||||||
|
|
||||||
if not multi_part or part.lower() == '-cd1':
|
if not multi_part or part.lower() == '-cd1':
|
||||||
try:
|
try:
|
||||||
# 下载预告片
|
# 下载预告片
|
||||||
if conf.is_trailer() and json_data.get('trailer'):
|
if conf.is_trailer() and json_data.get('trailer'):
|
||||||
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
|
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
# 下载剧照 data, path, filepath
|
# 下载剧照 data, path, filepath
|
||||||
if conf.is_extrafanart() and json_data.get('extrafanart'):
|
if conf.is_extrafanart() and json_data.get('extrafanart'):
|
||||||
extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
|
extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 裁剪图
|
# 裁剪图
|
||||||
cutImage(imagecut, path , fanart_path, poster_path)
|
cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
|
||||||
|
|
||||||
# 添加水印
|
# 添加水印
|
||||||
if conf.is_watermark():
|
if conf.is_watermark():
|
||||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||||
|
|
||||||
# 移动电影
|
# 移动电影
|
||||||
paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word)
|
paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||||
|
|
||||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word
|
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word
|
||||||
,fanart_path,poster_path,thumb_path)
|
,fanart_path,poster_path,thumb_path)
|
||||||
|
|
||||||
elif conf.main_mode() == 2:
|
elif conf.main_mode() == 2:
|
||||||
# 创建文件夹
|
# 创建文件夹
|
||||||
path = create_folder(json_data)
|
path = create_folder(json_data)
|
||||||
# 移动文件
|
# 移动文件
|
||||||
paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word)
|
paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word)
|
||||||
if conf.is_watermark():
|
if conf.is_watermark():
|
||||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||||
|
|
||||||
elif conf.main_mode() == 3:
|
elif conf.main_mode() == 3:
|
||||||
path = str(Path(file_path).parent)
|
path = str(Path(movie_path).parent)
|
||||||
if multi_part == 1:
|
if multi_part == 1:
|
||||||
number += part # 这时number会被附加上CD1后缀
|
number += part # 这时number会被附加上CD1后缀
|
||||||
|
|
||||||
# 检查小封面, 如果image cut为3,则下载小封面
|
# 检查小封面, 如果image cut为3,则下载小封面
|
||||||
if imagecut == 3:
|
if imagecut == 3:
|
||||||
small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath)
|
small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path)
|
||||||
|
|
||||||
# creatFolder会返回番号路径
|
# creatFolder会返回番号路径
|
||||||
image_download( cover, fanart_path,thumb_path, path, filepath)
|
image_download( cover, fanart_path, thumb_path, path, movie_path)
|
||||||
|
|
||||||
if not multi_part or part.lower() == '-cd1':
|
if not multi_part or part.lower() == '-cd1':
|
||||||
# 下载预告片
|
# 下载预告片
|
||||||
if conf.is_trailer() and json_data.get('trailer'):
|
if conf.is_trailer() and json_data.get('trailer'):
|
||||||
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath)
|
trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path)
|
||||||
|
|
||||||
# 下载剧照 data, path, filepath
|
# 下载剧照 data, path, filepath
|
||||||
if conf.is_extrafanart() and json_data.get('extrafanart'):
|
if conf.is_extrafanart() and json_data.get('extrafanart'):
|
||||||
extrafanart_download(json_data.get('extrafanart'), path, number, filepath)
|
extrafanart_download(json_data.get('extrafanart'), path, number, movie_path)
|
||||||
|
|
||||||
# 裁剪图
|
# 裁剪图
|
||||||
cutImage(imagecut, path , fanart_path, poster_path)
|
cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored))
|
||||||
|
|
||||||
# 添加水印
|
# 添加水印
|
||||||
if conf.is_watermark():
|
if conf.is_watermark():
|
||||||
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack)
|
||||||
|
|
||||||
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
# 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志
|
||||||
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath,
|
print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path,
|
||||||
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
|
tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
main_mode=1
|
main_mode=1
|
||||||
failed_output_folder=data/failure_output
|
failed_output_folder=data/failure_output
|
||||||
success_output_folder=data/organized
|
success_output_folder=data/organized
|
||||||
soft_link=0
|
link_mode=0
|
||||||
|
|
||||||
[proxy]
|
[proxy]
|
||||||
proxy=
|
proxy=
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ import config
|
|||||||
import typing
|
import typing
|
||||||
|
|
||||||
G_spat = re.compile(
|
G_spat = re.compile(
|
||||||
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|"
|
"^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|"
|
||||||
"^hhd800\.com@|-uncensored|_uncensored|-leak|_leak|-4K|_4K",
|
"^(fhd|hd|sd|1080p|720p|4K)(-|_)|"
|
||||||
|
"(-|_)(fhd|hd|sd|1080p|720p|4K|x264|x265|uncensored|leak)",
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
@@ -46,9 +47,13 @@ def get_number(debug: bool, file_path: str) -> str:
|
|||||||
lower_check = filename.lower()
|
lower_check = filename.lower()
|
||||||
if 'fc2' in lower_check:
|
if 'fc2' in lower_check:
|
||||||
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper()
|
||||||
filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE)
|
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
|
||||||
|
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv
|
||||||
|
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
|
||||||
file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
|
file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group())
|
||||||
file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
|
file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
|
||||||
|
if re.search("\d+ch$", file_number, flags=re.I):
|
||||||
|
file_number = file_number[:-2]
|
||||||
return file_number.upper()
|
return file_number.upper()
|
||||||
else: # 提取不含减号-的番号,FANZA CID
|
else: # 提取不含减号-的番号,FANZA CID
|
||||||
# 欧美番号匹配规则
|
# 欧美番号匹配规则
|
||||||
@@ -124,7 +129,8 @@ def is_uncensored(number):
|
|||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
if G_cache_uncensored_conf.is_empty():
|
if G_cache_uncensored_conf.is_empty():
|
||||||
G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
|
if G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) == None:
|
||||||
|
return False
|
||||||
return G_cache_uncensored_conf.check(number)
|
return G_cache_uncensored_conf.check(number)
|
||||||
|
|
||||||
|
|
||||||
@@ -146,13 +152,23 @@ if __name__ == "__main__":
|
|||||||
"caribean-020317_001.nfo", # -号误命名为_号的
|
"caribean-020317_001.nfo", # -号误命名为_号的
|
||||||
"257138_3xplanet_1Pondo_080521_001.mp4",
|
"257138_3xplanet_1Pondo_080521_001.mp4",
|
||||||
"ADV-R0624-CD3.wmv", # 多碟影片
|
"ADV-R0624-CD3.wmv", # 多碟影片
|
||||||
"XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源
|
"XXX-AV 22061-CD5.iso", # 支持片商格式 xxx-av-22061 命名规则来自javdb数据源
|
||||||
"xxx-av 20589.mp4",
|
"xxx-av 20589.mp4",
|
||||||
"Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源
|
"Muramura-102114_145-HD.wmv", # 支持片商格式 102114_145 命名规则来自javdb数据源
|
||||||
"heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
|
"heydouga-4102-023-CD2.iso", # 支持片商格式 heydouga-4102-023 命名规则来自javdb数据源
|
||||||
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
|
"HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源
|
||||||
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
|
"pacopacomama-093021_539-FHD.mkv", # 支持片商格式 093021_539 命名规则来自javdb数据源
|
||||||
"sbw99.cc@heyzo_hd_2636_full.mp4"
|
"sbw99.cc@heyzo_hd_2636_full.mp4",
|
||||||
|
"hhd800.com@STARS-566-HD.mp4",
|
||||||
|
"jav20s8.com@GIGL-677_4K.mp4",
|
||||||
|
"sbw99.cc@iesp-653-4K.mp4",
|
||||||
|
"4K-ABP-358_C.mkv",
|
||||||
|
"n1012-CD1.wmv",
|
||||||
|
"[]n1012-CD2.wmv",
|
||||||
|
"rctd-460ch.mp4", # 除支持-C硬字幕外,新支持ch硬字幕
|
||||||
|
"rctd-461CH-CD2.mp4", # ch后可加CDn
|
||||||
|
"rctd-461-Cd3-C.mp4", # CDn后可加-C
|
||||||
|
"rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ mkdir build
|
|||||||
mkdir __pycache__
|
mkdir __pycache__
|
||||||
|
|
||||||
pyinstaller --onefile Movie_Data_Capture.py `
|
pyinstaller --onefile Movie_Data_Capture.py `
|
||||||
--hidden-import "ImageProcessing.hog" `
|
--hidden-import "ImageProcessing.cnn" `
|
||||||
--add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
|
--add-data "$FACE_RECOGNITION_MODELS;face_recognition_models" `
|
||||||
--add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
|
--add-data "$CLOUDSCRAPER_PATH;cloudscraper" `
|
||||||
--add-data "$OPENCC_PATH;opencc" `
|
--add-data "$OPENCC_PATH;opencc" `
|
||||||
|
|||||||
@@ -9,4 +9,4 @@ urllib3==1.24.3
|
|||||||
certifi==2020.12.5
|
certifi==2020.12.5
|
||||||
MechanicalSoup==1.1.0
|
MechanicalSoup==1.1.0
|
||||||
opencc-python-reimplemented
|
opencc-python-reimplemented
|
||||||
face_recognition
|
face_recognition
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448 py38-mechanicalsoup
|
pkg install python38 py38-requests py38-pip py38-lxml py38-pillow py38-cloudscraper py38-pysocks git zip py38-beautifulsoup448 py38-mechanicalsoup
|
||||||
pip install pyquery pyinstaller
|
pip install pyquery pyinstaller
|
||||||
pyinstaller --onefile Movie_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
pyinstaller --onefile Movie_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
||||||
|
--hidden-import "ImageProcessing.cnn" \
|
||||||
--add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
--add-data "$(python3.8 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
||||||
--add-data "$(python3.8 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
--add-data "$(python3.8 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
||||||
|
--add-data "$(python3.8 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
|
||||||
--add-data "Img:Img" \
|
--add-data "Img:Img" \
|
||||||
--add-data "config.ini:." \
|
--add-data "config.ini:." \
|
||||||
|
|
||||||
|
|||||||
@@ -13,8 +13,10 @@
|
|||||||
pip3 install -r requirements.txt
|
pip3 install -r requirements.txt
|
||||||
pip3 install cloudscraper==1.2.52
|
pip3 install cloudscraper==1.2.52
|
||||||
pyinstaller --onefile Movie_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
pyinstaller --onefile Movie_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
||||||
|
--hidden-import "ImageProcessing.cnn" \
|
||||||
--add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
--add-data "$(python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1):cloudscraper" \
|
||||||
--add-data "$(python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
--add-data "$(python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1):opencc" \
|
||||||
|
--add-data "$(python3 -c 'import face_recognition_models as _; print(_.__path__[0])' | tail -n 1):face_recognition_models" \
|
||||||
--add-data "Img:Img" \
|
--add-data "Img:Img" \
|
||||||
--add-data "config.ini:." \
|
--add-data "config.ini:." \
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user