Merge remote-tracking branch 'origin/master'
# Conflicts: # .github/workflows/main.yml
This commit is contained in:
154
ADC_function.py
154
ADC_function.py
@@ -1,6 +1,6 @@
|
||||
from os import replace
|
||||
import requests
|
||||
#import hashlib
|
||||
# import hashlib
|
||||
from pathlib import Path
|
||||
import secrets
|
||||
import os.path
|
||||
@@ -11,6 +11,7 @@ import time
|
||||
from lxml import etree
|
||||
import re
|
||||
import config
|
||||
import typing
|
||||
from urllib.parse import urljoin
|
||||
import mechanicalsoup
|
||||
from requests.adapters import HTTPAdapter
|
||||
@@ -25,10 +26,13 @@ def getXpathSingle(htmlcode, xpath):
|
||||
return result1
|
||||
|
||||
|
||||
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
|
||||
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
|
||||
|
||||
|
||||
# 网页请求核心
|
||||
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||
"""
|
||||
网页请求核心函数
|
||||
"""
|
||||
verify = config.getInstance().cacert_file()
|
||||
configProxy = config.getInstance().proxy()
|
||||
errors = ""
|
||||
@@ -39,7 +43,8 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None,
|
||||
try:
|
||||
if configProxy.enable:
|
||||
proxies = configProxy.proxies()
|
||||
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, proxies=proxies, verify=verify,
|
||||
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, proxies=proxies,
|
||||
verify=verify,
|
||||
cookies=cookies)
|
||||
else:
|
||||
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, cookies=cookies)
|
||||
@@ -91,6 +96,7 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
||||
|
||||
G_DEFAULT_TIMEOUT = 10 # seconds
|
||||
|
||||
|
||||
class TimeoutHTTPAdapter(HTTPAdapter):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.timeout = G_DEFAULT_TIMEOUT
|
||||
@@ -98,6 +104,7 @@ class TimeoutHTTPAdapter(HTTPAdapter):
|
||||
self.timeout = kwargs["timeout"]
|
||||
del kwargs["timeout"]
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def send(self, request, **kwargs):
|
||||
timeout = kwargs.get("timeout")
|
||||
if timeout is None:
|
||||
@@ -106,12 +113,14 @@ class TimeoutHTTPAdapter(HTTPAdapter):
|
||||
|
||||
|
||||
# with keep-alive feature
|
||||
def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||
def get_html_session(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None,
|
||||
encoding: str = None):
|
||||
configProxy = config.getInstance().proxy()
|
||||
session = requests.Session()
|
||||
if isinstance(cookies, dict) and len(cookies):
|
||||
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504])
|
||||
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
if configProxy.enable:
|
||||
@@ -142,12 +151,14 @@ def get_html_session(url:str = None, cookies: dict = None, ua: str = None, retur
|
||||
return None
|
||||
|
||||
|
||||
def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None, use_scraper: bool = False):
|
||||
def get_html_by_browser(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None,
|
||||
encoding: str = None, use_scraper: bool = False):
|
||||
configProxy = config.getInstance().proxy()
|
||||
s = create_scraper(browser={'custom': ua or G_USER_AGENT,}) if use_scraper else requests.Session()
|
||||
s = create_scraper(browser={'custom': ua or G_USER_AGENT, }) if use_scraper else requests.Session()
|
||||
if isinstance(cookies, dict) and len(cookies):
|
||||
requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504])
|
||||
s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
if configProxy.enable:
|
||||
@@ -178,12 +189,14 @@ def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, re
|
||||
return None
|
||||
|
||||
|
||||
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None,
|
||||
return_type: str = None, encoding: str = None):
|
||||
configProxy = config.getInstance().proxy()
|
||||
s = requests.Session()
|
||||
if isinstance(cookies, dict) and len(cookies):
|
||||
requests.utils.add_dict_to_cookiejar(s.cookies, cookies)
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504])
|
||||
s.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
s.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
if configProxy.enable:
|
||||
@@ -216,12 +229,14 @@ def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies:
|
||||
return None
|
||||
|
||||
|
||||
def get_html_by_scraper(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||
def get_html_by_scraper(url: str = None, cookies: dict = None, ua: str = None, return_type: str = None,
|
||||
encoding: str = None):
|
||||
configProxy = config.getInstance().proxy()
|
||||
session = create_scraper(browser={'custom': ua or G_USER_AGENT,})
|
||||
session = create_scraper(browser={'custom': ua or G_USER_AGENT, })
|
||||
if isinstance(cookies, dict) and len(cookies):
|
||||
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
|
||||
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1,
|
||||
status_forcelist=[429, 500, 502, 503, 504])
|
||||
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||
if configProxy.enable:
|
||||
@@ -285,7 +300,12 @@ def translate(
|
||||
app_id: str = "",
|
||||
key: str = "",
|
||||
delay: int = 0,
|
||||
):
|
||||
) -> str:
|
||||
"""
|
||||
translate japanese kana to simplified chinese
|
||||
翻译日语假名到简体中文
|
||||
:raises ValueError: Non-existent translation engine
|
||||
"""
|
||||
trans_result = ""
|
||||
# 中文句子如果包含&等符号会被谷歌翻译截断损失内容,而且中文翻译到中文也没有意义,故而忽略,只翻译带有日语假名的
|
||||
if not is_japanese(src):
|
||||
@@ -295,7 +315,7 @@ def translate(
|
||||
if not re.match('^translate\.google\.(com|com\.\w{2}|\w{2})$', gsite):
|
||||
gsite = 'translate.google.cn'
|
||||
url = (
|
||||
f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={target_language}&q={src}"
|
||||
f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={target_language}&q={src}"
|
||||
)
|
||||
result = get_html(url=url, return_type="object")
|
||||
if not result.ok:
|
||||
@@ -324,26 +344,27 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t
|
||||
return trans_result
|
||||
|
||||
|
||||
# 从浏览器中导出网站登录验证信息的cookies,能够以会员方式打开游客无法访问到的页面
|
||||
# 示例: FC2-755670 url https://javdb9.com/v/vO8Mn
|
||||
# json 文件格式
|
||||
# 文件名: 站点名.json,示例 javdb9.json
|
||||
# 内容(文件编码:UTF-8):
|
||||
'''
|
||||
{
|
||||
def load_cookies(cookie_json_filename: str):
|
||||
"""
|
||||
加载cookie,用于以会员方式访问非游客内容
|
||||
|
||||
:filename: cookie文件名。获取cookie方式:从网站登录后,通过浏览器插件(CookieBro或EdittThisCookie)或者直接在地址栏网站链接信息处都可以复制或者导出cookie内容,以JSON方式保存
|
||||
|
||||
# 示例: FC2-755670 url https://javdb9.com/v/vO8Mn
|
||||
# json 文件格式
|
||||
# 文件名: 站点名.json,示例 javdb9.json
|
||||
# 内容(文件编码:UTF-8):
|
||||
{
|
||||
"over18":"1",
|
||||
"redirect_to":"%2Fv%2FvO8Mn",
|
||||
"remember_me_token":"cbJdeaFpbHMiOnsibWVzc2FnZSI6IklrNVJjbTAzZFVSRVlVaEtPWEpUVFhOVU0yNXhJZz09IiwiZXhwIjoiMjAyMS0wNS0xNVQxMzoyODoxNy4wMDBaIiwicHVyIjoiY29va2llLnJlbWVtYmVyX21lX3Rva2VuIn19--a7131611e844cf75f9db4cd411b635889bff3fe3",
|
||||
"_jdb_session":"asddefqfwfwwrfdsdaAmqKj1%2FvOrDQP4b7h%2BvGp7brvIShi2Y%2FHBUr%2BklApk06TfhBOK3g5gRImZzoi49GINH%2FK49o3W%2FX64ugBiUAcudN9b27Mg6Ohu%2Bx9Z7A4bbqmqCt7XR%2Bao8PRuOjMcdDG5czoYHJCPIPZQFU28Gd7Awc2jc5FM5CoIgSRyaYDy9ulTO7DlavxoNL%2F6OFEL%2FyaA6XUYTB2Gs1kpPiUDqwi854mo5%2FrNxMhTeBK%2BjXciazMtN5KlE5JIOfiWAjNrnx7SV3Hj%2FqPNxRxXFQyEwHr5TZa0Vk1%2FjbwWQ0wcIFfh%2FMLwwqKydAh%2FLndc%2Bmdv3e%2FJ%2BiL2--xhqYnMyVRlxJajdN--u7nl0M7Oe7tZtPd4kIaEbg%3D%3D",
|
||||
"remember_me_token":"***********",
|
||||
"_jdb_session":"************",
|
||||
"locale":"zh",
|
||||
"__cfduid":"dee27116d98c432a5cabc1fe0e7c2f3c91620479752",
|
||||
"__cfduid":"*********",
|
||||
"theme":"auto"
|
||||
}
|
||||
'''
|
||||
# 从网站登录后,通过浏览器插件(CookieBro或EdittThisCookie)或者直接在地址栏网站链接信息处都可以复制或者导出cookie内容,
|
||||
# 并填写到以上json文件的相应字段中
|
||||
def load_cookies(filename):
|
||||
filename = os.path.basename(filename)
|
||||
}
|
||||
"""
|
||||
filename = os.path.basename(cookie_json_filename)
|
||||
if not len(filename):
|
||||
return None, None
|
||||
path_search_order = (
|
||||
@@ -364,8 +385,11 @@ def load_cookies(filename):
|
||||
except:
|
||||
return None, None
|
||||
|
||||
# 文件修改时间距此时的天数
|
||||
def file_modification_days(filename) -> int:
|
||||
|
||||
def file_modification_days(filename: str) -> int:
|
||||
"""
|
||||
文件修改时间距此时的天数
|
||||
"""
|
||||
mfile = Path(filename)
|
||||
if not mfile.is_file():
|
||||
return 9999
|
||||
@@ -376,18 +400,24 @@ def file_modification_days(filename) -> int:
|
||||
return 9999
|
||||
return days
|
||||
|
||||
|
||||
def file_not_exist_or_empty(filepath) -> bool:
|
||||
return not os.path.isfile(filepath) or os.path.getsize(filepath) == 0
|
||||
|
||||
# 日语简单检测
|
||||
def is_japanese(s) -> bool:
|
||||
return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE))
|
||||
|
||||
def is_japanese(raw: str) -> bool:
|
||||
"""
|
||||
日语简单检测
|
||||
"""
|
||||
return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', raw, re.UNICODE))
|
||||
|
||||
|
||||
# Usage: python ./ADC_function.py https://cn.bing.com/
|
||||
if __name__ == "__main__":
|
||||
import sys, timeit
|
||||
from http.client import HTTPConnection
|
||||
|
||||
|
||||
def benchmark(t, url):
|
||||
print(f"HTTP GET Benchmark times:{t} url:{url}")
|
||||
tm = timeit.timeit(f"_ = session1.get('{url}')",
|
||||
@@ -406,18 +436,25 @@ if __name__ == "__main__":
|
||||
"from __main__ import get_html",
|
||||
number=t)
|
||||
print(f' *{tm:>10.5f}s get_html()')
|
||||
|
||||
|
||||
t = 100
|
||||
#url = "https://www.189.cn/"
|
||||
|
||||
# url = "https://www.189.cn/"
|
||||
url = "http://www.chinaunicom.com"
|
||||
HTTPConnection.debuglevel = 1
|
||||
s = get_html_session()
|
||||
_ = s.get(url)
|
||||
HTTPConnection.debuglevel = 0
|
||||
if len(sys.argv)>1:
|
||||
if len(sys.argv) > 1:
|
||||
url = sys.argv[1]
|
||||
benchmark(t, url)
|
||||
|
||||
def download_file_with_filename(url, filename, path):
|
||||
|
||||
def download_file_with_filename(url: str, filename: str, path: str) -> None:
|
||||
"""
|
||||
download file save to give path with given name from given url
|
||||
"""
|
||||
conf = config.getInstance()
|
||||
configProxy = conf.proxy()
|
||||
|
||||
@@ -475,38 +512,55 @@ def download_file_with_filename(url, filename, path):
|
||||
raise ValueError('[-]Connect Failed! Please check your Proxy or Network!')
|
||||
return
|
||||
|
||||
def download_one_file(args):
|
||||
|
||||
def download_one_file(args) -> str:
|
||||
"""
|
||||
download file save to given path from given url
|
||||
wrapped for map function
|
||||
"""
|
||||
|
||||
def _inner(url: str, save_path: Path):
|
||||
filebytes = get_html(url, return_type='content')
|
||||
if isinstance(filebytes, bytes) and len(filebytes):
|
||||
if len(filebytes) == save_path.open('wb').write(filebytes):
|
||||
return str(save_path)
|
||||
|
||||
return _inner(*args)
|
||||
|
||||
'''用法示例: 2线程同时下载两个不同文件,并保存到不同路径,路径目录可未创建,但需要具备对目标目录和文件的写权限
|
||||
parallel_download_files([
|
||||
|
||||
def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0):
|
||||
"""
|
||||
download files in parallel 多线程下载文件
|
||||
|
||||
用法示例: 2线程同时下载两个不同文件,并保存到不同路径,路径目录可未创建,但需要具备对目标目录和文件的写权限
|
||||
parallel_download_files([
|
||||
('https://site1/img/p1.jpg', 'C:/temp/img/p1.jpg'),
|
||||
('https://site2/cover/n1.xml', 'C:/tmp/cover/n1.xml')
|
||||
])
|
||||
'''
|
||||
# dn_list 可以是 tuple或者list: ((url1, save_fullpath1),(url2, save_fullpath2),)
|
||||
# parallel: 并行下载的线程池线程数,为0则由函数自己决定
|
||||
def parallel_download_files(dn_list, parallel: int = 0):
|
||||
|
||||
:dn_list: 可以是 tuple或者list: ((url1, save_fullpath1),(url2, save_fullpath2),) fullpath可以是str或Path
|
||||
:parallel: 并行下载的线程池线程数,为0则由函数自己决定
|
||||
"""
|
||||
mp_args = []
|
||||
for url, fullpath in dn_list:
|
||||
if url and isinstance(url, str) and url.startswith('http') and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)):
|
||||
if url and isinstance(url, str) and url.startswith('http') \
|
||||
and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)):
|
||||
fullpath = Path(fullpath)
|
||||
fullpath.parent.mkdir(parents=True, exist_ok=True)
|
||||
mp_args.append((url, fullpath))
|
||||
if not len(mp_args):
|
||||
return []
|
||||
if not isinstance(parallel, int) or parallel not in range(1,200):
|
||||
if not isinstance(parallel, int) or parallel not in range(1, 200):
|
||||
parallel = min(5, len(mp_args))
|
||||
with ThreadPoolExecutor(parallel) as pool:
|
||||
results = list(pool.map(download_one_file, mp_args))
|
||||
return results
|
||||
|
||||
def delete_all_elements_in_list(string,lists):
|
||||
|
||||
def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]):
|
||||
"""
|
||||
delete same string in given list
|
||||
"""
|
||||
new_lists = []
|
||||
for i in lists:
|
||||
if i != string:
|
||||
|
||||
@@ -3,18 +3,19 @@ import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import shutil
|
||||
import typing
|
||||
import urllib3
|
||||
import signal
|
||||
import platform
|
||||
import multiprocessing
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from opencc import OpenCC
|
||||
|
||||
import ADC_function
|
||||
import config
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
from pathlib import Path
|
||||
from ADC_function import file_modification_days, get_html, parallel_download_files
|
||||
from number_parser import get_number
|
||||
from core import core_main, moveFailedFolder
|
||||
@@ -30,7 +31,7 @@ def check_update(local_version):
|
||||
time.sleep(60)
|
||||
os._exit(-1)
|
||||
data = json.loads(htmlcode)
|
||||
remote = int(data["tag_name"].replace(".",""))
|
||||
remote = int(data["tag_name"].replace(".", ""))
|
||||
local_version = int(local_version.replace(".", ""))
|
||||
if local_version < remote:
|
||||
print("[*]" + ("* New update " + str(data["tag_name"]) + " *").center(54))
|
||||
@@ -43,36 +44,44 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]:
|
||||
conf = config.getInstance()
|
||||
parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.")
|
||||
parser.add_argument("file", default='', nargs='?', help="Single Movie file path.")
|
||||
parser.add_argument("-p","--path",default='',nargs='?',help="Analysis folder path.")
|
||||
parser.add_argument("-m","--main-mode",default='',nargs='?',help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||
parser.add_argument("-p", "--path", default='', nargs='?', help="Analysis folder path.")
|
||||
parser.add_argument("-m", "--main-mode", default='', nargs='?',
|
||||
help="Main mode. 1:Scraping 2:Organizing 3:Scraping in analysis folder")
|
||||
parser.add_argument("-n", "--number", default='', nargs='?', help="Custom file number of single movie file.")
|
||||
# parser.add_argument("-C", "--config", default='config.ini', nargs='?', help="The config file Path.")
|
||||
default_logdir = str(Path.home() / '.mlogs')
|
||||
parser.add_argument("-o","--log-dir",dest='logdir',default=default_logdir,nargs='?',
|
||||
parser.add_argument("-o", "--log-dir", dest='logdir', default=default_logdir, nargs='?',
|
||||
help=f"""Duplicate stdout and stderr to logfiles in logging folder, default on.
|
||||
default folder for current user: '{default_logdir}'. Change default folder to an empty file,
|
||||
or use --log-dir= to turn log off.""")
|
||||
parser.add_argument("-q","--regex-query",dest='regexstr',default='',nargs='?',help="python re module regex filepath filtering.")
|
||||
parser.add_argument("-d","--nfo-skip-days",dest='days',default='',nargs='?', help="Override nfo_skip_days value in config.")
|
||||
parser.add_argument("-c","--stop-counter",dest='cnt',default='',nargs='?', help="Override stop_counter value in config.")
|
||||
parser.add_argument("-q", "--regex-query", dest='regexstr', default='', nargs='?',
|
||||
help="python re module regex filepath filtering.")
|
||||
parser.add_argument("-d", "--nfo-skip-days", dest='days', default='', nargs='?',
|
||||
help="Override nfo_skip_days value in config.")
|
||||
parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?',
|
||||
help="Override stop_counter value in config.")
|
||||
parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format(
|
||||
os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt')))
|
||||
parser.add_argument("-a", "--auto-exit", action="store_true",
|
||||
help="Auto exit after program complete")
|
||||
parser.add_argument("-g","--debug", action="store_true",
|
||||
parser.add_argument("-g", "--debug", action="store_true",
|
||||
help="Turn on debug mode to generate diagnostic log for issue report.")
|
||||
parser.add_argument("-z","--zero-operation",dest='zero_op', action="store_true",
|
||||
parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true",
|
||||
help="""Only show job list of files and numbers, and **NO** actual operation
|
||||
is performed. It may help you correct wrong numbers before real job.""")
|
||||
parser.add_argument("-v", "--version", action="version", version=ver)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
def get_natural_number_or_none(value):
|
||||
return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None
|
||||
return int(value) if isinstance(value, str) and value.isnumeric() and int(value) >= 0 else None
|
||||
|
||||
def get_str_or_none(value):
|
||||
return value if isinstance(value, str) and len(value) else None
|
||||
|
||||
def get_bool_or_none(value):
|
||||
return True if isinstance(value, bool) and value else None
|
||||
|
||||
config.G_conf_override["common:main_mode"] = get_natural_number_or_none(args.main_mode)
|
||||
config.G_conf_override["common:source_folder"] = get_str_or_none(args.path)
|
||||
config.G_conf_override["common:auto_exit"] = get_bool_or_none(args.auto_exit)
|
||||
@@ -83,43 +92,53 @@ is performed. It may help you correct wrong numbers before real job.""")
|
||||
|
||||
return args.file, args.number, args.logdir, args.regexstr, args.zero_op
|
||||
|
||||
|
||||
class OutLogger(object):
|
||||
def __init__(self, logfile) -> None:
|
||||
self.term = sys.stdout
|
||||
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
|
||||
self.log = open(logfile, "w", encoding='utf-8', buffering=1)
|
||||
self.filepath = logfile
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
def write(self,msg):
|
||||
|
||||
def write(self, msg):
|
||||
self.term.write(msg)
|
||||
self.log.write(msg)
|
||||
|
||||
def flush(self):
|
||||
self.term.flush()
|
||||
self.log.flush()
|
||||
os.fsync(self.log.fileno())
|
||||
|
||||
def close(self):
|
||||
if self.term != None:
|
||||
if self.term is not None:
|
||||
sys.stdout = self.term
|
||||
self.term = None
|
||||
if self.log != None:
|
||||
if self.log is not None:
|
||||
self.log.close()
|
||||
self.log = None
|
||||
|
||||
|
||||
class ErrLogger(OutLogger):
|
||||
|
||||
def __init__(self, logfile) -> None:
|
||||
self.term = sys.stderr
|
||||
self.log = open(logfile,"w",encoding='utf-8',buffering=1)
|
||||
self.log = open(logfile, "w", encoding='utf-8', buffering=1)
|
||||
self.filepath = logfile
|
||||
|
||||
def close(self):
|
||||
if self.term != None:
|
||||
if self.term is not None:
|
||||
sys.stderr = self.term
|
||||
self.term = None
|
||||
if self.log != None:
|
||||
|
||||
if self.log is not None:
|
||||
self.log.close()
|
||||
self.log = None
|
||||
|
||||
@@ -130,7 +149,7 @@ def dupe_stdout_to_logfile(logdir: str):
|
||||
log_dir = Path(logdir)
|
||||
if not log_dir.exists():
|
||||
try:
|
||||
log_dir.mkdir(parents=True,exist_ok=True)
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
except:
|
||||
pass
|
||||
if not log_dir.is_dir():
|
||||
@@ -147,7 +166,7 @@ def dupe_stdout_to_logfile(logdir: str):
|
||||
def close_logfile(logdir: str):
|
||||
if not isinstance(logdir, str) or len(logdir) == 0 or not os.path.isdir(logdir):
|
||||
return
|
||||
#日志关闭前保存日志路径
|
||||
# 日志关闭前保存日志路径
|
||||
filepath = None
|
||||
try:
|
||||
filepath = sys.stdout.filepath
|
||||
@@ -158,7 +177,7 @@ def close_logfile(logdir: str):
|
||||
log_dir = Path(logdir).resolve()
|
||||
if isinstance(filepath, Path):
|
||||
print(f"Log file '{filepath}' saved.")
|
||||
assert(filepath.parent.samefile(log_dir))
|
||||
assert (filepath.parent.samefile(log_dir))
|
||||
# 清理空文件
|
||||
for f in log_dir.glob(r'*_err.txt'):
|
||||
if f.stat().st_size == 0:
|
||||
@@ -210,7 +229,7 @@ def close_logfile(logdir: str):
|
||||
if not txts or not len(txts):
|
||||
break
|
||||
txts.sort()
|
||||
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
|
||||
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3 * 30)).strftime("%Y%m32")
|
||||
deadline_month = f'mdc_{tmstr_3_month_ago}'
|
||||
month_merge = [f for f in txts if f.stem < deadline_month]
|
||||
if not month_merge or not len(month_merge):
|
||||
@@ -231,7 +250,7 @@ def close_logfile(logdir: str):
|
||||
if not mons or not len(mons):
|
||||
return
|
||||
mons.sort()
|
||||
deadline_year = f'mdc_{today.year-1}13'
|
||||
deadline_year = f'mdc_{today.year - 1}13'
|
||||
year_merge = [f for f in mons if f.stem < deadline_year]
|
||||
if not year_merge or not len(year_merge):
|
||||
return
|
||||
@@ -254,13 +273,14 @@ def signal_handler(*args):
|
||||
print('[!]Ctrl+C detected, Exit.')
|
||||
sys.exit(9)
|
||||
|
||||
|
||||
def sigdebug_handler(*args):
|
||||
config.G_conf_override["debug_mode:switch"] = not config.G_conf_override["debug_mode:switch"]
|
||||
print('[!]Debug {}'.format('On' if config.getInstance().debug() else 'oFF'))
|
||||
|
||||
|
||||
# 新增失败文件列表跳过处理,及.nfo修改天数跳过处理,提示跳过视频总数,调试模式(-g)下详细被跳过文件,跳过小广告
|
||||
def movie_lists(source_folder, regexstr):
|
||||
def movie_lists(source_folder, regexstr: str) -> typing.List[str]:
|
||||
conf = config.getInstance()
|
||||
main_mode = conf.main_mode()
|
||||
debug = conf.debug()
|
||||
@@ -282,7 +302,7 @@ def movie_lists(source_folder, regexstr):
|
||||
failed_set = set(flist)
|
||||
if len(flist) != len(failed_set): # 检查去重并写回,但是不改变failed_list.txt内条目的先后次序,重复的只保留最后的
|
||||
fset = failed_set.copy()
|
||||
for i in range(len(flist)-1, -1, -1):
|
||||
for i in range(len(flist) - 1, -1, -1):
|
||||
fset.remove(flist[i]) if flist[i] in fset else flist.pop(i)
|
||||
failed_list_txt_path.write_text('\n'.join(flist) + '\n', encoding='utf-8')
|
||||
assert len(fset) == 0 and len(flist) == len(failed_set)
|
||||
@@ -311,11 +331,12 @@ def movie_lists(source_folder, regexstr):
|
||||
continue # file is symlink or hardlink(Linux/NTFS/Darwin)
|
||||
# 调试用0字节样本允许通过,去除小于120MB的广告'苍老师强力推荐.mp4'(102.2MB)'黑道总裁.mp4'(98.4MB)'有趣的妹子激情表演.MP4'(95MB)'有趣的臺灣妹妹直播.mp4'(15.1MB)
|
||||
movie_size = 0 if is_sym else full_name.stat().st_size # 同上 符号链接不取stat()及st_size,直接赋0跳过小视频检测
|
||||
if movie_size > 0 and movie_size < 125829120: # 1024*1024*120=125829120
|
||||
if 0 < movie_size < 125829120: # 1024*1024*120=125829120
|
||||
continue
|
||||
if cliRE and not cliRE.search(absf) or trailerRE.search(full_name.name):
|
||||
continue
|
||||
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(full_name.with_suffix('.nfo')) <= nfo_skip_days:
|
||||
if main_mode == 3 and nfo_skip_days > 0 and file_modification_days(
|
||||
full_name.with_suffix('.nfo')) <= nfo_skip_days:
|
||||
skip_nfo_days_cnt += 1
|
||||
if debug:
|
||||
print(f"[!]Skip movie by it's .nfo which modified within {nfo_skip_days} days: '{absf}'")
|
||||
@@ -325,7 +346,8 @@ def movie_lists(source_folder, regexstr):
|
||||
if skip_failed_cnt:
|
||||
print(f"[!]Skip {skip_failed_cnt} movies in failed list '{failed_list_txt_path}'.")
|
||||
if skip_nfo_days_cnt:
|
||||
print(f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
print(
|
||||
f"[!]Skip {skip_nfo_days_cnt} movies in source folder '{source}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
if nfo_skip_days <= 0 or not soft_link or main_mode == 3:
|
||||
return total
|
||||
# 软连接方式,已经成功削刮的也需要从成功目录中检查.nfo更新天数,跳过N天内更新过的
|
||||
@@ -351,13 +373,17 @@ def movie_lists(source_folder, regexstr):
|
||||
if debug:
|
||||
print(f"[!]Skip file successfully processed within {nfo_skip_days} days: '{f}'")
|
||||
if len(rm_list):
|
||||
print(f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
print(
|
||||
f"[!]Skip {len(rm_list)} movies in success folder '{success_folder}' who's .nfo modified within {nfo_skip_days} days.")
|
||||
|
||||
return total
|
||||
|
||||
|
||||
def create_failed_folder(failed_folder):
|
||||
if not os.path.exists(failed_folder): # 新建failed文件夹
|
||||
def create_failed_folder(failed_folder: str):
|
||||
"""
|
||||
新建failed文件夹
|
||||
"""
|
||||
if not os.path.exists(failed_folder):
|
||||
try:
|
||||
os.makedirs(failed_folder)
|
||||
except:
|
||||
@@ -370,9 +396,7 @@ def rm_empty_folder(path):
|
||||
deleted = set()
|
||||
for current_dir, subdirs, files in os.walk(abspath, topdown=False):
|
||||
try:
|
||||
still_has_subdirs = any(
|
||||
_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted
|
||||
)
|
||||
still_has_subdirs = any(_ for subdir in subdirs if os.path.join(current_dir, subdir) not in deleted)
|
||||
if not any(files) and not still_has_subdirs and not os.path.samefile(path, current_dir):
|
||||
os.rmdir(current_dir)
|
||||
deleted.add(current_dir)
|
||||
@@ -387,7 +411,7 @@ def create_data_and_move(file_path: str, zero_op, oCC):
|
||||
n_number = get_number(debug, os.path.basename(file_path))
|
||||
file_path = os.path.abspath(file_path)
|
||||
|
||||
if debug == True:
|
||||
if debug is True:
|
||||
print(f"[!] [{n_number}] As Number making data for '{file_path}'")
|
||||
if zero_op:
|
||||
return
|
||||
@@ -443,8 +467,8 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC):
|
||||
|
||||
|
||||
def main():
|
||||
version = '6.0.1'
|
||||
urllib3.disable_warnings() #Ignore http proxy warning
|
||||
version = '6.0.2'
|
||||
urllib3.disable_warnings() # Ignore http proxy warning
|
||||
|
||||
# Read config.ini first, in argparse_function() need conf.failed_folder()
|
||||
conf = config.Config("config.ini")
|
||||
@@ -456,7 +480,7 @@ def main():
|
||||
|
||||
main_mode = conf.main_mode()
|
||||
folder_path = ""
|
||||
if not main_mode in (1, 2, 3):
|
||||
if main_mode not in (1, 2, 3):
|
||||
print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.")
|
||||
sys.exit(4)
|
||||
|
||||
@@ -467,7 +491,8 @@ def main():
|
||||
signal.signal(signal.SIGWINCH, sigdebug_handler)
|
||||
dupe_stdout_to_logfile(logdir)
|
||||
|
||||
platform_total = str(' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
|
||||
platform_total = str(
|
||||
' - ' + platform.platform() + ' \n[*] - ' + platform.machine() + ' - Python-' + platform.python_version())
|
||||
|
||||
print('[*]================= Movie Data Capture =================')
|
||||
print('[*]' + version.center(54))
|
||||
@@ -485,14 +510,14 @@ def main():
|
||||
print('[+]Enable debug')
|
||||
if conf.soft_link():
|
||||
print('[!]Enable soft link')
|
||||
if len(sys.argv)>1:
|
||||
print('[!]CmdLine:'," ".join(sys.argv[1:]))
|
||||
if len(sys.argv) > 1:
|
||||
print('[!]CmdLine:', " ".join(sys.argv[1:]))
|
||||
print('[+]Main Working mode ## {}: {} ## {}{}{}'
|
||||
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode-1],
|
||||
.format(*(main_mode, ['Scraping', 'Organizing', 'Scraping in analysis folder'][main_mode - 1],
|
||||
"" if not conf.multi_threading() else ", multi_threading on",
|
||||
"" if conf.nfo_skip_days() == 0 else f", nfo_skip_days={conf.nfo_skip_days()}",
|
||||
"" if conf.stop_counter() == 0 else f", stop_counter={conf.stop_counter()}"
|
||||
) if not single_file_path else ('-','Single File', '','',''))
|
||||
) if not single_file_path else ('-', 'Single File', '', '', ''))
|
||||
)
|
||||
|
||||
if conf.update_check():
|
||||
@@ -501,11 +526,15 @@ def main():
|
||||
create_failed_folder(conf.failed_folder())
|
||||
|
||||
# Download Mapping Table, parallel version
|
||||
def fmd(f):
|
||||
def fmd(f) -> typing.Tuple[str, Path]:
|
||||
"""
|
||||
|
||||
"""
|
||||
return ('https://raw.githubusercontent.com/yoshiko2/Movie_Data_Capture/master/MappingTable/' + f,
|
||||
Path.home() / '.local' / 'share' / 'mdc' / f)
|
||||
|
||||
map_tab = (fmd('mapping_actor.xml'), fmd('mapping_info.xml'), fmd('c_number.json'))
|
||||
for k,v in map_tab:
|
||||
for k, v in map_tab:
|
||||
if v.exists():
|
||||
if file_modification_days(str(v)) >= conf.mapping_table_validity():
|
||||
print("[+]Mapping Table Out of date! Remove", str(v))
|
||||
@@ -525,14 +554,15 @@ def main():
|
||||
try:
|
||||
oCC = None if ccm == 0 else OpenCC('t2s.json' if ccm == 1 else 's2t.json')
|
||||
except:
|
||||
# some OS no OpennCC cpython, try opencc-python-reimplemented.
|
||||
# some OS no OpenCC cpython, try opencc-python-reimplemented.
|
||||
# pip uninstall opencc && pip install opencc-python-reimplemented
|
||||
oCC = None if ccm == 0 else OpenCC('t2s' if ccm == 1 else 's2t')
|
||||
|
||||
if not single_file_path == '': #Single File
|
||||
if not single_file_path == '': # Single File
|
||||
print('[+]==================== Single File =====================')
|
||||
if custom_number == '':
|
||||
create_data_and_move_with_custom_number(single_file_path, get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
|
||||
create_data_and_move_with_custom_number(single_file_path,
|
||||
get_number(conf.debug(), os.path.basename(single_file_path)), oCC)
|
||||
else:
|
||||
create_data_and_move_with_custom_number(single_file_path, custom_number, oCC)
|
||||
else:
|
||||
@@ -547,7 +577,7 @@ def main():
|
||||
print('[+]Find', count_all, 'movies.')
|
||||
print('[*]======================================================')
|
||||
stop_count = conf.stop_counter()
|
||||
if stop_count<1:
|
||||
if stop_count < 1:
|
||||
stop_count = 999999
|
||||
else:
|
||||
count_all = str(min(len(movie_list), stop_count))
|
||||
@@ -555,7 +585,8 @@ def main():
|
||||
for movie_path in movie_list: # 遍历电影列表 交给core处理
|
||||
count = count + 1
|
||||
percentage = str(count / int(count_all) * 100)[:4] + '%'
|
||||
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S")))
|
||||
print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -',
|
||||
time.strftime("%H:%M:%S")))
|
||||
create_data_and_move(movie_path, zero_op, oCC)
|
||||
if count >= stop_count:
|
||||
print("[!]Stop counter triggered!")
|
||||
@@ -581,7 +612,7 @@ def main():
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
import multiprocessing
|
||||
|
||||
if __name__ == '__main__':
|
||||
multiprocessing.freeze_support()
|
||||
main()
|
||||
|
||||
@@ -38,9 +38,10 @@ def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
||||
|
||||
return True
|
||||
|
||||
def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
|
||||
def get_data_from_json(file_number, oCC):
|
||||
"""
|
||||
iterate through all services and fetch the data
|
||||
iterate through all services and fetch the data 从JSON返回元数据
|
||||
"""
|
||||
|
||||
actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml'))
|
||||
@@ -67,7 +68,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
conf = config.getInstance()
|
||||
# default fetch order list, from the beginning to the end
|
||||
sources = conf.sources().split(',')
|
||||
if not len(conf.sources()) > 80:
|
||||
if len(sources) <= len(func_mapping):
|
||||
# if the input file name matches certain rules,
|
||||
# move some web service to the beginning of the list
|
||||
lo_file_number = file_number.lower()
|
||||
@@ -235,8 +236,8 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
json_data['studio'] = studio
|
||||
json_data['director'] = director
|
||||
|
||||
if conf.is_transalte():
|
||||
translate_values = conf.transalte_values().split(",")
|
||||
if conf.is_translate():
|
||||
translate_values = conf.translate_values().split(",")
|
||||
for translate_value in translate_values:
|
||||
if json_data[translate_value] == "":
|
||||
continue
|
||||
@@ -248,12 +249,12 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
if conf.get_transalte_engine() == "azure":
|
||||
if conf.get_translate_engine() == "azure":
|
||||
t = translate(
|
||||
json_data[translate_value],
|
||||
target_language="zh-Hans",
|
||||
engine=conf.get_transalte_engine(),
|
||||
key=conf.get_transalte_key(),
|
||||
engine=conf.get_translate_engine(),
|
||||
key=conf.get_translate_key(),
|
||||
)
|
||||
else:
|
||||
t = translate(json_data[translate_value])
|
||||
@@ -326,11 +327,13 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据
|
||||
if i not in json_data:
|
||||
naming_rule += i.strip("'").strip('"')
|
||||
else:
|
||||
naming_rule += json_data.get(i)
|
||||
item = json_data.get(i)
|
||||
naming_rule += item if type(item) is not list else "&".join(item)
|
||||
|
||||
json_data['naming_rule'] = naming_rule
|
||||
return json_data
|
||||
|
||||
|
||||
def special_characters_replacement(text) -> str:
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
|
||||
@@ -8,7 +8,7 @@ success_output_folder=JAV_output
|
||||
soft_link=0
|
||||
failed_move=1
|
||||
auto_exit=0
|
||||
transalte_to_sc=0
|
||||
translate_to_sc=0
|
||||
multi_threading=0
|
||||
;actor_gender value: female(♀) or male(♂) or both(♀ ♂) or all(♂ ♀ ⚧)
|
||||
actor_gender=female
|
||||
@@ -51,7 +51,7 @@ folders=failed,JAV_output
|
||||
switch=0
|
||||
|
||||
; 机器翻译
|
||||
[transalte]
|
||||
[translate]
|
||||
switch=0
|
||||
;可选项 google-free,azure
|
||||
engine=google-free
|
||||
|
||||
104
config.py
104
config.py
@@ -5,18 +5,17 @@ import configparser
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
G_conf_override = {
|
||||
# index 0 save Config() first instance for quick access by using getInstance()
|
||||
0 : None,
|
||||
0: None,
|
||||
# register override config items
|
||||
"common:main_mode" : None,
|
||||
"common:source_folder" : None,
|
||||
"common:auto_exit" : None,
|
||||
"common:nfo_skip_days" : None,
|
||||
"common:stop_counter" : None,
|
||||
"common:ignore_failed_list" : None,
|
||||
"debug_mode:switch" : None
|
||||
"common:main_mode": None,
|
||||
"common:source_folder": None,
|
||||
"common:auto_exit": None,
|
||||
"common:nfo_skip_days": None,
|
||||
"common:stop_counter": None,
|
||||
"common:ignore_failed_list": None,
|
||||
"debug_mode:switch": None
|
||||
}
|
||||
|
||||
|
||||
@@ -98,14 +97,18 @@ class Config:
|
||||
# print("[-]",e)
|
||||
# sys.exit(3)
|
||||
# #self.conf = self._default_config()
|
||||
|
||||
def getboolean_override(self, section, item) -> bool:
|
||||
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(G_conf_override[f"{section}:{item}"])
|
||||
return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool(
|
||||
G_conf_override[f"{section}:{item}"])
|
||||
|
||||
def getint_override(self, section, item) -> int:
|
||||
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(G_conf_override[f"{section}:{item}"])
|
||||
return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int(
|
||||
G_conf_override[f"{section}:{item}"])
|
||||
|
||||
def get_override(self, section, item) -> str:
|
||||
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(G_conf_override[f"{section}:{item}"])
|
||||
return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str(
|
||||
G_conf_override[f"{section}:{item}"])
|
||||
|
||||
def main_mode(self) -> int:
|
||||
try:
|
||||
@@ -127,34 +130,46 @@ class Config:
|
||||
|
||||
def soft_link(self) -> bool:
|
||||
return self.conf.getboolean("common", "soft_link")
|
||||
|
||||
def failed_move(self) -> bool:
|
||||
return self.conf.getboolean("common", "failed_move")
|
||||
|
||||
def auto_exit(self) -> bool:
|
||||
return self.getboolean_override("common", "auto_exit")
|
||||
def transalte_to_sc(self) -> bool:
|
||||
return self.conf.getboolean("common", "transalte_to_sc")
|
||||
|
||||
def translate_to_sc(self) -> bool:
|
||||
return self.conf.getboolean("common", "translate_to_sc")
|
||||
|
||||
def multi_threading(self) -> bool:
|
||||
return self.conf.getboolean("common", "multi_threading")
|
||||
|
||||
def del_empty_folder(self) -> bool:
|
||||
return self.conf.getboolean("common", "del_empty_folder")
|
||||
|
||||
def nfo_skip_days(self) -> int:
|
||||
try:
|
||||
return self.getint_override("common", "nfo_skip_days")
|
||||
except:
|
||||
return 30
|
||||
|
||||
def stop_counter(self) -> int:
|
||||
try:
|
||||
return self.getint_override("common", "stop_counter")
|
||||
except:
|
||||
return 0
|
||||
|
||||
def ignore_failed_list(self) -> bool:
|
||||
return self.getboolean_override("common", "ignore_failed_list")
|
||||
|
||||
def download_only_missing_images(self) -> bool:
|
||||
return self.conf.getboolean("common", "download_only_missing_images")
|
||||
|
||||
def mapping_table_validity(self) -> int:
|
||||
return self.conf.getint("common", "mapping_table_validity")
|
||||
def is_transalte(self) -> bool:
|
||||
return self.conf.getboolean("transalte", "switch")
|
||||
|
||||
def is_translate(self) -> bool:
|
||||
return self.conf.getboolean("translate", "switch")
|
||||
|
||||
def is_trailer(self) -> bool:
|
||||
return self.conf.getboolean("trailer", "switch")
|
||||
|
||||
@@ -190,18 +205,25 @@ class Config:
|
||||
return extrafanart_download
|
||||
except ValueError:
|
||||
self._exit("extrafanart_folder")
|
||||
def get_transalte_engine(self) -> str:
|
||||
return self.conf.get("transalte","engine")
|
||||
# def get_transalte_appId(self) ->str:
|
||||
# return self.conf.get("transalte","appid")
|
||||
def get_transalte_key(self) -> str:
|
||||
return self.conf.get("transalte","key")
|
||||
def get_transalte_delay(self) -> int:
|
||||
return self.conf.getint("transalte","delay")
|
||||
def transalte_values(self) -> str:
|
||||
return self.conf.get("transalte", "values")
|
||||
|
||||
def get_translate_engine(self) -> str:
|
||||
return self.conf.get("translate", "engine")
|
||||
|
||||
# def get_translate_appId(self) ->str:
|
||||
# return self.conf.get("translate","appid")
|
||||
|
||||
def get_translate_key(self) -> str:
|
||||
return self.conf.get("translate", "key")
|
||||
|
||||
def get_translate_delay(self) -> int:
|
||||
return self.conf.getint("translate", "delay")
|
||||
|
||||
def translate_values(self) -> str:
|
||||
return self.conf.get("translate", "values")
|
||||
|
||||
def get_translate_service_site(self) -> str:
|
||||
return self.conf.get("transalte", "service_site")
|
||||
return self.conf.get("translate", "service_site")
|
||||
|
||||
def proxy(self):
|
||||
try:
|
||||
sec = "proxy"
|
||||
@@ -284,21 +306,21 @@ class Config:
|
||||
def storyline_show(self) -> int:
|
||||
try:
|
||||
v = self.conf.getint("storyline", "show_result")
|
||||
return v if v in (0,1,2) else 2 if v > 2 else 0
|
||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
||||
except:
|
||||
return 0
|
||||
|
||||
def storyline_mode(self) -> int:
|
||||
try:
|
||||
v = self.conf.getint("storyline", "run_mode")
|
||||
return v if v in (0,1,2) else 2 if v > 2 else 0
|
||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
||||
except:
|
||||
return 1
|
||||
|
||||
def cc_convert_mode(self) -> int:
|
||||
try:
|
||||
v = self.conf.getint("cc_convert", "mode")
|
||||
return v if v in (0,1,2) else 2 if v > 2 else 0
|
||||
return v if v in (0, 1, 2) else 2 if v > 2 else 0
|
||||
except:
|
||||
return 1
|
||||
|
||||
@@ -320,7 +342,6 @@ class Config:
|
||||
except:
|
||||
return "hog"
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _exit(sec: str) -> None:
|
||||
print("[-] Read config error! Please check the {} section in config.ini", sec)
|
||||
@@ -340,7 +361,7 @@ class Config:
|
||||
conf.set(sec1, "soft_link", "0")
|
||||
conf.set(sec1, "failed_move", "1")
|
||||
conf.set(sec1, "auto_exit", "0")
|
||||
conf.set(sec1, "transalte_to_sc", "1")
|
||||
conf.set(sec1, "translate_to_sc", "1")
|
||||
# actor_gender value: female or male or both or all(含人妖)
|
||||
conf.set(sec1, "actor_gender", "female")
|
||||
conf.set(sec1, "del_empty_folder", "1")
|
||||
@@ -358,7 +379,6 @@ class Config:
|
||||
conf.set(sec2, "type", "socks5")
|
||||
conf.set(sec2, "cacert_file", "")
|
||||
|
||||
|
||||
sec3 = "Name_Rule"
|
||||
conf.add_section(sec3)
|
||||
conf.set(sec3, "location_rule", "actor + '/' + number")
|
||||
@@ -382,7 +402,7 @@ class Config:
|
||||
conf.add_section(sec7)
|
||||
conf.set(sec7, "switch", "0")
|
||||
|
||||
sec8 = "transalte"
|
||||
sec8 = "translate"
|
||||
conf.add_section(sec8)
|
||||
conf.set(sec8, "switch", "0")
|
||||
conf.set(sec8, "engine", "google-free")
|
||||
@@ -402,8 +422,10 @@ class Config:
|
||||
|
||||
sec11 = "media"
|
||||
conf.add_section(sec11)
|
||||
conf.set(sec11, "media_type", ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
|
||||
conf.set(sec11, "sub_type", ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
|
||||
conf.set(sec11, "media_type",
|
||||
".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO")
|
||||
conf.set(sec11, "sub_type",
|
||||
".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml")
|
||||
|
||||
sec12 = "watermark"
|
||||
conf.add_section(sec12)
|
||||
@@ -464,7 +486,8 @@ class IniProxy():
|
||||
'''
|
||||
if self.address:
|
||||
if self.proxytype in self.SUPPORT_PROXY_TYPE:
|
||||
proxies = {"http": self.proxytype + "://" + self.address, "https": self.proxytype + "://" + self.address}
|
||||
proxies = {"http": self.proxytype + "://" + self.address,
|
||||
"https": self.proxytype + "://" + self.address}
|
||||
else:
|
||||
proxies = {"http": "http://" + self.address, "https": "https://" + self.address}
|
||||
else:
|
||||
@@ -477,13 +500,16 @@ if __name__ == "__main__":
|
||||
def evprint(evstr):
|
||||
code = compile(evstr, "<string>", "eval")
|
||||
print('{}: "{}"'.format(evstr, eval(code)))
|
||||
|
||||
|
||||
config = Config()
|
||||
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override', 'ini_path'}
|
||||
mfilter = {'conf', 'proxy', '_exit', '_default_config', 'getboolean_override', 'getint_override', 'get_override',
|
||||
'ini_path'}
|
||||
for _m in [m for m in dir(config) if not m.startswith('__') and m not in mfilter]:
|
||||
evprint(f'config.{_m}()')
|
||||
pfilter = {'proxies', 'SUPPORT_PROXY_TYPE'}
|
||||
# test getInstance()
|
||||
assert(getInstance() == config)
|
||||
assert (getInstance() == config)
|
||||
for _p in [p for p in dir(getInstance().proxy()) if not p.startswith('__') and p not in pfilter]:
|
||||
evprint(f'getInstance().proxy().{_p}')
|
||||
|
||||
|
||||
26
core.py
26
core.py
@@ -371,15 +371,19 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
moveFailedFolder(filepath)
|
||||
return
|
||||
|
||||
# 此函数从gui版copy过来用用
|
||||
# 参数说明
|
||||
# poster_path
|
||||
# thumb_path
|
||||
# cn_sub 中文字幕 参数值为 1 0
|
||||
# leak 流出 参数值为 1 0
|
||||
# uncensored 无码 参数值为 1 0
|
||||
# ========================================================================加水印
|
||||
def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack):
|
||||
|
||||
def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack) -> None:
|
||||
"""
|
||||
add watermark on poster or thumb for describe extra properties 给海报和缩略图加属性水印
|
||||
|
||||
此函数从gui版copy过来用用
|
||||
|
||||
:poster_path 海报位置
|
||||
:thumb_path 缩略图位置
|
||||
:cn_sub: 中文字幕 可选值:1,"1" 或其他值
|
||||
:uncensored 无码 可选值:1,"1" 或其他值
|
||||
:hack 破解 可选值:1,"1" 或其他值
|
||||
"""
|
||||
mark_type = ''
|
||||
if cn_sub:
|
||||
mark_type += ',字幕'
|
||||
@@ -396,6 +400,7 @@ def add_mark(poster_path, thumb_path, cn_sub, leak, uncensored, hack):
|
||||
add_mark_thread(poster_path, cn_sub, leak, uncensored, hack)
|
||||
print('[+]Poster Add Mark: ' + mark_type.strip(','))
|
||||
|
||||
|
||||
def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack):
|
||||
size = 9
|
||||
img_pic = Image.open(pic_path)
|
||||
@@ -414,6 +419,7 @@ def add_mark_thread(pic_path, cn_sub, leak, uncensored, hack):
|
||||
add_to_pic(pic_path, img_pic, size, count, 4)
|
||||
img_pic.close()
|
||||
|
||||
|
||||
def add_to_pic(pic_path, img_pic, size, count, mode):
|
||||
mark_pic_path = ''
|
||||
pngpath = ''
|
||||
@@ -455,6 +461,7 @@ def add_to_pic(pic_path, img_pic, size, count, mode):
|
||||
img_pic.save(pic_path, quality=95)
|
||||
# ========================结束=================================
|
||||
|
||||
|
||||
def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置
|
||||
filepath_obj = pathlib.Path(filepath)
|
||||
houzhui = filepath_obj.suffix
|
||||
@@ -546,6 +553,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
|
||||
print(f'[-]OS Error errno {oserr.errno}')
|
||||
return
|
||||
|
||||
|
||||
def get_part(filepath):
|
||||
try:
|
||||
if re.search('-CD\d+', filepath):
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import config
|
||||
import typing
|
||||
|
||||
G_spat = re.compile(
|
||||
"^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|"
|
||||
@@ -9,30 +10,30 @@ G_spat = re.compile(
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
def get_number(debug,file_path: str) -> str:
|
||||
# """
|
||||
# >>> from number_parser import get_number
|
||||
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("C:¥Users¥Guest¥snis-829.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("C:¥Users¥Guest¥snis-829-C.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("./snis-829.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("./snis-829-C.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number(".¥snis-829.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number(".¥snis-829-C.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("snis-829.mp4")
|
||||
# 'snis-829'
|
||||
# >>> get_number("snis-829-C.mp4")
|
||||
# 'snis-829'
|
||||
# """
|
||||
def get_number(debug: bool, file_path: str) -> str:
|
||||
"""
|
||||
从文件路径中提取番号 from number_parser import get_number
|
||||
>>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "C:¥Users¥Guest¥snis-829.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "C:¥Users¥Guest¥snis-829-C.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "./snis-829.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "./snis-829-C.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, ".¥snis-829.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, ".¥snis-829-C.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "snis-829.mp4")
|
||||
'snis-829'
|
||||
>>> get_number(False, "snis-829-C.mp4")
|
||||
'snis-829'
|
||||
"""
|
||||
filepath = os.path.basename(file_path)
|
||||
# debug True 和 False 两块代码块合并,原因是此模块及函数只涉及字符串计算,没有IO操作,debug on时输出导致异常信息即可
|
||||
try:
|
||||
@@ -69,29 +70,33 @@ def get_number(debug,file_path: str) -> str:
|
||||
|
||||
# 按javdb数据源的命名规范提取number
|
||||
G_TAKE_NUM_RULES = {
|
||||
'tokyo.*hot' : lambda x:str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
|
||||
'carib' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
|
||||
'1pon|mura|paco' : lambda x:str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
|
||||
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
|
||||
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
|
||||
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
|
||||
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
|
||||
'heyzo' : lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
|
||||
'tokyo.*hot': lambda x: str(re.search(r'(cz|gedo|k|n|red-|se)\d{2,4}', x, re.I).group()),
|
||||
'carib': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('_', '-'),
|
||||
'1pon|mura|paco': lambda x: str(re.search(r'\d{6}(-|_)\d{3}', x, re.I).group()).replace('-', '_'),
|
||||
'10mu': lambda x: str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
|
||||
'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
|
||||
'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
|
||||
'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
|
||||
'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0]
|
||||
}
|
||||
|
||||
def get_number_by_dict(filename: str) -> str:
|
||||
|
||||
def get_number_by_dict(filename: str) -> typing.Optional[str]:
|
||||
try:
|
||||
for k,v in G_TAKE_NUM_RULES.items():
|
||||
for k, v in G_TAKE_NUM_RULES.items():
|
||||
if re.search(k, filename, re.I):
|
||||
return v(filename)
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
class Cache_uncensored_conf:
|
||||
prefix = None
|
||||
|
||||
def is_empty(self):
|
||||
return bool(self.prefix is None)
|
||||
|
||||
def set(self, v: list):
|
||||
if not v or not len(v) or not len(v[0]):
|
||||
raise ValueError('input prefix list empty or None')
|
||||
@@ -100,17 +105,20 @@ class Cache_uncensored_conf:
|
||||
for i in v[1:]:
|
||||
s += f"|{i}.+"
|
||||
self.prefix = re.compile(s, re.I)
|
||||
|
||||
def check(self, number):
|
||||
if self.prefix is None:
|
||||
raise ValueError('No init re compile')
|
||||
return self.prefix.match(number)
|
||||
|
||||
|
||||
G_cache_uncensored_conf = Cache_uncensored_conf()
|
||||
|
||||
|
||||
# ========================================================================是否为无码
|
||||
def is_uncensored(number):
|
||||
if re.match(
|
||||
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
|
||||
r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}',
|
||||
number,
|
||||
re.I
|
||||
):
|
||||
@@ -119,9 +127,10 @@ r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydoug
|
||||
G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(','))
|
||||
return G_cache_uncensored_conf.check(number)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# import doctest
|
||||
# doctest.testmod(raise_on_error=True)
|
||||
# import doctest
|
||||
# doctest.testmod(raise_on_error=True)
|
||||
test_use_cases = (
|
||||
"MEYD-594-C.mp4",
|
||||
"SSIS-001_C.mp4",
|
||||
@@ -145,13 +154,17 @@ if __name__ == "__main__":
|
||||
"pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源
|
||||
"sbw99.cc@heyzo_hd_2636_full.mp4"
|
||||
)
|
||||
|
||||
|
||||
def evprint(evstr):
|
||||
code = compile(evstr, "<string>", "eval")
|
||||
print("{1:>20} # '{0}'".format(evstr[18:-2], eval(code)))
|
||||
|
||||
|
||||
for t in test_use_cases:
|
||||
evprint(f'get_number(True, "{t}")')
|
||||
|
||||
if len(sys.argv)<=1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
|
||||
if len(sys.argv) <= 1 or not re.search('^[A-Z]:?', sys.argv[1], re.IGNORECASE):
|
||||
sys.exit(0)
|
||||
|
||||
# 使用Everything的ES命令行工具搜集全盘视频文件名作为用例测试number数据,参数为盘符 A .. Z 或带盘符路径
|
||||
@@ -170,6 +183,7 @@ if __name__ == "__main__":
|
||||
# 示例:
|
||||
# python3 ./number_parser.py ALL
|
||||
import subprocess
|
||||
|
||||
ES_search_path = "ALL disks"
|
||||
if sys.argv[1] == "ALL":
|
||||
if sys.platform == "win32":
|
||||
@@ -181,10 +195,11 @@ if __name__ == "__main__":
|
||||
out_list = out_text.splitlines()
|
||||
elif sys.platform in ("linux", "darwin"):
|
||||
ES_prog_path = 'locate' if sys.platform == 'linux' else 'glocate'
|
||||
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(ES_prog_path)
|
||||
ES_cmdline = r"{} -b -i --regex '\.mp4$|\.avi$|\.rmvb$|\.wmv$|\.mov$|\.mkv$|\.webm$|\.iso$|\.mpg$|\.m4v$'".format(
|
||||
ES_prog_path)
|
||||
out_bytes = subprocess.check_output(ES_cmdline.split(' '))
|
||||
out_text = out_bytes.decode('utf-8')
|
||||
out_list = [ os.path.basename(line) for line in out_text.splitlines()]
|
||||
out_list = [os.path.basename(line) for line in out_text.splitlines()]
|
||||
else:
|
||||
print('[-]Unsupported platform! Please run on OS Windows/Linux/MacOSX. Exit.')
|
||||
sys.exit(1)
|
||||
|
||||
Reference in New Issue
Block a user