Merge branch 'master' of https://github.com/yoshiko2/av_data_capture
This commit is contained in:
42
core.py
Executable file → Normal file
42
core.py
Executable file → Normal file
@@ -9,7 +9,6 @@ from PIL import Image
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from ADC_function import *
|
||||
from WebCrawler import get_data_from_json
|
||||
@@ -216,33 +215,24 @@ def extrafanart_download_one_by_one(data, path, filepath):
|
||||
if conf.debug():
|
||||
print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s')
|
||||
|
||||
def download_one_file(args):
|
||||
def _inner(url: str, save_path: Path):
|
||||
filebytes = get_html(url, return_type='content')
|
||||
if isinstance(filebytes, bytes) and len(filebytes):
|
||||
if len(filebytes) == save_path.open('wb').write(filebytes):
|
||||
return str(save_path)
|
||||
return _inner(*args)
|
||||
|
||||
def extrafanart_download_threadpool(url_list, save_dir, number):
|
||||
tm_start = time.perf_counter()
|
||||
conf = config.getInstance()
|
||||
extrafanart_dir = Path(save_dir) / conf.get_extrafanart()
|
||||
download_only_missing_images = conf.download_only_missing_images()
|
||||
mp_args = []
|
||||
dn_list = []
|
||||
for i, url in enumerate(url_list, start=1):
|
||||
jpg_fullpath = extrafanart_dir / f'extrafanart-{i}.jpg'
|
||||
if download_only_missing_images and not file_not_exist_or_empty(jpg_fullpath):
|
||||
continue
|
||||
mp_args.append((url, jpg_fullpath))
|
||||
if not len(mp_args):
|
||||
dn_list.append((url, jpg_fullpath))
|
||||
if not len(dn_list):
|
||||
return
|
||||
extrafanart_dir.mkdir(parents=True, exist_ok=True)
|
||||
parallel = min(len(mp_args), conf.extrafanart_thread_pool_download())
|
||||
parallel = min(len(dn_list), conf.extrafanart_thread_pool_download())
|
||||
if parallel > 100:
|
||||
print('[!]Warrning: Parallel download thread too large may cause website ban IP!')
|
||||
with ThreadPoolExecutor(parallel) as pool:
|
||||
result = list(pool.map(download_one_file, mp_args))
|
||||
result = parallel_download_files(dn_list, parallel)
|
||||
failed = 0
|
||||
for i, r in enumerate(result, start=1):
|
||||
if not r:
|
||||
@@ -255,6 +245,7 @@ def extrafanart_download_threadpool(url_list, save_dir, number):
|
||||
if conf.debug():
|
||||
print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s')
|
||||
|
||||
|
||||
# 封面是否下载成功,否则移动到failed
|
||||
def image_download(cover, number, leak_word, c_word, path, filepath):
|
||||
filename = f"{number}{leak_word}{c_word}-fanart.jpg"
|
||||
@@ -299,7 +290,11 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
with open(nfo_path, "wt", encoding='UTF-8') as code:
|
||||
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
|
||||
print("<movie>", file=code)
|
||||
print(" <title>" + naming_rule + "</title>", file=code)
|
||||
print(" <title>" + naming_rule + "</title>", file=code)
|
||||
print(" <originaltitle>" + naming_rule + "</originaltitle>", file=code)
|
||||
print(" <sorttitle>" + naming_rule + "</sorttitle>", file=code)
|
||||
print(" <customrating>JP-18+</customrating>", file=code)
|
||||
print(" <mpaa>JP-18+</mpaa>", file=code)
|
||||
print(" <set>", file=code)
|
||||
print(" </set>", file=code)
|
||||
print(" <studio>" + studio + "</studio>", file=code)
|
||||
@@ -314,7 +309,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
try:
|
||||
for key in actor_list:
|
||||
print(" <actor>", file=code)
|
||||
print(" <name>" + key + "</name>", file=code)
|
||||
print(" <name>" + key + "</name>", file=code)
|
||||
print(" </actor>", file=code)
|
||||
except:
|
||||
aaaa = ''
|
||||
@@ -346,6 +341,8 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
|
||||
aaaaaaaa = ''
|
||||
print(" <num>" + number + "</num>", file=code)
|
||||
print(" <premiered>" + release + "</premiered>", file=code)
|
||||
print(" <releasedate>" + release + "</releasedate>", file=code)
|
||||
print(" <release>" + release + "</release>", file=code)
|
||||
print(" <cover>" + cover + "</cover>", file=code)
|
||||
if config.getInstance().is_trailer():
|
||||
print(" <trailer>" + trailer + "</trailer>", file=code)
|
||||
@@ -564,16 +561,19 @@ def get_part(filepath):
|
||||
|
||||
def debug_print(data: json):
|
||||
try:
|
||||
print("[+] ---Debug info---")
|
||||
print("[+] ------- DEBUG INFO -------")
|
||||
for i, v in data.items():
|
||||
if i == 'outline':
|
||||
print('[+] -', i, ' :', len(v), 'characters')
|
||||
print('[+] -', "%-14s" % i, ':', len(v), 'characters')
|
||||
continue
|
||||
if i == 'actor_photo' or i == 'year':
|
||||
continue
|
||||
print('[+] -', "%-11s" % i, ':', v)
|
||||
if i == 'extrafanart':
|
||||
print('[+] -', "%-14s" % i, ':', len(v), 'links')
|
||||
continue
|
||||
print('[+] -', "%-14s" % i, ':', v)
|
||||
|
||||
print("[+] ---Debug info---")
|
||||
print("[+] ------- DEBUG INFO -------")
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
Reference in New Issue
Block a user