From c6efec91dd6dea924b88392431ff1eed70bff39d Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 26 Sep 2021 04:25:25 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=A4=B1=E8=B4=A5=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=88=97=E8=A1=A8=E4=BB=A5=E9=81=BF=E5=85=8D=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E5=88=AE=E5=89=8A=EF=BC=8C=E6=A8=A1=E5=BC=8F3?= =?UTF-8?q?=E4=B8=8E=E8=BD=AF=E8=BF=9E=E6=8E=A5=E9=80=82=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AV_Data_Capture.py | 50 +++++++++++++++++++++++++++--------------- WebCrawler/__init__.py | 8 +++---- core.py | 46 +++++++++++++++++++------------------- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 7ce0bb1..4fbf25e 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -126,26 +126,40 @@ def close_logfile(logdir: str): -G_trailerRE = re.compile(r'-trailer\.', re.IGNORECASE) - -def movie_lists(root, escape_folder): - if os.path.basename(root) in escape_folder: - return [] +# 重写视频文件扫描,消除递归,取消全局变量,新增失败文件列表跳过处理 +def movie_lists(root, conf): + escape_folder = re.split("[,,]", conf.escape_folder()) + failed_folder = conf.failed_folder() + main_mode = conf.main_mode() total = [] file_type = conf.media_type().upper().split(",") - dirs = os.listdir(root) - for entry in dirs: - f = os.path.join(root, entry) - if os.path.isdir(f): - total += movie_lists(f, escape_folder) - elif os.path.splitext(f)[1].upper() in file_type: - absf = os.path.abspath(f) - if conf.main_mode() == 3 and conf.mode3_nfo_skip_days() > 0: - nfo = Path(absf).with_suffix('.nfo') - if file_modification_days(nfo) <= conf.mode3_nfo_skip_days(): + trailerRE = re.compile(r'-trailer\.', re.IGNORECASE) + try: + failed_list = open(os.path.join(failed_folder, 'failed_list.txt'), 'r', encoding='utf-8').read().splitlines() + except: + failed_list = [] + pass + for current_dir, subdirs, files in os.walk(root, topdown=False): + try: + if current_dir in escape_folder: + continue + for f in files: + full_name = os.path.join(current_dir, f) + if not os.path.splitext(full_name)[1].upper() in file_type: continue - if (conf.main_mode() == 3 or not is_link(absf)) and not G_trailerRE.search(f): - total.append(absf) + absf = os.path.abspath(full_name) + if absf in failed_list: + if conf.debug(): + print('[!]Skip failed file:', absf) + continue + if main_mode == 3 and conf.mode3_nfo_skip_days() > 0: + nfo = Path(absf).with_suffix('.nfo') + if file_modification_days(nfo) <= conf.mode3_nfo_skip_days(): + continue + if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f): + total.append(absf) + except: + pass return total @@ -275,7 +289,7 @@ if __name__ == '__main__': if folder_path == '': folder_path = os.path.abspath(".") - movie_list = movie_lists(folder_path, re.split("[,,]", conf.escape_folder())) + movie_list = movie_lists(folder_path, conf) count = 0 count_all = str(len(movie_list)) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 364f7c3..cadc5d6 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -126,8 +126,8 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数 # Return if data not found in all sources if not json_data: - print('[-]Movie Data not found!') - return + print('[-]Movie Number not found!') + return None # ================================================网站规则添加结束================================================ @@ -165,8 +165,8 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数 actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if title == '' or number == '': - print('[-]Movie Data not found!') - return + print('[-]Movie Number or Title not found!') + return None # if imagecut == '3': # DownloadFileWithFilename() diff --git a/core.py b/core.py index f549f35..ea92cb5 100755 --- a/core.py +++ b/core.py @@ -22,18 +22,20 @@ def escape_path(path, escape_literals: str): # Remove escape literals return path -def moveFailedFolder(filepath): - conf = config.Config() - if conf.failed_move(): - failed_folder = conf.failed_folder() +def moveFailedFolder(filepath, conf): + failed_folder = conf.failed_folder() + soft_link = conf.soft_link() + # 模式3或软连接,改为维护一个失败列表,启动扫描时加载用于排除该路径,以免反复处理 + # 原先的创建软连接到失败目录,并不直观,不方便找到失败文件位置,不如直接记录该文件路径 + if conf.main_mode() == 3 or soft_link: + with open(os.path.join(failed_folder, 'failed_list.txt'), 'a', encoding='utf-8') as m3f: + m3f.write(f'{filepath}\n') + m3f.close() + print('[-]Add to failed list file') + elif conf.failed_move() and not soft_link: file_name = os.path.basename(filepath) - if conf.soft_link(): - print('[-]Create symlink to Failed output folder') - os.symlink(filepath, os.path.join(failed_folder, file_name)) - else: - print('[-]Move to Failed output folder') - shutil.move(filepath, os.path.join(failed_folder, file_name)) - return + print('[-]Move to Failed output folder') + shutil.move(filepath, os.path.join(failed_folder, file_name)) def get_info(json_data): # 返回json里的数据 @@ -112,7 +114,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa 'User-Agent': G_USER_AGENT} r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies) if r == '': - print('[-]Movie Data not found!') + print('[-]Movie Download Data not found!') return with open(os.path.join(path, filename), "wb") as code: code.write(r.content) @@ -124,7 +126,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa 'User-Agent': G_USER_AGENT} r = requests.get(url, timeout=configProxy.timeout, headers=headers) if r == '': - print('[-]Movie Data not found!') + print('[-]Movie Download Data not found!') return with open(os.path.join(path, filename), "wb") as code: code.write(r.content) @@ -142,7 +144,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa i += 1 print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry)) print('[-]Connect Failed! Please check your Proxy or Network!') - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return def trailer_download(trailer, leak_word, c_word, number, path, filepath, conf: config.Config): @@ -168,7 +170,7 @@ def extrafanart_download(data, path, conf: config.Config, filepath): jpg_filename = f'extrafanart-{j}.jpg' jpg_fullpath = os.path.join(path, jpg_filename) if download_file_with_filename(url, jpg_filename, path, conf, filepath) == 'failed': - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return configProxy = conf.proxy() for i in range(configProxy.retry): @@ -190,7 +192,7 @@ def image_download(cover, number, leak_word, c_word, path, conf: config.Config, filename = f"{number}{leak_word}{c_word}-fanart.jpg" full_filepath = os.path.join(path, filename) if download_file_with_filename(cover, filename, path, conf, filepath) == 'failed': - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return configProxy = conf.proxy() @@ -276,12 +278,12 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f except IOError as e: print("[-]Write Failed!") print(e) - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return except Exception as e1: print(e1) print("[-]Write Failed!") - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return @@ -450,7 +452,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo print('[-]OS Error errno ' + oserr.errno) return -def get_part(filepath): +def get_part(filepath, conf): try: if re.search('-CD\d+', filepath): return re.findall('-CD\d+', filepath)[0] @@ -458,7 +460,7 @@ def get_part(filepath): return re.findall('-cd\d+', filepath)[0] except: print("[-]failed!Please rename the filename again!") - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return @@ -496,7 +498,7 @@ def core_main(file_path, number_th, conf: config.Config): # Return if blank dict returned (data not found) if not json_data: - moveFailedFolder(filepath) + moveFailedFolder(filepath, conf) return if json_data["number"] != number: @@ -511,7 +513,7 @@ def core_main(file_path, number_th, conf: config.Config): # =======================================================================判断-C,-CD后缀 if '-CD' in filepath or '-cd' in filepath: multi_part = 1 - part = get_part(filepath) + part = get_part(filepath, conf) if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀