From c6efec91dd6dea924b88392431ff1eed70bff39d Mon Sep 17 00:00:00 2001
From: lededev <lededev@noreplay.github.com>
Date: Sun, 26 Sep 2021 04:25:25 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=A4=B1=E8=B4=A5=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E5=88=97=E8=A1=A8=E4=BB=A5=E9=81=BF=E5=85=8D=E9=87=8D?=
 =?UTF-8?q?=E5=A4=8D=E5=88=AE=E5=89=8A=EF=BC=8C=E6=A8=A1=E5=BC=8F3?=
 =?UTF-8?q?=E4=B8=8E=E8=BD=AF=E8=BF=9E=E6=8E=A5=E9=80=82=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 AV_Data_Capture.py     | 50 +++++++++++++++++++++++++++---------------
 WebCrawler/__init__.py |  8 +++----
 core.py                | 46 +++++++++++++++++++-------------------
 3 files changed, 60 insertions(+), 44 deletions(-)

diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
index 7ce0bb1..4fbf25e 100755
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -126,26 +126,40 @@ def close_logfile(logdir: str):
 
 
 
-G_trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
-
-def movie_lists(root, escape_folder):
-    if os.path.basename(root) in escape_folder:
-        return []
+# 重写视频文件扫描，消除递归，取消全局变量，新增失败文件列表跳过处理
+def movie_lists(root, conf):
+    escape_folder = re.split("[,，]", conf.escape_folder())
+    failed_folder = conf.failed_folder()
+    main_mode = conf.main_mode()
     total = []
     file_type = conf.media_type().upper().split(",")
-    dirs = os.listdir(root)
-    for entry in dirs:
-        f = os.path.join(root, entry)
-        if os.path.isdir(f):
-            total += movie_lists(f, escape_folder)
-        elif os.path.splitext(f)[1].upper() in file_type:
-            absf = os.path.abspath(f)
-            if conf.main_mode() == 3 and conf.mode3_nfo_skip_days() > 0:
-                nfo = Path(absf).with_suffix('.nfo')
-                if file_modification_days(nfo) <= conf.mode3_nfo_skip_days():
+    trailerRE = re.compile(r'-trailer\.', re.IGNORECASE)
+    try:
+        failed_list = open(os.path.join(failed_folder, 'failed_list.txt'), 'r', encoding='utf-8').read().splitlines()
+    except:
+        failed_list = []
+        pass
+    for current_dir, subdirs, files in os.walk(root, topdown=False):
+        try:
+            if current_dir in escape_folder:
+                continue
+            for f in files:
+                full_name = os.path.join(current_dir, f)
+                if not os.path.splitext(full_name)[1].upper() in file_type:
                     continue
-            if (conf.main_mode() == 3 or not is_link(absf)) and not G_trailerRE.search(f):
-                total.append(absf)
+                absf = os.path.abspath(full_name)
+                if absf in failed_list:
+                    if conf.debug():
+                        print('[!]Skip failed file:', absf)
+                    continue
+                if main_mode == 3 and conf.mode3_nfo_skip_days() > 0:
+                    nfo = Path(absf).with_suffix('.nfo')
+                    if file_modification_days(nfo) <= conf.mode3_nfo_skip_days():
+                        continue
+                if (main_mode == 3 or not is_link(absf)) and not trailerRE.search(f):
+                    total.append(absf)
+        except:
+            pass
     return total
 
 
@@ -275,7 +289,7 @@ if __name__ == '__main__':
         if folder_path == '':
             folder_path = os.path.abspath(".")
 
-        movie_list = movie_lists(folder_path, re.split("[,，]", conf.escape_folder()))
+        movie_list = movie_lists(folder_path, conf)
 
         count = 0
         count_all = str(len(movie_list))
diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py
index 364f7c3..cadc5d6 100644
--- a/WebCrawler/__init__.py
+++ b/WebCrawler/__init__.py
@@ -126,8 +126,8 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
 
     # Return if data not found in all sources
     if not json_data:
-        print('[-]Movie Data not found!')
-        return
+        print('[-]Movie Number not found!')
+        return None
 
     # ================================================网站规则添加结束================================================
 
@@ -165,8 +165,8 @@ def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数
     actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
 
     if title == '' or number == '':
-        print('[-]Movie Data not found!')
-        return
+        print('[-]Movie Number or Title not found!')
+        return None
 
     # if imagecut == '3':
     #     DownloadFileWithFilename()
diff --git a/core.py b/core.py
index f549f35..ea92cb5 100755
--- a/core.py
+++ b/core.py
@@ -22,18 +22,20 @@ def escape_path(path, escape_literals: str):  # Remove escape literals
     return path
 
 
-def moveFailedFolder(filepath):
-    conf = config.Config()
-    if conf.failed_move():
-        failed_folder = conf.failed_folder()
+def moveFailedFolder(filepath, conf):
+    failed_folder = conf.failed_folder()
+    soft_link = conf.soft_link()
+    # 模式3或软连接，改为维护一个失败列表，启动扫描时加载用于排除该路径，以免反复处理
+    # 原先的创建软连接到失败目录，并不直观，不方便找到失败文件位置，不如直接记录该文件路径
+    if conf.main_mode() == 3 or soft_link:
+        with open(os.path.join(failed_folder, 'failed_list.txt'), 'a', encoding='utf-8') as m3f:
+            m3f.write(f'{filepath}\n')
+            m3f.close()
+        print('[-]Add to failed list file')
+    elif conf.failed_move() and not soft_link:
         file_name = os.path.basename(filepath)
-        if conf.soft_link():
-            print('[-]Create symlink to Failed output folder')
-            os.symlink(filepath, os.path.join(failed_folder, file_name))
-        else:
-            print('[-]Move to Failed output folder')
-            shutil.move(filepath, os.path.join(failed_folder, file_name))
-    return
+        print('[-]Move to Failed output folder')
+        shutil.move(filepath, os.path.join(failed_folder, file_name))
 
 
 def get_info(json_data):  # 返回json里的数据
@@ -112,7 +114,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
                     'User-Agent': G_USER_AGENT}
                 r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies)
                 if r == '':
-                    print('[-]Movie Data not found!')
+                    print('[-]Movie Download Data not found!')
                     return
                 with open(os.path.join(path, filename), "wb") as code:
                     code.write(r.content)
@@ -124,7 +126,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
                     'User-Agent': G_USER_AGENT}
                 r = requests.get(url, timeout=configProxy.timeout, headers=headers)
                 if r == '':
-                    print('[-]Movie Data not found!')
+                    print('[-]Movie Download Data not found!')
                     return
                 with open(os.path.join(path, filename), "wb") as code:
                     code.write(r.content)
@@ -142,7 +144,7 @@ def download_file_with_filename(url, filename, path, conf: config.Config, filepa
             i += 1
             print('[-]Image Download :  Connect retry ' + str(i) + '/' + str(configProxy.retry))
     print('[-]Connect Failed! Please check your Proxy or Network!')
-    moveFailedFolder(filepath)
+    moveFailedFolder(filepath, conf)
     return
 
 def trailer_download(trailer, leak_word, c_word, number, path, filepath, conf: config.Config):
@@ -168,7 +170,7 @@ def extrafanart_download(data, path, conf: config.Config, filepath):
         jpg_filename = f'extrafanart-{j}.jpg'
         jpg_fullpath = os.path.join(path, jpg_filename)
         if download_file_with_filename(url, jpg_filename, path, conf, filepath) == 'failed':
-            moveFailedFolder(filepath)
+            moveFailedFolder(filepath, conf)
             return
         configProxy = conf.proxy()
         for i in range(configProxy.retry):
@@ -190,7 +192,7 @@ def image_download(cover, number, leak_word, c_word, path, conf: config.Config,
     filename = f"{number}{leak_word}{c_word}-fanart.jpg"
     full_filepath = os.path.join(path, filename)
     if download_file_with_filename(cover, filename, path, conf, filepath) == 'failed':
-        moveFailedFolder(filepath)
+        moveFailedFolder(filepath, conf)
         return
 
     configProxy = conf.proxy()
@@ -276,12 +278,12 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f
     except IOError as e:
         print("[-]Write Failed!")
         print(e)
-        moveFailedFolder(filepath)
+        moveFailedFolder(filepath, conf)
         return
     except Exception as e1:
         print(e1)
         print("[-]Write Failed!")
-        moveFailedFolder(filepath)
+        moveFailedFolder(filepath, conf)
         return
 
 
@@ -450,7 +452,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo
         print('[-]OS Error errno ' + oserr.errno)
         return
 
-def get_part(filepath):
+def get_part(filepath, conf):
     try:
         if re.search('-CD\d+', filepath):
             return re.findall('-CD\d+', filepath)[0]
@@ -458,7 +460,7 @@ def get_part(filepath):
             return re.findall('-cd\d+', filepath)[0]
     except:
         print("[-]failed!Please rename the filename again!")
-        moveFailedFolder(filepath)
+        moveFailedFolder(filepath, conf)
         return
 
 
@@ -496,7 +498,7 @@ def core_main(file_path, number_th, conf: config.Config):
 
     # Return if blank dict returned (data not found)
     if not json_data:
-        moveFailedFolder(filepath)
+        moveFailedFolder(filepath, conf)
         return
 
     if json_data["number"] != number:
@@ -511,7 +513,7 @@ def core_main(file_path, number_th, conf: config.Config):
     # =======================================================================判断-C,-CD后缀
     if '-CD' in filepath or '-cd' in filepath:
         multi_part = 1
-        part = get_part(filepath)
+        part = get_part(filepath, conf)
     if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
         cn_sub = '1'
         c_word = '-C'  # 中文字幕影片后缀