Make webcrawler clear

2021-07-29 10:28:25 +08:00
parent 2c41487a4e
commit 08df7383a5
2 changed files with 34 additions and 51 deletions
@@ -1,5 +1,3 @@
-import sys
-sys.path.append('..')
 import json
 import re
 from multiprocessing.pool import ThreadPool
@@ -34,7 +32,7 @@ def get_data_state(data: dict) -> bool:  # 元数据获取失败检测

    return True

-def get_data_from_json(file_number, filepath, conf: config.Config):  # 从JSON返回元数据
+def get_data_from_json(file_number, conf: config.Config):  # 从JSON返回元数据
    """
    iterate through all services and fetch the data
    """
@@ -134,20 +132,20 @@ def get_data_from_json(file_number, filepath, conf: config.Config):  # 从JSON
    series = json_data.get('series')
    year = json_data.get('year')

-    if json_data.get('cover_small') == None:
-        cover_small = ''
-    else:
+    if json_data.get('cover_small'):
        cover_small = json_data.get('cover_small')
-
-    if json_data.get('trailer') == None:
-        trailer = ''
    else:
+        cover_small = ''
+
+    if json_data.get('trailer'):
        trailer = json_data.get('trailer')
-
-    if json_data.get('extrafanart') == None:
-        extrafanart = ''
    else:
+        trailer = ''
+
+    if json_data.get('extrafanart'):
        extrafanart = json_data.get('extrafanart')
+    else:
+        extrafanart = ''

    imagecut = json_data.get('imagecut')
    tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',')  # 字符串转列表 @
@@ -214,25 +212,17 @@ def get_data_from_json(file_number, filepath, conf: config.Config):  # 从JSON
    studio = studio.replace('/',' ')
    # ===  替换Studio片假名 END

-    location_rule = eval(conf.location_rule())
-
-    if 'actor' in conf.location_rule() and len(actor) > 100:
-        print(conf.location_rule())
-        location_rule = eval(conf.location_rule().replace("actor","'多人作品'"))
-    maxlen = conf.max_title_len()
-    if 'title' in conf.location_rule() and len(title) > maxlen:
-        shorttitle = title[0:maxlen]
-        location_rule = location_rule.replace(title, shorttitle)
-
    # 返回处理后的json_data
    json_data['title'] = title
    json_data['actor'] = actor
    json_data['release'] = release
    json_data['cover_small'] = cover_small
    json_data['tag'] = tag
-    json_data['location_rule'] = location_rule
    json_data['year'] = year
    json_data['actor_list'] = actor_list
+    json_data['trailer'] = trailer
+    json_data['extrafanart'] = extrafanart
+
    if conf.is_transalte():
        translate_values = conf.transalte_values().split(",")
        for translate_value in translate_values:
@@ -257,27 +247,12 @@ def get_data_from_json(file_number, filepath, conf: config.Config):  # 从JSON
            else:
                json_data[translate_value] = translate(json_data[translate_value])

-    if conf.is_trailer():
-        if trailer:
-            json_data['trailer'] = trailer
-        else:
-            json_data['trailer'] = ''
-    else:
-        json_data['trailer'] = ''
-
-    if conf.is_extrafanart():
-        if extrafanart:
-            json_data['extrafanart'] = extrafanart
-        else:
-            json_data['extrafanart'] = ''
-    else:
-        json_data['extrafanart'] = ''
-
    naming_rule=""
    for i in conf.naming_rule().split("+"):
        if i not in json_data:
            naming_rule += i.strip("'").strip('"')
        else:
            naming_rule += json_data.get(i)
+
    json_data['naming_rule'] = naming_rule
    return json_data
@@ -58,12 +58,20 @@ def small_cover_check(path, number, cover_small, leak_word, c_word, conf: config
    print('[+]Image Downloaded! ' + path + '/' + number + leak_word + c_word + '-poster.jpg')


-def create_folder(success_folder, location_rule, json_data, conf: config.Config):  # 创建文件夹
+def create_folder(json_data, conf: config.Config):  # 创建文件夹
    title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label = get_info(json_data)
-    if len(location_rule) > 240:  # 新建成功输出文件夹
-        path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor","'manypeople'",3)  # path为影片+元数据所在目录
-    else:
-        path = success_folder + '/' + location_rule
+    success_folder = conf.success_folder()
+    actor = json_data.get('actor')
+    location_rule = eval(conf.location_rule(), json_data)
+    if 'actor' in conf.location_rule() and len(actor) > 100:
+        print(conf.location_rule())
+        location_rule = eval(conf.location_rule().replace("actor","'多人作品'"), json_data)
+    maxlen = conf.max_title_len()
+    if 'title' in conf.location_rule() and len(title) > maxlen:
+        shorttitle = title[0:maxlen]
+        location_rule = location_rule.replace(title, shorttitle)
+
+    path = success_folder + '/' + location_rule
    path = trimblank(path)
    if not os.path.exists(path):
        path = escape_path(path, conf.escape_literals())
@@ -474,7 +482,7 @@ def core_main(file_path, number_th, conf: config.Config):
    # 下面被注释的变量不需要
    #rootpath= os.getcwd
    number = number_th
-    json_data = get_data_from_json(number, filepath, conf)  # 定义番号
+    json_data = get_data_from_json(number, conf)  # 定义番号

    # Return if blank dict returned (data not found)
    if not json_data:
@@ -525,7 +533,7 @@ def core_main(file_path, number_th, conf: config.Config):
    #  3：不改变路径刮削
    if conf.main_mode() == 1:
        # 创建文件夹
-        path = create_folder(conf.success_folder(),  json_data.get('location_rule'), json_data, conf)
+        path = create_folder(json_data, conf)
        if multi_part == 1:
            number += part  # 这时number会被附加上CD1后缀

@@ -539,13 +547,13 @@ def core_main(file_path, number_th, conf: config.Config):
        if not multi_part or part.lower() == '-cd1':
            try:
                # 下载预告片
-                if json_data.get('trailer'):
+                if conf.is_trailer() and json_data.get('trailer'):
                    trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)
            except:
                pass
            try:
                # 下载剧照 data, path, conf: config.Config, filepath
-                if json_data.get('extrafanart'):
+                if conf.is_extrafanart() and json_data.get('extrafanart'):
                    extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)
            except:
                pass
@@ -566,7 +574,7 @@ def core_main(file_path, number_th, conf: config.Config):

    elif conf.main_mode() == 2:
        # 创建文件夹
-        path = create_folder(conf.success_folder(), json_data.get('location_rule'), json_data, conf)
+        path = create_folder(json_data, conf)
        # 移动文件
        paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, conf)
        poster_path = path + '/' + number + leak_word + c_word + '-poster.jpg'
@@ -589,11 +597,11 @@ def core_main(file_path, number_th, conf: config.Config):

        if not multi_part or part.lower() == '-cd1':
            # 下载预告片
-            if json_data.get('trailer'):
+            if conf.is_trailer() and json_data.get('trailer'):
                trailer_download(json_data.get('trailer'), leak_word, c_word, number, path, filepath, conf)

            # 下载剧照 data, path, conf: config.Config, filepath
-            if json_data.get('extrafanart'):
+            if conf.is_extrafanart() and json_data.get('extrafanart'):
                extrafanart_download(json_data.get('extrafanart'), path, conf, filepath)

        # 裁剪图