From 120407636655fe3c3695be07282e40409873fa1b Mon Sep 17 00:00:00 2001 From: Mathhew Date: Mon, 9 Aug 2021 11:09:42 +0800 Subject: [PATCH] Check sources --- WebCrawler/__init__.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 47a0422..7579142 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -62,25 +62,37 @@ def get_data_from_json(file_number, conf: config.Config): # 从JSON返回元数 if "carib" in sources and (re.match(r"^\d{6}-\d{3}", file_number) ): sources.insert(0, sources.pop(sources.index("carib"))) - elif "avsox" in sources and (re.match(r"^\d{5,}", file_number) or - "heyzo" in lo_file_number - ): - sources.insert(0, sources.pop(sources.index("avsox"))) - sources.insert(1, sources.pop(sources.index("javdb"))) + elif re.match(r"^\d{5,}", file_number) or "heyzo" in lo_file_number: + if "javdb" in sources: + sources.insert(0, sources.pop(sources.index("javdb"))) + if "avsox" in sources: + sources.insert(0, sources.pop(sources.index("avsox"))) elif "mgstage" in sources and (re.match(r"\d+\D+", file_number) or "siro" in lo_file_number ): sources.insert(0, sources.pop(sources.index("mgstage"))) - elif "fc2" in sources and ("fc2" in lo_file_number - ): - sources.insert(0, sources.pop(sources.index("fc2club"))) - sources.insert(1, sources.pop(sources.index("fc2"))) - sources.insert(2, sources.pop(sources.index("javdb"))) + elif "fc2" in lo_file_number: + if "javdb" in sources: + sources.insert(0, sources.pop(sources.index("javdb"))) + if "fc2" in sources: + sources.insert(0, sources.pop(sources.index("fc2"))) + if "fc2club" in sources: + sources.insert(0, sources.pop(sources.index("fc2club"))) elif "dlsite" in sources and ( "rj" in lo_file_number or "vj" in lo_file_number ): sources.insert(0, sources.pop(sources.index("dlsite"))) + # check sources in func_mapping + todel = [] + for s in sources: + if not s in func_mapping: + print('[!] Source Not Exist : ' + s) + todel.append(s) + for d in todel: + print('[!] Remove Source : ' + s) + sources.remove(d) + json_data = {} if conf.multi_threading():