From d991fecb738f57797942a0df79262215ae089a31 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sun, 8 May 2022 03:26:48 +0800 Subject: [PATCH] fix order of sources --- WebCrawler/__init__.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index cf0c1b2..8c3516f 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -72,46 +72,44 @@ def get_data_from_json(file_number, oCC): conf = config.getInstance() # default fetch order list, from the beginning to the end sources = conf.sources().split(',') + def insert(sources,source): + if source in sources: + sources.insert(0, sources.pop(sources.index(source))) + return sources + if len(sources) <= len(func_mapping): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower() if "carib" in sources and (re.match(r"^\d{6}-\d{3}", file_number) ): - sources.insert(0, sources.pop(sources.index("carib"))) + sources = insert(sources,"carib") + elif "item" in file_number: + sources = insert(sources,"getchu") elif re.match(r"^\d{5,}", file_number) or "heyzo" in lo_file_number: - if "javdb" in sources: - sources.insert(0, sources.pop(sources.index("javdb"))) if "avsox" in sources: - sources.insert(0, sources.pop(sources.index("avsox"))) - elif "mgstage" in sources and (re.match(r"\d+\D+", file_number) or - "siro" in lo_file_number - ): - sources.insert(0, sources.pop(sources.index("mgstage"))) + sources = insert(sources,"avsox") + elif "mgstage" in sources and \ + (re.match(r"\d+\D+", file_number) or "siro" in lo_file_number): + sources = insert(sources,"mgstage") elif "fc2" in lo_file_number: - if "javdb" in sources: - sources.insert(0, sources.pop(sources.index("javdb"))) if "fc2" in sources: - sources.insert(0, sources.pop(sources.index("fc2"))) - if "fc2club" in sources: - sources.insert(0, sources.pop(sources.index("fc2club"))) + sources = insert(sources,"fc2") elif "gcolle" in sources and (re.search("\d{6}", file_number)): - sources.insert(0, sources.pop(sources.index("gcolle"))) + sources = insert(sources,"gcolle") elif "dlsite" in sources and ( "rj" in lo_file_number or "vj" in lo_file_number ): - sources.insert(0, sources.pop(sources.index("dlsite"))) + sources = insert(sources,"dlsite") elif re.match(r"^[a-z0-9]{3,}$", lo_file_number): - if "javdb" in sources: - sources.insert(0, sources.pop(sources.index("javdb"))) if "xcity" in sources: - sources.insert(0, sources.pop(sources.index("xcity"))) + sources = insert(sources,"xcity") if "madou" in sources: - sources.insert(0, sources.pop(sources.index("madou"))) + sources = insert(sources,"madou") elif "madou" in sources and ( re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number) ): - sources.insert(0, sources.pop(sources.index("madou"))) + sources = insert(sources,"madou") # check sources in func_mapping todel = []