diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index cb15ef0..7546802 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -81,32 +81,32 @@ def get_data_from_json(file_number, oCC): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower() - if "carib" in sources and (re.match(r"^\d{6}-\d{3}", file_number) + if "carib" in sources and (re.search(r"^\d{6}-\d{3}", file_number) ): sources = insert(sources,"carib") - elif "item" in file_number: + elif "item" in file_number or "GETCHU" in file_number.upper(): sources = insert(sources,"getchu") - elif "rj" in lo_file_number or "vj" in lo_file_number or re.match(r"[\u3040-\u309F\u30A0-\u30FF]+", file_number): + elif "rj" in lo_file_number or "vj" in lo_file_number or re.search(r"[\u3040-\u309F\u30A0-\u30FF]+", file_number): sources = insert(sources, "getchu") sources = insert(sources, "dlsite") - elif re.match(r"^\d{5,}", file_number) or "heyzo" in lo_file_number: + elif re.search(r"^\d{5,}", file_number) or "heyzo" in lo_file_number: if "avsox" in sources: sources = insert(sources,"avsox") elif "mgstage" in sources and \ - (re.match(r"\d+\D+", file_number) or "siro" in lo_file_number): + (re.search(r"\d+\D+", file_number) or "siro" in lo_file_number): sources = insert(sources,"mgstage") elif "fc2" in lo_file_number: if "fc2" in sources: sources = insert(sources,"fc2") elif "gcolle" in sources and (re.search("\d{6}", file_number)): sources = insert(sources,"gcolle") - elif re.match(r"^[a-z0-9]{3,}$", lo_file_number): + elif re.search(r"^[a-z0-9]{3,}$", lo_file_number): if "xcity" in sources: sources = insert(sources,"xcity") if "madou" in sources: sources = insert(sources,"madou") elif "madou" in sources and ( - re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number) + re.search(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number) ): sources = insert(sources,"madou")