diff --git a/ADC_function.py b/ADC_function.py index 6cf90e4..77f00f7 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -26,7 +26,7 @@ def getXpathSingle(htmlcode, xpath): return result1 -G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36' +G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36' def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 38d96d5..8d291db 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -68,7 +68,7 @@ def get_data_from_json(file_number, oCC): conf = config.getInstance() # default fetch order list, from the beginning to the end sources = conf.sources().split(',') - if not len(conf.sources()) > 80: + if len(sources) <= len(func_mapping): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower() @@ -327,7 +327,8 @@ def get_data_from_json(file_number, oCC): if i not in json_data: naming_rule += i.strip("'").strip('"') else: - naming_rule += json_data.get(i) + item = json_data.get(i) + naming_rule += item if type(item) is not list else "&".join(item) json_data['naming_rule'] = naming_rule return json_data