From ec28814449f199df9af2c60d453523a7b7b53163 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 17 Feb 2022 17:18:41 +0800 Subject: [PATCH 1/2] update User-Agent --- ADC_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ADC_function.py b/ADC_function.py index 1a64477..de36c73 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -25,7 +25,7 @@ def getXpathSingle(htmlcode, xpath): return result1 -G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36' +G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36' # 网页请求核心 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): From 592005be01ebb599cf0af52f45379a756e079ee3 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 17 Feb 2022 17:19:47 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=B8=8D=E5=86=8D=E4=BB=A5=E5=AD=97?= =?UTF-8?q?=E7=AC=A6=E4=B8=B2=E9=95=BF=E5=BA=A6=E4=B8=BA=E9=99=90=E5=88=B6?= =?UTF-8?q?=E6=9D=A1=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index f690e0c..4ee92d6 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -67,7 +67,7 @@ def get_data_from_json(file_number, oCC): # 从JSON返回元数据 conf = config.getInstance() # default fetch order list, from the beginning to the end sources = conf.sources().split(',') - if not len(conf.sources()) > 80: + if len(sources) <= len(func_mapping): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower()