From ae15e0815ef3f1532dfb79d3b1aba7428f0bb19e Mon Sep 17 00:00:00 2001 From: jop6__ Date: Mon, 25 Apr 2022 13:19:22 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=A0=87=E7=AD=BE=E7=BF=BB?= =?UTF-8?q?=E8=AF=91bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '''mapping_data.xpath('a[contains(@Keyword, $name)]/@' + language, name=i)[0]'''中使用了contains匹配,会导致原标签如“内S”错误命中标签“体内SJ”,因为他们也构成包含关系,xpath匹配时在name两侧添加逗号可解决该问题。 --- WebCrawler/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 9e51623..baaf0df 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -283,8 +283,8 @@ def get_data_from_json(file_number, oCC): def convert_list(mapping_data,language,vars): total = [] for i in vars: - if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)) != 0: - i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=i)[0] + if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")) != 0: + i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")[0] total.append(i) return total def convert(mapping_data,language,vars):