From dd106453f76e19c40b31bf7559f607559c3f4bd0 Mon Sep 17 00:00:00 2001 From: lededev Date: Tue, 19 Oct 2021 00:03:51 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E6=A0=87=E8=AE=B0=E4=B8=BA=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E7=9A=84tag=E8=BF=9B=E8=A1=8C=E6=B8=85=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index b6e7b2f..44f9094 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -178,6 +178,10 @@ def get_data_from_json(file_number): # 从JSON返回元数据 imagecut = json_data.get('imagecut') tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ + while 'XXXX' in tag: + tag.remove('XXXX') + while 'xxx' in tag: + tag.remove('xxx') actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if title == '' or number == '': @@ -306,4 +310,5 @@ def special_characters_replacement(text) -> str: replace('"', '"'). # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane replace('<', 'ᐸ'). # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane replace('>', 'ᐳ'). # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane - replace('|', 'ǀ')) # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane + replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane + replace('&', '&'))