Move get_data_from_json to WebCrawler

This commit is contained in:
Mathhew
2021-07-28 16:12:08 +08:00
parent ae2c2bcf23
commit 2c41487a4e
3 changed files with 285 additions and 280 deletions

View File

@@ -13,19 +13,6 @@ import config
from urllib.parse import urljoin
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
if "title" not in data or "number" not in data:
return False
if data["title"] is None or data["title"] == "" or data["title"] == "null":
return False
if data["number"] is None or data["number"] == "" or data["number"] == "null":
return False
return True
def getXpathSingle(htmlcode, xpath):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result1 = str(html.xpath(xpath)).strip(" ['']")