Move get_data_from_json to WebCrawler
This commit is contained in:
@@ -13,19 +13,6 @@ import config
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
||||
if "title" not in data or "number" not in data:
|
||||
return False
|
||||
|
||||
if data["title"] is None or data["title"] == "" or data["title"] == "null":
|
||||
return False
|
||||
|
||||
if data["number"] is None or data["number"] == "" or data["number"] == "null":
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def getXpathSingle(htmlcode, xpath):
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
result1 = str(html.xpath(xpath)).strip(" ['']")
|
||||
|
||||
Reference in New Issue
Block a user