28 lines
628 B
Python
28 lines
628 B
Python
from lxml import etree
|
|
|
|
class Crawler:
|
|
def __init__(self,htmlcode):
|
|
self.html = etree.HTML(htmlcode)
|
|
|
|
def getString(self,_xpath):
|
|
if _xpath == "":
|
|
return ""
|
|
result = self.html.xpath(_xpath)
|
|
try:
|
|
return result[0]
|
|
except:
|
|
return ""
|
|
|
|
def getStrings(self,_xpath):
|
|
result = self.html.xpath(_xpath)
|
|
try:
|
|
return result
|
|
except:
|
|
return ""
|
|
|
|
def getOutline(self,_xpath):
|
|
result = self.html.xpath(_xpath)
|
|
try:
|
|
return "\n".join(result)
|
|
except:
|
|
return "" |