解决发布日期的问题

This commit is contained in:
Feng4
2020-12-20 21:10:43 +08:00
committed by GitHub
parent a1c7d644b1
commit 4c8665f633

View File

@@ -66,10 +66,17 @@ def getYear(getRelease):
result = '' result = ''
return result return result
def getRelease(a): def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() # html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']") # result1 = str(html.xpath('//strong[contains(text(),"時間")]/../span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']") # result2 = str(html.xpath('//strong[contains(text(),"時間")]/../span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+') # return str(result1 + result2).strip('+')
patherr = re.compile(r'<strong>日期\:</strong>\s*?.*?<span class="value">(.*?)</span>')
dates = patherr.findall(a)
if dates:
result = dates[0]
else:
result = ''
return result
def getTag(a): def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try: