diff --git a/WebCrawler/mgstage.py b/WebCrawler/mgstage.py index 089bbf2..e98eb5a 100644 --- a/WebCrawler/mgstage.py +++ b/WebCrawler/mgstage.py @@ -95,6 +95,18 @@ def getSeries(a): result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip( '\\n') return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') + +def getExtrafanart(htmlcode): # 获取剧照 + html_pather = re.compile(r'
\s*?\s*?
') + html = html_pather.search(htmlcode) + if html: + html = html.group() + extrafanart_pather = re.compile(r'