From 8ad49973428021722a163038b4b7a206a39d8bd2 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 6 Mar 2022 20:39:59 +0800 Subject: [PATCH] madou.py:simp by regex --- WebCrawler/madou.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py index 0c03cf7..1e10481 100644 --- a/WebCrawler/madou.py +++ b/WebCrawler/madou.py @@ -19,8 +19,7 @@ def getTitle(html, number): # 获取标题 # MD0140-2 / 家有性事EP2 爱在身边-麻豆社 # MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社 browser_title = str(html.xpath("/html/head/title/text()")[0]) - browser_title = browser_title[browser_title.find(' '):].replace('/','').strip() - return browser_title[:browser_title.find('-麻豆社')].strip() + return str(re.findall(r'^.*?( / | )(.*)-麻豆社$', browser_title)[0][1]).strip() def getStudio(html): # 获取厂商 已修改