From 0bb2b2da3bbce6bb3732a9ce5b6852fbdd995669 Mon Sep 17 00:00:00 2001 From: Mathhew Date: Mon, 21 Mar 2022 11:44:33 +0800 Subject: [PATCH] fix: madou title --- WebCrawler/madou.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py index fffd08b..57be3ea 100644 --- a/WebCrawler/madou.py +++ b/WebCrawler/madou.py @@ -15,12 +15,13 @@ def getActorPhoto(html): return '' -def getTitle(html, number): # 获取标题 +def getTitle(html): # 获取标题 # MD0140-2 / 家有性事EP2 爱在身边-麻豆社 # MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社 + # MD0094/贫嘴贱舌中出大嫂/坏嫂嫂和小叔偷腥内射受孕-麻豆社 browser_title = str(html.xpath("/html/head/title/text()")[0]) - return str(re.findall(r'^.*?( / | )(.*)-麻豆社$', browser_title)[0][1]).strip() - + title = str(re.findall(r'^.*?( / | |/)(.*)-麻豆社$', browser_title)[0][1]).strip() + return title.replace('/', ' ') def getStudio(html): # 获取厂商 已修改 try: @@ -115,7 +116,7 @@ def main(number): actor = '' dic = { # 标题 - 'title': getTitle(html, number), + 'title': getTitle(html), # 制作商 'studio': studio, # 年份