From abd6503ae4e14ced402565424121290e38d2bc3a Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 30 Jun 2023 20:30:46 +0800 Subject: [PATCH] Add site msin #2 --- scrapinglib/msin.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scrapinglib/msin.py b/scrapinglib/msin.py index 2df28d1..34acb10 100644 --- a/scrapinglib/msin.py +++ b/scrapinglib/msin.py @@ -11,6 +11,7 @@ class Msin(Parser): expr_number = '//div[@class="mv_fileName"]/text()' expr_title = '//div[@class="mv_title"]/text()' + expr_title_unsubscribe = '//div[@class="mv_title unsubscribe"]/text()' expr_studio = '//div[@class="mv_writer"]/text()' expr_director = '//div[@class="mv_writer"]/text()' expr_actor = '//div[@class="mv_writer"]/text()' @@ -35,10 +36,12 @@ class Msin(Parser): session = request_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify) htmlcode = session.get(self.detailurl).text htmltree = etree.HTML(htmlcode) + # if title are null, use unsubscribe title + if super().getTitle(htmltree) == "": + self.expr_title = self.expr_title_unsubscribe # if tags are null, use genres if len(super().getTags(htmltree)) == 0: self.expr_tags = self.expr_genres - result = self.dictformat(htmltree) return result