From 5cefc85462dd449ea851fdad6cfd77f57b37afd9 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sat, 1 Jul 2023 02:20:51 +0800 Subject: [PATCH] Add site msin #3 --- scrapinglib/msin.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/scrapinglib/msin.py b/scrapinglib/msin.py index 34acb10..5476fb5 100644 --- a/scrapinglib/msin.py +++ b/scrapinglib/msin.py @@ -12,12 +12,12 @@ class Msin(Parser): expr_number = '//div[@class="mv_fileName"]/text()' expr_title = '//div[@class="mv_title"]/text()' expr_title_unsubscribe = '//div[@class="mv_title unsubscribe"]/text()' - expr_studio = '//div[@class="mv_writer"]/text()' - expr_director = '//div[@class="mv_writer"]/text()' - expr_actor = '//div[@class="mv_writer"]/text()' - expr_label = '//div[@class="mv_mfr"]/text()' - expr_series = '//div[@class="mv_mfr"]/text()' - expr_release = '//div[@class="mv_createDate"]/text()' + expr_studio = '//a[@class="mv_writer"]/text()' + expr_director = '//a[@class="mv_writer"]/text()' + expr_actor = '(//div[@class="performer_text"]/a/text())[1]' + expr_label = '//a[@class="mv_mfr"]/text()' + expr_series = '//a[@class="mv_mfr"]/text()' + expr_release = '//a[@class="mv_createDate"]/text()' expr_cover = '//div[@class="movie_top"]/img/@src' expr_tags = '//div[@class="mv_tag"]/label/text()' expr_genres = '//div[@class="mv_genre"]/label/text()' @@ -45,6 +45,14 @@ class Msin(Parser): result = self.dictformat(htmltree) return result + def getActors(self, htmltree): + actors = super().getActors(htmltree) + i = 0 + while i < len(actors): + actors[i] = actors[i].replace("(FC2動画)", "") + i = i + 1 + return actors + def getTags(self, htmltree) -> list: return super().getTags(htmltree)