diff --git a/config.ini b/config.ini index 670578e..2afe5cf 100755 --- a/config.ini +++ b/config.ini @@ -58,7 +58,7 @@ image_naming_with_number = 0 update_check = 1 [priority] -website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu,pcolle,caribpr +website = javbus,airav,jav321,fanza,xcity,mgstage,fc2,avsox,dlsite,carib,madou,getchu,javdb,gcolle,javday,javmenu,pcolle,caribpr,msin [escape] literals = \()/ diff --git a/scrapinglib/api.py b/scrapinglib/api.py index f10bd43..97ac350 100644 --- a/scrapinglib/api.py +++ b/scrapinglib/api.py @@ -33,7 +33,7 @@ class Scraping: """ """ adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', - 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', + 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'msin', 'getchu', 'gcolle', 'javday', 'pissplay', 'javmenu', 'pcolle', 'caribpr' ] @@ -222,6 +222,7 @@ class Scraping: sources = insert(sources, "pcolle") elif "fc2" in lo_file_number: if "fc2" in sources: + sources = insert(sources, "msin") sources = insert(sources, "fc2") elif "mgstage" in sources and \ (re.search(r"\d+\D+", file_number) or "siro" in lo_file_number): diff --git a/scrapinglib/msin.py b/scrapinglib/msin.py new file mode 100644 index 0000000..2df28d1 --- /dev/null +++ b/scrapinglib/msin.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +import re +from lxml import etree +from .httprequest import request_session +from .parser import Parser + + +class Msin(Parser): + source = 'msin' + + expr_number = '//div[@class="mv_fileName"]/text()' + expr_title = '//div[@class="mv_title"]/text()' + expr_studio = '//div[@class="mv_writer"]/text()' + expr_director = '//div[@class="mv_writer"]/text()' + expr_actor = '//div[@class="mv_writer"]/text()' + expr_label = '//div[@class="mv_mfr"]/text()' + expr_series = '//div[@class="mv_mfr"]/text()' + expr_release = '//div[@class="mv_createDate"]/text()' + expr_cover = '//div[@class="movie_top"]/img/@src' + expr_tags = '//div[@class="mv_tag"]/label/text()' + expr_genres = '//div[@class="mv_genre"]/label/text()' + + # expr_outline = '//p[@class="fo-14"]/text()' + # expr_extrafanart = '//*[@class="item-nav"]/ul/li/a/img/@src' + # expr_extrafanart2 = '//*[@id="cart_quantity"]/table/tr[3]/td/div/a/img/@src' + + def extraInit(self): + self.imagecut = 4 + + def search(self, number: str): + self.number = number.lower().replace('fc2-ppv-', '').replace('fc2-', '') + self.cookies = {"age": "off"} + self.detailurl = 'https://db.msin.jp/search/movie?str=fc2-ppv-' + self.number + session = request_session(cookies=self.cookies, proxies=self.proxies, verify=self.verify) + htmlcode = session.get(self.detailurl).text + htmltree = etree.HTML(htmlcode) + # if tags are null, use genres + if len(super().getTags(htmltree)) == 0: + self.expr_tags = self.expr_genres + + result = self.dictformat(htmltree) + return result + + def getTags(self, htmltree) -> list: + return super().getTags(htmltree) + + def getRelease(self, htmltree): + return super().getRelease(htmltree).replace('年', '-').replace('月', '-').replace('日', '') + + def getCover(self, htmltree): + if ".gif" in super().getCover(htmltree) and len(super().getExtrafanart(htmltree)) != 0: + return super().getExtrafanart(htmltree)[0] + return super().getCover(htmltree) + + def getNum(self, htmltree): + return 'FC2-' + self.number