# -*- coding: utf-8 -*- import re import json from .airav import Airav from .carib import Carib from .dlsite import Dlsite from .fanza import Fanza from .gcolle import Gcolle from .getchu import Getchu from .jav321 import Jav321 from .javdb import Javdb from .mv91 import Mv91 from .fc2 import Fc2 from .madou import Madou from .mgstage import Mgstage from .javbus import Javbus from .xcity import Xcity from .avsox import Avsox from .javlibrary import Javlibrary from .tmdb import Tmdb from .imdb import Imdb def search(number, sources: str=None, proxies=None, verify=None, type='adult', specifiedSource=None, specifiedUrl=None, dbcookies=None, dbsite=None, morestoryline=False): """ 根据`番号/电影`名搜索信息 :param number: number/name depends on type :param sources: sources string with `,` Eg: `avsox,javbus` :param type: `adult`, `general` """ sc = Scraping() return sc.search(number, sources, proxies=proxies, verify=verify, type=type, specifiedSource=specifiedSource, specifiedUrl=specifiedUrl, dbcookies=dbcookies, dbsite=dbsite, morestoryline=morestoryline) def getSupportedSources(tag='adult'): """ :param tag: `adult`, `general` """ sc = Scraping() if tag == 'adult': return ','.join(sc.adult_full_sources) else: return ','.join(sc.general_full_sources) class Scraping(): """ """ adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321', 'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou', 'mv91', 'getchu', 'gcolle' ] adult_func_mapping = { 'avsox': Avsox().scrape, 'javbus': Javbus().scrape, 'xcity': Xcity().scrape, 'mgstage': Mgstage().scrape, 'madou': Madou().scrape, 'fc2': Fc2().scrape, 'dlsite': Dlsite().scrape, 'jav321': Jav321().scrape, 'fanza': Fanza().scrape, 'airav': Airav().scrape, 'carib': Carib().scrape, 'mv91': Mv91().scrape, 'gcolle': Gcolle().scrape, 'javdb': Javdb().scrape, 'getchu': Getchu().scrape, 'javlibrary': Javlibrary().scrape, } general_full_sources = ['tmdb','imdb'] general_func_mapping = { 'tmdb': Tmdb().scrape, 'imdb': Imdb().scrape, } proxies = None verify = None specifiedSource = None specifiedUrl = None dbcookies = None dbsite = None # 使用storyline方法进一步获取故事情节 morestoryline = False def search(self, number, sources=None, proxies=None, verify=None, type='adult', specifiedSource=None, specifiedUrl=None, dbcookies=None, dbsite=None, morestoryline=False): self.proxies = proxies self.verify = verify self.specifiedSource = specifiedSource self.specifiedUrl = specifiedUrl self.dbcookies = dbcookies self.dbsite = dbsite self.morestoryline = morestoryline if type == 'adult': return self.searchAdult(number, sources) else: return self.searchGeneral(number, sources) def searchGeneral(self, name, sources): """ 查询电影电视剧 imdb,tmdb """ if self.specifiedSource: sources = [self.specifiedSource] else: sources = self.checkGeneralSources(sources, name) json_data = {} for source in sources: try: print('[+]select', source) try: data = self.general_func_mapping[source](name, self) if data == 404: continue json_data = json.loads(data) except Exception as e: # print('[!] 出错啦') # print(e) pass # if any service return a valid return, break if self.get_data_state(json_data): print(f"[+]Find movie [{name}] metadata on website '{source}'") break except: continue # Return if data not found in all sources if not json_data: print(f'[-]Movie Number [{name}] not found!') return None return json_data def searchAdult(self, number, sources): if self.specifiedSource: sources = [self.specifiedSource] else: sources = self.checkAdultSources(sources, number) json_data = {} for source in sources: try: print('[+]select', source) try: data = self.adult_func_mapping[source](number, self) if data == 404: continue json_data = json.loads(data) except Exception as e: # print('[!] 出错啦') # print(e) pass # json_data = self.func_mapping[source](number, self) # if any service return a valid return, break if self.get_data_state(json_data): print(f"[+]Find movie [{number}] metadata on website '{source}'") break except: continue # Return if data not found in all sources if not json_data: print(f'[-]Movie Number [{number}] not found!') return None return json_data def checkGeneralSources(self, c_sources, name): if not c_sources: sources = self.general_full_sources else: sources = c_sources.split(',') # check sources in func_mapping todel = [] for s in sources: if not s in self.general_func_mapping: print('[!] Source Not Exist : ' + s) todel.append(s) for d in todel: print('[!] Remove Source : ' + s) sources.remove(d) return sources def checkAdultSources(self, c_sources, file_number): if not c_sources: sources = self.adult_full_sources else: sources = c_sources.split(',') def insert(sources,source): if source in sources: sources.insert(0, sources.pop(sources.index(source))) return sources if len(sources) <= len(self.adult_func_mapping): # if the input file name matches certain rules, # move some web service to the beginning of the list lo_file_number = file_number.lower() if "carib" in sources and (re.search(r"^\d{6}-\d{3}", file_number) ): sources = insert(sources,"carib") elif "item" in file_number or "GETCHU" in file_number.upper(): sources = insert(sources,"getchu") elif "rj" in lo_file_number or "vj" in lo_file_number or re.search(r"[\u3040-\u309F\u30A0-\u30FF]+", file_number): sources = insert(sources, "getchu") sources = insert(sources, "dlsite") elif re.search(r"^\d{5,}", file_number) or "heyzo" in lo_file_number: if "avsox" in sources: sources = insert(sources,"avsox") elif "mgstage" in sources and \ (re.search(r"\d+\D+", file_number) or "siro" in lo_file_number): sources = insert(sources,"mgstage") elif "fc2" in lo_file_number: if "fc2" in sources: sources = insert(sources,"fc2") elif "gcolle" in sources and (re.search("\d{6}", file_number)): sources = insert(sources,"gcolle") elif re.search(r"^[a-z0-9]{3,}$", lo_file_number): if "xcity" in sources: sources = insert(sources,"xcity") if "madou" in sources: sources = insert(sources,"madou") elif "madou" in sources and ( re.search(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number) ): sources = insert(sources,"madou") # check sources in func_mapping todel = [] for s in sources: if not s in self.adult_func_mapping: print('[!] Source Not Exist : ' + s) todel.append(s) for d in todel: print('[!] Remove Source : ' + s) sources.remove(d) return sources def get_data_state(self, data: dict) -> bool: # 元数据获取失败检测 if "title" not in data or "number" not in data: return False if data["title"] is None or data["title"] == "" or data["title"] == "null": return False if data["number"] is None or data["number"] == "" or data["number"] == "null": return False return True