# -*- coding: utf-8 -*- import re from lxml import etree from urllib.parse import urlparse, unquote from .parser import Parser class Madou(Parser): source = 'madou' imagecut = 0 uncensored = True expr_url = '//a[@class="share-weixin"]/@data-url' expr_title = "/html/head/title/text()" expr_studio = '//a[@rel="category tag"]/text()' expr_tags = '/html/head/meta[@name="keywords"]/@content' def search(self, number): self.number = number.lower().strip() if self.specifiedUrl: self.detailurl = self.specifiedUrl else: self.detailurl = "https://madou.club/" + number + ".html" self.htmlcode = self.getHtml(self.detailurl) if self.htmlcode == 404: return 404 htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser()) self.detailurl = self.getTreeElement(htmltree, self.expr_url) result = self.dictformat(htmltree) return result def getNum(self, htmltree): try: # 解码url filename = unquote(urlparse(self.detailurl).path) # 裁剪文件名 result = filename[1:-5].upper().strip() # 移除中文 if result.upper() != self.number.upper(): result = re.split(r'[^\x00-\x7F]+', result, 1)[0] # 移除多余的符号 return result.strip('-') except: return '' def getTitle(self, htmltree): #