import sys
sys.path.append('../')
from bs4 import BeautifulSoup # need install
from lxml import etree # need install
from pyquery import PyQuery as pq # need install
from ADC_function import *
import json
import re
from lib2to3.pgen2 import parse
from urllib.parse import urlparse, unquote
def getActorPhoto(html):
return ''
def getTitle(html): # 获取标题
#
MD0140-2 / 家有性事EP2 爱在身边-麻豆社
# MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社
# MD0094/贫嘴贱舌中出大嫂/坏嫂嫂和小叔偷腥内射受孕-麻豆社
browser_title = str(html.xpath("/html/head/title/text()")[0])
title = str(re.findall(r'^.*?( / | |/)(.*)-麻豆社$', browser_title)[0][1]).strip()
return title.replace('/', ' ')
def getStudio(html): # 获取厂商 已修改
try:
category = str(html.xpath('//a[@rel="category tag"]/text()')[0])
return category.strip()
except:
return '麻豆社'
def getYear(html): # 获取年份
return ''
def getCover(htmlcode): # 获取封面图片
try:
url = str(re.findall("shareimage : '(.*?)'", htmlcode)[0])
return url.strip()
except:
return ''
def getRelease(html): # 获取出版日期
return ''
def getRuntime(html): # 获取播放时长
return ''
def getUrl(html):
return str(html.xpath('//a[@class="share-weixin"]/@data-url')[0])
def getNum(url, number): # 获取番号
try:
# 解码url
filename = unquote(urlparse(url).path)
# 裁剪文件名
result = filename[1:-5].upper().strip()
print(result)
# 移除中文
if result.upper() != number.upper():
result = re.split(r'[^\x00-\x7F]+', result, 1)[0]
# 移除多余的符号
return result.strip('-')
except:
return ''
def getDirector(html): # 获取导演 已修改
return ''
def getOutline(html): # 获取概述
return ''
def getSerise(html): # 获取系列 已修改
return ''
def getTag(html, studio): # 获取标签
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
def getExtrafanart(html): # 获取剧照
return ''
def cutTags(tags):
actors = []
tags = []
for tag in tags:
actors.append(tag)
return actors,tags
def main(number):
try:
try:
number = number.lower().strip()
url = "https://madou.club/" + number + ".html"
htmlcode = get_html(url)
except:
print(number)
html = etree.fromstring(htmlcode, etree.HTMLParser())
url = getUrl(html)
studio = getStudio(html)
tags = getTag(html, studio)
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定,放弃尝试获取
actor = ''
dic = {
# 标题
'title': getTitle(html),
# 制作商
'studio': studio,
# 年份
'year': getYear(html),
# 简介
'outline': getOutline(html),
#
'runtime': getRuntime(html),
# 导演
'director': getDirector(html),
# 演员
'actor': actor,
# 发售日
'release': getRelease(html),
# 番号
'number': getNum(url, number),
# 封面链接
'cover': getCover(htmlcode),
# 剧照获取
'extrafanart': getExtrafanart(html),
'imagecut': 1,
#
'tag': tags,
#
'label': getSerise(html),
# 作者图片
'website': url,
'source': 'madou.py',
# 使用
'series': getSerise(html)
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True,
indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
except Exception as e:
if config.getInstance().debug():
print(e)
data = {
"title": "",
}
js = json.dumps(
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
)
return js
if __name__ == '__main__':
print(main('MD0222'))
print(main('MD0140-2'))
print(main('MAD039'))
print(main('JDMY027'))