madou.py:fix get tags

This commit is contained in:
lededev
2022-03-06 21:03:00 +08:00
parent 8ad4997342
commit 6b7e518fbe

View File

@@ -81,13 +81,15 @@ def getSerise(html): # 获取系列 已修改
return '' return ''
def getTag(html): # 获取标签 def getTag(html, studio): # 获取标签
return html.xpath('//div[@class="article-tags"]/a/text()') x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
def getExtrafanart(html): # 获取剧照 def getExtrafanart(html): # 获取剧照
return '' return ''
def cutTags(tags): def cutTags(tags):
actors = [] actors = []
tags = [] tags = []
@@ -107,13 +109,15 @@ def main(number):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
url = getUrl(html) url = getUrl(html)
tags = getTag(html) studio = getStudio(html)
actor,tags = cutTags(tags); tags = getTag(html, studio)
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定放弃尝试获取
actor = ''
dic = { dic = {
# 标题 # 标题
'title': getTitle(html, number), 'title': getTitle(html, number),
# 制作商 # 制作商
'studio': getStudio(html), 'studio': studio,
# 年份 # 年份
'year': getYear(html), 'year': getYear(html),
# 简介 # 简介
@@ -162,3 +166,5 @@ if __name__ == '__main__':
print(main('MD0222')) print(main('MD0222'))
print(main('MD0140-2')) print(main('MD0140-2'))
print(main('MAD039')) print(main('MAD039'))
print(main('JDMY027'))