madou.py:fix get tags
This commit is contained in:
@@ -81,13 +81,15 @@ def getSerise(html): # 获取系列 已修改
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getTag(html): # 获取标签
|
def getTag(html, studio): # 获取标签
|
||||||
return html.xpath('//div[@class="article-tags"]/a/text()')
|
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
||||||
|
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
|
||||||
|
|
||||||
|
|
||||||
def getExtrafanart(html): # 获取剧照
|
def getExtrafanart(html): # 获取剧照
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def cutTags(tags):
|
def cutTags(tags):
|
||||||
actors = []
|
actors = []
|
||||||
tags = []
|
tags = []
|
||||||
@@ -107,13 +109,15 @@ def main(number):
|
|||||||
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
url = getUrl(html)
|
url = getUrl(html)
|
||||||
tags = getTag(html)
|
studio = getStudio(html)
|
||||||
actor,tags = cutTags(tags);
|
tags = getTag(html, studio)
|
||||||
|
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定,放弃尝试获取
|
||||||
|
actor = ''
|
||||||
dic = {
|
dic = {
|
||||||
# 标题
|
# 标题
|
||||||
'title': getTitle(html, number),
|
'title': getTitle(html, number),
|
||||||
# 制作商
|
# 制作商
|
||||||
'studio': getStudio(html),
|
'studio': studio,
|
||||||
# 年份
|
# 年份
|
||||||
'year': getYear(html),
|
'year': getYear(html),
|
||||||
# 简介
|
# 简介
|
||||||
@@ -162,3 +166,5 @@ if __name__ == '__main__':
|
|||||||
print(main('MD0222'))
|
print(main('MD0222'))
|
||||||
print(main('MD0140-2'))
|
print(main('MD0140-2'))
|
||||||
print(main('MAD039'))
|
print(main('MAD039'))
|
||||||
|
print(main('JDMY027'))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user