0.6 更新:Javbus抓取的元数据支持标签功能,优化无码视频抓取

This commit is contained in:
wenead99
2019-06-02 23:10:01 +08:00
committed by GitHub
parent cfccd00367
commit 532c5bfbe3
4 changed files with 124 additions and 44 deletions

View File

@@ -31,6 +31,11 @@ def getOutline(htmlcode,number): #获取番号
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
return result
# def getTag(htmlcode,number): #获取番号
# a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
# html = etree.fromstring(a, etree.HTMLParser())
# result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
# return result
def main(number):
str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")
@@ -47,6 +52,7 @@ def main(number):
'number': number,
'cover': getCover(htmlcode,number),
'imagecut': 0,
'tag':" ",
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js