@@ -100,6 +100,12 @@ def get_data_from_json(file_number, oCC):
|
|||||||
sources.insert(0, sources.pop(sources.index("javdb")))
|
sources.insert(0, sources.pop(sources.index("javdb")))
|
||||||
if "xcity" in sources:
|
if "xcity" in sources:
|
||||||
sources.insert(0, sources.pop(sources.index("xcity")))
|
sources.insert(0, sources.pop(sources.index("xcity")))
|
||||||
|
if "madou" in sources:
|
||||||
|
sources.insert(0, sources.pop(sources.index("madou")))
|
||||||
|
elif "madou" in sources and (
|
||||||
|
re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)
|
||||||
|
):
|
||||||
|
sources.insert(0, sources.pop(sources.index("madou")))
|
||||||
|
|
||||||
# check sources in func_mapping
|
# check sources in func_mapping
|
||||||
todel = []
|
todel = []
|
||||||
|
|||||||
@@ -84,7 +84,7 @@ def getTrailer(htmlcode, number):
|
|||||||
def main(number):
|
def main(number):
|
||||||
try:
|
try:
|
||||||
number = number.replace('FC2-', '').replace('fc2-', '')
|
number = number.replace('FC2-', '').replace('fc2-', '')
|
||||||
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/')
|
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/', encoding='utf-8')
|
||||||
actor = getActor_fc2com(htmlcode2)
|
actor = getActor_fc2com(htmlcode2)
|
||||||
if not actor:
|
if not actor:
|
||||||
actor = '素人'
|
actor = '素人'
|
||||||
@@ -123,4 +123,5 @@ def main(number):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('FC2-1787685'))
|
print(main('FC2-1787685'))
|
||||||
print(main('FC2-2086710'))
|
print(main('FC2-2086710'))
|
||||||
|
print(main('FC2-2182382'))
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.append('../')
|
||||||
from bs4 import BeautifulSoup # need install
|
from bs4 import BeautifulSoup # need install
|
||||||
from lxml import etree # need install
|
from lxml import etree # need install
|
||||||
from pyquery import PyQuery as pq # need install
|
from pyquery import PyQuery as pq # need install
|
||||||
@@ -5,10 +7,8 @@ from ADC_function import *
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from lib2to3.pgen2 import parse
|
from lib2to3.pgen2 import parse
|
||||||
import sys
|
|
||||||
|
|
||||||
from urllib.parse import urlparse, unquote
|
from urllib.parse import urlparse, unquote
|
||||||
sys.path.append('../')
|
|
||||||
|
|
||||||
|
|
||||||
def getActorPhoto(html):
|
def getActorPhoto(html):
|
||||||
@@ -16,12 +16,10 @@ def getActorPhoto(html):
|
|||||||
|
|
||||||
|
|
||||||
def getTitle(html, number): # 获取标题
|
def getTitle(html, number): # 获取标题
|
||||||
title = str(html.xpath('//h1[@class="article-title"]/text()')[0])
|
# <title>MD0140-2 / 家有性事EP2 爱在身边-麻豆社</title>
|
||||||
try:
|
# <title>MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社</title>
|
||||||
result = str(re.split(r'[/|/|-]', title)[1])
|
browser_title = str(html.xpath("/html/head/title/text()")[0])
|
||||||
return result.strip()
|
return str(re.findall(r'^.*?( / | )(.*)-麻豆社$', browser_title)[0][1]).strip()
|
||||||
except:
|
|
||||||
return title.replace(number.upper(), '').strip()
|
|
||||||
|
|
||||||
|
|
||||||
def getStudio(html): # 获取厂商 已修改
|
def getStudio(html): # 获取厂商 已修改
|
||||||
@@ -83,13 +81,15 @@ def getSerise(html): # 获取系列 已修改
|
|||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getTag(html): # 获取标签
|
def getTag(html, studio): # 获取标签
|
||||||
return html.xpath('//div[@class="article-tags"]/a/text()')
|
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
||||||
|
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
|
||||||
|
|
||||||
|
|
||||||
def getExtrafanart(html): # 获取剧照
|
def getExtrafanart(html): # 获取剧照
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def cutTags(tags):
|
def cutTags(tags):
|
||||||
actors = []
|
actors = []
|
||||||
tags = []
|
tags = []
|
||||||
@@ -109,13 +109,15 @@ def main(number):
|
|||||||
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
url = getUrl(html)
|
url = getUrl(html)
|
||||||
tags = getTag(html)
|
studio = getStudio(html)
|
||||||
actor,tags = cutTags(tags);
|
tags = getTag(html, studio)
|
||||||
|
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定,放弃尝试获取
|
||||||
|
actor = ''
|
||||||
dic = {
|
dic = {
|
||||||
# 标题
|
# 标题
|
||||||
'title': getTitle(html, number),
|
'title': getTitle(html, number),
|
||||||
# 制作商
|
# 制作商
|
||||||
'studio': getStudio(html),
|
'studio': studio,
|
||||||
# 年份
|
# 年份
|
||||||
'year': getYear(html),
|
'year': getYear(html),
|
||||||
# 简介
|
# 简介
|
||||||
@@ -161,4 +163,8 @@ def main(number):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(main('MD0094'))
|
print(main('MD0222'))
|
||||||
|
print(main('MD0140-2'))
|
||||||
|
print(main('MAD039'))
|
||||||
|
print(main('JDMY027'))
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ max_title_len=50
|
|||||||
update_check=1
|
update_check=1
|
||||||
|
|
||||||
[priority]
|
[priority]
|
||||||
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club
|
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91
|
||||||
|
|
||||||
[escape]
|
[escape]
|
||||||
literals=\()/
|
literals=\()/
|
||||||
@@ -116,4 +116,4 @@ sites=33,34
|
|||||||
|
|
||||||
; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢)
|
; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢)
|
||||||
[face]
|
[face]
|
||||||
locations_model=hog
|
locations_model=hog
|
||||||
|
|||||||
Reference in New Issue
Block a user