@@ -100,6 +100,12 @@ def get_data_from_json(file_number, oCC):
|
||||
sources.insert(0, sources.pop(sources.index("javdb")))
|
||||
if "xcity" in sources:
|
||||
sources.insert(0, sources.pop(sources.index("xcity")))
|
||||
if "madou" in sources:
|
||||
sources.insert(0, sources.pop(sources.index("madou")))
|
||||
elif "madou" in sources and (
|
||||
re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)
|
||||
):
|
||||
sources.insert(0, sources.pop(sources.index("madou")))
|
||||
|
||||
# check sources in func_mapping
|
||||
todel = []
|
||||
|
||||
@@ -84,7 +84,7 @@ def getTrailer(htmlcode, number):
|
||||
def main(number):
|
||||
try:
|
||||
number = number.replace('FC2-', '').replace('fc2-', '')
|
||||
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/')
|
||||
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/', encoding='utf-8')
|
||||
actor = getActor_fc2com(htmlcode2)
|
||||
if not actor:
|
||||
actor = '素人'
|
||||
@@ -123,4 +123,5 @@ def main(number):
|
||||
if __name__ == '__main__':
|
||||
print(main('FC2-1787685'))
|
||||
print(main('FC2-2086710'))
|
||||
print(main('FC2-2182382'))
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
from bs4 import BeautifulSoup # need install
|
||||
from lxml import etree # need install
|
||||
from pyquery import PyQuery as pq # need install
|
||||
@@ -5,10 +7,8 @@ from ADC_function import *
|
||||
import json
|
||||
import re
|
||||
from lib2to3.pgen2 import parse
|
||||
import sys
|
||||
|
||||
from urllib.parse import urlparse, unquote
|
||||
sys.path.append('../')
|
||||
|
||||
|
||||
def getActorPhoto(html):
|
||||
@@ -16,12 +16,10 @@ def getActorPhoto(html):
|
||||
|
||||
|
||||
def getTitle(html, number): # 获取标题
|
||||
title = str(html.xpath('//h1[@class="article-title"]/text()')[0])
|
||||
try:
|
||||
result = str(re.split(r'[/|/|-]', title)[1])
|
||||
return result.strip()
|
||||
except:
|
||||
return title.replace(number.upper(), '').strip()
|
||||
# <title>MD0140-2 / 家有性事EP2 爱在身边-麻豆社</title>
|
||||
# <title>MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社</title>
|
||||
browser_title = str(html.xpath("/html/head/title/text()")[0])
|
||||
return str(re.findall(r'^.*?( / | )(.*)-麻豆社$', browser_title)[0][1]).strip()
|
||||
|
||||
|
||||
def getStudio(html): # 获取厂商 已修改
|
||||
@@ -83,13 +81,15 @@ def getSerise(html): # 获取系列 已修改
|
||||
return ''
|
||||
|
||||
|
||||
def getTag(html): # 获取标签
|
||||
return html.xpath('//div[@class="article-tags"]/a/text()')
|
||||
def getTag(html, studio): # 获取标签
|
||||
x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
|
||||
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
|
||||
|
||||
|
||||
def getExtrafanart(html): # 获取剧照
|
||||
return ''
|
||||
|
||||
|
||||
def cutTags(tags):
|
||||
actors = []
|
||||
tags = []
|
||||
@@ -109,13 +109,15 @@ def main(number):
|
||||
|
||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||
url = getUrl(html)
|
||||
tags = getTag(html)
|
||||
actor,tags = cutTags(tags);
|
||||
studio = getStudio(html)
|
||||
tags = getTag(html, studio)
|
||||
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定,放弃尝试获取
|
||||
actor = ''
|
||||
dic = {
|
||||
# 标题
|
||||
'title': getTitle(html, number),
|
||||
# 制作商
|
||||
'studio': getStudio(html),
|
||||
'studio': studio,
|
||||
# 年份
|
||||
'year': getYear(html),
|
||||
# 简介
|
||||
@@ -161,4 +163,8 @@ def main(number):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(main('MD0094'))
|
||||
print(main('MD0222'))
|
||||
print(main('MD0140-2'))
|
||||
print(main('MAD039'))
|
||||
print(main('JDMY027'))
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ max_title_len=50
|
||||
update_check=1
|
||||
|
||||
[priority]
|
||||
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club
|
||||
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91
|
||||
|
||||
[escape]
|
||||
literals=\()/
|
||||
|
||||
Reference in New Issue
Block a user