Merge pull request #720 from lededev/md-1

madou priority against javdb
This commit is contained in:
Yoshiko2
2022-03-15 16:55:53 +08:00
committed by GitHub
4 changed files with 30 additions and 17 deletions

View File

@@ -100,6 +100,12 @@ def get_data_from_json(file_number, oCC):
sources.insert(0, sources.pop(sources.index("javdb"))) sources.insert(0, sources.pop(sources.index("javdb")))
if "xcity" in sources: if "xcity" in sources:
sources.insert(0, sources.pop(sources.index("xcity"))) sources.insert(0, sources.pop(sources.index("xcity")))
if "madou" in sources:
sources.insert(0, sources.pop(sources.index("madou")))
elif "madou" in sources and (
re.match(r"^[a-z0-9]{3,}-[0-9]{1,}$", lo_file_number)
):
sources.insert(0, sources.pop(sources.index("madou")))
# check sources in func_mapping # check sources in func_mapping
todel = [] todel = []

View File

@@ -84,7 +84,7 @@ def getTrailer(htmlcode, number):
def main(number): def main(number):
try: try:
number = number.replace('FC2-', '').replace('fc2-', '') number = number.replace('FC2-', '').replace('fc2-', '')
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/') htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/', encoding='utf-8')
actor = getActor_fc2com(htmlcode2) actor = getActor_fc2com(htmlcode2)
if not actor: if not actor:
actor = '素人' actor = '素人'
@@ -123,4 +123,5 @@ def main(number):
if __name__ == '__main__': if __name__ == '__main__':
print(main('FC2-1787685')) print(main('FC2-1787685'))
print(main('FC2-2086710')) print(main('FC2-2086710'))
print(main('FC2-2182382'))

View File

@@ -1,3 +1,5 @@
import sys
sys.path.append('../')
from bs4 import BeautifulSoup # need install from bs4 import BeautifulSoup # need install
from lxml import etree # need install from lxml import etree # need install
from pyquery import PyQuery as pq # need install from pyquery import PyQuery as pq # need install
@@ -5,10 +7,8 @@ from ADC_function import *
import json import json
import re import re
from lib2to3.pgen2 import parse from lib2to3.pgen2 import parse
import sys
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
sys.path.append('../')
def getActorPhoto(html): def getActorPhoto(html):
@@ -16,12 +16,10 @@ def getActorPhoto(html):
def getTitle(html, number): # 获取标题 def getTitle(html, number): # 获取标题
title = str(html.xpath('//h1[@class="article-title"]/text()')[0]) # <title>MD0140-2 / 家有性事EP2 爱在身边-麻豆社</title>
try: # <title>MAD039 机灵可爱小叫花 强诱僧人迫犯色戒-麻豆社</title>
result = str(re.split(r'[/||-]', title)[1]) browser_title = str(html.xpath("/html/head/title/text()")[0])
return result.strip() return str(re.findall(r'^.*?( / | )(.*)-麻豆社$', browser_title)[0][1]).strip()
except:
return title.replace(number.upper(), '').strip()
def getStudio(html): # 获取厂商 已修改 def getStudio(html): # 获取厂商 已修改
@@ -83,13 +81,15 @@ def getSerise(html): # 获取系列 已修改
return '' return ''
def getTag(html): # 获取标签 def getTag(html, studio): # 获取标签
return html.xpath('//div[@class="article-tags"]/a/text()') x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',')
return [i.strip() for i in x if len(i.strip()) and studio not in i and '麻豆' not in i]
def getExtrafanart(html): # 获取剧照 def getExtrafanart(html): # 获取剧照
return '' return ''
def cutTags(tags): def cutTags(tags):
actors = [] actors = []
tags = [] tags = []
@@ -109,13 +109,15 @@ def main(number):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
url = getUrl(html) url = getUrl(html)
tags = getTag(html) studio = getStudio(html)
actor,tags = cutTags(tags); tags = getTag(html, studio)
#actor,tags = cutTags(tags) # 演员在tags中的位置不固定放弃尝试获取
actor = ''
dic = { dic = {
# 标题 # 标题
'title': getTitle(html, number), 'title': getTitle(html, number),
# 制作商 # 制作商
'studio': getStudio(html), 'studio': studio,
# 年份 # 年份
'year': getYear(html), 'year': getYear(html),
# 简介 # 简介
@@ -161,4 +163,8 @@ def main(number):
if __name__ == '__main__': if __name__ == '__main__':
print(main('MD0094')) print(main('MD0222'))
print(main('MD0140-2'))
print(main('MAD039'))
print(main('JDMY027'))

View File

@@ -41,7 +41,7 @@ max_title_len=50
update_check=1 update_check=1
[priority] [priority]
website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club website=javbus,airav,fanza,xcity,javdb,mgstage,fc2,avsox,dlsite,carib,fc2club,madou,mv91
[escape] [escape]
literals=\()/ literals=\()/
@@ -116,4 +116,4 @@ sites=33,34
; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确需要GPU/CUDA,速度慢) ; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确需要GPU/CUDA,速度慢)
[face] [face]
locations_model=hog locations_model=hog