剧情简介:新增无码元数据站点,配置文件改为通用、有码、无码三种站点分列
This commit is contained in:
@@ -60,14 +60,9 @@ def get_year(lx: html.HtmlElement) -> str:
|
|||||||
|
|
||||||
def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
|
def get_outline(lx: html.HtmlElement, number: str, title: str) -> str:
|
||||||
o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
|
o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip()
|
||||||
|
g = getStoryline(number, title)
|
||||||
storyline_site = config.getInstance().storyline_site().split(',')
|
if len(g):
|
||||||
a = set(storyline_site) & {'airav', 'avno1'}
|
return g
|
||||||
if len(a):
|
|
||||||
site = [n for n in storyline_site if n in a]
|
|
||||||
g = getStoryline(number, title, site)
|
|
||||||
if len(g):
|
|
||||||
return g
|
|
||||||
return o
|
return o
|
||||||
|
|
||||||
def get_release(lx: html.HtmlElement) -> str:
|
def get_release(lx: html.HtmlElement) -> str:
|
||||||
|
|||||||
@@ -8,8 +8,9 @@ from multiprocessing import Pool
|
|||||||
from multiprocessing.dummy import Pool as ThreadPool
|
from multiprocessing.dummy import Pool as ThreadPool
|
||||||
from difflib import SequenceMatcher
|
from difflib import SequenceMatcher
|
||||||
from unicodedata import category
|
from unicodedata import category
|
||||||
|
from number_parser import is_uncensored
|
||||||
|
|
||||||
G_registered_storyline_site = {"airav", "avno1", "xcity", "amazon"}
|
G_registered_storyline_site = {"airav", "avno1", "xcity", "amazon", "58avgo"}
|
||||||
|
|
||||||
G_mode_txt = ('顺序执行','线程池','进程池')
|
G_mode_txt = ('顺序执行','线程池','进程池')
|
||||||
|
|
||||||
@@ -28,7 +29,16 @@ def getStoryline(number, title, sites: list=None):
|
|||||||
conf = config.getInstance()
|
conf = config.getInstance()
|
||||||
debug = conf.debug() or conf.storyline_show() == 2
|
debug = conf.debug() or conf.storyline_show() == 2
|
||||||
storyine_sites = conf.storyline_site().split(',') if sites is None else sites
|
storyine_sites = conf.storyline_site().split(',') if sites is None else sites
|
||||||
apply_sites = [ s for s in storyine_sites if s in G_registered_storyline_site]
|
if is_uncensored(number):
|
||||||
|
storyine_sites += conf.storyline_uncensored_site().split(',')
|
||||||
|
else:
|
||||||
|
storyine_sites += conf.storyline_censored_site().split(',')
|
||||||
|
r_dup = set()
|
||||||
|
apply_sites = []
|
||||||
|
for s in storyine_sites:
|
||||||
|
if s in G_registered_storyline_site and s not in r_dup:
|
||||||
|
apply_sites.append(s)
|
||||||
|
r_dup.add(s)
|
||||||
mp_args = ((site, number, title, debug) for site in apply_sites)
|
mp_args = ((site, number, title, debug) for site in apply_sites)
|
||||||
cores = min(len(apply_sites), os.cpu_count())
|
cores = min(len(apply_sites), os.cpu_count())
|
||||||
if cores == 0:
|
if cores == 0:
|
||||||
@@ -80,6 +90,8 @@ def _getStoryline_mp(site, number, title, debug):
|
|||||||
storyline = getStoryline_xcity(number, debug)
|
storyline = getStoryline_xcity(number, debug)
|
||||||
elif site == "amazon":
|
elif site == "amazon":
|
||||||
storyline = getStoryline_amazon(title, number, debug)
|
storyline = getStoryline_amazon(title, number, debug)
|
||||||
|
elif site == "58avgo":
|
||||||
|
storyline = getStoryline_58avgo(number, debug)
|
||||||
if not debug:
|
if not debug:
|
||||||
return storyline
|
return storyline
|
||||||
print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format(
|
||||||
@@ -119,24 +131,63 @@ def getStoryline_airav(number, debug):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def getStoryline_58avgo(number, debug):
|
||||||
|
try:
|
||||||
|
url = 'http://58avgo.com/cn/index.aspx' + secrets.choice([
|
||||||
|
'', '?status=3', '?status=4', '?status=7', '?status=9', '?status=10', '?status=11', '?status=12',
|
||||||
|
'?status=1&Sort=Playon', '?status=1&Sort=dateupload', 'status=1&Sort=dateproduce'
|
||||||
|
]) # 随机选一个,避免网站httpd日志中单个ip的请求太过单一
|
||||||
|
kwd = number[:6] if re.match(r'\d{6}[\-_]\d{2,3}', number) else number
|
||||||
|
result, browser = get_html_by_form(url,
|
||||||
|
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
|
||||||
|
return_type = 'browser')
|
||||||
|
if not result.ok:
|
||||||
|
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
||||||
|
if f'searchresults.aspx?Search={kwd}' not in browser.url:
|
||||||
|
raise ValueError("number not found")
|
||||||
|
s = browser.page.select('div.resultcontent > ul > li.listItem > div.one-info-panel.one > a.ga_click')
|
||||||
|
link = None
|
||||||
|
for i in range(len(s)):
|
||||||
|
title = s[i].h3.text.strip()
|
||||||
|
if re.search(number, title, re.I):
|
||||||
|
link = s[i]
|
||||||
|
break;
|
||||||
|
if link is None:
|
||||||
|
raise ValueError("number not found")
|
||||||
|
result = browser.follow_link(link)
|
||||||
|
if not result.ok or 'playon.aspx' not in browser.url:
|
||||||
|
raise ValueError("detail page not found")
|
||||||
|
title = browser.page.select('head > title')[0].text.strip()
|
||||||
|
detail_number = str(re.findall('\[(.*?)]', title)[0])
|
||||||
|
if not re.search(number, detail_number, re.I):
|
||||||
|
raise ValueError("detail page number not match, got ->[{detail_number}]")
|
||||||
|
return browser.page.select('#ContentPlaceHolder1_Label2')[0].text.strip()
|
||||||
|
except Exception as e:
|
||||||
|
if debug:
|
||||||
|
print(f"[-]MP getOutline_58avgo Error: {e}, number [{number}].")
|
||||||
|
pass
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
||||||
try:
|
try:
|
||||||
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
||||||
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
||||||
'?top=1&cat=hd', '?top=1', '?cat=hd', 'porn', '?cat=jp', '?cat=us', 'recommend_category.php'
|
'?top=1&cat=hd', '?top=1', '?cat=hd', 'porn', '?cat=jp', '?cat=us', 'recommend_category.php'
|
||||||
]) # 随机选一个,避免网站httpd日志中单个ip的请求太过单一
|
]) # 随机选一个,避免网站httpd日志中单个ip的请求太过单一
|
||||||
number_up = number.upper()
|
|
||||||
result, browser = get_html_by_form(url,
|
result, browser = get_html_by_form(url,
|
||||||
form_select='div.wrapper > div.header > div.search > form',
|
form_select='div.wrapper > div.header > div.search > form',
|
||||||
fields = {'kw' : number_up},
|
fields = {'kw' : number},
|
||||||
return_type = 'browser')
|
return_type = 'browser')
|
||||||
if not result.ok:
|
if not result.ok:
|
||||||
raise ValueError(f"get_html_by_form('{url}','{number_up}') failed")
|
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
||||||
title = browser.page.select('div.type_movie > div > ul > li > div > a > h3')[0].text.strip()
|
s = browser.page.select('div.type_movie > div > ul > li > div')
|
||||||
page_number = title[title.rfind(' '):].upper()
|
for i in range(len(s)):
|
||||||
if not number_up in page_number:
|
title = s[i].a.h3.text.strip()
|
||||||
raise ValueError(f"page number ->[{page_number}] not match")
|
page_number = title[title.rfind(' '):].strip()
|
||||||
return browser.page.select('div.type_movie > div > ul > li:nth-child(1) > div')[0]['data-description'].strip()
|
if re.search(number, page_number, re.I):
|
||||||
|
return s[i]['data-description'].strip()
|
||||||
|
raise ValueError(f"page number ->[{page_number}] not match")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
|
print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ def getDirector(html):
|
|||||||
|
|
||||||
def getOutline(html, number, title):
|
def getOutline(html, number, title):
|
||||||
storyline_site = config.getInstance().storyline_site().split(',')
|
storyline_site = config.getInstance().storyline_site().split(',')
|
||||||
a = set(storyline_site) & {'airav', 'avno1'}
|
a = set(storyline_site) & {'airav', 'avno1'} # 只要中文的简介文字
|
||||||
if len(a):
|
if len(a):
|
||||||
site = [n for n in storyline_site if n in a]
|
site = [n for n in storyline_site if n in a]
|
||||||
g = getStoryline(number, title, site)
|
g = getStoryline(number, title, site)
|
||||||
|
|||||||
13
config.ini
13
config.ini
@@ -86,11 +86,16 @@ extrafanart_folder=extrafanart
|
|||||||
|
|
||||||
; 剧情简介
|
; 剧情简介
|
||||||
[storyline]
|
[storyline]
|
||||||
; website为javbus或javdb时,site为获取剧情简介信息的可选数据源站点列表。列表内站点同时并发查询,取值优先级
|
; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的
|
||||||
; 从左到右,靠左站点没数据才会采用后面站点获得的。其中airav和avno1是中文剧情简介,xcity和amazon是日语的,由
|
; 可选数据源站点列表。列表内站点同时并发查询,取值优先级从左到右,靠左站点没数据才会采用后面站点获得的。
|
||||||
; 于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果列表为空则不查询,设置成不查询可大幅提高刮削速度。
|
; 其中airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1有码无码都能查,58avgo只能查无码或者
|
||||||
|
; 流出破解马赛克的影片(此功能没使用)。
|
||||||
|
; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询,
|
||||||
|
; 设置成不查询可大幅提高刮削速度。
|
||||||
; site=
|
; site=
|
||||||
site=airav,avno1,xcity,amazon
|
site=avno1
|
||||||
|
censored_site=airav,xcity,amazon
|
||||||
|
uncensored_site=58avgo
|
||||||
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快)
|
||||||
run_mode=1
|
run_mode=1
|
||||||
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因
|
||||||
|
|||||||
18
config.py
18
config.py
@@ -243,7 +243,19 @@ class Config:
|
|||||||
try:
|
try:
|
||||||
return self.conf.get("storyline", "site")
|
return self.conf.get("storyline", "site")
|
||||||
except:
|
except:
|
||||||
return "airav,avno1,xcity,amazon"
|
return "avno1"
|
||||||
|
|
||||||
|
def storyline_censored_site(self) -> str:
|
||||||
|
try:
|
||||||
|
return self.conf.get("storyline", "censored_site")
|
||||||
|
except:
|
||||||
|
return "airav,xcity,amazon"
|
||||||
|
|
||||||
|
def storyline_uncensored_site(self) -> str:
|
||||||
|
try:
|
||||||
|
return self.conf.get("storyline", "uncensored_site")
|
||||||
|
except:
|
||||||
|
return "58avgo"
|
||||||
|
|
||||||
def storyline_show(self) -> int:
|
def storyline_show(self) -> int:
|
||||||
try:
|
try:
|
||||||
@@ -354,7 +366,9 @@ class Config:
|
|||||||
|
|
||||||
sec14 = "storyline"
|
sec14 = "storyline"
|
||||||
conf.add_section(sec14)
|
conf.add_section(sec14)
|
||||||
conf.set(sec14, "site", "airav,avno1,xcity,amazon")
|
conf.set(sec14, "site", "avno1")
|
||||||
|
conf.set(sec14, "censored_site", "airav,xcity,amazon")
|
||||||
|
conf.set(sec14, "uncensored_site", "58avgo")
|
||||||
conf.set(sec14, "show_result", 0)
|
conf.set(sec14, "show_result", 0)
|
||||||
conf.set(sec14, "run_mode", 1)
|
conf.set(sec14, "run_mode", 1)
|
||||||
|
|
||||||
|
|||||||
5
core.py
5
core.py
@@ -566,10 +566,7 @@ def core_main(file_path, number_th):
|
|||||||
c_word = '-C' # 中文字幕影片后缀
|
c_word = '-C' # 中文字幕影片后缀
|
||||||
|
|
||||||
# 判断是否无码
|
# 判断是否无码
|
||||||
if is_uncensored(number):
|
uncensored = 1 if is_uncensored(number) else 0
|
||||||
uncensored = 1
|
|
||||||
else:
|
|
||||||
uncensored = 0
|
|
||||||
|
|
||||||
|
|
||||||
if '流出' in filepath or 'uncensored' in filepath:
|
if '流出' in filepath or 'uncensored' in filepath:
|
||||||
|
|||||||
@@ -71,7 +71,7 @@ G_TAKE_NUM_RULES = {
|
|||||||
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
|
'10mu' : lambda x:str(re.search(r'\d{6}(-|_)\d{2}', x, re.I).group()).replace('-', '_'),
|
||||||
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
|
'x-art' : lambda x:str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()),
|
||||||
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
|
'xxx-av': lambda x:''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
|
||||||
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[-|_]{1}(\d{3,4})[^\d]*', x, re.I)[0])
|
'heydouga': lambda x:'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_number_by_dict(filename: str) -> str:
|
def get_number_by_dict(filename: str) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user