improve javbus and javdb outline source

This commit is contained in:
lededev
2021-10-12 11:28:17 +08:00
parent 678a8f9bc8
commit f8dc05a38b
4 changed files with 20 additions and 8 deletions

View File

@@ -2,7 +2,7 @@ from os import replace
import requests import requests
import hashlib import hashlib
from pathlib import Path from pathlib import Path
#import secrets import secrets
import os.path import os.path
import uuid import uuid
import json import json

View File

@@ -93,8 +93,12 @@ def getOutline0(number): #获取剧情介绍 airav.wiki站点404函数暂时
return '' return ''
def getOutline(number): #获取剧情介绍 从avno1.cc取得 def getOutline(number): #获取剧情介绍 从avno1.cc取得
try: try:
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
'?top=1&cat=hd', '?top=1', '?cat=hd', 'porn', '?cat=jp', '?cat=us', 'recommend_category.php'
]) # 随机选一个避免网站httpd日志中单个ip的请求太过单一
number_up = number.upper() number_up = number.upper()
result, browser = get_html_by_form('http://www.avno1.cc/cn/usercenter.php?item=pay_support', result, browser = get_html_by_form(url,
form_select='div.wrapper > div.header > div.search > form', form_select='div.wrapper > div.header > div.search > form',
fields = {'kw' : number_up}, fields = {'kw' : number_up},
return_type = 'browser') return_type = 'browser')
@@ -107,6 +111,12 @@ def getOutline(number): #获取剧情介绍 从avno1.cc取得
return browser.page.select('div.type_movie > div > ul > li:nth-child(1) > div')[0]['data-description'].strip() return browser.page.select('div.type_movie > div > ul > li:nth-child(1) > div')[0]['data-description'].strip()
except: except:
pass pass
from WebCrawler.xcity import open_by_browser, getOutline as xcity_getOutline
try:
detail_html, browser = open_by_browser(number_up)
return xcity_getOutline(detail_html)
except:
pass
return '' return ''
def getSerise(htmlcode): #获取系列 已修改 def getSerise(htmlcode): #获取系列 已修改
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())

View File

@@ -5,7 +5,6 @@ from lxml import etree
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
import secrets
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

View File

@@ -181,11 +181,10 @@ def getExtrafanart(htmlcode): # 获取剧照
return s return s
return '' return ''
def main(number): def open_by_browser(number):
try:
xcity_number = number.replace('-','') xcity_number = number.replace('-','')
query_result, browser = get_html_by_form( query_result, browser = get_html_by_form(
'https://xcity.jp/about/', 'https://xcity.jp/' + secrets.choice(['about/','sitemap/','policy/','law/','help/','main/']),
fields = {'q' : xcity_number.lower()}, fields = {'q' : xcity_number.lower()},
return_type = 'browser') return_type = 'browser')
if not query_result or not query_result.ok: if not query_result or not query_result.ok:
@@ -193,12 +192,16 @@ def main(number):
result = browser.follow_link(browser.links('avod\/detail')[0]) result = browser.follow_link(browser.links('avod\/detail')[0])
if not result.ok: if not result.ok:
raise ValueError("xcity.py: detail page not found") raise ValueError("xcity.py: detail page not found")
detail_page = str(browser.page) return str(browser.page), browser
def main(number):
try:
detail_page, browser = open_by_browser(number)
url = browser.url url = browser.url
newnum = getNum(detail_page).upper() newnum = getNum(detail_page).upper()
number_up = number.upper() number_up = number.upper()
if newnum != number_up: if newnum != number_up:
if newnum == xcity_number.upper(): if newnum == number.replace('-','').upper():
newnum = number_up newnum = number_up
else: else:
raise ValueError("xcity.py: number not found") raise ValueError("xcity.py: number not found")