deal with websites behind Clo*dfl**e
This commit is contained in:
@@ -14,6 +14,7 @@ from urllib.parse import urljoin
|
|||||||
import mechanicalsoup
|
import mechanicalsoup
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from urllib3.util.retry import Retry
|
from urllib3.util.retry import Retry
|
||||||
|
from cloudscraper import create_scraper
|
||||||
|
|
||||||
|
|
||||||
def getXpathSingle(htmlcode, xpath):
|
def getXpathSingle(htmlcode, xpath):
|
||||||
@@ -25,7 +26,7 @@ def getXpathSingle(htmlcode, xpath):
|
|||||||
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
|
G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36'
|
||||||
|
|
||||||
# 网页请求核心
|
# 网页请求核心
|
||||||
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
|
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
verify = config.getInstance().cacert_file()
|
verify = config.getInstance().cacert_file()
|
||||||
configProxy = config.getInstance().proxy()
|
configProxy = config.getInstance().proxy()
|
||||||
errors = ""
|
errors = ""
|
||||||
@@ -41,13 +42,12 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None)
|
|||||||
else:
|
else:
|
||||||
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, cookies=cookies)
|
result = requests.get(str(url), headers=headers, timeout=configProxy.timeout, cookies=cookies)
|
||||||
|
|
||||||
result.encoding = "utf-8"
|
|
||||||
|
|
||||||
if return_type == "object":
|
if return_type == "object":
|
||||||
return result
|
return result
|
||||||
elif return_type == "content":
|
elif return_type == "content":
|
||||||
return result.content
|
return result.content
|
||||||
else:
|
else:
|
||||||
|
result.encoding = encoding or "utf-8"
|
||||||
return result.text
|
return result.text
|
||||||
except requests.exceptions.ProxyError:
|
except requests.exceptions.ProxyError:
|
||||||
print("[-]Proxy error! Please check your Proxy")
|
print("[-]Proxy error! Please check your Proxy")
|
||||||
@@ -100,7 +100,7 @@ class TimeoutHTTPAdapter(HTTPAdapter):
|
|||||||
|
|
||||||
|
|
||||||
# with keep-alive feature
|
# with keep-alive feature
|
||||||
def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None):
|
def get_html_session(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
configProxy = config.getInstance().proxy()
|
configProxy = config.getInstance().proxy()
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
if isinstance(cookies, dict) and len(cookies):
|
if isinstance(cookies, dict) and len(cookies):
|
||||||
@@ -127,7 +127,7 @@ def get_html_session(url:str = None, cookies: dict = None, ua: str = None, retur
|
|||||||
elif return_type == "session":
|
elif return_type == "session":
|
||||||
return result, session
|
return result, session
|
||||||
else:
|
else:
|
||||||
result.encoding = "utf-8"
|
result.encoding = encoding or "utf-8"
|
||||||
return result.text
|
return result.text
|
||||||
except requests.exceptions.ProxyError:
|
except requests.exceptions.ProxyError:
|
||||||
print("[-]get_html_session() Proxy error! Please check your Proxy")
|
print("[-]get_html_session() Proxy error! Please check your Proxy")
|
||||||
@@ -136,7 +136,7 @@ def get_html_session(url:str = None, cookies: dict = None, ua: str = None, retur
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None):
|
def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
configProxy = config.getInstance().proxy()
|
configProxy = config.getInstance().proxy()
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
if isinstance(cookies, dict) and len(cookies):
|
if isinstance(cookies, dict) and len(cookies):
|
||||||
@@ -155,7 +155,7 @@ def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, re
|
|||||||
return browser
|
return browser
|
||||||
if not result.ok:
|
if not result.ok:
|
||||||
return None
|
return None
|
||||||
result.encoding = "utf-8"
|
|
||||||
if return_type == "object":
|
if return_type == "object":
|
||||||
return result
|
return result
|
||||||
elif return_type == "content":
|
elif return_type == "content":
|
||||||
@@ -163,6 +163,7 @@ def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, re
|
|||||||
elif return_type == "browser":
|
elif return_type == "browser":
|
||||||
return result, browser
|
return result, browser
|
||||||
else:
|
else:
|
||||||
|
result.encoding = encoding or "utf-8"
|
||||||
return result.text
|
return result.text
|
||||||
except requests.exceptions.ProxyError:
|
except requests.exceptions.ProxyError:
|
||||||
print("[-]get_html_by_browser() Proxy error! Please check your Proxy")
|
print("[-]get_html_by_browser() Proxy error! Please check your Proxy")
|
||||||
@@ -170,7 +171,8 @@ def get_html_by_browser(url:str = None, cookies: dict = None, ua: str = None, re
|
|||||||
print(f'[-]get_html_by_browser() Failed! {e}')
|
print(f'[-]get_html_by_browser() Failed! {e}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None):
|
|
||||||
|
def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
configProxy = config.getInstance().proxy()
|
configProxy = config.getInstance().proxy()
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
if isinstance(cookies, dict) and len(cookies):
|
if isinstance(cookies, dict) and len(cookies):
|
||||||
@@ -191,7 +193,7 @@ def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies:
|
|||||||
for k, v in fields.items():
|
for k, v in fields.items():
|
||||||
browser[k] = v
|
browser[k] = v
|
||||||
response = browser.submit_selected()
|
response = browser.submit_selected()
|
||||||
response.encoding = "utf-8"
|
|
||||||
if return_type == "object":
|
if return_type == "object":
|
||||||
return response
|
return response
|
||||||
elif return_type == "content":
|
elif return_type == "content":
|
||||||
@@ -199,6 +201,7 @@ def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies:
|
|||||||
elif return_type == "browser":
|
elif return_type == "browser":
|
||||||
return response, browser
|
return response, browser
|
||||||
else:
|
else:
|
||||||
|
result.encoding = encoding or "utf-8"
|
||||||
return response.text
|
return response.text
|
||||||
except requests.exceptions.ProxyError:
|
except requests.exceptions.ProxyError:
|
||||||
print("[-]get_html_by_form() Proxy error! Please check your Proxy")
|
print("[-]get_html_by_form() Proxy error! Please check your Proxy")
|
||||||
@@ -207,6 +210,40 @@ def get_html_by_form(url, form_select: str = None, fields: dict = None, cookies:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_html_by_scraper(url:str = None, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None):
|
||||||
|
configProxy = config.getInstance().proxy()
|
||||||
|
session = create_scraper(browser={'custom': ua or G_USER_AGENT,})
|
||||||
|
if isinstance(cookies, dict) and len(cookies):
|
||||||
|
requests.utils.add_dict_to_cookiejar(session.cookies, cookies)
|
||||||
|
retries = Retry(total=configProxy.retry, connect=configProxy.retry, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
|
||||||
|
session.mount("https://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||||
|
session.mount("http://", TimeoutHTTPAdapter(max_retries=retries, timeout=configProxy.timeout))
|
||||||
|
if configProxy.enable:
|
||||||
|
session.verify = config.getInstance().cacert_file()
|
||||||
|
session.proxies = configProxy.proxies()
|
||||||
|
try:
|
||||||
|
if isinstance(url, str) and len(url):
|
||||||
|
result = session.get(str(url))
|
||||||
|
else: # 空url参数直接返回可重用scraper对象,无需设置return_type
|
||||||
|
return session
|
||||||
|
if not result.ok:
|
||||||
|
return None
|
||||||
|
if return_type == "object":
|
||||||
|
return result
|
||||||
|
elif return_type == "content":
|
||||||
|
return result.content
|
||||||
|
elif return_type == "scraper":
|
||||||
|
return result, session
|
||||||
|
else:
|
||||||
|
result.encoding = encoding or "utf-8"
|
||||||
|
return result.text
|
||||||
|
except requests.exceptions.ProxyError:
|
||||||
|
print("[-]get_html_session() Proxy error! Please check your Proxy")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[-]get_html_session() failed. {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# def get_javlib_cookie() -> [dict, str]:
|
# def get_javlib_cookie() -> [dict, str]:
|
||||||
# import cloudscraper
|
# import cloudscraper
|
||||||
# switch, proxy, timeout, retry_count, proxytype = config.getInstance().proxy()
|
# switch, proxy, timeout, retry_count, proxytype = config.getInstance().proxy()
|
||||||
@@ -701,31 +738,35 @@ def is_japanese(s) -> bool:
|
|||||||
return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE))
|
return bool(re.search(r'[\u3040-\u309F\u30A0-\u30FF\uFF66-\uFF9F]', s, re.UNICODE))
|
||||||
|
|
||||||
|
|
||||||
|
# Usage: python ./ADC_function.py https://cn.bing.com/
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys, timeit
|
import sys, timeit
|
||||||
from http.client import HTTPConnection
|
from http.client import HTTPConnection
|
||||||
s = get_html_session()
|
|
||||||
def benchmark(t, url):
|
def benchmark(t, url):
|
||||||
print(f"HTTP GET Benchmark times:{t} url:{url}")
|
print(f"HTTP GET Benchmark times:{t} url:{url}")
|
||||||
tm = timeit.timeit(f"_ = session1.get('{url}')",
|
tm = timeit.timeit(f"_ = session1.get('{url}')",
|
||||||
"from __main__ import get_html_session;session1=get_html_session()",
|
"from __main__ import get_html_session;session1=get_html_session()",
|
||||||
number=t)
|
number=t)
|
||||||
print(f'===={tm:2.5f}s get_html_session() Keep-Alive enable====')
|
print(f' *{tm:>10.5f}s get_html_session() Keep-Alive enable')
|
||||||
|
tm = timeit.timeit(f"_ = scraper1.get('{url}')",
|
||||||
|
"from __main__ import get_html_by_scraper;scraper1=get_html_by_scraper()",
|
||||||
|
number=t)
|
||||||
|
print(f' *{tm:>10.5f}s get_html_by_scraper() Keep-Alive enable')
|
||||||
tm = timeit.timeit(f"_ = browser1.open('{url}')",
|
tm = timeit.timeit(f"_ = browser1.open('{url}')",
|
||||||
"from __main__ import get_html_by_browser;browser1=get_html_by_browser()",
|
"from __main__ import get_html_by_browser;browser1=get_html_by_browser()",
|
||||||
number=t)
|
number=t)
|
||||||
print(f'===={tm:2.5f}s get_html_by_browser() Keep-Alive enable====')
|
print(f' *{tm:>10.5f}s get_html_by_browser() Keep-Alive enable')
|
||||||
tm = timeit.timeit(f"_ = get_html('{url}')",
|
tm = timeit.timeit(f"_ = get_html('{url}')",
|
||||||
"from __main__ import get_html",
|
"from __main__ import get_html",
|
||||||
number=t)
|
number=t)
|
||||||
print(f'===={tm:2.5f}s get_html() ====')
|
print(f' *{tm:>10.5f}s get_html()')
|
||||||
t = 100
|
t = 100
|
||||||
#url = "https://www.189.cn/"
|
#url = "https://www.189.cn/"
|
||||||
url = "http://www.chinaunicom.com"
|
url = "http://www.chinaunicom.com"
|
||||||
HTTPConnection.debuglevel = 1
|
HTTPConnection.debuglevel = 1
|
||||||
|
s = get_html_session()
|
||||||
_ = s.get(url)
|
_ = s.get(url)
|
||||||
HTTPConnection.debuglevel = 0
|
HTTPConnection.debuglevel = 0
|
||||||
# Usage: python ./ADC_function.py https://cn.bing.com/
|
|
||||||
if len(sys.argv)>1:
|
if len(sys.argv)>1:
|
||||||
url = sys.argv[1]
|
url = sys.argv[1]
|
||||||
benchmark(t, url)
|
benchmark(t, url)
|
||||||
|
|||||||
1
Makefile
1
Makefile
@@ -17,6 +17,7 @@ make:
|
|||||||
|
|
||||||
@echo "[+]Pyinstaller make"
|
@echo "[+]Pyinstaller make"
|
||||||
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
pyinstaller --onefile AV_Data_Capture.py --hidden-import ADC_function.py --hidden-import core.py \
|
||||||
|
--add-data "`python3 -c 'import cloudscraper as _; print(_.__path__[0])' | tail -n 1`:cloudscraper" \
|
||||||
--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
|
--add-data "`python3 -c 'import opencc as _; print(_.__path__[0])' | tail -n 1`:opencc" \
|
||||||
--add-data "Img:Img" \
|
--add-data "Img:Img" \
|
||||||
--add-data "config.ini:." \
|
--add-data "config.ini:." \
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ def getActor(html):
|
|||||||
return r
|
return r
|
||||||
|
|
||||||
def getaphoto(url, session):
|
def getaphoto(url, session):
|
||||||
html_page = session.get(url).text if isinstance(session, requests.Session) else get_html(url)
|
html_page = session.get(url).text if session is not None else get_html(url)
|
||||||
img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)')
|
img_prether = re.compile(r'<span class\=\"avatar\" style\=\"background\-image\: url\((.*?)\)')
|
||||||
img_url = img_prether.findall(html_page)
|
img_url = img_prether.findall(html_page)
|
||||||
if img_url:
|
if img_url:
|
||||||
@@ -215,14 +215,21 @@ def main(number):
|
|||||||
if debug:
|
if debug:
|
||||||
print(f'[!]javdb:select site {javdb_site}')
|
print(f'[!]javdb:select site {javdb_site}')
|
||||||
session = None
|
session = None
|
||||||
|
javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
|
||||||
try:
|
try:
|
||||||
javdb_url = 'https://' + javdb_site + '.com/search?q=' + number + '&f=all'
|
if debug:
|
||||||
|
raise # try get_html_by_scraper() branch
|
||||||
res, session = get_html_session(javdb_url, cookies=javdb_cookies, return_type='session')
|
res, session = get_html_session(javdb_url, cookies=javdb_cookies, return_type='session')
|
||||||
if not res:
|
if not res:
|
||||||
raise
|
raise
|
||||||
query_result = res.text
|
query_result = res.text
|
||||||
except:
|
except:
|
||||||
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all', cookies=javdb_cookies)
|
res, session = get_html_by_scraper(javdb_url, cookies=javdb_cookies, return_type='scraper')
|
||||||
|
if not res:
|
||||||
|
raise ValueError('page not found')
|
||||||
|
query_result = res.text
|
||||||
|
if session is None:
|
||||||
|
raise ValueError('page not found')
|
||||||
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
||||||
# javdb sometime returns multiple results,
|
# javdb sometime returns multiple results,
|
||||||
# and the first elememt maybe not the one we are looking for
|
# and the first elememt maybe not the one we are looking for
|
||||||
@@ -241,14 +248,12 @@ def main(number):
|
|||||||
raise ValueError("number not found")
|
raise ValueError("number not found")
|
||||||
correct_url = urls[0]
|
correct_url = urls[0]
|
||||||
try:
|
try:
|
||||||
if isinstance(session, requests.Session): # get faster benefit from http keep-alive
|
# get faster benefit from http keep-alive
|
||||||
javdb_detail_url = urljoin(res.url, correct_url)
|
javdb_detail_url = urljoin(res.url, correct_url)
|
||||||
detail_page = session.get(javdb_detail_url).text
|
detail_page = session.get(javdb_detail_url).text
|
||||||
else:
|
|
||||||
javdb_detail_url = 'https://' + javdb_site + '.com' + correct_url
|
|
||||||
detail_page = get_html(javdb_detail_url, cookies=javdb_cookies)
|
|
||||||
except:
|
except:
|
||||||
detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies)
|
detail_page = get_html('https://javdb.com' + correct_url, cookies=javdb_cookies)
|
||||||
|
session = None
|
||||||
|
|
||||||
# etree.fromstring开销很大,最好只用一次,而它的xpath很快,比bs4 find/select快,可以多用
|
# etree.fromstring开销很大,最好只用一次,而它的xpath很快,比bs4 find/select快,可以多用
|
||||||
lx = etree.fromstring(detail_page, etree.HTMLParser())
|
lx = etree.fromstring(detail_page, etree.HTMLParser())
|
||||||
@@ -309,7 +314,7 @@ def main(number):
|
|||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if config.getInstance().debug():
|
if debug:
|
||||||
print(e)
|
print(e)
|
||||||
dic = {"title": ""}
|
dic = {"title": ""}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
@@ -324,12 +329,12 @@ if __name__ == "__main__":
|
|||||||
# print(main('BANK-022'))
|
# print(main('BANK-022'))
|
||||||
# print(main('070116-197'))
|
# print(main('070116-197'))
|
||||||
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
# print(main('093021_539')) # 没有剧照 片商pacopacomama
|
||||||
# print(main('FC2-2278260'))
|
print(main('FC2-2278260'))
|
||||||
# print(main('FC2-735670'))
|
# print(main('FC2-735670'))
|
||||||
# print(main('FC2-1174949')) # not found
|
# print(main('FC2-1174949')) # not found
|
||||||
print(main('MVSD-439'))
|
print(main('MVSD-439'))
|
||||||
# print(main('EHM0001')) # not found
|
# print(main('EHM0001')) # not found
|
||||||
# print(main('FC2-2314275'))
|
print(main('FC2-2314275'))
|
||||||
# print(main('EBOD-646'))
|
# print(main('EBOD-646'))
|
||||||
# print(main('LOVE-262'))
|
# print(main('LOVE-262'))
|
||||||
print(main('ABP-890'))
|
print(main('ABP-890'))
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ def getStoryline(number, title, sites: list=None):
|
|||||||
return value
|
return value
|
||||||
return ''
|
return ''
|
||||||
# 以下debug结果输出会写入日志,进程池中的则不会,只在标准输出中显示
|
# 以下debug结果输出会写入日志,进程池中的则不会,只在标准输出中显示
|
||||||
s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个进程总用时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}'
|
s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}'
|
||||||
first = True
|
first = True
|
||||||
sel = ''
|
sel = ''
|
||||||
for i, site in enumerate(apply_sites):
|
for i, site in enumerate(apply_sites):
|
||||||
@@ -175,7 +175,7 @@ def getStoryline_airavwiki(number, debug):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
print(f"[-]MP def getStoryline_airavwiki Error: {e}, number [{number}].")
|
print(f"[-]MP getStoryline_airavwiki Error: {e}, number [{number}].")
|
||||||
pass
|
pass
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -190,7 +190,7 @@ def getStoryline_58avgo(number, debug):
|
|||||||
result, browser = get_html_by_form(url,
|
result, browser = get_html_by_form(url,
|
||||||
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
|
fields = {'ctl00$TextBox_SearchKeyWord' : kwd},
|
||||||
return_type = 'browser')
|
return_type = 'browser')
|
||||||
if not result.ok:
|
if not result:
|
||||||
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
raise ValueError(f"get_html_by_form('{url}','{number}') failed")
|
||||||
if f'searchresults.aspx?Search={kwd}' not in browser.url:
|
if f'searchresults.aspx?Search={kwd}' not in browser.url:
|
||||||
raise ValueError("number not found")
|
raise ValueError("number not found")
|
||||||
@@ -219,6 +219,29 @@ def getStoryline_58avgo(number, debug):
|
|||||||
|
|
||||||
|
|
||||||
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
def getStoryline_avno1(number, debug): #获取剧情介绍 从avno1.cc取得
|
||||||
|
try:
|
||||||
|
site = secrets.choice(['1768av.club','2nine.net','av999.tv','avno1.cc',
|
||||||
|
'hotav.biz','iqq2.xyz','javhq.tv',
|
||||||
|
'www.hdsex.cc','www.porn18.cc','www.xxx18.cc',])
|
||||||
|
url = f'http://{site}/cn/search.php?kw_type=key&kw={number}'
|
||||||
|
lx = fromstring(get_html_by_scraper(url))
|
||||||
|
descs = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/@data-description')
|
||||||
|
titles = lx.xpath('//div[@class="type_movie"]/div/ul/li/div/a/h3/text()')
|
||||||
|
if not descs or not len(descs):
|
||||||
|
raise ValueError(f"number not found")
|
||||||
|
for i, title in enumerate(titles):
|
||||||
|
page_number = title[title.rfind(' '):].strip()
|
||||||
|
if re.search(number, page_number, re.I):
|
||||||
|
return descs[i].strip()
|
||||||
|
raise ValueError(f"page number ->[{page_number}] not match")
|
||||||
|
except Exception as e:
|
||||||
|
if debug:
|
||||||
|
print(f"[-]MP getOutline_avno1 Error: {e}, number [{number}].")
|
||||||
|
pass
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def getStoryline_avno1OLD(number, debug): #获取剧情介绍 从avno1.cc取得
|
||||||
try:
|
try:
|
||||||
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
url = 'http://www.avno1.cc/cn/' + secrets.choice(['usercenter.php?item=' +
|
||||||
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
secrets.choice(['pay_support', 'qa', 'contact', 'guide-vpn']),
|
||||||
@@ -343,6 +366,8 @@ def amazon_select_one(a_titles, q_title, number, debug):
|
|||||||
if pos < 0:
|
if pos < 0:
|
||||||
if category(char) == 'Nd':
|
if category(char) == 'Nd':
|
||||||
return -1
|
return -1
|
||||||
|
if re.match(r'[\u4e00|\u4e8c|\u4e09|\u56db|\u4e94|\u516d|\u4e03|\u516b|\u4e5d|\u5341]', char, re.U):
|
||||||
|
return -1
|
||||||
ama_t = ama_t[:cloc]
|
ama_t = ama_t[:cloc]
|
||||||
findlen = 0
|
findlen = 0
|
||||||
lastpos = -1
|
lastpos = -1
|
||||||
|
|||||||
Reference in New Issue
Block a user