Files
AV_Data_Capture/ADC_function.py
2020-08-03 22:57:24 +08:00

110 lines
3.5 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import requests
from lxml import etree
import config
SUPPORT_PROXY_TYPE = ("http", "socks5", "socks5h")
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
if "title" not in data or "number" not in data:
return False
if data["title"] is None or data["title"] == "" or data["title"] == "null":
return False
if data["number"] is None or data["number"] == "" or data["number"] == "null":
return False
return True
def getXpathSingle(htmlcode,xpath):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result1 = str(html.xpath(xpath)).strip(" ['']")
return result1
def get_proxy(proxy: str, proxytype: str = None) -> dict:
''' 获得代理参数默认http代理
'''
if proxy:
if proxytype in SUPPORT_PROXY_TYPE:
proxies = {"http": proxytype + "://" + proxy, "https": proxytype + "://" + proxy}
else:
proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
else:
proxies = {}
return proxies
# 网页请求核心
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
if ua is None:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"} # noqa
else:
headers = {"User-Agent": ua}
for i in range(retry_count):
try:
if not proxy == '':
result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
else:
result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
result.encoding = "utf-8"
if return_type == "object":
return result
else:
return result.text
except requests.exceptions.ProxyError:
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
except requests.exceptions.ConnectionError:
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
input("Press ENTER to exit!")
exit()
def post_html(url: str, query: dict) -> requests.Response:
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
for i in range(retry_count):
try:
result = requests.post(url, data=query, proxies=proxies)
return result
except requests.exceptions.ProxyError:
print("[-]Connect retry {}/{}".format(i+1, retry_count))
print("[-]Connect Failed! Please check your Proxy or Network!")
input("Press ENTER to exit!")
exit()
def get_javlib_cookie() -> [dict, str]:
import cloudscraper
proxy, timeout, retry_count, proxytype = config.Config().proxy()
proxies = get_proxy(proxy, proxytype)
raw_cookie = {}
user_agent = ""
# Get __cfduid/cf_clearance and user-agent
for i in range(retry_count):
try:
raw_cookie, user_agent = cloudscraper.get_cookie_string(
"http://www.m45e.com/",
proxies=proxies
)
except requests.exceptions.ProxyError:
print("[-] ProxyError, retry {}/{}".format(i+1, retry_count))
except cloudscraper.exceptions.CloudflareIUAMError:
print("[-] IUAMError, retry {}/{}".format(i+1, retry_count))
return raw_cookie, user_agent