Fix get_html() & post_html()
This commit is contained in:
@@ -10,6 +10,7 @@ import config
|
|||||||
|
|
||||||
SUPPORT_PROXY_TYPE = ("http", "socks5", "socks5h")
|
SUPPORT_PROXY_TYPE = ("http", "socks5", "socks5h")
|
||||||
|
|
||||||
|
|
||||||
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
||||||
if "title" not in data or "number" not in data:
|
if "title" not in data or "number" not in data:
|
||||||
return False
|
return False
|
||||||
@@ -23,7 +24,7 @@ def get_data_state(data: dict) -> bool: # 元数据获取失败检测
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def getXpathSingle(htmlcode,xpath):
|
def getXpathSingle(htmlcode, xpath):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result1 = str(html.xpath(xpath)).strip(" ['']")
|
result1 = str(html.xpath(xpath)).strip(" ['']")
|
||||||
return result1
|
return result1
|
||||||
@@ -45,19 +46,22 @@ def get_proxy(proxy: str, proxytype: str = None) -> dict:
|
|||||||
|
|
||||||
# 网页请求核心
|
# 网页请求核心
|
||||||
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
|
def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None):
|
||||||
verify=config.Config().cacert_file()
|
verify = config.Config().cacert_file()
|
||||||
switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
||||||
proxies = get_proxy(proxy, proxytype)
|
proxies = get_proxy(proxy, proxytype)
|
||||||
|
errors = ""
|
||||||
|
|
||||||
if ua is None:
|
if ua is None:
|
||||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"} # noqa
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"} # noqa
|
||||||
else:
|
else:
|
||||||
headers = {"User-Agent": ua}
|
headers = {"User-Agent": ua}
|
||||||
|
|
||||||
for i in range(retry_count):
|
for i in range(retry_count):
|
||||||
try:
|
try:
|
||||||
if switch == '1' or switch == 1:
|
if switch == '1' or switch == 1:
|
||||||
result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, verify=verify, cookies=cookies)
|
result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, verify=verify,
|
||||||
|
cookies=cookies)
|
||||||
else:
|
else:
|
||||||
result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
|
result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
|
||||||
|
|
||||||
@@ -74,13 +78,15 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None)
|
|||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
|
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
|
||||||
print("[-]" + str(e))
|
errors = str(e)
|
||||||
print('[-]Connect Failed! Please check your Proxy or Network!')
|
print('[-]Connect Failed! Please check your Proxy or Network!')
|
||||||
|
print("[-]" + errors)
|
||||||
|
|
||||||
|
|
||||||
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
||||||
switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
switch, proxy, timeout, retry_count, proxytype = config.Config().proxy()
|
||||||
proxies = get_proxy(proxy, proxytype)
|
proxies = get_proxy(proxy, proxytype)
|
||||||
|
errors = ""
|
||||||
headers_ua = {
|
headers_ua = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"}
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36"}
|
||||||
if headers is None:
|
if headers is None:
|
||||||
@@ -95,9 +101,11 @@ def post_html(url: str, query: dict, headers: dict = None) -> requests.Response:
|
|||||||
else:
|
else:
|
||||||
result = requests.post(url, data=query, headers=headers, timeout=timeout)
|
result = requests.post(url, data=query, headers=headers, timeout=timeout)
|
||||||
return result
|
return result
|
||||||
except requests.exceptions.ProxyError:
|
except Exception as e:
|
||||||
print("[-]Connect retry {}/{}".format(i+1, retry_count))
|
print("[-]Connect retry {}/{}".format(i + 1, retry_count))
|
||||||
|
errors = str(e)
|
||||||
print("[-]Connect Failed! Please check your Proxy or Network!")
|
print("[-]Connect Failed! Please check your Proxy or Network!")
|
||||||
|
print("[-]" + errors)
|
||||||
|
|
||||||
|
|
||||||
def get_javlib_cookie() -> [dict, str]:
|
def get_javlib_cookie() -> [dict, str]:
|
||||||
@@ -121,12 +129,13 @@ def get_javlib_cookie() -> [dict, str]:
|
|||||||
"http://www.javlibrary.com/"
|
"http://www.javlibrary.com/"
|
||||||
)
|
)
|
||||||
except requests.exceptions.ProxyError:
|
except requests.exceptions.ProxyError:
|
||||||
print("[-] ProxyError, retry {}/{}".format(i+1, retry_count))
|
print("[-] ProxyError, retry {}/{}".format(i + 1, retry_count))
|
||||||
except cloudscraper.exceptions.CloudflareIUAMError:
|
except cloudscraper.exceptions.CloudflareIUAMError:
|
||||||
print("[-] IUAMError, retry {}/{}".format(i+1, retry_count))
|
print("[-] IUAMError, retry {}/{}".format(i + 1, retry_count))
|
||||||
|
|
||||||
return raw_cookie, user_agent
|
return raw_cookie, user_agent
|
||||||
|
|
||||||
|
|
||||||
def translateTag_to_sc(tag):
|
def translateTag_to_sc(tag):
|
||||||
tranlate_to_sc = config.Config().transalte_to_sc()
|
tranlate_to_sc = config.Config().transalte_to_sc()
|
||||||
if tranlate_to_sc:
|
if tranlate_to_sc:
|
||||||
@@ -445,13 +454,12 @@ def translateTag_to_sc(tag):
|
|||||||
'处男', 'ドキュメンタリー': '记录片', 'ドラッグ・媚薬': '药局', 'ドラマ': '电视剧', 'ニューハーフ': '变性人',
|
'处男', 'ドキュメンタリー': '记录片', 'ドラッグ・媚薬': '药局', 'ドラマ': '电视剧', 'ニューハーフ': '变性人',
|
||||||
'ニーソックス': '过膝袜', '妊婦': '孕妇', '寝取り・寝取られ': '睡下', 'HowTo': 'HowTo',
|
'ニーソックス': '过膝袜', '妊婦': '孕妇', '寝取り・寝取られ': '睡下', 'HowTo': 'HowTo',
|
||||||
|
|
||||||
|
# fc2
|
||||||
#fc2
|
|
||||||
'美人': '美女', 'ハメ撮り': '拍鸽子', 'フェチ': '恋物癖',
|
'美人': '美女', 'ハメ撮り': '拍鸽子', 'フェチ': '恋物癖',
|
||||||
'コスプレ・制服': 'COSPLAY制服', '自分撮り':'自拍', 'その他': '其他', 'OL・お姉さん': 'OL姐姐', 'ゲイ': '同性恋',
|
'コスプレ・制服': 'COSPLAY制服', '自分撮り': '自拍', 'その他': '其他', 'OL・お姉さん': 'OL姐姐', 'ゲイ': '同性恋',
|
||||||
'3P・乱交':'3P・乱交', '野外・露出': '野外露出', '海外': '国外', 'レズ': '女士', 'アニメ': '动画',
|
'3P・乱交': '3P・乱交', '野外・露出': '野外露出', '海外': '国外', 'レズ': '女士', 'アニメ': '动画',
|
||||||
'アダルト': '成人', 'アイドル': '空闲', '個人撮影': '个人摄影', '無修正': '无修正', 'コスプレ':'角色扮演',
|
'アダルト': '成人', 'アイドル': '空闲', '個人撮影': '个人摄影', '無修正': '无修正', 'コスプレ': '角色扮演',
|
||||||
'下着': '内衣', '水着': '游泳衣', 'パンチラ': '小册子', 'フェラ': '口交', 'モデル': '模型','中出し': '中出', '可愛い': '可爱',
|
'下着': '内衣', '水着': '游泳衣', 'パンチラ': '小册子', 'フェラ': '口交', 'モデル': '模型', '中出し': '中出', '可愛い': '可爱',
|
||||||
'オリジナル': '原始', '貧乳': '贫乳', 'オナニー': '自慰', 'パイパン': '菠萝', 'ロリ': '萝莉', '生ハメ': '第一人称'
|
'オリジナル': '原始', '貧乳': '贫乳', 'オナニー': '自慰', 'パイパン': '菠萝', 'ロリ': '萝莉', '生ハメ': '第一人称'
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
@@ -461,6 +469,7 @@ def translateTag_to_sc(tag):
|
|||||||
else:
|
else:
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
|
|
||||||
def translate(
|
def translate(
|
||||||
src: str,
|
src: str,
|
||||||
target_language: str = "zh_cn",
|
target_language: str = "zh_cn",
|
||||||
@@ -511,7 +520,7 @@ def translate(
|
|||||||
'X-ClientTraceId': str(uuid.uuid4())
|
'X-ClientTraceId': str(uuid.uuid4())
|
||||||
}
|
}
|
||||||
body = json.dumps([{'text': src}])
|
body = json.dumps([{'text': src}])
|
||||||
result = post_html(url=url,query=body,headers=headers)
|
result = post_html(url=url, query=body, headers=headers)
|
||||||
translate_list = [i["text"] for i in result.json()[0]["translations"]]
|
translate_list = [i["text"] for i in result.json()[0]["translations"]]
|
||||||
trans_result = trans_result.join(translate_list)
|
trans_result = trans_result.join(translate_list)
|
||||||
|
|
||||||
@@ -521,6 +530,7 @@ def translate(
|
|||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
return trans_result
|
return trans_result
|
||||||
|
|
||||||
|
|
||||||
# ========================================================================是否为无码
|
# ========================================================================是否为无码
|
||||||
def is_uncensored(number):
|
def is_uncensored(number):
|
||||||
if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
|
if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper():
|
||||||
|
|||||||
Reference in New Issue
Block a user