From 1460e2962da56f51516cf1c03e5abc7c08ac8026 Mon Sep 17 00:00:00 2001
From: lededev <lededev@noreplay.github.com>
Date: Thu, 6 May 2021 02:07:53 +0800
Subject: [PATCH] carib.py: use proxy config settings

---
 WebCrawler/carib.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)
 mode change 100644 => 100755 WebCrawler/carib.py

diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py
old mode 100644
new mode 100755
index 2e5f9ec..6896683
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -4,20 +4,14 @@ import json
 from bs4 import BeautifulSoup
 from lxml import html
 import re
-import urllib.request
-import socket
 from ADC_function import *
 
-def get_html(url):
-    socket.setdefaulttimeout(10)
-    papg = urllib.request.urlopen(url)
-    htm = papg.read()
-    htm = htm.decode("euc_jp")
-    return htm
-
 def main(number: str) -> json:
     try:
-        caribhtml = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html')
+        caribbytes = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html',
+                             return_type="content")
+
+        caribhtml = caribbytes.decode("euc_jp")
 
         soup = BeautifulSoup(caribhtml, "html.parser")
         lx = html.fromstring(str(soup))
@@ -47,7 +41,7 @@ def main(number: str) -> json:
         'source': 'carib.py',
         'series': '',
     }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
     return js
 
 def get_title(lx: html.HtmlElement) -> str: