Merge pull request #478 from lededev/carib-enable-proxy

carib.py: use proxy config settings
2021-05-08 00:42:42 +08:00
parent f02fe16254 1460e2962d
commit 3e6763f7af
1 changed files with 5 additions and 11 deletions
--- a/WebCrawler/carib.py
+++ b/WebCrawler/carib.py
@@ -4,20 +4,14 @@ import json
 from bs4 import BeautifulSoup
 from lxml import html
 import re
 import urllib.request
 import socket
 from ADC_function import *
 def get_html(url):
    socket.setdefaulttimeout(10)
    papg = urllib.request.urlopen(url)
    htm = papg.read()
    htm = htm.decode("euc_jp")
    return htm
 def main(number: str) -> json:
    try:
-        caribhtml = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html')
+        caribbytes = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html',
                             return_type="content")
        caribhtml = caribbytes.decode("euc_jp")
        soup = BeautifulSoup(caribhtml, "html.parser")
        lx = html.fromstring(str(soup))
@@ -47,7 +41,7 @@ def main(number: str) -> json:
        'source': 'carib.py',
        'series': '',
    }
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
    return js
 def get_title(lx: html.HtmlElement) -> str: