Merge pull request #478 from lededev/carib-enable-proxy
carib.py: use proxy config settings
This commit is contained in:
16
WebCrawler/carib.py
Normal file → Executable file
16
WebCrawler/carib.py
Normal file → Executable file
@@ -4,20 +4,14 @@ import json
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import re
|
import re
|
||||||
import urllib.request
|
|
||||||
import socket
|
|
||||||
from ADC_function import *
|
from ADC_function import *
|
||||||
|
|
||||||
def get_html(url):
|
|
||||||
socket.setdefaulttimeout(10)
|
|
||||||
papg = urllib.request.urlopen(url)
|
|
||||||
htm = papg.read()
|
|
||||||
htm = htm.decode("euc_jp")
|
|
||||||
return htm
|
|
||||||
|
|
||||||
def main(number: str) -> json:
|
def main(number: str) -> json:
|
||||||
try:
|
try:
|
||||||
caribhtml = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html')
|
caribbytes = get_html('https://www.caribbeancom.com/moviepages/'+number+'/index.html',
|
||||||
|
return_type="content")
|
||||||
|
|
||||||
|
caribhtml = caribbytes.decode("euc_jp")
|
||||||
|
|
||||||
soup = BeautifulSoup(caribhtml, "html.parser")
|
soup = BeautifulSoup(caribhtml, "html.parser")
|
||||||
lx = html.fromstring(str(soup))
|
lx = html.fromstring(str(soup))
|
||||||
@@ -47,7 +41,7 @@ def main(number: str) -> json:
|
|||||||
'source': 'carib.py',
|
'source': 'carib.py',
|
||||||
'series': '',
|
'series': '',
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )
|
||||||
return js
|
return js
|
||||||
|
|
||||||
def get_title(lx: html.HtmlElement) -> str:
|
def get_title(lx: html.HtmlElement) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user