Merge pull request #1047 from popjdh/master

将GETCHU设置指定编码(euc-jp)
This commit is contained in:
Yoshiko2
2023-07-09 23:28:15 +08:00
committed by GitHub

View File

@@ -3,6 +3,8 @@
import re
import json
from urllib.parse import quote
from scrapinglib import httprequest
from .parser import Parser
@@ -64,6 +66,18 @@ class wwwGetchu(Parser):
return None
return detailurl.replace('../', 'http://www.getchu.com/')
def getHtml(self, url, type = None):
""" 访问网页(指定EUC-JP)
"""
resp = httprequest.get(url, cookies=self.cookies, proxies=self.proxies, extra_headers=self.extraheader, encoding='euc-jp', verify=self.verify, return_type=type)
if '<title>404 Page Not Found' in resp \
or '<title>未找到页面' in resp \
or '404 Not Found' in resp \
or '<title>404' in resp \
or '<title>お探しの商品が見つかりません' in resp:
return 404
return resp
def getNum(self, htmltree):
return 'GETCHU-' + re.findall('\d+', self.detailurl.replace("http://www.getchu.com/soft.phtml?id=", ""))[0]