diff --git a/ADC_function.py b/ADC_function.py index e65ae32..aa0acc6 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -40,6 +40,7 @@ def get_proxy(proxy: str, proxytype: str = None) -> dict: # 网页请求核心 def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None): + verify=config.Config().cacert_file() switch, proxy, timeout, retry_count, proxytype = config.Config().proxy() proxies = get_proxy(proxy, proxytype) @@ -51,7 +52,7 @@ def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None) for i in range(retry_count): try: if switch == '1' or switch == 1: - result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies) + result = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, verify=verify, cookies=cookies) else: result = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) diff --git a/WebCrawler/airav.py b/WebCrawler/airav.py index 58263a2..8f978f8 100644 --- a/WebCrawler/airav.py +++ b/WebCrawler/airav.py @@ -7,7 +7,14 @@ from bs4 import BeautifulSoup#need install import json from ADC_function import * - +''' +API +注册:https://www.airav.wiki/api/auth/signup +设置:https://www.airav.wiki/api/get_web_settings +搜索:https://www.airav.wiki/api/video/list?lng=zh-CN&search= +搜索:https://www.airav.wiki/api/video/list?lang=zh-TW&lng=zh-TW&search= +''' +host = 'https://www.airav.wiki' # airav这个网站没有演员图片,所以直接使用javbus的图 def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img @@ -121,6 +128,44 @@ def getExtrafanart(htmlcode): # 获取剧照 return extrafanart_imgs return '' +def search(keyword): #搜索,返回结果 + result = [] + page = 1 + while page > 0: + # search_result = {"offset": 0,"count": 4,"result": [ + # {"vid": "99-07-15076","slug": "Wrop6o","name": "朝ゴミ出しする近所の遊び好きノーブラ奥さん 江波りゅう", + # "url": "","view": 98,"img_url": "https://wiki-img.airav.wiki/storage/big_pic/99-07-15076.jpg","barcode": "_1pondo_012717_472"}, + # {"vid": "99-27-00286","slug": "DlPEua","name": "放課後に、仕込んでください 〜優等生は無言でスカートを捲り上げる〜", + # "url": "","view": 69,"img_url": "https://wiki-img.airav.wiki/storage/big_pic/99-27-00286.jpg","barcode": "caribbeancom012717-360"}, + # {"vid": "99-07-15070","slug": "VLS3WY","name": "放課後に、仕込んでください ~優等生は無言でスカートを捲り上げる~ ももき希", + # "url": "","view": 58,"img_url": "https://wiki-img.airav.wiki/storage/big_pic/99-07-15070.jpg","barcode": "caribbeancom_012717-360"}, + # {"vid": "99-27-00287","slug": "YdMVb3","name": "朝ゴミ出しする近所の遊び好きノーブラ奥さん 江波りゅう", + # "url": "","view": 56,"img_url": "https://wiki-img.airav.wiki/storage/big_pic/99-27-00287.jpg","barcode": "1pondo_012717_472"} + # ],"status": "ok"} + search_result = get_html(host + '/api/video/list?lang=zh-TW&lng=jp&search=' + keyword + '&page=' + str(page)) + + try: + json_data = json.loads(search_result) + except json.decoder.JSONDecodeError: + print("[-]Json decoder error!") + return [] + + result_offset = int(json_data["offset"]) + result_count = int(json_data["count"]) + result_size = len(json_data["result"]) + if result_count <= 0 or result_size <= 0: + return result + elif result_count > result_offset + result_size: #请求下一页内容 + result.extend(json_data["result"]) + page += 1 + elif result_count == result_offset + result_size: #请求最后一页内容 + result.extend(json_data["result"]) + page = 0 + else: + page = 0 + + return result + def main(number): try: try: @@ -180,5 +225,10 @@ def main(number): if __name__ == '__main__': - print(main('ADN-188')) + #print(main('ADN-188')) + + print(search('ADN-188')) + print(search('012717_472')) + print(search('080719-976')) + print(search('姫川ゆうな')) diff --git a/config.ini b/config.ini index ceb30a9..dbcc070 100644 --- a/config.ini +++ b/config.ini @@ -14,6 +14,7 @@ type=http proxy=127.0.0.1:1080 timeout=5 retry=3 +cacert_file= [Name_Rule] location_rule=actor+'/'+number diff --git a/config.py b/config.py index b798ab6..8b54947 100644 --- a/config.py +++ b/config.py @@ -87,6 +87,9 @@ class Config: return switch, proxy, timeout, retry, proxytype except ValueError: self._exit("common") + + def cacert_file(self) -> str: + return self.conf.get('proxy', 'cacert_file') def media_type(self) -> str: return self.conf.get('media', 'media_type') @@ -153,6 +156,8 @@ class Config: conf.set(sec2, "timeout", "5") conf.set(sec2, "retry", "3") conf.set(sec2, "type", "socks5") + conf.set(sec2, "cacert_file", "") + sec3 = "Name_Rule" conf.add_section(sec3) diff --git a/xlog.py b/xlog.py new file mode 100755 index 0000000..956a77b --- /dev/null +++ b/xlog.py @@ -0,0 +1,319 @@ + +import os +import sys +import time +from datetime import datetime +import traceback +import threading +import json +import shutil +import types + +CRITICAL = 50 +FATAL = CRITICAL +ERROR = 40 +WARNING = 30 +WARN = WARNING +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +class Logger(): + def __init__(self, name, buffer_size=0, file_name=None, roll_num=1): + self.err_color = '\033[0m' + self.warn_color = '\033[0m' + self.debug_color = '\033[0m' + self.reset_color = '\033[0m' + self.set_console_color = lambda color: sys.stderr.write(color) + self.name = str(name) + self.file_max_size = 1024 * 1024 + self.buffer_lock = threading.Lock() + self.buffer = {} # id => line + self.buffer_size = buffer_size + self.last_no = 0 + self.min_level = NOTSET + self.log_fd = None + self.roll_num = roll_num + if file_name: + self.set_file(file_name) + + def set_buffer(self, buffer_size): + with self.buffer_lock: + self.buffer_size = buffer_size + buffer_len = len(self.buffer) + if buffer_len > self.buffer_size: + for i in range(self.last_no - buffer_len, self.last_no - self.buffer_size): + try: + del self.buffer[i] + except: + pass + + def setLevel(self, level): + if level == "DEBUG": + self.min_level = DEBUG + elif level == "INFO": + self.min_level = INFO + elif level == "WARN": + self.min_level = WARN + elif level == "ERROR": + self.min_level = ERROR + elif level == "FATAL": + self.min_level = FATAL + else: + print(("log level not support:%s", level)) + + def set_color(self): + self.err_color = None + self.warn_color = None + self.debug_color = None + self.reset_color = None + self.set_console_color = lambda x: None + if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty(): + if os.name == 'nt': + self.err_color = 0x04 + self.warn_color = 0x06 + self.debug_color = 0x002 + self.reset_color = 0x07 + + import ctypes + SetConsoleTextAttribute = ctypes.windll.kernel32.SetConsoleTextAttribute + GetStdHandle = ctypes.windll.kernel32.GetStdHandle + self.set_console_color = lambda color: SetConsoleTextAttribute(GetStdHandle(-11), color) + + elif os.name == 'posix': + self.err_color = '\033[31m' + self.warn_color = '\033[33m' + self.debug_color = '\033[32m' + self.reset_color = '\033[0m' + + self.set_console_color = lambda color: sys.stderr.write(color) + + def set_file(self, file_name): + self.log_filename = file_name + if os.path.isfile(file_name): + self.file_size = os.path.getsize(file_name) + if self.file_size > self.file_max_size: + self.roll_log() + self.file_size = 0 + else: + self.file_size = 0 + + self.log_fd = open(file_name, "a+") + + def roll_log(self): + for i in range(self.roll_num, 1, -1): + new_name = "%s.%d" % (self.log_filename, i) + old_name = "%s.%d" % (self.log_filename, i - 1) + if not os.path.isfile(old_name): + continue + + #self.info("roll_log %s -> %s", old_name, new_name) + shutil.move(old_name, new_name) + + shutil.move(self.log_filename, self.log_filename + ".1") + + def log_console(self, level, console_color, fmt, *args, **kwargs): + try: + console_string = '[%s] %s\n' % (level, fmt % args) + self.set_console_color(console_color) + sys.stderr.write(console_string) + self.set_console_color(self.reset_color) + except: + pass + + def log_to_file(self, level, console_color, fmt, *args, **kwargs): + if self.log_fd: + if level == 'e': + string = '%s' % (fmt % args) + else: + time_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:23] + string = '%s [%s] [%s] %s\n' % (time_str, self.name, level, fmt % args) + + self.log_fd.write(string) + try: + self.log_fd.flush() + except: + pass + + self.file_size += len(string) + if self.file_size > self.file_max_size: + self.log_fd.close() + self.log_fd = None + self.roll_log() + self.log_fd = open(self.log_filename, "w") + self.file_size = 0 + + def log(self, level, console_color, html_color, fmt, *args, **kwargs): + self.buffer_lock.acquire() + try: + self.log_console(level, console_color, fmt, *args, **kwargs) + + self.log_to_file(level, console_color, fmt, *args, **kwargs) + + if self.buffer_size: + self.last_no += 1 + self.buffer[self.last_no] = string + buffer_len = len(self.buffer) + if buffer_len > self.buffer_size: + del self.buffer[self.last_no - self.buffer_size] + except Exception as e: + string = '%s - [%s]LOG_EXCEPT: %s, Except:%s
%s' % (time.ctime()[4:-5], level, fmt % args, e, traceback.format_exc()) + self.last_no += 1 + self.buffer[self.last_no] = string + buffer_len = len(self.buffer) + if buffer_len > self.buffer_size: + del self.buffer[self.last_no - self.buffer_size] + finally: + self.buffer_lock.release() + + def debug(self, fmt, *args, **kwargs): + if self.min_level > DEBUG: + return + self.log('-', self.debug_color, '21610b', fmt, *args, **kwargs) + + def info(self, fmt, *args, **kwargs): + if self.min_level > INFO: + return + self.log('+', self.reset_color, '000000', fmt, *args) + + def warning(self, fmt, *args, **kwargs): + if self.min_level > WARN: + return + self.log('#', self.warn_color, 'FF8000', fmt, *args, **kwargs) + + def warn(self, fmt, *args, **kwargs): + self.warning(fmt, *args, **kwargs) + + def error(self, fmt, *args, **kwargs): + if self.min_level > ERROR: + return + self.log('!', self.err_color, 'FE2E2E', fmt, *args, **kwargs) + + def exception(self, fmt, *args, **kwargs): + self.error(fmt, *args, **kwargs) + string = '%s' % (traceback.format_exc()) + self.log_to_file('e', self.err_color, string) + + def critical(self, fmt, *args, **kwargs): + if self.min_level > CRITICAL: + return + self.log('!', self.err_color, 'D7DF01', fmt, *args, **kwargs) + + def tofile(self, fmt, *args, **kwargs): + self.log_to_file('@', self.warn_color, fmt, *args, **kwargs) + + #================================================================= + def set_buffer_size(self, set_size): + self.buffer_lock.acquire() + self.buffer_size = set_size + buffer_len = len(self.buffer) + if buffer_len > self.buffer_size: + for i in range(self.last_no - buffer_len, self.last_no - self.buffer_size): + try: + del self.buffer[i] + except: + pass + self.buffer_lock.release() + + def get_last_lines(self, max_lines): + self.buffer_lock.acquire() + buffer_len = len(self.buffer) + if buffer_len > max_lines: + first_no = self.last_no - max_lines + else: + first_no = self.last_no - buffer_len + 1 + + jd = {} + if buffer_len > 0: + for i in range(first_no, self.last_no + 1): + jd[i] = self.unicode_line(self.buffer[i]) + self.buffer_lock.release() + return json.dumps(jd) + + def get_new_lines(self, from_no): + self.buffer_lock.acquire() + jd = {} + first_no = self.last_no - len(self.buffer) + 1 + if from_no < first_no: + from_no = first_no + + if self.last_no >= from_no: + for i in range(from_no, self.last_no + 1): + jd[i] = self.unicode_line(self.buffer[i]) + self.buffer_lock.release() + return json.dumps(jd) + + def unicode_line(self, line): + try: + if type(line) is str: + return line + else: + return str(line, errors='ignore') + except Exception as e: + print(("unicode err:%r" % e)) + print(("line can't decode:%s" % line)) + print(("Except stack:%s" % traceback.format_exc())) + return "" + +loggerDict = {} + +def getLogger(name=None, buffer_size=0, file_name=None, roll_num=1): + global loggerDict, default_log + if name is None: + for n in loggerDict: + name = n + break + if name is None: + name = u"default" + + if not isinstance(name, str): + raise TypeError('A logger name must be string or Unicode') + if isinstance(name, bytes): + name = name.encode('utf-8') + + if name in loggerDict: + return loggerDict[name] + else: + logger_instance = Logger(name, buffer_size, file_name, roll_num) + loggerDict[name] = logger_instance + default_log = logger_instance + return logger_instance + +default_log = getLogger() + +def debg(fmt, *args, **kwargs): + default_log.debug(fmt, *args, **kwargs) + +def info(fmt, *args, **kwargs): + default_log.info(fmt, *args, **kwargs) + +def warn(fmt, *args, **kwargs): + default_log.warning(fmt, *args, **kwargs) + +def erro(fmt, *args, **kwargs): + default_log.error(fmt, *args, **kwargs) + +def excp(fmt, *args, **kwargs): + default_log.exception(fmt, *args, **kwargs) + +def crit(fmt, *args, **kwargs): + default_log.critical(fmt, *args, **kwargs) + +def tofile(fmt, *args, **kwargs): + default_log.tofile(fmt, *args, **kwargs) + +if __name__ == '__main__': + log_file = os.path.join(os.path.dirname(sys.argv[0]), "test.log") + getLogger().set_file(log_file) + debg("debug") + info("info") + warn("warning") + erro("error") + crit("critical") + tofile("write to file only") + + try: + 1/0 + except Exception as e: + excp("An error has occurred") +