diff --git a/ADC_function.py b/ADC_function.py index 5b1d507..12fecce 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -566,17 +566,6 @@ f"https://{gsite}/translate_a/single?client=gtx&dt=t&dj=1&ie=UTF-8&sl=auto&tl={t return trans_result -# ========================================================================是否为无码 -def is_uncensored(number): - if re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper(): - return True - configs = config.getInstance().get_uncensored() - prefix_list = str(configs).split(',') - for pre in prefix_list: - if pre.upper() in number.upper(): - return True - return False - # 从浏览器中导出网站登录验证信息的cookies,能够以会员方式打开游客无法访问到的页面 # 示例: FC2-755670 url https://javdb9.com/v/vO8Mn # json 文件格式 diff --git a/config.ini b/config.ini index 700fa95..b4d9fb4 100755 --- a/config.ini +++ b/config.ini @@ -65,8 +65,7 @@ switch=0 ; 用来确定是否是无码 [uncensored] -uncensored_prefix=S2M,BT,LAF,SMD - +uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD,MKD,MKBD,MXBD,MK3D2DBD,MCB3DBD,MCBD,RHJ,RED [media] ; 影片后缀 diff --git a/core.py b/core.py index ae73af8..d7066f4 100755 --- a/core.py +++ b/core.py @@ -12,7 +12,7 @@ from datetime import datetime from ADC_function import * from WebCrawler import get_data_from_json - +from number_parser import is_uncensored def escape_path(path, escape_literals: str): # Remove escape literals backslash = '\\' diff --git a/number_parser.py b/number_parser.py index 616af85..212c2c0 100755 --- a/number_parser.py +++ b/number_parser.py @@ -1,6 +1,7 @@ import os import re import sys +import config G_spat = re.compile( "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|^hhd800\.com@", @@ -82,6 +83,37 @@ def get_number_by_dict(filename: str) -> str: pass return None +class Cache_uncensored_conf: + prefix = None + def is_empty(self): + return bool(self.prefix is None) + def set(self, v: list): + if not v or not len(v) or not len(v[0]): + raise ValueError('input prefix list empty or None') + s = v[0] + if len(v) > 1: + for i in v[1:]: + s += f"|{i}.+" + self.prefix = re.compile(s, re.I) + def check(self, number): + if self.prefix is None: + raise ValueError('No init re compile') + return self.prefix.match(number) + +G_cache_uncensored_conf = Cache_uncensored_conf() + +# ========================================================================是否为无码 +def is_uncensored(number): + if re.match( +r'[\d-]{4,}|\d{6}_\d{2,3}|(cz|gedo|k|n|red-|se)\d{2,4}|heyzo.+|xxx-av-.+|heydouga-.+|x-art\.\d{2}\.\d{2}\.\d{2}', + number, + re.I + ): + return True + if G_cache_uncensored_conf.is_empty(): + G_cache_uncensored_conf.set(config.getInstance().get_uncensored().split(',')) + return G_cache_uncensored_conf.check(number) + if __name__ == "__main__": # import doctest # doctest.testmod(raise_on_error=True) @@ -164,7 +196,7 @@ if __name__ == "__main__": try: n = get_number(True, filename) if n: - print(f' [{n}] # {filename}') + print(' [{0}] {2}# {1}'.format(n, filename, '#无码' if is_uncensored(n) else '')) else: print(f'[-]Number return None. # {filename}') except Exception as e: