Merge pull request #903 from hejianjun/master

修复一些脚本执行异常和添加mdbk和mdtm的规则,避免被麻豆番号污染
This commit is contained in:
Yoshiko2
2022-11-02 01:48:12 +08:00
committed by GitHub
2 changed files with 16 additions and 6 deletions

View File

@@ -58,7 +58,12 @@ def get_number(debug: bool, file_path: str) -> str:
filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE) filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE)
if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况例如n1012-CD1.wmv if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况例如n1012-CD1.wmv
return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group())
file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group()) file_number = os.path.splitext(filename)
filename = re.search(r'\w+(-|_)\w+', filename, re.A)
if filename:
file_number = str(filename.group())
else:
file_number = file_number[0]
file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE) file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE)
if re.search("\d+ch$", file_number, flags=re.I): if re.search("\d+ch$", file_number, flags=re.I):
file_number = file_number[:-2] file_number = file_number[:-2]
@@ -82,7 +87,7 @@ def get_number(debug: bool, file_path: str) -> str:
# modou提取number # modou提取number
def md(filename): def md(filename):
m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*|-\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}' return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}'
def mmz(filename): def mmz(filename):
@@ -103,9 +108,11 @@ def yk(filename):
def pm(filename): def pm(filename):
m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I) m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
def fsog(filename):
m = re.search(r'(fsog-?)(\d{2,})(-ep\d*)*', filename, re.I)
return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}'
# 按javdb数据源的命名规范提取number # 按javdb数据源的命名规范提取number
G_TAKE_NUM_RULES = { G_TAKE_NUM_RULES = {
@@ -117,12 +124,15 @@ G_TAKE_NUM_RULES = {
'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]),
'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]),
'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0],
'mdbk': lambda x: str(re.search(r'mdbk(-|_)(\d{4})', x, re.I).group()),
'mdtm': lambda x: str(re.search(r'mdtm(-|_)(\d{4})', x, re.I).group()),
r'\bmd[a-z]{0,2}-\d{2,}': md, r'\bmd[a-z]{0,2}-\d{2,}': md,
r'\bmmz-\d{2,}':mmz, r'\bmmz-\d{2,}':mmz,
r'\bmsd-\d{2,}':msd, r'\bmsd-\d{2,}':msd,
r'\bmky-[a-z]{2,2}-\d{2,}':mky, r'\bmky-[a-z]{2,2}-\d{2,}':mky,
r'\byk-\d{2,3}': yk, r'\byk-\d{2,3}': yk,
r'\bpm[a-z]?-?\d{2,}':pm r'\bpm[a-z]?-?\d{2,}':pm,
r'\bfsog-?\d{2,}':fsog
} }

View File

@@ -104,7 +104,7 @@ class Javdb(Parser):
correct_url = urls[self.queryid] correct_url = urls[self.queryid]
except: except:
# 为避免获得错误番号,只要精确对应的结果 # 为避免获得错误番号,只要精确对应的结果
if ids[0].upper() != number: if ids[0].upper() != number.upper():
raise ValueError("number not found in javdb") raise ValueError("number not found in javdb")
correct_url = urls[0] correct_url = urls[0]
return urljoin(resp.url, correct_url) return urljoin(resp.url, correct_url)