From 9e7f819edabbc42b3afd9e5cbb86c6a03f1213d9 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Wed, 8 Jun 2022 20:35:27 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E9=BA=BB=E8=B1=86=E7=95=AA=E5=8F=B7?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index e3e7245..933998a 100755 --- a/number_parser.py +++ b/number_parser.py @@ -79,7 +79,33 @@ def get_number(debug: bool, file_path: str) -> str: if debug: print(f'[-]Number Parser exception: {e} [{file_path}]') return None + +# modou提取number +def md(filename): + m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}' +def mmz(filename): + m = re.search(r'(mmz-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def msd(filename): + m = re.search(r'(msd-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def mky(filename): + m = re.search(r'(mky-[a-z]{2,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def yk(filename): + m = re.search(r'(yk-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def pm(filename): + m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + + # 按javdb数据源的命名规范提取number G_TAKE_NUM_RULES = { @@ -90,7 +116,13 @@ G_TAKE_NUM_RULES = { 'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()), 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), - 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0] + 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], + 'md[a-z]{0,2}-\d{2,}': md, + 'mmz-\d{2,}':mmz, + 'msd-\d{2,}':msd, + 'mky-[a-z]{2,2}-\d{2,}':mky, + 'yk-\d{2,3}': yk, + 'pm[a-z]?-?\d{2,}':pm } @@ -176,6 +208,9 @@ if __name__ == "__main__": "rctd-461CH-CD2.mp4", # ch后可加CDn "rctd-461-Cd3-C.mp4", # CDn后可加-C "rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1 + "MD-123.ts", + "MDSR-0001-ep2.ts", + "2953-麻豆传媒 MKY-NS-001护理长的盲目暴露-张芸熙.mp4" ) From 2b9e9a23d3036572bbd2aaa0e063f5ecd7c059c5 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Wed, 8 Jun 2022 21:07:59 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=8C=B9=E9=85=8D=E5=8D=95=E8=AF=8D?= =?UTF-8?q?=E8=BE=B9=E7=95=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/number_parser.py b/number_parser.py index 933998a..911bddd 100755 --- a/number_parser.py +++ b/number_parser.py @@ -117,12 +117,12 @@ G_TAKE_NUM_RULES = { 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], - 'md[a-z]{0,2}-\d{2,}': md, - 'mmz-\d{2,}':mmz, - 'msd-\d{2,}':msd, - 'mky-[a-z]{2,2}-\d{2,}':mky, - 'yk-\d{2,3}': yk, - 'pm[a-z]?-?\d{2,}':pm + r'\bmd[a-z]{0,2}-\d{2,}': md, + r'\bmmz-\d{2,}':mmz, + r'\bmsd-\d{2,}':msd, + r'\bmky-[a-z]{2,2}-\d{2,}':mky, + r'\byk-\d{2,3}': yk, + r'\bpm[a-z]?-?\d{2,}':pm } From 4637f2b6e819ba8c9b1b7f8498c09808d3efb354 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Fri, 10 Jun 2022 12:23:25 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 911bddd..cbe94fa 100755 --- a/number_parser.py +++ b/number_parser.py @@ -210,7 +210,7 @@ if __name__ == "__main__": "rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1 "MD-123.ts", "MDSR-0001-ep2.ts", - "2953-麻豆传媒 MKY-NS-001护理长的盲目暴露-张芸熙.mp4" + "MKY-NS-001.mp4" )