From 2aaea3446c34595da8c70b4185f1d22a964131b2 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Wed, 26 Oct 2022 23:07:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=B8=80=E4=BA=9B=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E6=89=A7=E8=A1=8C=E5=BC=82=E5=B8=B8=E5=92=8C=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0mdbk=E5=92=8Cmdtm=E7=9A=84=E8=A7=84=E5=88=99=EF=BC=8C?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E8=A2=AB=E9=BA=BB=E8=B1=86=E7=95=AA=E5=8F=B7?= =?UTF-8?q?=E6=B1=A1=E6=9F=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 20 +++++++++++++++----- scrapinglib/javdb.py | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/number_parser.py b/number_parser.py index cbe94fa..d9f6f7d 100755 --- a/number_parser.py +++ b/number_parser.py @@ -58,7 +58,12 @@ def get_number(debug: bool, file_path: str) -> str: filename = re.sub("[-_]cd\d{1,2}", "", filename, flags=re.IGNORECASE) if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) - file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group()) + file_number = os.path.splitext(filename) + filename = re.search(r'\w+(-|_)\w+', filename, re.A) + if filename: + file_number = str(filename.group()) + else: + file_number = file_number[0] file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE) if re.search("\d+ch$", file_number, flags=re.I): file_number = file_number[:-2] @@ -82,7 +87,7 @@ def get_number(debug: bool, file_path: str) -> str: # modou提取number def md(filename): - m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) + m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*|-\d*)*', filename, re.I) return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}' def mmz(filename): @@ -103,9 +108,11 @@ def yk(filename): def pm(filename): m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I) - return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' - + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' +def fsog(filename): + m = re.search(r'(fsog-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' # 按javdb数据源的命名规范提取number G_TAKE_NUM_RULES = { @@ -117,12 +124,15 @@ G_TAKE_NUM_RULES = { 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], + 'mdbk': lambda x: str(re.search(r'mdbk(-|_)(\d{4})', x, re.I).group()), + 'mdtm': lambda x: str(re.search(r'mdtm(-|_)(\d{4})', x, re.I).group()), r'\bmd[a-z]{0,2}-\d{2,}': md, r'\bmmz-\d{2,}':mmz, r'\bmsd-\d{2,}':msd, r'\bmky-[a-z]{2,2}-\d{2,}':mky, r'\byk-\d{2,3}': yk, - r'\bpm[a-z]?-?\d{2,}':pm + r'\bpm[a-z]?-?\d{2,}':pm, + r'\bfsog-?\d{2,}':fsog } diff --git a/scrapinglib/javdb.py b/scrapinglib/javdb.py index 65a57fb..3a0fb95 100644 --- a/scrapinglib/javdb.py +++ b/scrapinglib/javdb.py @@ -104,7 +104,7 @@ class Javdb(Parser): correct_url = urls[self.queryid] except: # 为避免获得错误番号,只要精确对应的结果 - if ids[0].upper() != number: + if ids[0].upper() != number.upper(): raise ValueError("number not found in javdb") correct_url = urls[0] return urljoin(resp.url, correct_url)