From e23a25b9b7005004ed8e71c6a08bbe6325725657 Mon Sep 17 00:00:00 2001 From: TachibanaKimika Date: Sun, 6 Aug 2023 15:45:12 +0800 Subject: [PATCH 1/4] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E8=87=AA?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=E6=AD=A3=E5=88=99=20&=20=E7=95=AA=E5=8F=B7?= =?UTF-8?q?=E5=A4=A7=E5=86=99=E8=BD=AC=E6=8D=A2=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.ini | 3 +++ config.py | 14 ++++++++++++++ number_parser.py | 5 +++++ scraper.py | 4 ++++ 4 files changed, 26 insertions(+) diff --git a/config.ini b/config.ini index 7aa6487..24aedfb 100755 --- a/config.ini +++ b/config.ini @@ -53,6 +53,9 @@ naming_rule = number+'-'+title max_title_len = 50 ; 刮削后图片是否命名为番号 image_naming_with_number = 0 +; 番号大写(仅在写入文件时变大写, 搜索时不影响) +number_uppercase = 0 +number_regexs = [update] update_check = 1 diff --git a/config.py b/config.py index 897df66..8f3966b 100644 --- a/config.py +++ b/config.py @@ -343,6 +343,18 @@ class Config: except: return False + def number_uppercase(self) -> bool: + try: + return self.conf.getboolean("Name_Rule", "number_uppercase") + except: + return False + + def number_regexs(self) -> list[str]: + try: + return self.conf.get("Name_Rule", "number_regexs").split() + except: + return [] + def update_check(self) -> bool: try: return self.conf.getboolean("update", "update_check") @@ -473,6 +485,8 @@ class Config: conf.set(sec4, "naming_rule", "number + '-' + title") conf.set(sec4, "max_title_len", "50") conf.set(sec4, "image_naming_with_number", "0") + conf.set(sec4, "number_uppercase", "0") + conf.set(sec4, "number_regexs", []) sec5 = "update" conf.add_section(sec5) diff --git a/number_parser.py b/number_parser.py index 6d19c5e..2461936 100755 --- a/number_parser.py +++ b/number_parser.py @@ -40,6 +40,11 @@ def get_number(debug: bool, file_path: str) -> str: filepath = os.path.basename(file_path) # debug True 和 False 两块代码块合并,原因是此模块及函数只涉及字符串计算,没有IO操作,debug on时输出导致异常信息即可 try: + # 先对自定义正则进行匹配 + if config.getInstance().number_regexs(): + for regex in config.getInstance().number_regexs(): + if re.search(regex, filepath): + return re.search(regex, filepath).group() file_number = get_number_by_dict(filepath) if file_number: return file_number diff --git a/scraper.py b/scraper.py index 06a64f8..0bf4cc1 100644 --- a/scraper.py +++ b/scraper.py @@ -120,6 +120,10 @@ def get_data_from_json( series = json_data.get('series') year = json_data.get('year') + + if conf.number_uppercase(): + number = number.upper() + if json_data.get('cover_small'): cover_small = json_data.get('cover_small') else: From b00c9a25875c5af4cb4ac8bd58926b66beab172c Mon Sep 17 00:00:00 2001 From: TachibanaKimika Date: Sun, 6 Aug 2023 16:00:06 +0800 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20=E5=A4=A7=E5=B0=8F=E5=86=99=E8=BD=AC?= =?UTF-8?q?=E6=8D=A2=E5=86=99=E5=85=A5json=5Fdata?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scraper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scraper.py b/scraper.py index 0bf4cc1..c5e0d82 100644 --- a/scraper.py +++ b/scraper.py @@ -120,10 +120,6 @@ def get_data_from_json( series = json_data.get('series') year = json_data.get('year') - - if conf.number_uppercase(): - number = number.upper() - if json_data.get('cover_small'): cover_small = json_data.get('cover_small') else: @@ -169,6 +165,10 @@ def get_data_from_json( cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| + # 处理大写 + if conf.number_uppercase(): + json_data['number'] = number.upper() + # 返回处理后的json_data json_data['title'] = title json_data['original_title'] = title From 35baf17160672c5bd3b1c76f04e2dce96e3d17ae Mon Sep 17 00:00:00 2001 From: TachibanaKimika Date: Sun, 6 Aug 2023 16:01:17 +0800 Subject: [PATCH 3/4] =?UTF-8?q?doc:=20=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.ini | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config.ini b/config.ini index 24aedfb..e5d6169 100755 --- a/config.ini +++ b/config.ini @@ -53,8 +53,10 @@ naming_rule = number+'-'+title max_title_len = 50 ; 刮削后图片是否命名为番号 image_naming_with_number = 0 -; 番号大写(仅在写入文件时变大写, 搜索时不影响) +; 番号大写 1 | 0, 仅在写入数据时会进行大写转换, 搜索刮削流程则不影响 number_uppercase = 0 +; 自定义正则表达式, 多个正则使用空格隔开, 第一个分组为提取的番号, 若自定义正则未能匹配到番号则使用默认规则 +; example: ([A-Za-z]{2,4}\-\d{3}) ([A-Za-z]{2,4}00\d{3}) number_regexs = [update] From 65e0ff665d6e6ccdc3c469ae38c0227872f76787 Mon Sep 17 00:00:00 2001 From: TachibanaKimika Date: Sun, 6 Aug 2023 16:14:50 +0800 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20=E7=BB=9F=E4=B8=80=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=20&=20=E5=A4=84=E7=90=86=E6=97=A0=E6=95=88?= =?UTF-8?q?=E6=AD=A3=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 8 ++++---- number_parser.py | 12 ++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/config.py b/config.py index 8f3966b..e008de1 100644 --- a/config.py +++ b/config.py @@ -349,11 +349,11 @@ class Config: except: return False - def number_regexs(self) -> list[str]: + def number_regexs(self) -> str: try: - return self.conf.get("Name_Rule", "number_regexs").split() + return self.conf.get("Name_Rule", "number_regexs") except: - return [] + return "" def update_check(self) -> bool: try: @@ -486,7 +486,7 @@ class Config: conf.set(sec4, "max_title_len", "50") conf.set(sec4, "image_naming_with_number", "0") conf.set(sec4, "number_uppercase", "0") - conf.set(sec4, "number_regexs", []) + conf.set(sec4, "number_regexs", "") sec5 = "update" conf.add_section(sec5) diff --git a/number_parser.py b/number_parser.py index 2461936..d54d9a0 100755 --- a/number_parser.py +++ b/number_parser.py @@ -41,10 +41,14 @@ def get_number(debug: bool, file_path: str) -> str: # debug True 和 False 两块代码块合并,原因是此模块及函数只涉及字符串计算,没有IO操作,debug on时输出导致异常信息即可 try: # 先对自定义正则进行匹配 - if config.getInstance().number_regexs(): - for regex in config.getInstance().number_regexs(): - if re.search(regex, filepath): - return re.search(regex, filepath).group() + if config.getInstance().number_regexs().split().__len__() > 0: + for regex in config.getInstance().number_regexs().split(): + try: + if re.search(regex, filepath): + return re.search(regex, filepath).group() + except Exception as e: + print(f'[-]custom regex exception: {e} [{regex}]') + file_number = get_number_by_dict(filepath) if file_number: return file_number