From bb6ff56ce5e80fa0ce25b92951338350f061a9cd Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Thu, 26 May 2022 23:43:11 +0800 Subject: [PATCH 01/15] Fix `Mapping Table Download FAILED` --- ADC_function.py | 5 ++++- Movie_Data_Capture.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ADC_function.py b/ADC_function.py index bfa4bb8..16887a1 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -523,7 +523,10 @@ def download_one_file(args) -> str: """ (url, save_path, json_data) = args - filebytes = get_html(url, return_type='content', json_headers=json_data['headers']) + if json_data != None: + filebytes = get_html(url, return_type='content', json_headers=json_data['headers']) + else: + filebytes = get_html(url, return_type='content') if isinstance(filebytes, bytes) and len(filebytes): with save_path.open('wb') as fpbyte: if len(filebytes) == fpbyte.write(filebytes): diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 2b1b37a..d9603ce 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -591,6 +591,7 @@ def main(args: tuple) -> Path: print("[!] " + "Mapping Table Download FAILED".center(47)) print("[!] " + "无法连接github".center(47)) print("[!] " + "请过几小时再试试".center(47)) + print("[!]", e) print("[-] " + "------ AUTO EXIT AFTER 30s !!! ------ ".center(47)) time.sleep(30) os._exit(-1) From 5ab121f996cd7a700b42b9b5e0849ba5f36c8f5b Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 27 May 2022 23:03:20 +0800 Subject: [PATCH 02/15] Fix image download failed --- ADC_function.py | 10 ++--- core.py | 106 +++++++++++++++++++++++------------------------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 16887a1..25c7fd2 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -522,9 +522,9 @@ def download_one_file(args) -> str: wrapped for map function """ - (url, save_path, json_data) = args - if json_data != None: - filebytes = get_html(url, return_type='content', json_headers=json_data['headers']) + (url, save_path, json_headers) = args + if json_headers != None: + filebytes = get_html(url, return_type='content', json_headers=json_headers['headers']) else: filebytes = get_html(url, return_type='content') if isinstance(filebytes, bytes) and len(filebytes): @@ -533,7 +533,7 @@ def download_one_file(args) -> str: return str(save_path) -def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0, json_data=None): +def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0, json_headers=None): """ download files in parallel 多线程下载文件 @@ -552,7 +552,7 @@ def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: and fullpath and isinstance(fullpath, (str, Path)) and len(str(fullpath)): fullpath = Path(fullpath) fullpath.parent.mkdir(parents=True, exist_ok=True) - mp_args.append((url, fullpath, json_data)) + mp_args.append((url, fullpath, json_headers)) if not len(mp_args): return [] if not isinstance(parallel, int) or parallel not in range(1, 200): diff --git a/core.py b/core.py index 515ea4c..397b8b9 100644 --- a/core.py +++ b/core.py @@ -71,11 +71,14 @@ def get_info(json_data): # 返回json里的数据 return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label -def small_cover_check(path, filename, cover_small, movie_path, json_data=None): +def small_cover_check(path, filename, cover_small, movie_path, json_headers=None): full_filepath = Path(path) / filename if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(str(full_filepath)): return - download_file_with_filename(cover_small, filename, path, movie_path, json_data) + if json_headers != None: + download_file_with_filename(cover_small, filename, path, movie_path, json_headers['headers']) + else: + download_file_with_filename(cover_small, filename, path, movie_path) print('[+]Image Downloaded! ' + full_filepath.name) @@ -119,57 +122,36 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None for i in range(configProxy.retry): try: - if configProxy.enable: - if not os.path.exists(path): - try: - os.makedirs(path) - except: - print(f"[-]Fatal error! Can not make folder '{path}'") - os._exit(0) + if not os.path.exists(path): + try: + os.makedirs(path) + except: + print(f"[-]Fatal error! Can not make folder '{path}'") + os._exit(0) + headers = {'User-Agent': G_USER_AGENT} + if not json_headers == None: + if 'headers' in json_headers: + headers.update(json_headers) + if configProxy: proxies = configProxy.proxies() - headers = {'User-Agent': G_USER_AGENT} - if json_headers != None: - headers.update(json_headers) r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies) - if r == '': - print('[-]Movie Download Data not found!') - return - with open(os.path.join(path, filename), "wb") as code: - code.write(r.content) - return else: - if not os.path.exists(path): - try: - os.makedirs(path) - except: - print(f"[-]Fatal error! Can not make folder '{path}'") - os._exit(0) - headers = {'User-Agent': G_USER_AGENT} - if json_headers != None: - headers.update(json_headers) - r = requests.get(url, timeout=configProxy.timeout, headers=headers) - if r == '': - print('[-]Movie Download Data not found!') - return - with open(os.path.join(path, filename), "wb") as code: - code.write(r.content) + r = requests.get(url, headers=headers, timeout=configProxy.timeout) + if r == '': + print('[-]Movie Download Data not found!') return - except requests.exceptions.RequestException: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry)) - except requests.exceptions.ConnectionError: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry)) + with open(os.path.join(path, filename), "wb") as code: + code.write(r.content) + return except requests.exceptions.ProxyError: i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry)) - except requests.exceptions.ConnectTimeout: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(configProxy.retry)) - except IOError: - print(f"[-]Create Directory '{path}' failed!") - moveFailedFolder(filepath) - return + print('[-]Image Download : Proxy error ' + str(i) + '/' + str(configProxy.retry)) + # except IOError: + # print(f"[-]Create Directory '{path}' failed!") + # moveFailedFolder(filepath) + # return + except Exception as e: + print('[-]Image Download :Error',e) print('[-]Connect Failed! Please check your Proxy or Network!') moveFailedFolder(filepath) return @@ -302,12 +284,12 @@ def image_ext(url): # 封面是否下载成功,否则移动到failed -def image_download(cover, fanart_path, thumb_path, path, filepath, json_data): +def image_download(cover, fanart_path, thumb_path, path, filepath, json_headers=None): full_filepath = os.path.join(path, fanart_path) if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath): return - if "headers" in json_data: - if download_file_with_filename(cover, fanart_path, path, filepath, json_data['headers']) == 'failed': + if json_headers != None: + if download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers']) == 'failed': moveFailedFolder(filepath) return else: @@ -319,8 +301,8 @@ def image_download(cover, fanart_path, thumb_path, path, filepath, json_data): for i in range(configProxy.retry): if file_not_exist_or_empty(full_filepath): print('[!]Image Download Failed! Trying again. [{}/3]', i + 1) - if "headers" in json_data: - download_file_with_filename(cover, fanart_path, path, filepath, json_data['headers']) + if json_headers != None: + download_file_with_filename(cover, fanart_path, path, filepath, json_headers['headers']) else: download_file_with_filename(cover, fanart_path, path, filepath) continue @@ -863,10 +845,16 @@ def core_main(movie_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data) + if 'headers' in json_data: + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data['headers']) + else: + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 - image_download( cover, fanart_path,thumb_path, path, movie_path, json_data) + if 'headers' in json_data: + image_download(cover, fanart_path, thumb_path, path, movie_path, json_data['headers']) + else: + image_download(cover, fanart_path, thumb_path, path, movie_path) if not multi_part or part.lower() == '-cd1': try: @@ -917,10 +905,16 @@ def core_main(movie_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data) + if 'headers' in json_data: + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data['headers']) + else: + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 - image_download( cover, fanart_path, thumb_path, path, movie_path) + if 'headers' in json_data: + image_download(cover, fanart_path, thumb_path, path, movie_path, json_data['headers']) + else: + image_download(cover, fanart_path, thumb_path, path, movie_path) if not multi_part or part.lower() == '-cd1': try: From 99bc50bbbaed16589cda17a7a8e172eecef305a3 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 27 May 2022 23:14:43 +0800 Subject: [PATCH 03/15] Fix extrafanart download failed --- core.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/core.py b/core.py index 397b8b9..d50e495 100644 --- a/core.py +++ b/core.py @@ -846,13 +846,13 @@ def core_main(movie_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: if 'headers' in json_data: - small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data['headers']) + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data) else: small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 if 'headers' in json_data: - image_download(cover, fanart_path, thumb_path, path, movie_path, json_data['headers']) + image_download(cover, fanart_path, thumb_path, path, movie_path, json_data) else: image_download(cover, fanart_path, thumb_path, path, movie_path) @@ -864,7 +864,10 @@ def core_main(movie_path, number_th, oCC): # 下载剧照 data, path, filepath if conf.is_extrafanart() and json_data.get('extrafanart'): - extrafanart_download(json_data.get('extrafanart'), path, number, movie_path, json_data) + if 'headers' in json_data: + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path, json_data) + else: + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) # 下载演员头像 KODI .actors 目录位置 if conf.download_actor_photo_for_kodi(): @@ -906,13 +909,13 @@ def core_main(movie_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: if 'headers' in json_data: - small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data['headers']) + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path, json_data) else: small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 if 'headers' in json_data: - image_download(cover, fanart_path, thumb_path, path, movie_path, json_data['headers']) + image_download(cover, fanart_path, thumb_path, path, movie_path, json_data) else: image_download(cover, fanart_path, thumb_path, path, movie_path) @@ -924,7 +927,10 @@ def core_main(movie_path, number_th, oCC): # 下载剧照 data, path, filepath if conf.is_extrafanart() and json_data.get('extrafanart'): - extrafanart_download(json_data.get('extrafanart'), path, number, movie_path, json_data) + if 'headers' in json_data: + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path, json_data) + else: + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) # 下载演员头像 KODI .actors 目录位置 if conf.download_actor_photo_for_kodi(): From 3c256d17e8c5efeac974504117bf78f04c8a1437 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Fri, 27 May 2022 23:16:24 +0800 Subject: [PATCH 04/15] Update 6.2.2 --- Movie_Data_Capture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index d9603ce..10888a1 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -678,7 +678,7 @@ def period(delta, pattern): if __name__ == '__main__': - version = '6.2.1' + version = '6.2.2' urllib3.disable_warnings() # Ignore http proxy warning app_start = time.time() From 6403fb0679facc41a39370e501e49b24fb19c2f8 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sat, 28 May 2022 01:05:29 +0800 Subject: [PATCH 05/15] Fix end in `jav321` --- WebCrawler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 7546802..e6ec1e1 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -157,7 +157,7 @@ def get_data_from_json(file_number, oCC): print(f"[+]Find movie [{file_number}] metadata on website '{source}'") break except: - break + continue # Return if data not found in all sources if not json_data: From 9f6322494f6d964db8276a6291669a73b5aba9bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=BE=BD=E5=85=88=E7=94=9F?= <8655163+VergilGao@users.noreply.github.com> Date: Sat, 28 May 2022 13:38:46 +0800 Subject: [PATCH 06/15] fix download error --- core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core.py b/core.py index d50e495..d02c885 100644 --- a/core.py +++ b/core.py @@ -132,7 +132,7 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None if not json_headers == None: if 'headers' in json_headers: headers.update(json_headers) - if configProxy: + if configProxy.enable: proxies = configProxy.proxies() r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies) else: From e61c72b597e3005782573efeae6bfcd34812dcf9 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sat, 28 May 2022 16:10:22 +0800 Subject: [PATCH 07/15] Remove shit code --- ADC_function.py | 13 ++++--------- core.py | 14 +++----------- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 25c7fd2..2219219 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -470,15 +470,12 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None: except: print(f"[-]Fatal error! Can not make folder '{path}'") os._exit(0) - proxies = configProxy.proxies() - headers = { - 'User-Agent': G_USER_AGENT} - r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies) + r = get_html(url=url, return_type='content') if r == '': print('[-]Movie Download Data not found!') return with open(os.path.join(path, filename), "wb") as code: - code.write(r.content) + code.write(r) return else: if not os.path.exists(path): @@ -487,14 +484,12 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None: except: print(f"[-]Fatal error! Can not make folder '{path}'") os._exit(0) - headers = { - 'User-Agent': G_USER_AGENT} - r = requests.get(url, timeout=configProxy.timeout, headers=headers) + r = get_html(url=url, return_type='content') if r == '': print('[-]Movie Download Data not found!') return with open(os.path.join(path, filename), "wb") as code: - code.write(r.content) + code.write(r) return except requests.exceptions.RequestException: i += 1 diff --git a/core.py b/core.py index d50e495..3b71b63 100644 --- a/core.py +++ b/core.py @@ -118,7 +118,7 @@ def create_folder(json_data): # 创建文件夹 # path = examle:photo , video.in the Project Folder! def download_file_with_filename(url, filename, path, filepath, json_headers=None): conf = config.getInstance() - configProxy = conf.proxy() + configProxy = conf.proxy('switch') for i in range(configProxy.retry): try: @@ -128,20 +128,12 @@ def download_file_with_filename(url, filename, path, filepath, json_headers=None except: print(f"[-]Fatal error! Can not make folder '{path}'") os._exit(0) - headers = {'User-Agent': G_USER_AGENT} - if not json_headers == None: - if 'headers' in json_headers: - headers.update(json_headers) - if configProxy: - proxies = configProxy.proxies() - r = requests.get(url, headers=headers, timeout=configProxy.timeout, proxies=proxies) - else: - r = requests.get(url, headers=headers, timeout=configProxy.timeout) + r = get_html(url=url,return_type='content',json_headers=json_headers) if r == '': print('[-]Movie Download Data not found!') return with open(os.path.join(path, filename), "wb") as code: - code.write(r.content) + code.write(r) return except requests.exceptions.ProxyError: i += 1 From 99cc99bb513790342d426d6ca432c41bddf38947 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sat, 28 May 2022 16:12:09 +0800 Subject: [PATCH 08/15] Remove shit code #2 --- core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core.py b/core.py index 3b71b63..18b59e2 100644 --- a/core.py +++ b/core.py @@ -118,7 +118,7 @@ def create_folder(json_data): # 创建文件夹 # path = examle:photo , video.in the Project Folder! def download_file_with_filename(url, filename, path, filepath, json_headers=None): conf = config.getInstance() - configProxy = conf.proxy('switch') + configProxy = conf.proxy() for i in range(configProxy.retry): try: From 5b550dccfddeb79fc357a1f72fd26462009a79cc Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Wed, 1 Jun 2022 22:20:28 +0800 Subject: [PATCH 09/15] Fix single actor in `fanza` --- WebCrawler/fanza.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index bae3713..0489a62 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -123,13 +123,21 @@ def main(number): number_lo.replace('-', '') + 'so' == fanza_hinban ): out_num = number + + director = fanza_Crawler.getFanzaString('監督:') + if "anime" in chosen_url: + director = "" + actor = fanza_Crawler.getStrings("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()") + if "anime" in chosen_url: + actor = "" + data = { "title": fanza_Crawler.getString('//*[starts-with(@id, "title")]/text()').strip(), "studio": fanza_Crawler.getFanzaString('メーカー'), "outline": getOutline(html), "runtime": str(re.search(r'\d+',fanza_Crawler.getString("//td[contains(text(),'収録時間')]/following-sibling::td/text()")).group()).strip(" ['']"), - "director": fanza_Crawler.getFanzaString('監督:') if "anime" not in chosen_url else "", - "actor": fanza_Crawler.getString("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()").replace("', '", ",") if "anime" not in chosen_url else "", + "director": director, + "actor": actor, "release": getRelease(fanza_Crawler), "number": out_num, "cover": getCover(html, fanza_hinban), @@ -143,7 +151,7 @@ def main(number): "source": "fanza.py", "series": fanza_Crawler.getFanzaString('シリーズ:'), } - except: + except Exception as e: data = { "title": "", } @@ -185,6 +193,6 @@ def main_htmlcode(number): if __name__ == "__main__": # print(main("DV-1562")) # print(main("96fad1217")) - print(main("pred00251")) + print(main("AES-002")) print(main("MIAA-391")) print(main("OBA-326")) From 346163906b4fffe7b95d18e59e99093c7175cf6a Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Wed, 1 Jun 2022 22:21:16 +0800 Subject: [PATCH 10/15] Add output `series` in `` in nfo --- core.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index 18b59e2..5637803 100644 --- a/core.py +++ b/core.py @@ -337,8 +337,10 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f print(" ", file=code) print(" JP-18+", file=code) print(" JP-18+", file=code) - print(" ", file=code) - print(" ", file=code) + try: + print(" " + series + "", file=code) + except: + print(" ", file=code) print(" " + studio + "", file=code) print(" " + year + "", file=code) print(" ", file=code) From 0a5685435f6beecce63ed9797eea97de54f2dd41 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sun, 5 Jun 2022 02:48:27 +0800 Subject: [PATCH 11/15] Add if series and label is `----` then return "" --- WebCrawler/fanza.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index 0489a62..38a919a 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -130,6 +130,13 @@ def main(number): actor = fanza_Crawler.getStrings("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()") if "anime" in chosen_url: actor = "" + # ---- + series = fanza_Crawler.getFanzaString('シリーズ:') + if series == "----": + series = "" + label = fanza_Crawler.getFanzaString('レーベル') + if label == "----": + label = "" data = { "title": fanza_Crawler.getString('//*[starts-with(@id, "title")]/text()').strip(), @@ -144,12 +151,12 @@ def main(number): "imagecut": 1, "tag": fanza_Crawler.getFanzaStrings('ジャンル:'), "extrafanart": getExtrafanart(htmlcode), - "label": fanza_Crawler.getFanzaString('レーベル'), + "label": label, "year": re.findall('\d{4}',getRelease(fanza_Crawler))[0], # str(re.search('\d{4}',getRelease(a)).group()), "actor_photo": "", "website": chosen_url, "source": "fanza.py", - "series": fanza_Crawler.getFanzaString('シリーズ:'), + "series": series, } except Exception as e: data = { From de67c5d4cde30d18e18d7d6cd42d137cf4c640a4 Mon Sep 17 00:00:00 2001 From: yoshiko2 Date: Sun, 5 Jun 2022 21:23:37 +0800 Subject: [PATCH 12/15] Remove `series` in `` and `` in nfo file (`series` in ``) --- core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index 5637803..e698ab5 100644 --- a/core.py +++ b/core.py @@ -374,7 +374,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f try: for i in tag: print(" " + i + "", file=code) - print(" " + series + "", file=code) + # print(" " + series + "", file=code) except: aaaaa = '' if cn_sub == '1': @@ -388,7 +388,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f try: for i in tag: print(" " + i + "", file=code) - print(" " + series + "", file=code) + # print(" " + series + "", file=code) except: aaaaaaaa = '' print(" " + number + "", file=code) From 9e7f819edabbc42b3afd9e5cbb86c6a03f1213d9 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Wed, 8 Jun 2022 20:35:27 +0800 Subject: [PATCH 13/15] =?UTF-8?q?=E9=BA=BB=E8=B1=86=E7=95=AA=E5=8F=B7?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index e3e7245..933998a 100755 --- a/number_parser.py +++ b/number_parser.py @@ -79,7 +79,33 @@ def get_number(debug: bool, file_path: str) -> str: if debug: print(f'[-]Number Parser exception: {e} [{file_path}]') return None + +# modou提取number +def md(filename): + m = re.search(r'(md[a-z]{0,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(4)}{m.group(3) or ""}' +def mmz(filename): + m = re.search(r'(mmz-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def msd(filename): + m = re.search(r'(msd-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def mky(filename): + m = re.search(r'(mky-[a-z]{2,2}-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def yk(filename): + m = re.search(r'(yk-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + +def pm(filename): + m = re.search(r'(pm[a-z]?-?)(\d{2,})(-ep\d*)*', filename, re.I) + return f'{m.group(1).replace("-","").upper()}{m.group(2).zfill(3)}{m.group(3) or ""}' + + # 按javdb数据源的命名规范提取number G_TAKE_NUM_RULES = { @@ -90,7 +116,13 @@ G_TAKE_NUM_RULES = { 'x-art': lambda x: str(re.search(r'x-art\.\d{2}\.\d{2}\.\d{2}', x, re.I).group()), 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), - 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0] + 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], + 'md[a-z]{0,2}-\d{2,}': md, + 'mmz-\d{2,}':mmz, + 'msd-\d{2,}':msd, + 'mky-[a-z]{2,2}-\d{2,}':mky, + 'yk-\d{2,3}': yk, + 'pm[a-z]?-?\d{2,}':pm } @@ -176,6 +208,9 @@ if __name__ == "__main__": "rctd-461CH-CD2.mp4", # ch后可加CDn "rctd-461-Cd3-C.mp4", # CDn后可加-C "rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1 + "MD-123.ts", + "MDSR-0001-ep2.ts", + "2953-麻豆传媒 MKY-NS-001护理长的盲目暴露-张芸熙.mp4" ) From 2b9e9a23d3036572bbd2aaa0e063f5ecd7c059c5 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Wed, 8 Jun 2022 21:07:59 +0800 Subject: [PATCH 14/15] =?UTF-8?q?=E5=8C=B9=E9=85=8D=E5=8D=95=E8=AF=8D?= =?UTF-8?q?=E8=BE=B9=E7=95=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/number_parser.py b/number_parser.py index 933998a..911bddd 100755 --- a/number_parser.py +++ b/number_parser.py @@ -117,12 +117,12 @@ G_TAKE_NUM_RULES = { 'xxx-av': lambda x: ''.join(['xxx-av-', re.findall(r'xxx-av[^\d]*(\d{3,5})[^\d]*', x, re.I)[0]]), 'heydouga': lambda x: 'heydouga-' + '-'.join(re.findall(r'(\d{4})[\-_](\d{3,4})[^\d]*', x, re.I)[0]), 'heyzo': lambda x: 'HEYZO-' + re.findall(r'heyzo[^\d]*(\d{4})', x, re.I)[0], - 'md[a-z]{0,2}-\d{2,}': md, - 'mmz-\d{2,}':mmz, - 'msd-\d{2,}':msd, - 'mky-[a-z]{2,2}-\d{2,}':mky, - 'yk-\d{2,3}': yk, - 'pm[a-z]?-?\d{2,}':pm + r'\bmd[a-z]{0,2}-\d{2,}': md, + r'\bmmz-\d{2,}':mmz, + r'\bmsd-\d{2,}':msd, + r'\bmky-[a-z]{2,2}-\d{2,}':mky, + r'\byk-\d{2,3}': yk, + r'\bpm[a-z]?-?\d{2,}':pm } From 4637f2b6e819ba8c9b1b7f8498c09808d3efb354 Mon Sep 17 00:00:00 2001 From: hejianjun Date: Fri, 10 Jun 2022 12:23:25 +0800 Subject: [PATCH 15/15] =?UTF-8?q?=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 911bddd..cbe94fa 100755 --- a/number_parser.py +++ b/number_parser.py @@ -210,7 +210,7 @@ if __name__ == "__main__": "rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1 "MD-123.ts", "MDSR-0001-ep2.ts", - "2953-麻豆传媒 MKY-NS-001护理长的盲目暴露-张芸熙.mp4" + "MKY-NS-001.mp4" )