From 96fd8d76821e4f3f834574829efdb1c10315728f Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 1 Apr 2022 05:07:10 +0800 Subject: [PATCH 01/35] number_parser.py:add ^4K_ and ^4K- filter --- number_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/number_parser.py b/number_parser.py index 2abcf8a..38aa69a 100755 --- a/number_parser.py +++ b/number_parser.py @@ -5,8 +5,9 @@ import config import typing G_spat = re.compile( - "^22-sht\.me|-fhd|_fhd|^fhd_|^fhd-|-hd|_hd|^hd_|^hd-|-sd|_sd|-1080p|_1080p|-720p|_720p|" - "^\w+\.(cc|com)@|-uncensored|_uncensored|-leak|_leak|-4K|_4K", + "^\w+\.(cc|com)@|^22-sht\.me|" + "^(fhd|hd|sd|1080p|720p|4K)(-|_)|" + "(-|_)(fhd|hd|sd|1080p|720p|4K|uncensored|leak)", re.IGNORECASE) @@ -153,9 +154,10 @@ if __name__ == "__main__": "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源 "pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源 "sbw99.cc@heyzo_hd_2636_full.mp4", - "hhd800.com@STARS-566.mp4", - "jav20s8.com@GIGL-677.mp4", - "sbw99.cc@iesp-653.mp4" + "hhd800.com@STARS-566-HD.mp4", + "jav20s8.com@GIGL-677_4K.mp4", + "sbw99.cc@iesp-653-4K.mp4", + "4K-ABP-358_C.mkv" ) From a3655e99c32732619b41d10d02868dbc795506ed Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 2 Apr 2022 00:28:00 +0800 Subject: [PATCH 02/35] number_parser.py:more domain suffixes --- number_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 38aa69a..3bebb08 100755 --- a/number_parser.py +++ b/number_parser.py @@ -5,7 +5,7 @@ import config import typing G_spat = re.compile( - "^\w+\.(cc|com)@|^22-sht\.me|" + "^\w+\.(cc|com|net|me|club|jp|tv|xyz|biz|wiki|info|tw|us|de)@|^22-sht\.me|" "^(fhd|hd|sd|1080p|720p|4K)(-|_)|" "(-|_)(fhd|hd|sd|1080p|720p|4K|uncensored|leak)", re.IGNORECASE) From ef82e73fac85ff2a69e76501b143891f19eb46c8 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 3 Apr 2022 02:27:29 +0800 Subject: [PATCH 03/35] Add --rerun-delay -R option, rerun after delay --- Movie_Data_Capture.py | 111 ++++++++++++++++++++++++++++-------------- config.ini | 5 +- config.py | 39 ++++++++++----- 3 files changed, 106 insertions(+), 49 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 4a24948..c3c891e 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -62,6 +62,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: help="Override nfo_skip_days value in config.") parser.add_argument("-c", "--stop-counter", dest='cnt', default='', nargs='?', help="Override stop_counter value in config.") + parser.add_argument("-R", "--rerun-delay", dest='delaytm', default='', nargs='?', + help="Delay (eg. 1h10m30s or 60 (second)) time and rerun, until all movies proceed. Note: stop_counter value in config or -c must none zero.") parser.add_argument("-i", "--ignore-failed-list", action="store_true", help="Ignore failed list '{}'".format( os.path.join(os.path.abspath(conf.failed_folder()), 'failed_list.txt'))) parser.add_argument("-a", "--auto-exit", action="store_true", @@ -92,6 +94,7 @@ is performed. It may help you correct wrong numbers before real job.""") config.G_conf_override["common:stop_counter"] = get_natural_number_or_none(args.cnt) config.G_conf_override["common:ignore_failed_list"] = get_bool_or_none(args.ignore_failed_list) config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug) + config.G_conf_override["common:rerun_delay"] = get_str_or_none(args.delaytm) return args.file, args.number, args.logdir, args.regexstr, args.zero_op @@ -250,29 +253,31 @@ def close_logfile(logdir: str): except: pass # 第三步,月合并到年 - if today.month < 4: - return - mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)] - if not mons or not len(mons): - return - mons.sort() - deadline_year = f'mdc_{today.year - 1}13' - year_merge = [f for f in mons if f.stem < deadline_year] - if not year_merge or not len(year_merge): - return - toyear = len('12.txt') # cut length mdc_2020|12.txt - for f in year_merge: - try: - year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt - with open(year_file_name, 'a', encoding='utf-8') as y: - y.write(f.read_text(encoding='utf-8')) - f.unlink(missing_ok=True) - except: - pass + for i in range(1): + if today.month < 4: + break + mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^mdc_\d{6}$', f.stem, re.A)] + if not mons or not len(mons): + break + mons.sort() + deadline_year = f'mdc_{today.year - 1}13' + year_merge = [f for f in mons if f.stem < deadline_year] + if not year_merge or not len(year_merge): + break + toyear = len('12.txt') # cut length mdc_2020|12.txt + for f in year_merge: + try: + year_file_name = str(f)[:-toyear] + '.txt' # mdc_2020.txt + with open(year_file_name, 'a', encoding='utf-8') as y: + y.write(f.read_text(encoding='utf-8')) + f.unlink(missing_ok=True) + except: + pass # 第四步,压缩年志 如果有压缩需求,请自行手工压缩,或者使用外部脚本来定时完成。推荐nongnu的lzip,对于 # 这种粒度的文本日志,压缩比是目前最好的。lzip -9的运行参数下,日志压缩比要高于xz -9,而且内存占用更少, # 多核利用率更高(plzip多线程版本),解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右, # 100MB的日志文件能缩小到3.7MB。 + return filepath def signal_handler(*args): @@ -472,18 +477,9 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): print('[!]', err) -def main(): - version = '6.0.3' - urllib3.disable_warnings() # Ignore http proxy warning - - # Read config.ini first, in argparse_function() need conf.failed_folder() - conf = config.Config("config.ini") - - # Parse command line args - single_file_path, custom_number, logdir, regexstr, zero_op = argparse_function(version) - - - +def main(args: tuple) -> Path: + (single_file_path, custom_number, logdir, regexstr, zero_op) = args + conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" if main_mode not in (1, 2, 3): @@ -614,14 +610,55 @@ def main(): print("[+]All finished!!!") - close_logfile(logdir) + return close_logfile(logdir) + + +def 分析日志文件(logfile): + try: + if not (isinstance(logfile, Path) and logfile.is_file()): + raise FileNotFoundError('log file not found') + logtxt = logfile.read_text(encoding='utf-8') + 扫描电影数 = int(re.findall(r'\[\+]Find (.*) movies\.', logtxt)[0]) + 已处理 = int(re.findall(r'\[1/(.*?)] -', logtxt)[0]) + 完成数 = logtxt.count(r'[+]Wrote!') + return 扫描电影数, 已处理, 完成数 + except: + return None, None, None + + +if __name__ == '__main__': + version = '6.0.3' + multiprocessing.freeze_support() + urllib3.disable_warnings() # Ignore http proxy warning + + # Read config.ini first, in argparse_function() need conf.failed_folder() + conf = config.Config("config.ini") + + # Parse command line args + args = tuple(argparse_function(version)) + + 再运行延迟 = conf.rerun_delay() + if 再运行延迟 > 0 and conf.stop_counter() > 0: + while True: + try: + logfile = main(args) + (扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile)) + if all(isinstance(v, int) for v in 分析结果元组): + 剩余个数 = 扫描电影数 - 已处理 + print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}') + if 剩余个数 == 0: + break + 下次运行 = datetime.now() + timedelta(seconds=再运行延迟) + print(f'Next run time: {下次运行.strftime("%H:%M:%S")}, rerun_delay={再运行延迟}, press Ctrl+C stop run.') + time.sleep(再运行延迟) + else: + break + except: + break + else: + main(args) if not conf.auto_exit(): input("Press enter key exit, you can check the error message before you exit...") sys.exit(0) - - -if __name__ == '__main__': - multiprocessing.freeze_support() - main() diff --git a/config.ini b/config.ini index f0e4456..011b8e5 100755 --- a/config.ini +++ b/config.ini @@ -20,7 +20,10 @@ del_empty_folder=1 nfo_skip_days=30 ; 处理完多少个视频文件后停止,0为处理所有视频文件 stop_counter=0 -; 以上两个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁 +; 再运行延迟时间,单位:h时m分s秒 举例: 1h30m45s(1小时30分45秒) 45(45秒) +; stop_counter不为零的条件下才有效,每处理stop_counter部影片后延迟rerun_delay秒再次运行 +rerun_delay=0 +; 以上三个参数配合使用可以以多次少量的方式刮削或整理数千个文件而不触发翻译或元数据站封禁 ignore_failed_list=0 download_only_missing_images=1 mapping_table_validity=7 diff --git a/config.py b/config.py index 63a12a8..1132a3e 100644 --- a/config.py +++ b/config.py @@ -16,6 +16,7 @@ G_conf_override = { "common:nfo_skip_days": None, "common:stop_counter": None, "common:ignore_failed_list": None, + "common:rerun_delay": None, "debug_mode:switch": None } @@ -103,9 +104,12 @@ class Config: return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool( G_conf_override[f"{section}:{item}"]) - def getint_override(self, section, item) -> int: - return self.conf.getint(section, item) if G_conf_override[f"{section}:{item}"] is None else int( - G_conf_override[f"{section}:{item}"]) + def getint_override(self, section, item, fallback=None) -> int: + if G_conf_override[f"{section}:{item}"] is not None: + return int(G_conf_override[f"{section}:{item}"]) + if fallback is not None: + return self.conf.getint(section, item, fallback=fallback) + return self.conf.getint(section, item) def get_override(self, section, item) -> str: return self.conf.get(section, item) if G_conf_override[f"{section}:{item}"] is None else str( @@ -151,16 +155,10 @@ class Config: return self.conf.getboolean("common", "del_empty_folder") def nfo_skip_days(self) -> int: - try: - return self.getint_override("common", "nfo_skip_days") - except: - return 30 + return self.getint_override("common", "nfo_skip_days", fallback=30) def stop_counter(self) -> int: - try: - return self.getint_override("common", "stop_counter") - except: - return 0 + return self.getint_override("common", "stop_counter", fallback=0) def ignore_failed_list(self) -> bool: return self.getboolean_override("common", "ignore_failed_list") @@ -171,6 +169,24 @@ class Config: def mapping_table_validity(self) -> int: return self.conf.getint("common", "mapping_table_validity") + def rerun_delay(self) -> int: + value = self.get_override("common", "rerun_delay") + if not (isinstance(value, str) and re.match(r'^[\dsmh]+$', value, re.I)): + return 0 # not match '1h30m45s' or '30' or '1s2m1h4s5m' + if value.isnumeric() and int(value) >= 0: + return int(value) + sec = 0 + sv = re.findall(r'(\d+)s', value, re.I) + mv = re.findall(r'(\d+)m', value, re.I) + hv = re.findall(r'(\d+)h', value, re.I) + for v in sv: + sec += int(v) + for v in mv: + sec += int(v) * 60 + for v in hv: + sec += int(v) * 3600 + return sec + def is_translate(self) -> bool: return self.conf.getboolean("translate", "switch") @@ -375,6 +391,7 @@ class Config: conf.set(sec1, "ignore_failed_list", 0) conf.set(sec1, "download_only_missing_images", 1) conf.set(sec1, "mapping_table_validity", 7) + conf.set(sec1, "rerun_delay", 0) sec2 = "proxy" conf.add_section(sec2) From 7ff701b5d739f2e3e5acf1bd06b25fa4c0b883ee Mon Sep 17 00:00:00 2001 From: lededev Date: Wed, 6 Apr 2022 01:39:36 +0800 Subject: [PATCH 04/35] show app total run time --- Movie_Data_Capture.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index c3c891e..aa0c5ac 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -626,10 +626,18 @@ def 分析日志文件(logfile): return None, None, None +def period(delta, pattern): + d = {'d': delta.days} + d['h'], rem = divmod(delta.seconds, 3600) + d['m'], d['s'] = divmod(rem, 60) + return pattern.format(**d) + + if __name__ == '__main__': version = '6.0.3' multiprocessing.freeze_support() urllib3.disable_warnings() # Ignore http proxy warning + app_start = time.time() # Read config.ini first, in argparse_function() need conf.failed_folder() conf = config.Config("config.ini") @@ -645,7 +653,12 @@ if __name__ == '__main__': (扫描电影数, 已处理, 完成数) = 分析结果元组 = tuple(分析日志文件(logfile)) if all(isinstance(v, int) for v in 分析结果元组): 剩余个数 = 扫描电影数 - 已处理 - print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}') + 总用时 = timedelta(seconds = time.time() - app_start) + print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}' + + ' total time:{}'.format( + period(总用时, "{d} day {h:02}:{m:02}:{s:02}") if 总用时.days == 1 + else period(总用时, "{d} days {h:02}:{m:02}:{s:02}") if 总用时.days > 1 + else period(总用时, "{h:02}:{m:02}:{s:02}"))) if 剩余个数 == 0: break 下次运行 = datetime.now() + timedelta(seconds=再运行延迟) From a840f529086924939585c7169c2a9bbd55bbb367 Mon Sep 17 00:00:00 2001 From: lededev Date: Wed, 6 Apr 2022 04:18:50 +0800 Subject: [PATCH 05/35] storyline.py:sync current amazon website --- WebCrawler/storyline.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index cf4c7cc..846e840 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -308,8 +308,8 @@ def getStoryline_amazon(q_title, number, debug): res = session.get(urljoin(res.url, lks[0])) cookie = None lx = fromstring(res.text) - titles = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/text()") - urls = lx.xpath("//span[contains(@class,'a-color-base a-text-normal')]/../@href") + titles = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/text()") + urls = lx.xpath("//span[contains(@class,'a-size-base-plus a-color-base a-text-normal')]/../@href") if not len(urls) or len(urls) != len(titles): raise ValueError("titles not found") idx = amazon_select_one(titles, q_title, number, debug) @@ -325,8 +325,9 @@ def getStoryline_amazon(q_title, number, debug): res = session.get(urljoin(res.url, lks[0])) cookie = None lx = fromstring(res.text) - div = lx.xpath('//*[@id="productDescription"]')[0] - ama_t = ' '.join([e.text.strip() for e in div if not re.search('Comment|h3', str(e.tag), re.I) and isinstance(e.text, str)]) + p1 = lx.xpath('//*[@id="productDescription"]/p[1]/span/text()') + p2 = lx.xpath('//*[@id="productDescription"]/p[2]/span/text()') + ama_t = ' '.join(p1) + ' '.join(p2) ama_t = re.sub(r'審査番号:\d+', '', ama_t).strip() if cookie is None: @@ -406,10 +407,10 @@ def amazon_select_one(a_titles, q_title, number, debug): # debug 模式下记录识别准确率日志 if ratio < 0.9: # 相似度[0.5, 0.9)的淘汰结果单独记录日志 - (Path.home() / '.avlogs/ratio0.5.txt').open('a', encoding='utf-8').write( - f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n') + with (Path.home() / '.mlogs/ratio0.5.txt').open('a', encoding='utf-8') as hrt: + hrt.write(f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n') return -1 # 被采信的结果日志 - (Path.home() / '.avlogs/ratio.txt').open('a', encoding='utf-8').write( - f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n') + with (Path.home() / '.mlogs/ratio.txt').open('a', encoding='utf-8') as hrt: + hrt.write(f' [{number}] Ratio:{ratio}\n{a_titles[sel]}\n{q_title}\n{save_t_}\n{que_t}\n') return sel From b251a127c85ec2773f727057c4f7246938540bae Mon Sep 17 00:00:00 2001 From: lededev Date: Wed, 6 Apr 2022 04:35:11 +0800 Subject: [PATCH 06/35] storyline:remove _inner(), expand args directly --- WebCrawler/storyline.py | 54 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 846e840..20d047f 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -80,34 +80,34 @@ def getStoryline(number, title, sites: list=None): def getStoryline_mp(args): - def _inner(site, number, title, debug): - start_time = time.time() - storyline = None - if not isinstance(site, str): - return storyline - elif site == "airavwiki": - storyline = getStoryline_airavwiki(number, debug) - elif site == "airav": - storyline = getStoryline_airav(number, debug) - elif site == "avno1": - storyline = getStoryline_avno1(number, debug) - elif site == "xcity": - storyline = getStoryline_xcity(number, debug) - elif site == "amazon": - storyline = getStoryline_amazon(title, number, debug) - elif site == "58avgo": - storyline = getStoryline_58avgo(number, debug) - if not debug: - return storyline - # 进程池模式的子进程getStoryline_*()的print()不会写入日志中,线程池和顺序执行不受影响 - print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format( - site, - time.time() - start_time, - time.strftime("%H:%M:%S"), - storyline if isinstance(storyline, str) and len(storyline) else '[空]') - ) + (site, number, title, debug) = args + start_time = time.time() + storyline = None + if not isinstance(site, str): return storyline - return _inner(*args) + elif site == "airavwiki": + storyline = getStoryline_airavwiki(number, debug) + #storyline = getStoryline_airavwiki_super(number, debug) + elif site == "airav": + storyline = getStoryline_airav(number, debug) + elif site == "avno1": + storyline = getStoryline_avno1(number, debug) + elif site == "xcity": + storyline = getStoryline_xcity(number, debug) + elif site == "amazon": + storyline = getStoryline_amazon(title, number, debug) + elif site == "58avgo": + storyline = getStoryline_58avgo(number, debug) + if not debug: + return storyline + # 进程池模式的子进程getStoryline_*()的print()不会写入日志中,线程池和顺序执行不受影响 + print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format( + site, + time.time() - start_time, + time.strftime("%H:%M:%S"), + storyline if isinstance(storyline, str) and len(storyline) else '[空]') + ) + return storyline def getStoryline_airav(number, debug): From 580139c626b19ec6c433883fed6f502c4abcf4a9 Mon Sep 17 00:00:00 2001 From: lededev Date: Wed, 6 Apr 2022 12:08:51 +0800 Subject: [PATCH 07/35] try fix issue #751 --- ADC_function.py | 13 ++++++------- WebCrawler/__init__.py | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index e310dd5..993e5dc 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -381,7 +381,7 @@ def load_cookies(cookie_json_filename: str): break if not cookies_filename: return None, None - return json.load(open(cookies_filename)), cookies_filename + return json.loads(Path(cookies_filename).read_text(encoding='utf-8')), cookies_filename except: return None, None @@ -519,14 +519,13 @@ def download_one_file(args) -> str: wrapped for map function """ - def _inner(url: str, save_path: Path): - filebytes = get_html(url, return_type='content') - if isinstance(filebytes, bytes) and len(filebytes): - if len(filebytes) == save_path.open('wb').write(filebytes): + (url, save_path) = args + filebytes = get_html(url, return_type='content') + if isinstance(filebytes, bytes) and len(filebytes): + with save_path.open('wb') as fpbyte: + if len(filebytes) == fpbyte.write(filebytes): return str(save_path) - return _inner(*args) - def parallel_download_files(dn_list: typing.Iterable[typing.Sequence], parallel: int = 0): """ diff --git a/WebCrawler/__init__.py b/WebCrawler/__init__.py index 7f9cf19..d870ece 100644 --- a/WebCrawler/__init__.py +++ b/WebCrawler/__init__.py @@ -248,8 +248,8 @@ def get_data_from_json(file_number, oCC): if json_data[translate_value] == "": continue if translate_value == "title": - title_dict = json.load( - open(str(Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json'), 'r', encoding="utf-8")) + title_dict = json.loads( + (Path.home() / '.local' / 'share' / 'mdc' / 'c_number.json').read_text(encoding="utf-8")) try: json_data[translate_value] = title_dict[number] continue From 6df4d8ff763c39c8ecbab071f18f5b5b6edae30e Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 7 Apr 2022 05:47:08 +0800 Subject: [PATCH 08/35] change description 'total time:' to 'Elapsed time ' --- Movie_Data_Capture.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index aa0c5ac..daa6b7a 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -655,10 +655,10 @@ if __name__ == '__main__': 剩余个数 = 扫描电影数 - 已处理 总用时 = timedelta(seconds = time.time() - app_start) print(f'All movies:{扫描电影数} processed:{已处理} successes:{完成数} remain:{剩余个数}' + - ' total time:{}'.format( - period(总用时, "{d} day {h:02}:{m:02}:{s:02}") if 总用时.days == 1 - else period(总用时, "{d} days {h:02}:{m:02}:{s:02}") if 总用时.days > 1 - else period(总用时, "{h:02}:{m:02}:{s:02}"))) + ' Elapsed time {}'.format( + period(总用时, "{d} day {h}:{m:02}:{s:02}") if 总用时.days == 1 + else period(总用时, "{d} days {h}:{m:02}:{s:02}") if 总用时.days > 1 + else period(总用时, "{h}:{m:02}:{s:02}"))) if 剩余个数 == 0: break 下次运行 = datetime.now() + timedelta(seconds=再运行延迟) From f728f3336308773f114c3af7d1b31384dd1a7531 Mon Sep 17 00:00:00 2001 From: lededev Date: Thu, 7 Apr 2022 12:38:27 +0800 Subject: [PATCH 09/35] UserAgent update to Chrome 100.0 --- ADC_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ADC_function.py b/ADC_function.py index 993e5dc..30a5127 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -26,7 +26,7 @@ def getXpathSingle(htmlcode, xpath): return result1 -G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36' +G_USER_AGENT = r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36' def get_html(url, cookies: dict = None, ua: str = None, return_type: str = None, encoding: str = None): From cbde2e4a81448693f6da41f891b783427e87a917 Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 8 Apr 2022 12:57:16 +0800 Subject: [PATCH 10/35] =?UTF-8?q?=E6=9B=B4=E6=96=B0.nfo=E6=97=B6=E4=BF=9D?= =?UTF-8?q?=E7=95=99=E5=B7=B2=E6=9C=89=E7=9A=84=E7=94=A8=E6=88=B7=E8=87=AA?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=E8=AF=84=E5=88=86=E6=A0=87?= =?UTF-8?q?=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index 3a64c06..6fbb679 100644 --- a/core.py +++ b/core.py @@ -10,6 +10,7 @@ from PIL import Image from io import BytesIO from pathlib import Path from datetime import datetime +from lxml import etree from ADC_function import * from WebCrawler import get_data_from_json @@ -291,6 +292,12 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f print(f"[-]Fatal error! can not make folder '{path}'") sys.exit(0) + old_nfo = None + try: + if os.path.isfile(nfo_path): + old_nfo = etree.parse(nfo_path) + except: + pass # KODI内查看影片信息时找不到number,配置naming_rule=number+'#'+title虽可解决 # 但使得标题太长,放入时常为空的outline内会更适合,软件给outline留出的显示版面也较大 outline = f"{number}#{outline}" @@ -354,11 +361,17 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f print(" " + release + "", file=code) print(" " + release + "", file=code) print(" " + release + "", file=code) + if old_nfo: + try: + xur = old_nfo.xpath('//userrating/text()')[0] + if isinstance(xur, str) and re.match('\d+\.\d+|\d+', xur.strip()): + print(f" {xur.strip()}", file=code) + except: + pass try: f_rating = json_data['用户评分'] uc = json_data['评分人数'] - print(f""" {round(f_rating * 2.0)} - {round(f_rating * 2.0, 1)} + print(f""" {round(f_rating * 2.0, 1)} {round(f_rating * 20.0, 1)} From c20bf4cf57549e28690a51d51ca3a205a6f0572f Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 8 Apr 2022 14:17:50 +0800 Subject: [PATCH 11/35] fanza.py:resolve some [-]Movie number has changed --- WebCrawler/fanza.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index 00d8988..e9b25ef 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -250,6 +250,9 @@ def main(number): # but the hinban on the page is test00012 # so get the hinban first, and then pass it to following functions fanza_hinban = getNum(htmlcode) + out_num = fanza_hinban + if re.sub('-|_', '', number.lower()) == fanza_hinban: + out_num = number data = { "title": getTitle(htmlcode).strip(), "studio": getStudio(htmlcode), @@ -258,7 +261,7 @@ def main(number): "director": getDirector(htmlcode) if "anime" not in chosen_url else "", "actor": getActor(htmlcode) if "anime" not in chosen_url else "", "release": getRelease(htmlcode), - "number": fanza_hinban, + "number": out_num, "cover": getCover(htmlcode, fanza_hinban), "imagecut": 1, "tag": getTag(htmlcode), @@ -315,3 +318,5 @@ if __name__ == "__main__": # print(main("DV-1562")) # print(main("96fad1217")) print(main("pred00251")) + print(main("MIAA-391")) + print(main("OBA-326")) From b048c0431098df54c366e33315951aee87b4860a Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 8 Apr 2022 16:11:48 +0800 Subject: [PATCH 12/35] =?UTF-8?q?=E4=BF=AE=E5=A4=8D-CD1=20-CD2=E5=8F=8Aima?= =?UTF-8?q?gecut=3D=3D3=E4=B8=8B=E8=BD=BD=E5=B0=8F=E5=B0=81=E9=9D=A2?= =?UTF-8?q?=E5=90=8E=E5=9B=A0=E6=B0=B4=E5=8D=B0=E6=96=87=E4=BB=B6=E4=B8=8D?= =?UTF-8?q?=E5=AD=98=E5=9C=A8=E8=80=8C=E5=A4=B1=E8=B4=A5=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/core.py b/core.py index 6fbb679..4aa3c78 100644 --- a/core.py +++ b/core.py @@ -70,10 +70,12 @@ def get_info(json_data): # 返回json里的数据 return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, trailer, website, series, label -def small_cover_check(path, number, cover_small, leak_word, c_word, hack_word, filepath): - filename = f"{number}{leak_word}{c_word}{hack_word}-poster.jpg" - download_file_with_filename(cover_small, filename, path, filepath) - print('[+]Image Downloaded! ' + os.path.join(path, filename)) +def small_cover_check(path, filename, cover_small, movie_path): + full_filepath = os.path.join(path, filename) + if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath): + return + download_file_with_filename(cover_small, filename, path, movie_path) + print('[+]Image Downloaded! ' + full_filepath) def create_folder(json_data): # 创建文件夹 @@ -256,7 +258,7 @@ def image_ext(url): return ".jpg" # 封面是否下载成功,否则移动到failed -def image_download(cover, fanart_path,thumb_path, path, filepath): +def image_download(cover, fanart_path, thumb_path, path, filepath): full_filepath = os.path.join(path, fanart_path) if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath): return @@ -696,8 +698,7 @@ def core_main(file_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath) - + small_cover_check(path, poster_path, json_data.get('cover_small'), filepath) # creatFolder会返回番号路径 image_download( cover, fanart_path,thumb_path, path, filepath) @@ -718,7 +719,7 @@ def core_main(file_path, number_th, oCC): # 裁剪图 - cutImage(imagecut, path , fanart_path, poster_path) + cutImage(imagecut, path, fanart_path, poster_path) # 添加水印 if conf.is_watermark(): @@ -746,7 +747,7 @@ def core_main(file_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, number, json_data.get('cover_small'), leak_word, c_word, hack_word, filepath) + small_cover_check(path, poster_path, json_data.get('cover_small'), filepath) # creatFolder会返回番号路径 image_download( cover, fanart_path,thumb_path, path, filepath) @@ -761,7 +762,7 @@ def core_main(file_path, number_th, oCC): extrafanart_download(json_data.get('extrafanart'), path, number, filepath) # 裁剪图 - cutImage(imagecut, path , fanart_path, poster_path) + cutImage(imagecut, path, fanart_path, poster_path) # 添加水印 if conf.is_watermark(): From de58647402ee03c8af28be94048260dc2613710f Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 9 Apr 2022 01:35:49 +0800 Subject: [PATCH 13/35] =?UTF-8?q?=E6=A8=A1=E5=BC=8F2=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E7=A1=AC=E9=93=BE=E6=8E=A5=EF=BC=9B=E6=94=B9=E8=BF=9B=E5=AD=97?= =?UTF-8?q?=E5=B9=95=E5=A4=8D=E5=88=B6=E9=80=82=E5=BA=94=E6=9B=B4=E5=A4=9A?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E5=90=8D=E7=BB=84=E5=90=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 64 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/core.py b/core.py index 4aa3c78..d512d01 100644 --- a/core.py +++ b/core.py @@ -494,7 +494,6 @@ def add_to_pic(pic_path, img_pic, size, count, mode): def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置 filepath_obj = pathlib.Path(filepath) houzhui = filepath_obj.suffix - file_parent_origin_path = str(filepath_obj.parent) try: targetpath = os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{houzhui}") # 任何情况下都不要覆盖,以免遭遇数据源或者引擎错误导致所有文件得到同一个number,逐一 @@ -520,25 +519,19 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): filerelpath = os.path.relpath(filepath, path) os.symlink(filerelpath, targetpath) except: - os.symlink(filepath_obj.resolve(), targetpath) - sub_res = config.getInstance().sub_rule() + os.symlink(str(filepath_obj.resolve()), targetpath) - for subname in sub_res: - sub_filepath = str(filepath_obj.with_suffix(subname)) - if os.path.isfile(sub_filepath.replace(subname,".chs" + subname)): - sub_filepath = sub_filepath.replace(subname,".chs" + subname) - subname = ".chs" + subname - elif os.path.isfile(sub_filepath.replace(subname,".cht" + subname)): - sub_filepath = sub_filepath.replace(subname, ".cht" + subname) - subname = ".cht" + subname - if os.path.isfile(sub_filepath): + sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] + for subfile in filepath_obj.parent.glob('**/*'): + if subfile.is_file() and subfile.suffix.lower() in sub_res: + sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" if link_mode not in (1, 2): - shutil.move(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}")) - print('[+]Sub moved!') + shutil.move(str(subfile), str(sub_targetpath)) + print(f"[+]Sub Moved! '{sub_targetpath.name}'") else: - shutil.copyfile(sub_filepath, os.path.join(path, f"{number}{leak_word}{c_word}{hack_word}{subname}")) - print('[+]Sub Copied!') - return True + shutil.copyfile(str(subfile), str(sub_targetpath)) + print(f"[+]Sub Copied! '{sub_targetpath.name}'") + return except FileExistsError as fee: print(f'[-]FileExistsError: {fee}') @@ -557,24 +550,37 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo number += part # 这时number会被附加上CD1后缀 filepath_obj = pathlib.Path(filepath) houzhui = filepath_obj.suffix - file_parent_origin_path = str(filepath_obj.parent) targetpath = os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{houzhui}") if os.path.exists(targetpath): raise FileExistsError('File Exists on destination path, we will never overwriting.') try: - if config.getInstance().link_mode(): - os.symlink(filepath, targetpath) - else: + link_mode = config.getInstance().link_mode() + create_softlink = False + if link_mode not in (1, 2): shutil.move(filepath, targetpath) + elif link_mode == 2: + try: + os.link(filepath, targetpath, follow_symlinks=False) + except: + create_softlink = True + if link_mode == 1 or create_softlink: + try: + filerelpath = os.path.relpath(filepath, path) + os.symlink(filerelpath, targetpath) + except: + os.symlink(str(filepath_obj.resolve()), targetpath) - sub_res = config.getInstance().sub_rule() - for subname in sub_res: - sub_filepath = str(filepath_obj.with_suffix(subname)) - if os.path.isfile(sub_filepath): # 字幕移动 - shutil.move(sub_filepath, os.path.join(path, f"{number}{part}{leak_word}{c_word}{hack_word}{subname}")) - print('[+]Sub moved!') - print('[!]Success') - return True + sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] + for subfile in filepath_obj.parent.glob('**/*'): + if subfile.is_file() and subfile.suffix.lower() in sub_res: + sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" + if link_mode not in (1, 2): + shutil.move(str(subfile), str(sub_targetpath)) + print(f"[+]Sub Moved! '{sub_targetpath.name}'") + else: + shutil.copyfile(str(subfile), str(sub_targetpath)) + print(f"[+]Sub Copied! '{sub_targetpath.name}'") + return except FileExistsError as fee: print(f'[-]FileExistsError: {fee}') return From 69f52798c6d5a25ca58a1070e0559afe0e063530 Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 9 Apr 2022 20:50:09 +0800 Subject: [PATCH 14/35] =?UTF-8?q?=E5=AD=97=E5=B9=95=E5=90=8E=E7=BC=80?= =?UTF-8?q?=E5=8E=BB=E9=99=A4.txt=E4=BB=A5=E9=81=BF=E5=85=8D=E5=A4=8D?= =?UTF-8?q?=E5=88=B6=E5=B9=BF=E5=91=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.ini b/config.ini index 011b8e5..a25c685 100755 --- a/config.ini +++ b/config.ini @@ -79,7 +79,7 @@ uncensored_prefix=S2M,BT,LAF,SMD,SMBD,SM3D2DBD,SKY-,SKYHD,CWP,CWDV,CWBD,CW3D2DBD ; 影片后缀 media_type=.mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.iso,.mpg,.m4v ; 字幕后缀 -sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml +sub_type=.smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml ; 水印 [watermark] From 8ee1f212d29dfc66d27e999ff01fd5ac3ec24ef6 Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 9 Apr 2022 20:50:44 +0800 Subject: [PATCH 15/35] site 38,39 --- config.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.ini b/config.ini index a25c685..25a19c8 100755 --- a/config.ini +++ b/config.ini @@ -117,7 +117,7 @@ mode=1 vars=outline,series,studio,tag,title [javdb] -sites=37,38 +sites=38,39 ; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢) [face] From 9e9b799441a31acf62d48941ca898dfe06e36143 Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 9 Apr 2022 21:35:06 +0800 Subject: [PATCH 16/35] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=A4=9A=E9=9B=86?= =?UTF-8?q?=E5=BD=B1=E7=89=87=E5=AD=97=E5=B9=95=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/core.py b/core.py index d512d01..ddf30e7 100644 --- a/core.py +++ b/core.py @@ -491,7 +491,7 @@ def add_to_pic(pic_path, img_pic, size, count, mode): # ========================结束================================= -def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置 +def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word): # 文件路径,番号,后缀,要移动至的位置 filepath_obj = pathlib.Path(filepath) houzhui = filepath_obj.suffix try: @@ -524,6 +524,8 @@ def paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word): sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] for subfile in filepath_obj.parent.glob('**/*'): if subfile.is_file() and subfile.suffix.lower() in sub_res: + if multi_part and part.lower() not in subfile.name.lower(): + continue sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" if link_mode not in (1, 2): shutil.move(str(subfile), str(sub_targetpath)) @@ -573,6 +575,8 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] for subfile in filepath_obj.parent.glob('**/*'): if subfile.is_file() and subfile.suffix.lower() in sub_res: + if multi_part and part.lower() not in subfile.name.lower(): + continue sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" if link_mode not in (1, 2): shutil.move(str(subfile), str(sub_targetpath)) @@ -592,18 +596,6 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo return -def get_part(filepath): - try: - if re.search('-CD\d+', filepath): - return re.findall('-CD\d+', filepath)[0] - if re.search('-cd\d+', filepath): - return re.findall('-cd\d+', filepath)[0] - except: - print("[-]failed!Please rename the filename again!") - moveFailedFolder(filepath) - return - - def debug_print(data: json): try: print("[+] ------- DEBUG INFO -------") @@ -657,9 +649,9 @@ def core_main(file_path, number_th, oCC): imagecut = json_data.get('imagecut') tag = json_data.get('tag') # =======================================================================判断-C,-CD后缀 - if '-CD' in filepath or '-cd' in filepath: + if re.search('-CD\d+', filepath, re.IGNORECASE): multi_part = 1 - part = get_part(filepath) + part = re.findall('-CD\d+', filepath, re.IGNORECASE)[0] if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -732,7 +724,7 @@ def core_main(file_path, number_th, oCC): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) # 移动电影 - paste_file_to_folder(filepath, path, number, leak_word, c_word, hack_word) + paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word) # 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志 print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word From 109cc3717b6200f1ef9451b13b212d6797fb076b Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 01:26:47 +0800 Subject: [PATCH 17/35] strictly restrict to .nfo in order to exclude .nfo\w+ --- Movie_Data_Capture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index daa6b7a..4737c71 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -365,7 +365,7 @@ def movie_lists(source_folder, regexstr: str) -> typing.List[str]: skip_numbers = set() success_folder = Path(conf.success_folder()).resolve() for f in success_folder.glob(r'**/*'): - if not re.match(r'\.nfo', f.suffix, re.IGNORECASE): + if not re.match(r'\.nfo$', f.suffix, re.IGNORECASE): continue if file_modification_days(f) > nfo_skip_days: continue From 3e3ff3cfb3f681466fab0c169601fa5711b54c88 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 03:12:03 +0800 Subject: [PATCH 18/35] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=AD=97=E5=B9=95?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 9 +++++---- core.py | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/config.py b/config.py index 1132a3e..7dfc702 100644 --- a/config.py +++ b/config.py @@ -3,6 +3,7 @@ import re import sys import configparser import time +import typing from pathlib import Path G_conf_override = { @@ -263,8 +264,8 @@ class Config: def media_type(self) -> str: return self.conf.get('media', 'media_type') - def sub_rule(self): - return self.conf.get('media', 'sub_type').split(',') + def sub_rule(self) -> typing.Set[str]: + return set(self.conf.get('media', 'sub_type').lower().split(',')) def naming_rule(self) -> str: return self.conf.get("Name_Rule", "naming_rule") @@ -445,9 +446,9 @@ class Config: sec11 = "media" conf.add_section(sec11) conf.set(sec11, "media_type", - ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,.MP4,.AVI,.RMVB,.WMV,.MOV,.MKV,.FLV,.TS,.WEBM,iso,ISO") + ".mp4,.avi,.rmvb,.wmv,.mov,.mkv,.flv,.ts,.webm,iso") conf.set(sec11, "sub_type", - ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.txt,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml") + ".smi,.srt,.idx,.sub,.sup,.psb,.ssa,.ass,.usf,.xss,.ssf,.rt,.lrc,.sbv,.vtt,.ttml") sec12 = "watermark" conf.add_section(sec12) diff --git a/core.py b/core.py index ddf30e7..41d40e4 100644 --- a/core.py +++ b/core.py @@ -521,7 +521,7 @@ def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_ except: os.symlink(str(filepath_obj.resolve()), targetpath) - sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] + sub_res = config.getInstance().sub_rule() for subfile in filepath_obj.parent.glob('**/*'): if subfile.is_file() and subfile.suffix.lower() in sub_res: if multi_part and part.lower() not in subfile.name.lower(): @@ -572,7 +572,7 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo except: os.symlink(str(filepath_obj.resolve()), targetpath) - sub_res = [subext.lower() for subext in config.getInstance().sub_rule()] + sub_res = config.getInstance().sub_rule() for subfile in filepath_obj.parent.glob('**/*'): if subfile.is_file() and subfile.suffix.lower() in sub_res: if multi_part and part.lower() not in subfile.name.lower(): From e951429ec03a7c05717b1fcd49c3762a9911ac8c Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 05:39:57 +0800 Subject: [PATCH 19/35] =?UTF-8?q?=E5=87=8F=E5=B0=91=E6=88=90=E5=8A=9F?= =?UTF-8?q?=E4=BF=A1=E6=81=AF=E6=96=87=E4=BB=B6=E8=B7=AF=E5=BE=84=E5=88=B7?= =?UTF-8?q?=E5=B1=8F=EF=BC=8C=E4=BB=85=E5=86=99=E5=85=A5.nfo=E6=97=B6?= =?UTF-8?q?=E6=98=BE=E7=A4=BA=E5=AE=8C=E5=85=A8=E8=B7=AF=E5=BE=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ImageProcessing/__init__.py | 5 +++-- core.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ImageProcessing/__init__.py b/ImageProcessing/__init__.py index 9fafc6e..dafbb55 100644 --- a/ImageProcessing/__init__.py +++ b/ImageProcessing/__init__.py @@ -2,6 +2,7 @@ import logging import os import config import importlib +from pathlib import Path from PIL import Image import shutil from ADC_function import file_not_exist_or_empty @@ -72,13 +73,13 @@ def cutImage(imagecut, path, fanart_path, poster_path): else: # 如果等于2/3 img2 = img img2.save(fullpath_poster) - print('[+]Image Cutted! ' + fullpath_poster) + print(f"[+]Image Cutted! {Path(fullpath_poster).name}") except Exception as e: print(e) print('[-]Cover cut failed!') elif imagecut == 0: # 复制封面 shutil.copyfile(fullpath_fanart, fullpath_poster) - print('[+]Image Copyed! ' + fullpath_poster) + print(f"[+]Image Copyed! {Path(fullpath_poster).name}") def face_center(filename, model): diff --git a/core.py b/core.py index 41d40e4..fff055d 100644 --- a/core.py +++ b/core.py @@ -71,11 +71,11 @@ def get_info(json_data): # 返回json里的数据 def small_cover_check(path, filename, cover_small, movie_path): - full_filepath = os.path.join(path, filename) - if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(full_filepath): + full_filepath = Path(path) / filename + if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(str(full_filepath)): return download_file_with_filename(cover_small, filename, path, movie_path) - print('[+]Image Downloaded! ' + full_filepath) + print('[+]Image Downloaded! ' + full_filepath.name) def create_folder(json_data): # 创建文件夹 @@ -216,7 +216,7 @@ def extrafanart_download_one_by_one(data, path, filepath): break if file_not_exist_or_empty(jpg_fullpath): return - print('[+]Image Downloaded!', jpg_fullpath) + print('[+]Image Downloaded!', Path(jpg_fullpath).name) j += 1 if conf.debug(): print(f'[!]Extrafanart download one by one mode runtime {time.perf_counter() - tm_start:.3f}s') @@ -247,7 +247,7 @@ def extrafanart_download_threadpool(url_list, save_dir, number): if failed: # 非致命错误,电影不移入失败文件夹,将来可以用模式3补齐 print(f"[-]Failed downloaded {failed}/{len(result)} extrafanart images for [{number}] to '{extrafanart_dir}', you may retry run mode 3 later.") else: - print(f"[+]Successfully downloaded {len(result)} extrafanart to '{extrafanart_dir}'") + print(f"[+]Successfully downloaded {len(result)} extrafanarts.") if conf.debug(): print(f'[!]Extrafanart download ThreadPool mode runtime {time.perf_counter() - tm_start:.3f}s') @@ -276,7 +276,7 @@ def image_download(cover, fanart_path, thumb_path, path, filepath): break if file_not_exist_or_empty(full_filepath): return - print('[+]Image Downloaded!', full_filepath) + print('[+]Image Downloaded!', Path(full_filepath).name) shutil.copyfile(full_filepath, os.path.join(path, thumb_path)) @@ -529,10 +529,10 @@ def paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_ sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" if link_mode not in (1, 2): shutil.move(str(subfile), str(sub_targetpath)) - print(f"[+]Sub Moved! '{sub_targetpath.name}'") + print(f"[+]Sub Moved! {sub_targetpath.name}") else: shutil.copyfile(str(subfile), str(sub_targetpath)) - print(f"[+]Sub Copied! '{sub_targetpath.name}'") + print(f"[+]Sub Copied! {sub_targetpath.name}") return except FileExistsError as fee: @@ -580,10 +580,10 @@ def paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_wo sub_targetpath = Path(path) / f"{number}{leak_word}{c_word}{hack_word}{''.join(subfile.suffixes)}" if link_mode not in (1, 2): shutil.move(str(subfile), str(sub_targetpath)) - print(f"[+]Sub Moved! '{sub_targetpath.name}'") + print(f"[+]Sub Moved! {sub_targetpath.name}") else: shutil.copyfile(str(subfile), str(sub_targetpath)) - print(f"[+]Sub Copied! '{sub_targetpath.name}'") + print(f"[+]Sub Copied! {sub_targetpath.name}") return except FileExistsError as fee: print(f'[-]FileExistsError: {fee}') From 09c81d7f5975eea75b7dfe89596dfa9610c6e318 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 07:14:55 +0800 Subject: [PATCH 20/35] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E9=80=89=E9=A1=B9:1.?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E6=A8=A1=E5=BC=8F3=E8=B7=B3=E8=BF=87?= =?UTF-8?q?=E4=BA=BA=E8=84=B8=E8=AF=86=E5=88=AB=202.=E9=81=BF=E5=85=8D?= =?UTF-8?q?=E5=AF=B9=E6=9C=89=E7=A0=81=E5=B0=81=E9=9D=A2=E8=BF=9B=E8=A1=8C?= =?UTF-8?q?=E4=BA=BA=E8=84=B8=E8=AF=86=E5=88=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ImageProcessing/__init__.py | 14 ++++++++++---- config.ini | 6 +++++- config.py | 22 ++++++++++------------ core.py | 4 ++-- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/ImageProcessing/__init__.py b/ImageProcessing/__init__.py index dafbb55..f0b7fe5 100644 --- a/ImageProcessing/__init__.py +++ b/ImageProcessing/__init__.py @@ -55,18 +55,24 @@ def face_crop_height(filename, width, height): return (0, 0, width, cropHeight) -def cutImage(imagecut, path, fanart_path, poster_path): +def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False): fullpath_fanart = os.path.join(path, fanart_path) fullpath_poster = os.path.join(path, poster_path) - if config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster): + if config.getInstance().face_aways_imagecut(): + imagecut = 1 + elif config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster): return if imagecut == 1: # 剪裁大封面 try: img = Image.open(fullpath_fanart) width, height = img.size if width/height > 2/3: # 如果宽度大于2 - # 以人像为中心切取 - img2 = img.crop(face_crop_width(fullpath_fanart, width, height)) + if skip_facerec: + # 有码封面默认靠右切 + img2 = img.crop((width - int(height/3) * 2, 0, width, height)) + else: + # 以人像为中心切取 + img2 = img.crop(face_crop_width(fullpath_fanart, width, height)) elif width/height < 2/3: # 如果高度大于3 # 从底部向上切割 img2 = img.crop(face_crop_height(fullpath_fanart, width, height)) diff --git a/config.ini b/config.ini index 25a19c8..6e40cc4 100755 --- a/config.ini +++ b/config.ini @@ -119,6 +119,10 @@ vars=outline,series,studio,tag,title [javdb] sites=38,39 -; 人脸识别 hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢) +; 人脸识别 locations_model=hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢) +; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面,有码封面直接切右半部分 +; aways_imagecut=0:按各网站默认行为 1:总是裁剪封面,开启此项将无视[common]download_only_missing_images=1总是覆盖封面 [face] locations_model=hog +uncensored_only=1 +aways_imagecut=0 diff --git a/config.py b/config.py index 7dfc702..586ee8f 100644 --- a/config.py +++ b/config.py @@ -346,22 +346,20 @@ class Config: return 1 def cc_convert_vars(self) -> str: - try: - return self.conf.get("cc_convert", "vars") - except: - return "actor,director,label,outline,series,studio,tag,title" + return self.conf.get("cc_convert", "vars", + fallback="actor,director,label,outline,series,studio,tag,title") def javdb_sites(self) -> str: - try: - return self.conf.get("javdb", "sites") - except: - return "33,34" + return self.conf.get("javdb", "sites", fallback="38,39") def face_locations_model(self) -> str: - try: - return self.conf.get("face", "locations_model") - except: - return "hog" + return self.conf.get("face", "locations_model", fallback="hog") + + def face_uncensored_only(self) -> bool: + return self.conf.getboolean("face", "uncensored_only", fallback=True) + + def face_aways_imagecut(self) -> bool: + return self.conf.getboolean("face", "aways_imagecut", fallback=False) @staticmethod def _exit(sec: str) -> None: diff --git a/core.py b/core.py index fff055d..f8ef23c 100644 --- a/core.py +++ b/core.py @@ -717,7 +717,7 @@ def core_main(file_path, number_th, oCC): # 裁剪图 - cutImage(imagecut, path, fanart_path, poster_path) + cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) # 添加水印 if conf.is_watermark(): @@ -760,7 +760,7 @@ def core_main(file_path, number_th, oCC): extrafanart_download(json_data.get('extrafanart'), path, number, filepath) # 裁剪图 - cutImage(imagecut, path, fanart_path, poster_path) + cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) # 添加水印 if conf.is_watermark(): From 44dc26d13e30f55f27130e4c59799aa97281fbf0 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 10:09:40 +0800 Subject: [PATCH 21/35] =?UTF-8?q?=E4=BC=98=E5=8C=96=E5=B0=81=E9=9D=A2?= =?UTF-8?q?=E8=A3=81=E5=89=AA=E5=AE=BD=E9=AB=98=E6=AF=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ImageProcessing/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ImageProcessing/__init__.py b/ImageProcessing/__init__.py index f0b7fe5..539623e 100644 --- a/ImageProcessing/__init__.py +++ b/ImageProcessing/__init__.py @@ -7,6 +7,8 @@ from PIL import Image import shutil from ADC_function import file_not_exist_or_empty +g_width_half_ratio = 2.12 + def face_crop_width(filename, width, height): # 新宽度是高度的2/3 cropWidthHalf = int(height/3) @@ -22,15 +24,15 @@ def face_crop_width(filename, width, height): # 越界处理 if cropLeft < 0: cropLeft = 0 - cropRight = cropWidthHalf*2 + cropRight = cropWidthHalf*g_width_half_ratio elif cropRight > width: - cropLeft = width-cropWidthHalf*2 + cropLeft = width-cropWidthHalf*g_width_half_ratio cropRight = width return (cropLeft, 0, cropRight, height) except: print('[-]Not found face! ' + filename) # 默认靠右切 - return (width-cropWidthHalf*2, 0, width, height) + return (width-cropWidthHalf * g_width_half_ratio, 0, width, height) def face_crop_height(filename, width, height): @@ -69,7 +71,7 @@ def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False): if width/height > 2/3: # 如果宽度大于2 if skip_facerec: # 有码封面默认靠右切 - img2 = img.crop((width - int(height/3) * 2, 0, width, height)) + img2 = img.crop((width - int(height/3) * g_width_half_ratio, 0, width, height)) else: # 以人像为中心切取 img2 = img.crop(face_crop_width(fullpath_fanart, width, height)) From c54817aa01a16bce78e9c403ad5ed5711bbc41ae Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 13:04:08 +0800 Subject: [PATCH 22/35] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=B8=8D=E8=81=94?= =?UTF-8?q?=E7=BD=91=E6=89=B9=E9=87=8F=E5=B0=81=E9=9D=A2=E5=89=AA=E8=A3=81?= =?UTF-8?q?(=E4=BA=BA=E8=84=B8=E8=AF=86=E5=88=AB)=E5=92=8C=E6=89=93?= =?UTF-8?q?=E6=B0=B4=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Movie_Data_Capture.py | 45 ++++++++++++++++-------- config.py | 14 +++++--- core.py | 81 +++++++++++++++++++++++++++++++------------ 3 files changed, 98 insertions(+), 42 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 4737c71..a191ee2 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -18,7 +18,7 @@ from opencc import OpenCC import config from ADC_function import file_modification_days, get_html, parallel_download_files from number_parser import get_number -from core import core_main, moveFailedFolder +from core import core_main, core_main_no_net_op, moveFailedFolder def check_update(local_version): @@ -40,7 +40,7 @@ def check_update(local_version): print("[*]======================================================") -def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: +def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]: conf = config.getInstance() parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.") parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") @@ -70,6 +70,8 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: help="Auto exit after program complete") parser.add_argument("-g", "--debug", action="store_true", help="Turn on debug mode to generate diagnostic log for issue report.") + parser.add_argument("-N", "--no-network-operation", action="store_true", + help="No network query, do not get metadata, for cover cropping purposes, only takes effect when main mode is 3.") parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true", help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") @@ -96,7 +98,14 @@ is performed. It may help you correct wrong numbers before real job.""") config.G_conf_override["debug_mode:switch"] = get_bool_or_none(args.debug) config.G_conf_override["common:rerun_delay"] = get_str_or_none(args.delaytm) - return args.file, args.number, args.logdir, args.regexstr, args.zero_op + no_net_op = False + if conf.main_mode() == 3: + no_net_op = args.no_network_operation + config.G_conf_override["common:stop_counter"] = 0 + config.G_conf_override["common:rerun_delay"] = '0s' + config.G_conf_override["face:aways_imagecut"] = True + + return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op class OutLogger(object): @@ -416,38 +425,44 @@ def rm_empty_folder(path): pass -def create_data_and_move(file_path: str, zero_op, oCC): +def create_data_and_move(movie_path: str, zero_op: bool, no_net_op: bool, oCC): # Normalized number, eg: 111xxx-222.mp4 -> xxx-222.mp4 debug = config.getInstance().debug() - n_number = get_number(debug, os.path.basename(file_path)) - file_path = os.path.abspath(file_path) + n_number = get_number(debug, os.path.basename(movie_path)) + movie_path = os.path.abspath(movie_path) if debug is True: - print(f"[!] [{n_number}] As Number making data for '{file_path}'") + print(f"[!] [{n_number}] As Number making data for '{movie_path}'") if zero_op: return if n_number: - core_main(file_path, n_number, oCC) + if no_net_op: + core_main_no_net_op(movie_path, n_number) + else: + core_main(movie_path, n_number, oCC) else: print("[-] number empty ERROR") - moveFailedFolder(file_path) + moveFailedFolder(movie_path) print("[*]======================================================") else: try: - print(f"[!] [{n_number}] As Number making data for '{file_path}'") + print(f"[!] [{n_number}] As Number making data for '{movie_path}'") if zero_op: return if n_number: - core_main(file_path, n_number, oCC) + if no_net_op: + core_main_no_net_op(movie_path, n_number) + else: + core_main(movie_path, n_number, oCC) else: raise ValueError("number empty") print("[*]======================================================") except Exception as err: - print(f"[-] [{file_path}] ERROR:") + print(f"[-] [{movie_path}] ERROR:") print('[-]', err) try: - moveFailedFolder(file_path) + moveFailedFolder(movie_path) except Exception as err: print('[!]', err) @@ -478,7 +493,7 @@ def create_data_and_move_with_custom_number(file_path: str, custom_number, oCC): def main(args: tuple) -> Path: - (single_file_path, custom_number, logdir, regexstr, zero_op) = args + (single_file_path, custom_number, logdir, regexstr, zero_op, no_net_op) = args conf = config.getInstance() main_mode = conf.main_mode() folder_path = "" @@ -592,7 +607,7 @@ def main(args: tuple) -> Path: percentage = str(count / int(count_all) * 100)[:4] + '%' print('[!] {:>30}{:>21}'.format('- ' + percentage + ' [' + str(count) + '/' + count_all + '] -', time.strftime("%H:%M:%S"))) - create_data_and_move(movie_path, zero_op, oCC) + create_data_and_move(movie_path, zero_op, no_net_op, oCC) if count >= stop_count: print("[!]Stop counter triggered!") break diff --git a/config.py b/config.py index 586ee8f..e5bb1bf 100644 --- a/config.py +++ b/config.py @@ -18,7 +18,8 @@ G_conf_override = { "common:stop_counter": None, "common:ignore_failed_list": None, "common:rerun_delay": None, - "debug_mode:switch": None + "debug_mode:switch": None, + "face:aways_imagecut": None } @@ -101,9 +102,12 @@ class Config: # sys.exit(3) # #self.conf = self._default_config() - def getboolean_override(self, section, item) -> bool: - return self.conf.getboolean(section, item) if G_conf_override[f"{section}:{item}"] is None else bool( - G_conf_override[f"{section}:{item}"]) + def getboolean_override(self, section, item, fallback=None) -> bool: + if G_conf_override[f"{section}:{item}"] is not None: + return bool(G_conf_override[f"{section}:{item}"]) + if fallback is not None: + return self.conf.getboolean(section, item, fallback=fallback) + return self.conf.getboolean(section, item) def getint_override(self, section, item, fallback=None) -> int: if G_conf_override[f"{section}:{item}"] is not None: @@ -359,7 +363,7 @@ class Config: return self.conf.getboolean("face", "uncensored_only", fallback=True) def face_aways_imagecut(self) -> bool: - return self.conf.getboolean("face", "aways_imagecut", fallback=False) + return self.getboolean_override("face", "aways_imagecut", fallback=False) @staticmethod def _exit(sec: str) -> None: diff --git a/core.py b/core.py index f8ef23c..e898e01 100644 --- a/core.py +++ b/core.py @@ -615,7 +615,46 @@ def debug_print(data: json): pass -def core_main(file_path, number_th, oCC): +def core_main_no_net_op(movie_path, number): + conf = config.getInstance() + leak_word = '' + leak = 0 + c_word = '' + cn_sub = '' + hack = '' + hack_word = '' + ext = '.jpg' + imagecut = 1 + path = str(Path(movie_path).parent) + + if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: + cn_sub = '1' + c_word = '-C' # 中文字幕影片后缀 + uncensored = 1 if is_uncensored(number) else 0 + if '流出' in movie_path or 'uncensored' in movie_path: + leak_word = '-流出' # 流出影片后缀 + leak = 1 + + if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path: + hack = 1 + hack_word = "-hack" + + fanart_path = f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}" + poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}" + thumb_path = f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}" + full_fanart_path = os.path.join(path, fanart_path) + full_poster_path = os.path.join(path, poster_path) + full_thumb_path = os.path.join(path, thumb_path) + + if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): + return + + cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) + if conf.is_watermark(): + add_mark(full_poster_path, full_thumb_path, cn_sub, leak, uncensored, hack) + + +def core_main(movie_path, number_th, oCC): conf = config.getInstance() # =======================================================================初始化所需变量 multi_part = 0 @@ -627,8 +666,6 @@ def core_main(file_path, number_th, oCC): hack = '' hack_word = '' - - filepath = file_path # 影片的路径 绝对路径 # 下面被注释的变量不需要 #rootpath= os.getcwd number = number_th @@ -636,7 +673,7 @@ def core_main(file_path, number_th, oCC): # Return if blank dict returned (data not found) if not json_data: - moveFailedFolder(filepath) + moveFailedFolder(movie_path) return if json_data["number"] != number: @@ -649,10 +686,10 @@ def core_main(file_path, number_th, oCC): imagecut = json_data.get('imagecut') tag = json_data.get('tag') # =======================================================================判断-C,-CD后缀 - if re.search('-CD\d+', filepath, re.IGNORECASE): + if re.search('-CD\d+', movie_path, re.IGNORECASE): multi_part = 1 - part = re.findall('-CD\d+', filepath, re.IGNORECASE)[0] - if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: + part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0] + if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -660,14 +697,14 @@ def core_main(file_path, number_th, oCC): uncensored = 1 if is_uncensored(number) else 0 - if '流出' in filepath or 'uncensored' in filepath: + if '流出' in movie_path or 'uncensored' in movie_path: liuchu = '流出' leak = 1 leak_word = '-流出' # 流出影片后缀 else: leak = 0 - if 'hack'.upper() in str(filepath).upper() or '破解' in filepath: + if 'hack'.upper() in str(movie_path).upper() or '破解' in movie_path: hack = 1 hack_word = "-hack" @@ -696,22 +733,22 @@ def core_main(file_path, number_th, oCC): # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, poster_path, json_data.get('cover_small'), filepath) + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 - image_download( cover, fanart_path,thumb_path, path, filepath) + image_download( cover, fanart_path,thumb_path, path, movie_path) if not multi_part or part.lower() == '-cd1': try: # 下载预告片 if conf.is_trailer() and json_data.get('trailer'): - trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath) + trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path) except: pass try: # 下载剧照 data, path, filepath if conf.is_extrafanart() and json_data.get('extrafanart'): - extrafanart_download(json_data.get('extrafanart'), path, number, filepath) + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) except: pass @@ -724,40 +761,40 @@ def core_main(file_path, number_th, oCC): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) # 移动电影 - paste_file_to_folder(filepath, path, multi_part, number, part, leak_word, c_word, hack_word) + paste_file_to_folder(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) # 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志 - print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word + print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word ,fanart_path,poster_path,thumb_path) elif conf.main_mode() == 2: # 创建文件夹 path = create_folder(json_data) # 移动文件 - paste_file_to_folder_mode2(filepath, path, multi_part, number, part, leak_word, c_word, hack_word) + paste_file_to_folder_mode2(movie_path, path, multi_part, number, part, leak_word, c_word, hack_word) if conf.is_watermark(): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) elif conf.main_mode() == 3: - path = str(Path(file_path).parent) + path = str(Path(movie_path).parent) if multi_part == 1: number += part # 这时number会被附加上CD1后缀 # 检查小封面, 如果image cut为3,则下载小封面 if imagecut == 3: - small_cover_check(path, poster_path, json_data.get('cover_small'), filepath) + small_cover_check(path, poster_path, json_data.get('cover_small'), movie_path) # creatFolder会返回番号路径 - image_download( cover, fanart_path,thumb_path, path, filepath) + image_download( cover, fanart_path, thumb_path, path, movie_path) if not multi_part or part.lower() == '-cd1': # 下载预告片 if conf.is_trailer() and json_data.get('trailer'): - trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, filepath) + trailer_download(json_data.get('trailer'), leak_word, c_word, hack_word, number, path, movie_path) # 下载剧照 data, path, filepath if conf.is_extrafanart() and json_data.get('extrafanart'): - extrafanart_download(json_data.get('extrafanart'), path, number, filepath) + extrafanart_download(json_data.get('extrafanart'), path, number, movie_path) # 裁剪图 cutImage(imagecut, path, fanart_path, poster_path, bool(conf.face_uncensored_only() and not uncensored)) @@ -767,5 +804,5 @@ def core_main(file_path, number_th, oCC): add_mark(os.path.join(path,poster_path), os.path.join(path,thumb_path), cn_sub, leak, uncensored, hack) # 最后输出.nfo元数据文件,以完成.nfo文件创建作为任务成功标志 - print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, filepath, + print_files(path, leak_word, c_word, json_data.get('naming_rule'), part, cn_sub, json_data, movie_path, tag, json_data.get('actor_list'), liuchu, uncensored, hack_word,fanart_path,poster_path,thumb_path) From 8add9fe42454a94091942404063bd8cc5436f55a Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 13:38:48 +0800 Subject: [PATCH 23/35] =?UTF-8?q?=E8=A3=81=E5=89=AA=E5=B0=81=E9=9D=A2?= =?UTF-8?q?=E5=AE=BD=E9=AB=98=E6=AF=94=E5=8F=AF=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ImageProcessing/__init__.py | 16 +++++++++------- Movie_Data_Capture.py | 7 ++++--- config.ini | 2 ++ config.py | 3 +++ 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/ImageProcessing/__init__.py b/ImageProcessing/__init__.py index 539623e..e582e55 100644 --- a/ImageProcessing/__init__.py +++ b/ImageProcessing/__init__.py @@ -7,9 +7,9 @@ from PIL import Image import shutil from ADC_function import file_not_exist_or_empty -g_width_half_ratio = 2.12 def face_crop_width(filename, width, height): + aspect_ratio = config.getInstance().face_aspect_ratio() # 新宽度是高度的2/3 cropWidthHalf = int(height/3) try: @@ -24,15 +24,15 @@ def face_crop_width(filename, width, height): # 越界处理 if cropLeft < 0: cropLeft = 0 - cropRight = cropWidthHalf*g_width_half_ratio + cropRight = cropWidthHalf * aspect_ratio elif cropRight > width: - cropLeft = width-cropWidthHalf*g_width_half_ratio + cropLeft = width - cropWidthHalf * aspect_ratio cropRight = width return (cropLeft, 0, cropRight, height) except: print('[-]Not found face! ' + filename) # 默认靠右切 - return (width-cropWidthHalf * g_width_half_ratio, 0, width, height) + return (width-cropWidthHalf * aspect_ratio, 0, width, height) def face_crop_height(filename, width, height): @@ -58,11 +58,13 @@ def face_crop_height(filename, width, height): def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False): + conf = config.getInstance() fullpath_fanart = os.path.join(path, fanart_path) fullpath_poster = os.path.join(path, poster_path) - if config.getInstance().face_aways_imagecut(): + aspect_ratio = conf.face_aspect_ratio() + if conf.face_aways_imagecut(): imagecut = 1 - elif config.getInstance().download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster): + elif conf.download_only_missing_images() and not file_not_exist_or_empty(fullpath_poster): return if imagecut == 1: # 剪裁大封面 try: @@ -71,7 +73,7 @@ def cutImage(imagecut, path, fanart_path, poster_path, skip_facerec=False): if width/height > 2/3: # 如果宽度大于2 if skip_facerec: # 有码封面默认靠右切 - img2 = img.crop((width - int(height/3) * g_width_half_ratio, 0, width, height)) + img2 = img.crop((width - int(height/3) * aspect_ratio, 0, width, height)) else: # 以人像为中心切取 img2 = img.crop(face_crop_width(fullpath_fanart, width, height)) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index a191ee2..2c8a5a2 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -101,9 +101,10 @@ is performed. It may help you correct wrong numbers before real job.""") no_net_op = False if conf.main_mode() == 3: no_net_op = args.no_network_operation - config.G_conf_override["common:stop_counter"] = 0 - config.G_conf_override["common:rerun_delay"] = '0s' - config.G_conf_override["face:aways_imagecut"] = True + if no_net_op: + config.G_conf_override["common:stop_counter"] = 0 + config.G_conf_override["common:rerun_delay"] = '0s' + config.G_conf_override["face:aways_imagecut"] = True return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op diff --git a/config.ini b/config.ini index 6e40cc4..d46b1ac 100755 --- a/config.ini +++ b/config.ini @@ -122,7 +122,9 @@ sites=38,39 ; 人脸识别 locations_model=hog:方向梯度直方图(不太准确,速度快) cnn:深度学习模型(准确,需要GPU/CUDA,速度慢) ; uncensored_only=0:对全部封面进行人脸识别 1:只识别无码封面,有码封面直接切右半部分 ; aways_imagecut=0:按各网站默认行为 1:总是裁剪封面,开启此项将无视[common]download_only_missing_images=1总是覆盖封面 +; 封面裁剪的宽高比可配置,公式为aspect_ratio/3。默认aspect_ratio=2.12: 适配大部分有码影片封面,前一版本默认为2/3即aspect_ratio=2 [face] locations_model=hog uncensored_only=1 aways_imagecut=0 +aspect_ratio=2.12 diff --git a/config.py b/config.py index e5bb1bf..4dc0a9f 100644 --- a/config.py +++ b/config.py @@ -365,6 +365,9 @@ class Config: def face_aways_imagecut(self) -> bool: return self.getboolean_override("face", "aways_imagecut", fallback=False) + def face_aspect_ratio(self) -> float: + return self.conf.getfloat("face", "aspect_ratio", fallback=2.12) + @staticmethod def _exit(sec: str) -> None: print("[-] Read config error! Please check the {} section in config.ini", sec) From 02692becfea0d8a34c007928fbc334c4eda393d3 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 14:48:25 +0800 Subject: [PATCH 24/35] =?UTF-8?q?=E6=9B=B4=E5=8F=AF=E9=9D=A0=E7=9A=84?= =?UTF-8?q?=E6=97=A0=E7=A0=81=E8=AF=86=E5=88=AB=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/carib.py | 1 + WebCrawler/javbus.py | 5 +++++ WebCrawler/javdb.py | 7 +++++-- WebCrawler/madou.py | 3 ++- core.py | 19 +++++++++++++++---- 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 47aa0d7..4dac7ba 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -40,6 +40,7 @@ def main(number: str) -> json: 'website': f'{G_SITE}/moviepages/{number}/index.html', 'source': 'carib.py', 'series': get_series(lx), + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) return js diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index 46493da..2a5a303 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -83,6 +83,9 @@ def getExtrafanart(htmlcode): # 获取剧照 if extrafanart_imgs: return [urljoin('https://www.javbus.com',img) for img in extrafanart_imgs] return '' +def getUncensored(html): + x = html.xpath('//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]') + return bool(x) def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/ja/' + number) @@ -109,6 +112,7 @@ def main_uncensored(number): 'website': 'https://www.javbus.com/ja/' + number, 'source': 'javbus.py', 'series': getSeriseJa(lx), + '无码': getUncensored(lx) } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js @@ -151,6 +155,7 @@ def main(number): 'website': 'https://www.javbus.com/' + number, 'source': 'javbus.py', 'series': getSerise(lx), + '无码': getUncensored(lx) } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') return js diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index a0d8a38..2d21e29 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -179,6 +179,9 @@ def getUserRating(html): return float(v[0][0]), int(v[0][1]) except: return +def getUncensored(html): + x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]') + return bool(x) def main(number): # javdb更新后同一时间只能登录一个数字站,最新登录站会踢出旧的登录,因此按找到的第一个javdb*.json文件选择站点, @@ -300,7 +303,7 @@ def main(number): 'website': urljoin('https://javdb.com', correct_url), 'source': 'javdb.py', 'series': getSeries(lx), - + '无码': getUncensored(lx) } userrating = getUserRating(lx) if isinstance(userrating, tuple) and len(userrating) == 2: @@ -328,7 +331,7 @@ if __name__ == "__main__": # print(main('blacked.20.05.30')) # print(main('AGAV-042')) # print(main('BANK-022')) - # print(main('070116-197')) + print(main('070116-197')) # print(main('093021_539')) # 没有剧照 片商pacopacomama #print(main('FC2-2278260')) # print(main('FC2-735670')) diff --git a/WebCrawler/madou.py b/WebCrawler/madou.py index 6cf9132..eb1f365 100644 --- a/WebCrawler/madou.py +++ b/WebCrawler/madou.py @@ -146,7 +146,8 @@ def main(number): 'website': url, 'source': 'madou.py', # 使用 - 'series': getSerise(html) + 'series': getSerise(html), + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') diff --git a/core.py b/core.py index e898e01..ed0ad51 100644 --- a/core.py +++ b/core.py @@ -617,6 +617,7 @@ def debug_print(data: json): def core_main_no_net_op(movie_path, number): conf = config.getInstance() + part = '' leak_word = '' leak = 0 c_word = '' @@ -627,6 +628,8 @@ def core_main_no_net_op(movie_path, number): imagecut = 1 path = str(Path(movie_path).parent) + if re.search('-CD\d+', movie_path, re.IGNORECASE): + part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0] if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 @@ -639,12 +642,19 @@ def core_main_no_net_op(movie_path, number): hack = 1 hack_word = "-hack" - fanart_path = f"{number}{leak_word}{c_word}{hack_word}-fanart{ext}" - poster_path = f"{number}{leak_word}{c_word}{hack_word}-poster{ext}" - thumb_path = f"{number}{leak_word}{c_word}{hack_word}-thumb{ext}" + prestr = f"{number}{leak_word}{c_word}{hack_word}" + fanart_path = f"{prestr}-fanart{ext}" + poster_path = f"{prestr}-poster{ext}" + thumb_path = f"{prestr}-thumb{ext}" full_fanart_path = os.path.join(path, fanart_path) full_poster_path = os.path.join(path, poster_path) full_thumb_path = os.path.join(path, thumb_path) + full_nfo = Path(path) / f"{prestr}{part}.nfo" + + if full_nfo.is_file(): + nfo = full_nfo.read_text(encoding='utf-8') + if nfo.find(r'无码'): + uncensored = 1 if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): return @@ -695,7 +705,8 @@ def core_main(movie_path, number_th, oCC): # 判断是否无码 uncensored = 1 if is_uncensored(number) else 0 - + if json_data.get('无码'): + uncensored = 1 if '流出' in movie_path or 'uncensored' in movie_path: liuchu = '流出' From e5bc900b40970a9c1f444cd615957ebf85b0b4ac Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 10 Apr 2022 15:29:46 +0800 Subject: [PATCH 25/35] =?UTF-8?q?=E5=B0=86=E6=AC=A7=E7=BE=8E=E5=85=A8?= =?UTF-8?q?=E9=83=A8=E5=BD=92=E7=B1=BB=E5=88=B0=E6=97=A0=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/javbus.py | 24 +++++++++++++----------- WebCrawler/javdb.py | 4 +++- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index 2a5a303..5218016 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -88,7 +88,8 @@ def getUncensored(html): return bool(x) def main_uncensored(number): - htmlcode = get_html('https://www.javbus.com/ja/' + number) + w_number = number.replace('.', '-') + htmlcode = get_html('https://www.javbus.red/' + w_number) if "404 Page Not Found" in htmlcode: raise Exception('404 page not found') lx = etree.fromstring(htmlcode, etree.HTMLParser()) @@ -97,7 +98,7 @@ def main_uncensored(number): 'title': title, 'studio': getStudioJa(lx), 'year': getYear(lx), - 'outline': getOutline(number, title), + 'outline': getOutline(w_number, title), 'runtime': getRuntime(lx), 'director': getDirectorJa(lx), 'actor': getActor(lx), @@ -109,10 +110,10 @@ def main_uncensored(number): 'label': getSeriseJa(lx), 'imagecut': 0, # 'actor_photo': '', - 'website': 'https://www.javbus.com/ja/' + number, + 'website': 'https://www.javbus.red/' + w_number, 'source': 'javbus.py', 'series': getSeriseJa(lx), - '无码': getUncensored(lx) + '无码': True } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js @@ -174,12 +175,13 @@ def main(number): if __name__ == "__main__" : config.G_conf_override['debug_mode:switch'] = True - print(main('ABP-888')) - print(main('ABP-960')) - print(main('ADV-R0624')) # 404 - print(main('MMNT-010')) - print(main('ipx-292')) - print(main('CEMD-011')) - print(main('CJOD-278')) + # print(main('ABP-888')) + # print(main('ABP-960')) + # print(main('ADV-R0624')) # 404 + # print(main('MMNT-010')) + # print(main('ipx-292')) + # print(main('CEMD-011')) + # print(main('CJOD-278')) + print(main('BrazzersExxtra.21.02.01')) print(main('100221_001')) print(main('AVSW-061')) diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index 2d21e29..3dfff16 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -180,7 +180,8 @@ def getUserRating(html): except: return def getUncensored(html): - x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")]') + x = html.xpath('//strong[contains(text(),"類別")]/../span/a[contains(@href,"/tags/uncensored?")' + ' or contains(@href,"/tags/western?")]') return bool(x) def main(number): @@ -342,3 +343,4 @@ if __name__ == "__main__": # print(main('EBOD-646')) # print(main('LOVE-262')) print(main('ABP-890')) + print(main('blacked.14.12.08')) From 9c1baef0b7bd83d7e7f7ced01bf6eb2be4181eb1 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 00:18:40 +0800 Subject: [PATCH 26/35] =?UTF-8?q?=E6=97=A0=E8=AE=BA=E6=9C=89=E7=A0=81?= =?UTF-8?q?=E6=97=A0=E7=A0=81=E5=9D=87=E4=BC=98=E5=85=88=E9=87=87=E4=BF=A1?= =?UTF-8?q?=E7=BD=91=E7=AB=99=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core.py b/core.py index ed0ad51..29b2f58 100644 --- a/core.py +++ b/core.py @@ -705,8 +705,9 @@ def core_main(movie_path, number_th, oCC): # 判断是否无码 uncensored = 1 if is_uncensored(number) else 0 - if json_data.get('无码'): - uncensored = 1 + unce = json_data.get('无码') + if type(unce) is bool: + uncensored = 1 if unce else 0 if '流出' in movie_path or 'uncensored' in movie_path: liuchu = '流出' From dfcc012201762f1ace3639aa16511c8e1cec1929 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 06:05:11 +0800 Subject: [PATCH 27/35] =?UTF-8?q?.nfo=E6=96=87=E4=BB=B6=E4=B8=8D=E5=AD=98?= =?UTF-8?q?=E5=9C=A8=E6=97=B6=E4=B8=8D=E6=89=A7=E8=A1=8C-N=E6=93=8D?= =?UTF-8?q?=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core.py b/core.py index 29b2f58..39f4276 100644 --- a/core.py +++ b/core.py @@ -655,6 +655,8 @@ def core_main_no_net_op(movie_path, number): nfo = full_nfo.read_text(encoding='utf-8') if nfo.find(r'<tag>无码</tag>'): uncensored = 1 + else: + return if not all(os.path.isfile(f) for f in (full_fanart_path, full_thumb_path)): return From a813bf462fa29313b5d92083c009eca72c50feb9 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 13:37:50 +0800 Subject: [PATCH 28/35] fanza.py:fix [-]Movie number has changed! [RED-164]->[red00164] --- WebCrawler/fanza.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index e9b25ef..deeea1d 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -251,7 +251,8 @@ def main(number): # so get the hinban first, and then pass it to following functions fanza_hinban = getNum(htmlcode) out_num = fanza_hinban - if re.sub('-|_', '', number.lower()) == fanza_hinban: + number_lo = number.lower() + if re.sub('-|_', '', number_lo) == fanza_hinban or number_lo.replace('-', '00') == fanza_hinban: out_num = number data = { "title": getTitle(htmlcode).strip(), From e50e14764f52e15a7d52b7d49ebc1a8fa784e6a0 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 13:46:57 +0800 Subject: [PATCH 29/35] fanza.py:fix [-]Movie number has changed! [ATOM-067]->[atom067so] --- WebCrawler/fanza.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/WebCrawler/fanza.py b/WebCrawler/fanza.py index deeea1d..f09ad1e 100644 --- a/WebCrawler/fanza.py +++ b/WebCrawler/fanza.py @@ -252,7 +252,10 @@ def main(number): fanza_hinban = getNum(htmlcode) out_num = fanza_hinban number_lo = number.lower() - if re.sub('-|_', '', number_lo) == fanza_hinban or number_lo.replace('-', '00') == fanza_hinban: + if (re.sub('-|_', '', number_lo) == fanza_hinban or + number_lo.replace('-', '00') == fanza_hinban or + number_lo.replace('-', '') + 'so' == fanza_hinban + ): out_num = number data = { "title": getTitle(htmlcode).strip(), From db23ffae54539995fc187d045d75820ab2f1ef95 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 17:13:23 +0800 Subject: [PATCH 30/35] number_parser.py:fix n1012-CD1.wmv return null number --- number_parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 3bebb08..615dd69 100755 --- a/number_parser.py +++ b/number_parser.py @@ -48,6 +48,8 @@ def get_number(debug: bool, file_path: str) -> str: if 'fc2' in lower_check: filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper() filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE) + if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv + return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group()) file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE) return file_number.upper() @@ -157,7 +159,9 @@ if __name__ == "__main__": "hhd800.com@STARS-566-HD.mp4", "jav20s8.com@GIGL-677_4K.mp4", "sbw99.cc@iesp-653-4K.mp4", - "4K-ABP-358_C.mkv" + "4K-ABP-358_C.mkv", + "n1012-CD1.wmv", + "東热n1012-CD2.wmv", ) From cc3e4d1edd5ee1f9676aa3c0b711b3a8ec988987 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 17:32:01 +0800 Subject: [PATCH 31/35] str.find() return -1 when fail --- core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core.py b/core.py index 39f4276..51dcc66 100644 --- a/core.py +++ b/core.py @@ -652,8 +652,7 @@ def core_main_no_net_op(movie_path, number): full_nfo = Path(path) / f"{prestr}{part}.nfo" if full_nfo.is_file(): - nfo = full_nfo.read_text(encoding='utf-8') - if nfo.find(r'<tag>无码</tag>'): + if full_nfo.read_text(encoding='utf-8').find(r'<tag>无码</tag>') >= 0: uncensored = 1 else: return From cdbccb3b143bcf731eb213e2a729c64f789e809b Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Mon, 11 Apr 2022 17:45:55 +0800 Subject: [PATCH 32/35] =?UTF-8?q?number=5Fparser.py=20=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=E5=9B=A0=E4=B8=BB=E6=9C=BA=E7=BC=96=E7=A0=81?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE=EF=BC=8C=E6=9A=82=E4=B8=8D=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E4=B8=AD=E6=96=87=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 615dd69..4f09b29 100755 --- a/number_parser.py +++ b/number_parser.py @@ -161,7 +161,7 @@ if __name__ == "__main__": "sbw99.cc@iesp-653-4K.mp4", "4K-ABP-358_C.mkv", "n1012-CD1.wmv", - "東热n1012-CD2.wmv", + "[]n1012-CD2.wmv", ) From f342d42b86d805b7cb7f3874148293345f38cf2e Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Tue, 12 Apr 2022 02:13:36 +0800 Subject: [PATCH 33/35] =?UTF-8?q?=E9=99=A4=E6=96=87=E4=BB=B6=E5=90=8D?= =?UTF-8?q?=E8=A7=84=E5=88=99=E6=94=AF=E6=8C=81-C=E7=A1=AC=E5=AD=97?= =?UTF-8?q?=E5=B9=95=E5=A4=96=EF=BC=8C=E6=96=B0=E6=94=AF=E6=8C=81ch?= =?UTF-8?q?=E7=A1=AC=E5=AD=97=E5=B9=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core.py | 10 ++++++---- number_parser.py | 14 ++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/core.py b/core.py index 51dcc66..0300b9e 100644 --- a/core.py +++ b/core.py @@ -629,8 +629,9 @@ def core_main_no_net_op(movie_path, number): path = str(Path(movie_path).parent) if re.search('-CD\d+', movie_path, re.IGNORECASE): - part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0] - if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: + part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0].upper() + if re.search(r'-C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path, + re.I) or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 uncensored = 1 if is_uncensored(number) else 0 @@ -699,8 +700,9 @@ def core_main(movie_path, number_th, oCC): # =======================================================================判断-C,-CD后缀 if re.search('-CD\d+', movie_path, re.IGNORECASE): multi_part = 1 - part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0] - if '-c.' in movie_path or '-C.' in movie_path or '中文' in movie_path or '字幕' in movie_path: + part = re.findall('-CD\d+', movie_path, re.IGNORECASE)[0].upper() + if re.search(r'-C(\.\w+$|-\w+)|\d+ch(\.\w+$|-\w+)', movie_path, + re.I) or '中文' in movie_path or '字幕' in movie_path: cn_sub = '1' c_word = '-C' # 中文字幕影片后缀 diff --git a/number_parser.py b/number_parser.py index 4f09b29..54400ea 100755 --- a/number_parser.py +++ b/number_parser.py @@ -52,6 +52,8 @@ def get_number(debug: bool, file_path: str) -> str: return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group()) file_number = re.sub("(-|_)c$", "", file_number, flags=re.IGNORECASE) + if re.search("\d+ch$", file_number, flags=re.I): + file_number = file_number[:-2] return file_number.upper() else: # 提取不含减号-的番号,FANZA CID # 欧美番号匹配规则 @@ -149,12 +151,12 @@ if __name__ == "__main__": "caribean-020317_001.nfo", # -号误命名为_号的 "257138_3xplanet_1Pondo_080521_001.mp4", "ADV-R0624-CD3.wmv", # 多碟影片 - "XXX-AV 22061-CD5.iso", # 新支持片商格式 xxx-av-22061 命名规则来自javdb数据源 + "XXX-AV 22061-CD5.iso", # 支持片商格式 xxx-av-22061 命名规则来自javdb数据源 "xxx-av 20589.mp4", - "Muramura-102114_145-HD.wmv", # 新支持片商格式 102114_145 命名规则来自javdb数据源 - "heydouga-4102-023-CD2.iso", # 新支持片商格式 heydouga-4102-023 命名规则来自javdb数据源 + "Muramura-102114_145-HD.wmv", # 支持片商格式 102114_145 命名规则来自javdb数据源 + "heydouga-4102-023-CD2.iso", # 支持片商格式 heydouga-4102-023 命名规则来自javdb数据源 "HeyDOuGa4236-1048 Ai Qiu - .mp4", # heydouga-4236-1048 命名规则来自javdb数据源 - "pacopacomama-093021_539-FHD.mkv", # 新支持片商格式 093021_539 命名规则来自javdb数据源 + "pacopacomama-093021_539-FHD.mkv", # 支持片商格式 093021_539 命名规则来自javdb数据源 "sbw99.cc@heyzo_hd_2636_full.mp4", "hhd800.com@STARS-566-HD.mp4", "jav20s8.com@GIGL-677_4K.mp4", @@ -162,6 +164,10 @@ if __name__ == "__main__": "4K-ABP-358_C.mkv", "n1012-CD1.wmv", "[]n1012-CD2.wmv", + "rctd-460ch.mp4", # 除支持-C硬字幕外,新支持ch硬字幕 + "rctd-461CH-CD2.mp4", # ch后可加CDn + "rctd-461-Cd3-C.mp4", # CDn后可加-C + "rctd-461-C-cD4.mp4", # cD1 Cd1 cd1 CD1 最终生成.nfo时统一为大写CD1 ) From 9a3b48140df40264259015cc927b7c48dd041995 Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Tue, 12 Apr 2022 02:28:31 +0800 Subject: [PATCH 34/35] =?UTF-8?q?number=5Fparser.py:=E4=B8=8Ecore.py?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E7=9B=B8=E5=AF=B9=E5=BA=94=EF=BC=8C=5FCD1?= =?UTF-8?q?=E4=B8=8B=E5=88=92=E7=BA=BF=E4=B8=8D=E5=86=8D=E6=94=AF=E6=8C=81?= =?UTF-8?q?=EF=BC=8C=E4=BB=85=E6=94=AF=E6=8C=81-CD1=E8=BF=9E=E5=AD=97?= =?UTF-8?q?=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- number_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/number_parser.py b/number_parser.py index 54400ea..3751cc0 100755 --- a/number_parser.py +++ b/number_parser.py @@ -47,7 +47,7 @@ def get_number(debug: bool, file_path: str) -> str: lower_check = filename.lower() if 'fc2' in lower_check: filename = lower_check.replace('ppv', '').replace('--', '-').replace('_', '-').upper() - filename = re.sub("(-|_)cd\d{1,2}", "", filename, flags=re.IGNORECASE) + filename = re.sub("-cd\d{1,2}", "", filename, flags=re.IGNORECASE) if not re.search("-|_", filename): # 去掉-CD1之后再无-的情况,例如n1012-CD1.wmv return str(re.search(r'\w+', filename[:filename.find('.')], re.A).group()) file_number = str(re.search(r'\w+(-|_)\w+', filename, re.A).group()) From 475f02fbe64edd4ab127445261d64faa0f370acf Mon Sep 17 00:00:00 2001 From: lededev <lededev@noreplay.github.com> Date: Tue, 12 Apr 2022 06:34:08 +0800 Subject: [PATCH 35/35] =?UTF-8?q?=E5=A4=84=E7=90=86format=E7=A9=BA?= =?UTF-8?q?=E6=A0=BC=E5=AF=B9=E9=BD=90=E5=86=85=E5=AE=B9=E5=8C=85=E5=90=AB?= =?UTF-8?q?=E4=B8=AD=E6=96=87=E7=9A=84=E6=83=85=E5=86=B5=EF=BC=8CDEBUG=20I?= =?UTF-8?q?NFO=E8=BE=93=E5=87=BA=E5=AF=B9=E9=BD=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ADC_function.py | 14 +++++++++++++- core.py | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 30a5127..bcda6c9 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -18,6 +18,7 @@ from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from cloudscraper import create_scraper from concurrent.futures import ThreadPoolExecutor +from unicodedata import category def getXpathSingle(htmlcode, xpath): @@ -566,6 +567,7 @@ def delete_all_elements_in_list(string: str, lists: typing.Iterable[str]): new_lists.append(i) return new_lists + def delete_all_elements_in_str(string_delete: str, string: str): """ delete same string in given list @@ -573,4 +575,14 @@ def delete_all_elements_in_str(string_delete: str, string: str): for i in string: if i == string_delete: string = string.replace(i,"") - return string \ No newline at end of file + return string + + +def cnspace(v: str, n: int) -> int: + """ + print format空格填充对齐内容包含中文时的空格计算 + """ + cw = 0 + for c in v: + cw += 1 if category(c) in ('Lo',) else 0 + return n - cw diff --git a/core.py b/core.py index 0300b9e..6767996 100644 --- a/core.py +++ b/core.py @@ -608,7 +608,7 @@ def debug_print(data: json): if i == 'extrafanart': print('[+] -', "%-14s" % i, ':', len(v), 'links') continue - print('[+] -', "%-14s" % i, ':', v) + print(f'[+] - {i:<{cnspace(i,14)}} : {v}') print("[+] ------- DEBUG INFO -------") except: