From c94ef3cf4a4a972bb2a4b42849099e267b90abf3 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 17 Apr 2022 23:36:41 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E6=9B=B4=E7=B2=BE=E7=A1=AE=E7=9A=84?= =?UTF-8?q?=E6=9C=89=E7=A0=81=E6=97=A0=E7=A0=81=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/carib.py | 2 +- WebCrawler/javbus.py | 8 ++++---- WebCrawler/javdb.py | 6 +++--- WebCrawler/storyline.py | 5 +++-- WebCrawler/xcity.py | 2 +- core.py | 4 +--- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/WebCrawler/carib.py b/WebCrawler/carib.py index 4dac7ba..0d917a2 100755 --- a/WebCrawler/carib.py +++ b/WebCrawler/carib.py @@ -60,7 +60,7 @@ def get_year(lx: html.HtmlElement) -> str: def get_outline(lx: html.HtmlElement, number: str, title: str) -> str: o = lx.xpath("//div[@class='movie-info section']/p[@itemprop='description']/text()")[0].strip() - g = getStoryline(number, title) + g = getStoryline(number, title, 无码=True) if len(g): return g return o diff --git a/WebCrawler/javbus.py b/WebCrawler/javbus.py index d565652..bb2f986 100644 --- a/WebCrawler/javbus.py +++ b/WebCrawler/javbus.py @@ -60,10 +60,10 @@ def getCID(html): string = html.xpath("//a[contains(@class,'sample-box')][1]/@href")[0].replace('https://pics.dmm.co.jp/digital/video/','') result = re.sub('/.*?.jpg','',string) return result -def getOutline(number, title): #获取剧情介绍 多进程并发查询 +def getOutline(number, title, uncensored): #获取剧情介绍 多进程并发查询 if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'): return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度 - return getStoryline(number,title) + return getStoryline(number,title, 无码=uncensored) def getSeriseJa(html): x = html.xpath('//span[contains(text(),"シリーズ:")]/../a/text()') return str(x[0]) if len(x) else '' @@ -98,7 +98,7 @@ def main_uncensored(number): 'title': title, 'studio': getStudioJa(lx), 'year': getYear(lx), - 'outline': getOutline(w_number, title), + 'outline': getOutline(w_number, title, True), 'runtime': getRuntime(lx), 'director': getDirectorJa(lx), 'actor': getActor(lx), @@ -141,7 +141,7 @@ def main(number): 'title': title, 'studio': getStudio(lx), 'year': getYear(lx), - 'outline': getOutline(number, title), + 'outline': getOutline(number, title, getUncensored(lx)), 'runtime': getRuntime(lx), 'director': getDirector(lx), 'actor': getActor(lx), diff --git a/WebCrawler/javdb.py b/WebCrawler/javdb.py index c65a0dd..a622c35 100755 --- a/WebCrawler/javdb.py +++ b/WebCrawler/javdb.py @@ -166,8 +166,8 @@ def getDirector(html): result1 = str(html.xpath('//strong[contains(text(),"導演")]/../span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') -def getOutline(number, title): #获取剧情介绍 多进程并发查询 - return getStoryline(number,title) +def getOutline(number, title, uncensored): #获取剧情介绍 多进程并发查询 + return getStoryline(number, title, 无码=uncensored) def getSeries(html): result1 = str(html.xpath('//strong[contains(text(),"系列")]/../span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"系列")]/../span/a/text()')).strip(" ['']") @@ -287,7 +287,7 @@ def main(number): 'actor': getActor(lx), 'title': title, 'studio': getStudio(detail_page, lx), - 'outline': getOutline(number, title), + 'outline': getOutline(number, title, getUncensored(lx)), 'runtime': getRuntime(lx), 'director': getDirector(lx), 'release': getRelease(detail_page), diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 20d047f..022853c 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -25,14 +25,15 @@ class noThread(object): # 获取剧情介绍 从列表中的站点同时查,取值优先级从前到后 -def getStoryline(number, title, sites: list=None): +def getStoryline(number, title, sites: list=None, 无码=None): start_time = time.time() conf = config.getInstance() if not conf.is_storyline(): return '' debug = conf.debug() or conf.storyline_show() == 2 storyine_sites = conf.storyline_site().split(',') if sites is None else sites - if is_uncensored(number): + unc = 无码 if isinstance(无码, bool) else is_uncensored(number) + if unc: storyine_sites += conf.storyline_uncensored_site().split(',') else: storyine_sites += conf.storyline_censored_site().split(',') diff --git a/WebCrawler/xcity.py b/WebCrawler/xcity.py index b6851ca..c117ca0 100644 --- a/WebCrawler/xcity.py +++ b/WebCrawler/xcity.py @@ -128,7 +128,7 @@ def getOutline(html, number, title): a = set(storyline_site) & {'airav', 'avno1'} # 只要中文的简介文字 if len(a): site = [n for n in storyline_site if n in a] - g = getStoryline(number, title, site) + g = getStoryline(number, title, site, 无码=False) if len(g): return g try: diff --git a/core.py b/core.py index a0ac1bd..b664b04 100644 --- a/core.py +++ b/core.py @@ -724,10 +724,8 @@ def core_main(movie_path, number_th, oCC): c_word = '-C' # 中文字幕影片后缀 # 判断是否无码 - uncensored = 1 if is_uncensored(number) else 0 unce = json_data.get('无码') - if type(unce) is bool: - uncensored = 1 if unce else 0 + uncensored = int(unce) if isinstance(unce, bool) else int(is_uncensored(number)) if '流出' in movie_path or 'uncensored' in movie_path.lower(): liuchu = '流出' From 5d00dd29e4f65fcca1a83917aedfa2c8b58b4e00 Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 17 Apr 2022 23:37:33 +0800 Subject: [PATCH 2/6] =?UTF-8?q?-C=E5=B0=8F=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Movie_Data_Capture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 85175db..c2ef207 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -77,7 +77,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool, bool]: parser.add_argument("-D", "--download-images", dest='dnimg', action="store_true", help="Override [common]download_only_missing_images=0 force invoke image downloading.") parser.add_argument("-C", "--config-override", dest='cfgcmd', default='', nargs='?', - help="Common use config override. grammar: section:key=value[;section:key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1") + help="Common use config override. grammar: section:key=value[;[section:]key=value] eg. 'de:s=1' or 'debug_mode:switch=1' override[debug_mode]switch=1") parser.add_argument("-z", "--zero-operation", dest='zero_op', action="store_true", help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") @@ -116,7 +116,7 @@ is performed. It may help you correct wrong numbers before real job.""") if conf.main_mode() == 3: no_net_op = args.no_network_operation if no_net_op: - conf.set_override("common:stop_counter=0;common:rerun_delay=0s;face:aways_imagecut=1") + conf.set_override("common:stop_counter=0;rerun_delay=0s;face:aways_imagecut=1") return args.file, args.number, args.logdir, args.regexstr, args.zero_op, no_net_op From 3224f8c1abf7b13c3f964861b87915af1efd283d Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 18 Apr 2022 01:07:58 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E5=8F=96=E6=B6=88storyline=E8=BF=9B?= =?UTF-8?q?=E7=A8=8B=E6=B1=A0=E6=A8=A1=E5=BC=8F=E4=BB=A5=E6=8F=90=E5=8D=87?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Movie_Data_Capture.py | 1 - WebCrawler/storyline.py | 13 +++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index c2ef207..672671d 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -668,7 +668,6 @@ def period(delta, pattern): if __name__ == '__main__': version = '6.0.3' - multiprocessing.freeze_support() urllib3.disable_warnings() # Ignore http proxy warning app_start = time.time() diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 022853c..40117b5 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -5,7 +5,6 @@ import json import builtins from ADC_function import * from lxml.html import fromstring -from multiprocessing import Pool from multiprocessing.dummy import Pool as ThreadPool from difflib import SequenceMatcher from unicodedata import category @@ -13,7 +12,7 @@ from number_parser import is_uncensored G_registered_storyline_site = {"airavwiki", "airav", "avno1", "xcity", "amazon", "58avgo"} -G_mode_txt = ('顺序执行','线程池','进程池') +G_mode_txt = ('顺序执行','线程池') class noThread(object): def map(self, fn, param): @@ -50,9 +49,8 @@ def getStoryline(number, title, sites: list=None, 无码=None): cores = min(len(apply_sites), os.cpu_count()) if cores == 0: return '' - run_mode = conf.storyline_mode() - assert run_mode in (0,1,2) - with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool: + run_mode = 1 if conf.storyline_mode() > 0 else 0 + with ThreadPool(cores) if run_mode > 0 else noThread() as pool: results = pool.map(getStoryline_mp, mp_args) sel = '' if not debug and conf.storyline_show() == 0: @@ -63,7 +61,7 @@ def getStoryline(number, title, sites: list=None, 无码=None): if not len(sel): sel = value return sel - # 以下debug结果输出会写入日志,进程池中的则不会,只在标准输出中显示 + # 以下debug结果输出会写入日志 s = f'[!]Storyline{G_mode_txt[run_mode]}模式运行{len(apply_sites)}个任务共耗时(含启动开销){time.time() - start_time:.3f}秒,结束于{time.strftime("%H:%M:%S")}' sel_site = '' for site, desc in zip(apply_sites, results): @@ -101,8 +99,7 @@ def getStoryline_mp(args): storyline = getStoryline_58avgo(number, debug) if not debug: return storyline - # 进程池模式的子进程getStoryline_*()的print()不会写入日志中,线程池和顺序执行不受影响 - print("[!]MP 进程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format( + print("[!]MP 线程[{}]运行{:.3f}秒,结束于{}返回结果: {}".format( site, time.time() - start_time, time.strftime("%H:%M:%S"), From 3dda5a94cfd8110924e5028acfa2531c1ee9261a Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 18 Apr 2022 01:27:46 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E5=BD=B1=E7=89=87=E5=88=AE=E5=89=8A?= =?UTF-8?q?=E6=97=B6Ctrl+C=E7=AB=8B=E5=88=BB=E9=80=80=E5=87=BA=EF=BC=8C?= =?UTF-8?q?=E8=80=8C=E4=B8=8D=E6=98=AF=E5=8A=A0=E5=85=A5=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E5=88=97=E8=A1=A8=E5=B9=B6=E8=B7=B3=E5=88=B0=E4=B8=8B=E4=B8=80?= =?UTF-8?q?=E9=83=A8=E5=BD=B1=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ADC_function.py | 4 ++-- Movie_Data_Capture.py | 6 +++--- config.py | 10 +++++----- core.py | 9 +++++---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index 8165019..71d4fd9 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -467,7 +467,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None: os.makedirs(path) except: print(f"[-]Fatal error! Can not make folder '{path}'") - sys.exit(0) + os._exit(0) proxies = configProxy.proxies() headers = { 'User-Agent': G_USER_AGENT} @@ -484,7 +484,7 @@ def download_file_with_filename(url: str, filename: str, path: str) -> None: os.makedirs(path) except: print(f"[-]Fatal error! Can not make folder '{path}'") - sys.exit(0) + os._exit(0) headers = { 'User-Agent': G_USER_AGENT} r = requests.get(url, timeout=configProxy.timeout, headers=headers) diff --git a/Movie_Data_Capture.py b/Movie_Data_Capture.py index 672671d..4173c29 100644 --- a/Movie_Data_Capture.py +++ b/Movie_Data_Capture.py @@ -304,7 +304,7 @@ def close_logfile(logdir: str): def signal_handler(*args): print('[!]Ctrl+C detected, Exit.') - sys.exit(9) + os._exit(9) def sigdebug_handler(*args): @@ -426,7 +426,7 @@ def create_failed_folder(failed_folder: str): os.makedirs(failed_folder) except: print(f"[-]Fatal error! Can not make folder '{failed_folder}'") - sys.exit(0) + os._exit(0) def rm_empty_folder(path): @@ -517,7 +517,7 @@ def main(args: tuple) -> Path: folder_path = "" if main_mode not in (1, 2, 3): print(f"[-]Main mode must be 1 or 2 or 3! You can run '{os.path.basename(sys.argv[0])} --help' for more help.") - sys.exit(4) + os._exit(4) signal.signal(signal.SIGINT, signal_handler) if sys.platform == 'win32': diff --git a/config.py b/config.py index 4b105b8..edfabe5 100644 --- a/config.py +++ b/config.py @@ -69,17 +69,17 @@ class Config: elif (Path(__file__).resolve().parent / 'config.ini').is_file(): res_path = Path(__file__).resolve().parent / 'config.ini' if res_path is None: - sys.exit(2) + os._exit(2) ins = input("Or, Do you want me create a config file for you? (Yes/No)[Y]:") if re.search('n', ins, re.I): - sys.exit(2) + os._exit(2) # 用户目录才确定具有写权限,因此选择 ~/mdc.ini 作为配置文件生成路径,而不是有可能并没有写权限的 # 当前目录。目前版本也不再鼓励使用当前路径放置配置文件了,只是作为多配置文件的切换技巧保留。 write_path = path_search_order[2] # Path.home() / "mdc.ini" write_path.write_text(res_path.read_text(encoding='utf-8'), encoding='utf-8') print("Config file '{}' created.".format(write_path.resolve())) input("Press Enter key exit...") - sys.exit(0) + os._exit(0) # self.conf = self._default_config() # try: # self.conf = configparser.ConfigParser() @@ -90,7 +90,7 @@ class Config: # except Exception as e: # print("[-]Config file not found! Use the default settings") # print("[-]",e) - # sys.exit(3) + # os._exit(3) # #self.conf = self._default_config() def set_override(self, option_cmd: str): @@ -110,7 +110,7 @@ class Config: """ def err_exit(str): print(str) - sys.exit(2) + os._exit(2) sections = self.conf.sections() sec_name = None diff --git a/core.py b/core.py index b664b04..7b493cf 100644 --- a/core.py +++ b/core.py @@ -1,5 +1,6 @@ import json import os.path +import os import pathlib import re import shutil @@ -104,7 +105,7 @@ def create_folder(json_data): # 创建文件夹 os.makedirs(path) except: print(f"[-]Fatal error! Can not make folder '{path}'") - sys.exit(0) + os._exit(0) return os.path.normpath(path) @@ -124,7 +125,7 @@ def download_file_with_filename(url, filename, path, filepath): os.makedirs(path) except: print(f"[-]Fatal error! Can not make folder '{path}'") - sys.exit(0) + os._exit(0) proxies = configProxy.proxies() headers = { 'User-Agent': G_USER_AGENT} @@ -141,7 +142,7 @@ def download_file_with_filename(url, filename, path, filepath): os.makedirs(path) except: print(f"[-]Fatal error! Can not make folder '{path}'") - sys.exit(0) + os._exit(0) headers = { 'User-Agent': G_USER_AGENT} r = requests.get(url, timeout=configProxy.timeout, headers=headers) @@ -292,7 +293,7 @@ def print_files(path, leak_word, c_word, naming_rule, part, cn_sub, json_data, f os.makedirs(path) except: print(f"[-]Fatal error! can not make folder '{path}'") - sys.exit(0) + os._exit(0) old_nfo = None try: From de58cc89d56df8e0b395e1507ae268f57df8a111 Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 18 Apr 2022 01:42:06 +0800 Subject: [PATCH 5/6] =?UTF-8?q?config.py:=E5=8F=96=E6=B6=88=E8=BF=9B?= =?UTF-8?q?=E7=A8=8B=E6=B1=A0=E6=A8=A1=E5=BC=8F[storyline]run=5Fmode=3D2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/config.py b/config.py index edfabe5..783c2fe 100644 --- a/config.py +++ b/config.py @@ -365,25 +365,15 @@ class Config: return "3:58avgo" def storyline_show(self) -> int: - try: - v = self.conf.getint("storyline", "show_result") - return v if v in (0, 1, 2) else 2 if v > 2 else 0 - except: - return 0 + v = self.conf.getint("storyline", "show_result", fallback=0) + return v if v in (0, 1, 2) else 2 if v > 2 else 0 def storyline_mode(self) -> int: - try: - v = self.conf.getint("storyline", "run_mode") - return v if v in (0, 1, 2) else 2 if v > 2 else 0 - except: - return 1 + return 1 if self.conf.getint("storyline", "run_mode", fallback=1) > 0 else 0 def cc_convert_mode(self) -> int: - try: - v = self.conf.getint("cc_convert", "mode") - return v if v in (0, 1, 2) else 2 if v > 2 else 0 - except: - return 1 + v = self.conf.getint("cc_convert", "mode", fallback=1) + return v if v in (0, 1, 2) else 2 if v > 2 else 0 def cc_convert_vars(self) -> str: return self.conf.get("cc_convert", "vars", From 679467700688a3bdff4db9c8ca9e48ce19da90a4 Mon Sep 17 00:00:00 2001 From: lededev Date: Mon, 18 Apr 2022 04:23:12 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcnn=E4=BA=BA=E8=84=B8?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=EF=BC=8C=E5=87=86=E7=A1=AE=E7=8E=87=E9=AB=98?= =?UTF-8?q?=E4=BD=86=E9=80=9F=E5=BA=A6=E6=85=A2=E4=BA=8Ehog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ImageProcessing/__init__.py | 6 +++++- ImageProcessing/cnn.py | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ImageProcessing/__init__.py b/ImageProcessing/__init__.py index e582e55..900ad55 100644 --- a/ImageProcessing/__init__.py +++ b/ImageProcessing/__init__.py @@ -1,3 +1,6 @@ +import sys +sys.path.append('../') + import logging import os import config @@ -104,4 +107,5 @@ def face_center(filename, model): return (0, 0) if __name__ == '__main__': - cutImage(1,'H:\\test\\','12.jpg','test.jpg') + cutImage(1,'z:/t/','p.jpg','o.jpg') + #cutImage(1,'H:\\test\\','12.jpg','test.jpg') diff --git a/ImageProcessing/cnn.py b/ImageProcessing/cnn.py index 4219c5d..2d190ed 100644 --- a/ImageProcessing/cnn.py +++ b/ImageProcessing/cnn.py @@ -1,4 +1,8 @@ -import hog +import sys +sys.path.append('../') + +from ImageProcessing.hog import face_center as hog_face_center + def face_center(filename, model): - return hog.face_center(filename, model) \ No newline at end of file + return hog_face_center(filename, model)