From 99ae5bf996fb6aac951459ef81127174d863c380 Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 22 Oct 2021 16:50:36 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E4=B8=89=E7=A7=8D=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E4=BF=9D=E6=8C=81=E4=B8=80=E8=87=B4=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/storyline.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 9b0a44c..cc14a3f 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -16,7 +16,7 @@ G_mode_txt = ('顺序执行','线程池','进程池') class noThread(object): def map(self, fn, param): - return builtins.map(fn, param) + return list(builtins.map(fn, param)) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -47,7 +47,6 @@ def getStoryline(number, title, sites: list=None): assert run_mode in (0,1,2) with ThreadPool(cores) if run_mode == 1 else Pool(cores) if run_mode == 2 else noThread() as pool: result = pool.map(getStoryline_mp, mp_args) - result = list(result) if run_mode == 0 else result if not debug and conf.storyline_show() == 0: for value in result: if isinstance(value, str) and len(value): From d3eef993de9cd986d363da0d787f6bcb30ec5357 Mon Sep 17 00:00:00 2001 From: lededev Date: Fri, 22 Oct 2021 17:19:33 +0800 Subject: [PATCH 2/6] =?UTF-8?q?storyline.py:=E5=89=A7=E6=83=85=E7=AE=80?= =?UTF-8?q?=E4=BB=8B=E7=AB=99=E7=82=B9=E5=90=8D=E7=A7=B0=E5=89=8D=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E5=BA=8F=E5=8F=B7=EF=BC=8C=E6=95=B0=E5=AD=97=E5=B0=8F?= =?UTF-8?q?=E7=9A=84=E4=BC=98=E5=85=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WebCrawler/storyline.py | 11 +++++++---- config.ini | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index cc14a3f..3cc8583 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -34,11 +34,14 @@ def getStoryline(number, title, sites: list=None): else: storyine_sites += conf.storyline_censored_site().split(',') r_dup = set() - apply_sites = [] + sort_sites = [] for s in storyine_sites: - if s in G_registered_storyline_site and s not in r_dup: - apply_sites.append(s) - r_dup.add(s) + ns = re.sub(r'.*?:', '', s, re.A) + if ns in G_registered_storyline_site and ns not in r_dup: + sort_sites.append(s) + r_dup.add(ns) + sort_sites.sort() + apply_sites = [re.sub(r'.*?:', '', s, re.A) for s in sort_sites] mp_args = ((site, number, title, debug) for site in apply_sites) cores = min(len(apply_sites), os.cpu_count()) if cores == 0: diff --git a/config.ini b/config.ini index 9ff2bb8..24404c6 100755 --- a/config.ini +++ b/config.ini @@ -89,15 +89,15 @@ extrafanart_folder=extrafanart ; 剧情简介 [storyline] ; website为javbus javdb avsox xcity carib时,site censored_site uncensored_site 为获取剧情简介信息的 -; 可选数据源站点列表。列表内站点同时并发查询,取值优先级从左到右,靠左站点没数据才会采用后面站点获得的。 +; 可选数据源站点列表。列表内站点同时并发查询,取值优先级由冒号前的序号决定,从小到大,数字小的站点没数据才会采用后面站点获得的。 ; 其中airav avno1 58avgo是中文剧情简介,区别是airav只能查有码,avno1有码无码都能查,58avgo只能查无码或者 ; 流出破解马赛克的影片(此功能没使用)。 ; xcity和amazon是日语的,由于amazon商城没有番号信息,选中对应DVD的准确率仅99.6%。如果三个列表全部为空则不查询, ; 设置成不查询可大幅提高刮削速度。 ; site= -site=avno1 -censored_site=airav,xcity,amazon -uncensored_site=58avgo +site=3:avno1 +censored_site=1:airav,4:xcity,5:amazon +uncensored_site=2:58avgo ; 运行模式:0:顺序执行(最慢) 1:线程池(默认值) 2:进程池(启动开销比线程池大,并发站点越多越快) run_mode=1 ; show_result剧情简介调试信息 0关闭 1简略 2详细(详细部分不记入日志),剧情简介失效时可打开2查看原因 From 24ac95f365b7a5098d2e8448a313fb5a92319eee Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 23 Oct 2021 05:04:15 +0800 Subject: [PATCH 3/6] storyline.py: fix debug output --- WebCrawler/storyline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/storyline.py b/WebCrawler/storyline.py index 3cc8583..4fcca0e 100644 --- a/WebCrawler/storyline.py +++ b/WebCrawler/storyline.py @@ -128,7 +128,7 @@ def getStoryline_airav(number, debug): return desc except Exception as e: if debug: - print(f"[-]MP getOutline_amazon Error: {e},number [{number}].") + print(f"[-]MP getStoryline_airav Error: {e},number [{number}].") pass return None From eaa9d51d009f6d825d7cbce61a9775a529ada644 Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 23 Oct 2021 05:51:43 +0800 Subject: [PATCH 4/6] clean up --- AV_Data_Capture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 7483a8d..778cd03 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -35,7 +35,7 @@ def check_update(local_version): print("[*]======================================================") -def argparse_function(ver: str) -> typing.Tuple[str, str, bool]: +def argparse_function(ver: str) -> typing.Tuple[str, str, str, str, bool]: conf = config.getInstance() parser = argparse.ArgumentParser(epilog=f"Load Config file '{conf.ini_path}'.") parser.add_argument("file", default='', nargs='?', help="Single Movie file path.") @@ -61,7 +61,7 @@ def argparse_function(ver: str) -> typing.Tuple[str, str, bool]: help="""Only show job list of files and numbers, and **NO** actual operation is performed. It may help you correct wrong numbers before real job.""") parser.add_argument("-v", "--version", action="version", version=ver) - #ini_path + args = parser.parse_args() def get_natural_number_or_none(value): return int(value) if isinstance(value, str) and value.isnumeric() and int(value)>=0 else None From dbbfa722683e0a8f8dcb3358edf6af1fef5fe392 Mon Sep 17 00:00:00 2001 From: lededev Date: Sat, 23 Oct 2021 06:15:24 +0800 Subject: [PATCH 5/6] vscode debug program/cwd/args demo settings --- .vscode/launch.json | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 978418a..16ae9dd 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,8 +5,26 @@ "name": "AV_Data_Capture", "type": "python", "request": "launch", + "console": "integratedTerminal", + "env": { + "PYTHONIOENCODING": "utf-8" + }, "program": "${workspaceFolder}/AV_Data_capture.py", - "console": "integratedTerminal" + "program1": "${workspaceFolder}/WebCrawler/javbus.py", + "program2": "${workspaceFolder}/WebCrawler/javdb.py", + "program3": "${workspaceFolder}/WebCrawler/xcity.py", + "program4": "${workspaceFolder}/number_parser.py", + "program5": "${workspaceFolder}/config.py", + "cwd0": "${fileDirname}", + "cwd1": "${workspaceFolder}/dist", + "cwd2": "${env:HOME}${env:USERPROFILE}/.avdc", + "args0": ["-a","-p","J:/Downloads","-o","J:/log"], + "args1": ["-g","-m","3","-c","1","-d","0"], + "args2": ["-igd0", "-m3", "-p", "J:/JAV_output", "-q", "121220_001"], + "args3": ["-agd0","-m3", "-q", ".*","-p","J:/#JAV_output3"], + "args4": ["-gic1", "-d0", "-m3", "-o", "avlog", "-p", "I:/JAV_output"], + "args5": ["-gic1", "-d0", "-m1", "-o", "avlog", "-p", "J:/Downloads"], + "args6": ["-z", "-o", "J:/log"] } ] -} \ No newline at end of file +} From 7bc667e3b9b24d7dadf852e2f5ef3d8dcaaada4f Mon Sep 17 00:00:00 2001 From: lededev Date: Sun, 24 Oct 2021 00:42:57 +0800 Subject: [PATCH 6/6] avsox.py: call translateTag_to_sc() --- WebCrawler/avsox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/WebCrawler/avsox.py b/WebCrawler/avsox.py index e38a452..c6e6f00 100644 --- a/WebCrawler/avsox.py +++ b/WebCrawler/avsox.py @@ -58,7 +58,7 @@ def getCover_small(html): return result def getTag(html): x = html.xpath('/html/head/meta[@name="keywords"]/@content')[0].split(',') - return x[2:] if len(x) > 2 else [] + return [translateTag_to_sc(i.strip()) for i in x[2:]] if len(x) > 2 else [] def getSeries(html): try: result1 = str(html.xpath('//span[contains(text(),"系列:")]/../span[2]/text()')).strip(" ['']")