Add Multi Threading crawler

This commit is contained in:
yoshiko2
2021-04-29 09:58:11 +08:00
parent b4cb2e1f4e
commit cd8afb3353
2 changed files with 37 additions and 8 deletions

View File

@@ -6,11 +6,12 @@ soft_link=0
failed_move=0 failed_move=0
auto_exit=0 auto_exit=0
transalte_to_sc=0 transalte_to_sc=0
multi_threading=1
[proxy] [proxy]
;proxytype: http or socks5 or socks5h switch: 0 1 ;proxytype: http or socks5 or socks5h switch: 0 1
switch=0 switch=0
type=http type=socks5
proxy=127.0.0.1:1080 proxy=127.0.0.1:1080
timeout=5 timeout=5
retry=3 retry=3
@@ -32,7 +33,7 @@ literals=\()/
folders=failed,JAV_output folders=failed,JAV_output
[debug_mode] [debug_mode]
switch=0 switch=1
; 机器翻译 ; 机器翻译
[transalte] [transalte]

40
core.py
View File

@@ -7,6 +7,7 @@ import platform
from PIL import Image from PIL import Image
from io import BytesIO from io import BytesIO
from multiprocessing.pool import ThreadPool
from ADC_function import * from ADC_function import *
@@ -94,16 +95,43 @@ def get_data_from_json(file_number, filepath, conf: config.Config): # 从JSON
sources.insert(0, sources.pop(sources.index("dlsite"))) sources.insert(0, sources.pop(sources.index("dlsite")))
json_data = {} json_data = {}
for source in sources:
try: if conf.multi_threading():
pool = ThreadPool(processes=11)
MT = {
'avsox' : pool.apply_async(avsox.main, (file_number,)),
'fanza' : pool.apply_async(fanza.main, (file_number,)),
'fc2' : pool.apply_async(fc2.main, (file_number,)),
'jav321' : pool.apply_async(jav321.main, (file_number,)),
'javbus' : pool.apply_async(javbus.main, (file_number,)),
'javdb' : pool.apply_async(javbus.main, (file_number,)),
'mgstage' : pool.apply_async(mgstage.main, (file_number,)),
'xcity' : pool.apply_async(xcity.main, (file_number,)),
'javlib' : pool.apply_async(javlib.main, (file_number,)),
'dlsite' : pool.apply_async(dlsite.main, (file_number,)),
'airav' : pool.apply_async(airav.main, (file_number,)),
}
for source in sources:
if conf.debug() == True: if conf.debug() == True:
print('[+]select',source) print('[+]select', source)
json_data = json.loads(func_mapping[source](file_number)) json_data = json.loads(MT[source].get())
# if any service return a valid return, break # if any service return a valid return, break
if get_data_state(json_data): if get_data_state(json_data):
break break
except: pool.close()
break pool.terminate()
else:
for source in sources:
try:
if conf.debug() == True:
print('[+]select', source)
json_data = json.loads(func_mapping[source](file_number))
# if any service return a valid return, break
if get_data_state(json_data):
break
except:
break
# Return if data not found in all sources # Return if data not found in all sources
if not json_data: if not json_data: