update
This commit is contained in:
@@ -123,14 +123,15 @@ def train(cfg,envs):
|
|||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
print('Finish training!')
|
print('Finish training!')
|
||||||
return test_rewards, test_ma_rewards
|
return {'rewards':test_rewards,'ma_rewards':test_ma_rewards}
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cfg = get_args()
|
cfg = get_args()
|
||||||
envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)]
|
envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)]
|
||||||
envs = SubprocVecEnv(envs)
|
envs = SubprocVecEnv(envs)
|
||||||
# training
|
# training
|
||||||
rewards,ma_rewards = train(cfg,envs)
|
res_dic = train(cfg,envs)
|
||||||
make_dir(cfg.result_path,cfg.model_path)
|
make_dir(cfg.result_path,cfg.model_path)
|
||||||
save_args(cfg)
|
save_args(cfg)
|
||||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) # 保存结果
|
save_results(res_dic, tag='train',
|
||||||
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果
|
path=cfg.result_path)
|
||||||
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
@Email: johnjim0816@gmail.com
|
@Email: johnjim0816@gmail.com
|
||||||
@Date: 2020-06-11 20:58:21
|
@Date: 2020-06-11 20:58:21
|
||||||
@LastEditor: John
|
@LastEditor: John
|
||||||
LastEditTime: 2022-07-21 00:05:41
|
LastEditTime: 2022-07-21 21:51:34
|
||||||
@Discription:
|
@Discription:
|
||||||
@Environment: python 3.7.7
|
@Environment: python 3.7.7
|
||||||
'''
|
'''
|
||||||
@@ -86,7 +86,7 @@ def train(cfg, env, agent):
|
|||||||
else:
|
else:
|
||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print('Finish training!')
|
print('Finish training!')
|
||||||
return rewards, ma_rewards
|
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||||
|
|
||||||
def test(cfg, env, agent):
|
def test(cfg, env, agent):
|
||||||
print('Start testing')
|
print('Start testing')
|
||||||
@@ -111,21 +111,23 @@ def test(cfg, env, agent):
|
|||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
||||||
print('Finish testing!')
|
print('Finish testing!')
|
||||||
return rewards, ma_rewards
|
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cfg = get_args()
|
cfg = get_args()
|
||||||
# training
|
# training
|
||||||
env,agent = env_agent_config(cfg,seed=1)
|
env,agent = env_agent_config(cfg,seed=1)
|
||||||
rewards, ma_rewards = train(cfg, env, agent)
|
res_dic = train(cfg, env, agent)
|
||||||
make_dir(cfg.result_path, cfg.model_path)
|
make_dir(cfg.result_path, cfg.model_path)
|
||||||
save_args(cfg)
|
save_args(cfg)
|
||||||
agent.save(path=cfg.model_path)
|
agent.save(path=cfg.model_path)
|
||||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
|
save_results(res_dic, tag='train',
|
||||||
plot_rewards(rewards, ma_rewards, cfg, tag="train")
|
path=cfg.result_path)
|
||||||
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||||
# testing
|
# testing
|
||||||
env,agent = env_agent_config(cfg,seed=10)
|
env,agent = env_agent_config(cfg,seed=10)
|
||||||
agent.load(path=cfg.model_path)
|
agent.load(path=cfg.model_path)
|
||||||
rewards,ma_rewards = test(cfg,env,agent)
|
res_dic = test(cfg,env,agent)
|
||||||
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
|
save_results(res_dic, tag='test',
|
||||||
plot_rewards(rewards, ma_rewards, cfg, tag="test")
|
path=cfg.result_path)
|
||||||
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import torch
|
|||||||
import datetime
|
import datetime
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import argparse
|
import argparse
|
||||||
from common.utils import save_results_1, make_dir
|
from common.utils import save_results, make_dir
|
||||||
from common.utils import plot_rewards,save_args
|
from common.utils import plot_rewards,save_args
|
||||||
from dqn import DQN
|
from dqn import DQN
|
||||||
|
|
||||||
@@ -95,8 +95,8 @@ def train(cfg, env, agent):
|
|||||||
|
|
||||||
|
|
||||||
def test(cfg, env, agent):
|
def test(cfg, env, agent):
|
||||||
print('开始测试!')
|
print('Start testing!')
|
||||||
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
|
print(f'Env:{cfg.env_name}, A{cfg.algo_name}, 设备:{cfg.device}')
|
||||||
############# 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 ###############
|
############# 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 ###############
|
||||||
cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon
|
cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon
|
||||||
cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon
|
cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon
|
||||||
@@ -123,7 +123,7 @@ def test(cfg, env, agent):
|
|||||||
else:
|
else:
|
||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print(f'Episode:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}')
|
print(f'Episode:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}')
|
||||||
print('完成测试!')
|
print('Finish testing')
|
||||||
env.close()
|
env.close()
|
||||||
return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps}
|
return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps}
|
||||||
|
|
||||||
@@ -133,16 +133,16 @@ if __name__ == "__main__":
|
|||||||
# 训练
|
# 训练
|
||||||
env, agent = env_agent_config(cfg)
|
env, agent = env_agent_config(cfg)
|
||||||
res_dic = train(cfg, env, agent)
|
res_dic = train(cfg, env, agent)
|
||||||
make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹
|
make_dir(cfg.result_path, cfg.model_path)
|
||||||
save_args(cfg)
|
save_args(cfg) # save parameters
|
||||||
agent.save(path=cfg.model_path) # 保存模型
|
agent.save(path=cfg.model_path) # save model
|
||||||
save_results_1(res_dic, tag='train',
|
save_results(res_dic, tag='train',
|
||||||
path=cfg.result_path) # 保存结果
|
path=cfg.result_path)
|
||||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||||
# 测试
|
# 测试
|
||||||
env, agent = env_agent_config(cfg)
|
env, agent = env_agent_config(cfg)
|
||||||
agent.load(path=cfg.model_path) # 导入模型
|
agent.load(path=cfg.model_path) # 导入模型
|
||||||
res_dic = test(cfg, env, agent)
|
res_dic = test(cfg, env, agent)
|
||||||
save_results_1(res_dic, tag='test',
|
save_results(res_dic, tag='test',
|
||||||
path=cfg.result_path) # 保存结果
|
path=cfg.result_path) # 保存结果
|
||||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果
|
||||||
|
|||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 2, "hidden_dim": 256, "device": "cuda", "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/models/", "save_fig": true}
|
|
||||||
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 38 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 44 KiB |
Binary file not shown.
@@ -0,0 +1,19 @@
|
|||||||
|
{
|
||||||
|
"algo_name": "DoubleDQN",
|
||||||
|
"env_name": "CartPole-v0",
|
||||||
|
"train_eps": 200,
|
||||||
|
"test_eps": 20,
|
||||||
|
"gamma": 0.99,
|
||||||
|
"epsilon_start": 0.95,
|
||||||
|
"epsilon_end": 0.01,
|
||||||
|
"epsilon_decay": 500,
|
||||||
|
"lr": 0.0001,
|
||||||
|
"memory_capacity": 100000,
|
||||||
|
"batch_size": 64,
|
||||||
|
"target_update": 2,
|
||||||
|
"hidden_dim": 256,
|
||||||
|
"device": "cuda",
|
||||||
|
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/results/",
|
||||||
|
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/models/",
|
||||||
|
"save_fig": true
|
||||||
|
}
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
@@ -5,7 +5,7 @@ Author: JiangJi
|
|||||||
Email: johnjim0816@gmail.com
|
Email: johnjim0816@gmail.com
|
||||||
Date: 2021-11-07 18:10:37
|
Date: 2021-11-07 18:10:37
|
||||||
LastEditor: JiangJi
|
LastEditor: JiangJi
|
||||||
LastEditTime: 2022-07-21 00:08:38
|
LastEditTime: 2022-07-21 21:52:31
|
||||||
Discription:
|
Discription:
|
||||||
'''
|
'''
|
||||||
import sys,os
|
import sys,os
|
||||||
@@ -86,7 +86,7 @@ def train(cfg,env,agent):
|
|||||||
else:
|
else:
|
||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print('Finish training!')
|
print('Finish training!')
|
||||||
return rewards,ma_rewards
|
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||||
|
|
||||||
def test(cfg,env,agent):
|
def test(cfg,env,agent):
|
||||||
print('Start testing')
|
print('Start testing')
|
||||||
@@ -115,22 +115,24 @@ def test(cfg,env,agent):
|
|||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
||||||
print('Finish testing!')
|
print('Finish testing!')
|
||||||
return rewards,ma_rewards
|
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cfg = get_args()
|
cfg = get_args()
|
||||||
print(cfg.device)
|
print(cfg.device)
|
||||||
# training
|
# training
|
||||||
env,agent = env_agent_config(cfg,seed=1)
|
env,agent = env_agent_config(cfg,seed=1)
|
||||||
rewards, ma_rewards = train(cfg, env, agent)
|
res_dic = train(cfg, env, agent)
|
||||||
make_dir(cfg.result_path, cfg.model_path)
|
make_dir(cfg.result_path, cfg.model_path)
|
||||||
save_args(cfg)
|
save_args(cfg)
|
||||||
agent.save(path=cfg.model_path)
|
agent.save(path=cfg.model_path)
|
||||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
|
save_results(res_dic, tag='train',
|
||||||
plot_rewards(rewards, ma_rewards, cfg, tag="train")
|
path=cfg.result_path)
|
||||||
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||||
# testing
|
# testing
|
||||||
env,agent = env_agent_config(cfg,seed=10)
|
env,agent = env_agent_config(cfg,seed=10)
|
||||||
agent.load(path=cfg.model_path)
|
agent.load(path=cfg.model_path)
|
||||||
rewards,ma_rewards = test(cfg,env,agent)
|
res_dic = test(cfg,env,agent)
|
||||||
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
|
save_results(res_dic, tag='test',
|
||||||
plot_rewards(rewards, ma_rewards, cfg, tag="test")
|
path=cfg.result_path)
|
||||||
|
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")
|
||||||
|
|||||||
@@ -5,56 +5,47 @@ Author: John
|
|||||||
Email: johnjim0816@gmail.com
|
Email: johnjim0816@gmail.com
|
||||||
Date: 2020-11-22 23:21:53
|
Date: 2020-11-22 23:21:53
|
||||||
LastEditor: John
|
LastEditor: John
|
||||||
LastEditTime: 2022-02-10 06:13:21
|
LastEditTime: 2022-07-21 21:44:00
|
||||||
Discription:
|
Discription:
|
||||||
Environment:
|
Environment:
|
||||||
'''
|
'''
|
||||||
import sys
|
import sys,os
|
||||||
import os
|
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
|
||||||
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
|
parent_path = os.path.dirname(curr_path) # parent path
|
||||||
parent_path = os.path.dirname(curr_path) # 父路径
|
sys.path.append(parent_path) # add to system path
|
||||||
sys.path.append(parent_path) # 添加路径到系统路径
|
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import torch
|
import torch
|
||||||
import datetime
|
import datetime
|
||||||
|
import argparse
|
||||||
from itertools import count
|
from itertools import count
|
||||||
|
|
||||||
from pg import PolicyGradient
|
from pg import PolicyGradient
|
||||||
from common.utils import save_results, make_dir
|
from common.utils import save_results, make_dir
|
||||||
from common.utils import plot_rewards
|
from common.utils import plot_rewards
|
||||||
|
|
||||||
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
|
|
||||||
|
|
||||||
class Config:
|
def get_args():
|
||||||
'''超参数
|
""" Hyperparameters
|
||||||
'''
|
"""
|
||||||
|
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
|
||||||
def __init__(self):
|
parser = argparse.ArgumentParser(description="hyperparameters")
|
||||||
################################## 环境超参数 ###################################
|
parser.add_argument('--algo_name',default='PolicyGradient',type=str,help="name of algorithm")
|
||||||
self.algo_name = "PolicyGradient" # 算法名称
|
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
|
||||||
self.env_name = 'CartPole-v0' # 环境名称
|
parser.add_argument('--train_eps',default=300,type=int,help="episodes of training")
|
||||||
self.device = torch.device(
|
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
|
||||||
"cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十
|
parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
|
||||||
self.seed = 10 # 随机种子,置0则不设置随机种子
|
parser.add_argument('--lr',default=0.01,type=float,help="learning rate")
|
||||||
self.train_eps = 300 # 训练的回合数
|
parser.add_argument('--batch_size',default=8,type=int)
|
||||||
self.test_eps = 30 # 测试的回合数
|
parser.add_argument('--hidden_dim',default=36,type=int)
|
||||||
################################################################################
|
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
|
||||||
|
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||||
################################## 算法超参数 ###################################
|
'/' + curr_time + '/results/' )
|
||||||
self.batch_size = 8 # mini-batch SGD中的批量大小
|
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||||
self.lr = 0.01 # 学习率
|
'/' + curr_time + '/models/' ) # path to save models
|
||||||
self.gamma = 0.99 # 强化学习中的折扣因子
|
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
|
||||||
self.hidden_dim = 36 # 网络隐藏层
|
args = parser.parse_args()
|
||||||
################################################################################
|
return args
|
||||||
|
|
||||||
################################# 保存结果相关参数 ################################
|
|
||||||
self.result_path = curr_path + "/outputs/" + self.env_name + \
|
|
||||||
'/' + curr_time + '/results/' # 保存结果的路径
|
|
||||||
self.model_path = curr_path + "/outputs/" + self.env_name + \
|
|
||||||
'/' + curr_time + '/models/' # 保存模型的路径
|
|
||||||
self.save = True # 是否保存图片
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
|
|
||||||
def env_agent_config(cfg,seed=1):
|
def env_agent_config(cfg,seed=1):
|
||||||
@@ -65,9 +56,9 @@ def env_agent_config(cfg,seed=1):
|
|||||||
return env,agent
|
return env,agent
|
||||||
|
|
||||||
def train(cfg,env,agent):
|
def train(cfg,env,agent):
|
||||||
print('开始训练!')
|
print('Start training!')
|
||||||
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
|
print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
|
||||||
state_pool = [] # 存放每batch_size个episode的state序列
|
state_pool = [] # temp states pool per several episodes
|
||||||
action_pool = []
|
action_pool = []
|
||||||
reward_pool = []
|
reward_pool = []
|
||||||
rewards = []
|
rewards = []
|
||||||
@@ -86,11 +77,11 @@ def train(cfg,env,agent):
|
|||||||
reward_pool.append(reward)
|
reward_pool.append(reward)
|
||||||
state = next_state
|
state = next_state
|
||||||
if done:
|
if done:
|
||||||
print('回合:{}/{}, 奖励:{}'.format(i_ep + 1, cfg.train_eps, ep_reward))
|
print(f'Episode:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}')
|
||||||
break
|
break
|
||||||
if i_ep > 0 and i_ep % cfg.batch_size == 0:
|
if i_ep > 0 and i_ep % cfg.batch_size == 0:
|
||||||
agent.update(reward_pool,state_pool,action_pool)
|
agent.update(reward_pool,state_pool,action_pool)
|
||||||
state_pool = [] # 每个episode的state
|
state_pool = []
|
||||||
action_pool = []
|
action_pool = []
|
||||||
reward_pool = []
|
reward_pool = []
|
||||||
rewards.append(ep_reward)
|
rewards.append(ep_reward)
|
||||||
@@ -99,8 +90,8 @@ def train(cfg,env,agent):
|
|||||||
0.9*ma_rewards[-1]+0.1*ep_reward)
|
0.9*ma_rewards[-1]+0.1*ep_reward)
|
||||||
else:
|
else:
|
||||||
ma_rewards.append(ep_reward)
|
ma_rewards.append(ep_reward)
|
||||||
print('完成训练!')
|
print('Finish training!')
|
||||||
env.close()
|
env.close() # close environment
|
||||||
return rewards, ma_rewards
|
return rewards, ma_rewards
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Author: John
|
|||||||
Email: johnjim0816@gmail.com
|
Email: johnjim0816@gmail.com
|
||||||
Date: 2021-03-12 16:02:24
|
Date: 2021-03-12 16:02:24
|
||||||
LastEditor: John
|
LastEditor: John
|
||||||
LastEditTime: 2022-07-20 23:53:34
|
LastEditTime: 2022-07-21 21:45:33
|
||||||
Discription:
|
Discription:
|
||||||
Environment:
|
Environment:
|
||||||
'''
|
'''
|
||||||
@@ -69,19 +69,19 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
|
|||||||
plt.savefig(path+"losses_curve")
|
plt.savefig(path+"losses_curve")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
def save_results_1(dic, tag='train', path='./results'):
|
def save_results(dic, tag='train', path='./results'):
|
||||||
''' 保存奖励
|
''' 保存奖励
|
||||||
'''
|
'''
|
||||||
for key,value in dic.items():
|
for key,value in dic.items():
|
||||||
np.save(path+'{}_{}.npy'.format(tag,key),value)
|
np.save(path+'{}_{}.npy'.format(tag,key),value)
|
||||||
print('Results saved!')
|
print('Results saved!')
|
||||||
|
|
||||||
def save_results(rewards, ma_rewards, tag='train', path='./results'):
|
# def save_results(rewards, ma_rewards, tag='train', path='./results'):
|
||||||
''' 保存奖励
|
# ''' 保存奖励
|
||||||
'''
|
# '''
|
||||||
np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
# np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
||||||
np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
# np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
||||||
print('Result saved!')
|
# print('Result saved!')
|
||||||
|
|
||||||
|
|
||||||
def make_dir(*paths):
|
def make_dir(*paths):
|
||||||
|
|||||||
Reference in New Issue
Block a user