update rainbowdqn
This commit is contained in:
@@ -21,8 +21,8 @@ class Actor(nn.Module):
|
||||
'''[summary]
|
||||
|
||||
Args:
|
||||
input_dim (int): 输入维度,这里等于state_dim
|
||||
output_dim (int): 输出维度,这里等于action_dim
|
||||
input_dim (int): 输入维度,这里等于n_states
|
||||
output_dim (int): 输出维度,这里等于n_actions
|
||||
max_action (int): action的最大值
|
||||
'''
|
||||
super(Actor, self).__init__()
|
||||
|
||||
@@ -14,13 +14,13 @@ import torch
|
||||
|
||||
|
||||
class ReplayBuffer(object):
|
||||
def __init__(self, state_dim, action_dim, max_size=int(1e6)):
|
||||
def __init__(self, n_states, n_actions, max_size=int(1e6)):
|
||||
self.max_size = max_size
|
||||
self.ptr = 0
|
||||
self.size = 0
|
||||
self.state = np.zeros((max_size, state_dim))
|
||||
self.action = np.zeros((max_size, action_dim))
|
||||
self.next_state = np.zeros((max_size, state_dim))
|
||||
self.state = np.zeros((max_size, n_states))
|
||||
self.action = np.zeros((max_size, n_actions))
|
||||
self.next_state = np.zeros((max_size, n_states))
|
||||
self.reward = np.zeros((max_size, 1))
|
||||
self.not_done = np.zeros((max_size, 1))
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
@@ -74,10 +74,10 @@ if __name__ == "__main__":
|
||||
env.seed(cfg.seed) # Set seeds
|
||||
torch.manual_seed(cfg.seed)
|
||||
np.random.seed(cfg.seed)
|
||||
state_dim = env.observation_space.shape[0]
|
||||
action_dim = env.action_space.shape[0]
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.shape[0]
|
||||
max_action = float(env.action_space.high[0])
|
||||
td3= TD3(state_dim,action_dim,max_action,cfg)
|
||||
td3= TD3(n_states,n_actions,max_action,cfg)
|
||||
cfg.model_path = './TD3/results/HalfCheetah-v2/20210416-130341/models/'
|
||||
td3.load(cfg.model_path)
|
||||
td3_rewards,td3_ma_rewards = eval(cfg.env,td3,cfg.seed)
|
||||
|
||||
@@ -72,7 +72,7 @@ def train(cfg,env,agent):
|
||||
else:
|
||||
action = (
|
||||
agent.choose_action(np.array(state))
|
||||
+ np.random.normal(0, max_action * cfg.expl_noise, size=action_dim)
|
||||
+ np.random.normal(0, max_action * cfg.expl_noise, size=n_actions)
|
||||
).clip(-max_action, max_action)
|
||||
# Perform action
|
||||
next_state, reward, done, _ = env.step(action)
|
||||
@@ -121,11 +121,11 @@ def train(cfg,env,agent):
|
||||
# else:
|
||||
# action = (
|
||||
# agent.choose_action(np.array(state))
|
||||
# + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim)
|
||||
# + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions)
|
||||
# ).clip(-max_action, max_action)
|
||||
# # action = (
|
||||
# # agent.choose_action(np.array(state))
|
||||
# # + np.random.normal(0, max_action * cfg.expl_noise, size=action_dim)
|
||||
# # + np.random.normal(0, max_action * cfg.expl_noise, size=n_actions)
|
||||
# # ).clip(-max_action, max_action)
|
||||
# # Perform action
|
||||
# next_state, reward, done, _ = env.step(action)
|
||||
@@ -157,10 +157,10 @@ if __name__ == "__main__":
|
||||
env.seed(cfg.seed) # Set seeds
|
||||
torch.manual_seed(cfg.seed)
|
||||
np.random.seed(cfg.seed)
|
||||
state_dim = env.observation_space.shape[0]
|
||||
action_dim = env.action_space.shape[0]
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.shape[0]
|
||||
max_action = float(env.action_space.high[0])
|
||||
agent = TD3(state_dim,action_dim,max_action,cfg)
|
||||
agent = TD3(n_states,n_actions,max_action,cfg)
|
||||
rewards,ma_rewards = train(cfg,env,agent)
|
||||
make_dir(cfg.result_path,cfg.model_path)
|
||||
agent.save(path=cfg.model_path)
|
||||
|
||||
@@ -70,10 +70,10 @@ if __name__ == "__main__":
|
||||
env.seed(cfg.seed) # Set seeds
|
||||
torch.manual_seed(cfg.seed)
|
||||
np.random.seed(cfg.seed)
|
||||
state_dim = env.observation_space.shape[0]
|
||||
action_dim = env.action_space.shape[0]
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.shape[0]
|
||||
max_action = float(env.action_space.high[0])
|
||||
td3= TD3(state_dim,action_dim,max_action,cfg)
|
||||
td3= TD3(n_states,n_actions,max_action,cfg)
|
||||
cfg.model_path = './TD3/results/Pendulum-v0/20210428-092059/models/'
|
||||
cfg.result_path = './TD3/results/Pendulum-v0/20210428-092059/results/'
|
||||
td3.load(cfg.model_path)
|
||||
|
||||
@@ -79,7 +79,7 @@ def train(cfg,env,agent):
|
||||
else:
|
||||
action = (
|
||||
agent.choose_action(np.array(state))
|
||||
+ np.random.normal(0, max_action * cfg.expl_noise, size=action_dim)
|
||||
+ np.random.normal(0, max_action * cfg.expl_noise, size=n_actions)
|
||||
).clip(-max_action, max_action)
|
||||
# Perform action
|
||||
next_state, reward, done, _ = env.step(action)
|
||||
@@ -109,10 +109,10 @@ if __name__ == "__main__":
|
||||
env.seed(1) # 随机种子
|
||||
torch.manual_seed(1)
|
||||
np.random.seed(1)
|
||||
state_dim = env.observation_space.shape[0]
|
||||
action_dim = env.action_space.shape[0]
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.shape[0]
|
||||
max_action = float(env.action_space.high[0])
|
||||
agent = TD3(state_dim,action_dim,max_action,cfg)
|
||||
agent = TD3(n_states,n_actions,max_action,cfg)
|
||||
rewards,ma_rewards = train(cfg,env,agent)
|
||||
make_dir(plot_cfg.result_path,plot_cfg.model_path)
|
||||
agent.save(path=plot_cfg.model_path)
|
||||
|
||||
Reference in New Issue
Block a user