update
This commit is contained in:
@@ -13,9 +13,9 @@ from A2C.model import ActorCritic
|
||||
import torch.optim as optim
|
||||
|
||||
class A2C:
|
||||
def __init__(self,n_states, n_actions, cfg):
|
||||
def __init__(self,state_dim, action_dim, cfg):
|
||||
self.gamma = 0.99
|
||||
self.model = ActorCritic(n_states, n_actions, hidden_dim=cfg.hidden_dim).to(cfg.device)
|
||||
self.model = ActorCritic(state_dim, action_dim, hidden_dim=cfg.hidden_dim).to(cfg.device)
|
||||
self.optimizer = optim.Adam(self.model.parameters(),lr=cfg.lr)
|
||||
def choose_action(self, state):
|
||||
dist, value = self.model(state)
|
||||
|
||||
@@ -95,8 +95,8 @@ if __name__ == "__main__":
|
||||
cfg = A2CConfig()
|
||||
env = gym.make('CartPole-v0')
|
||||
env.seed(1) # set random seed for env
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.n
|
||||
agent = A2C(n_states, n_actions, cfg)
|
||||
state_dim = env.observation_space.shape[0]
|
||||
action_dim = env.action_space.n
|
||||
agent = A2C(state_dim, action_dim, cfg)
|
||||
train(cfg,env,agent)
|
||||
|
||||
|
||||
@@ -13,18 +13,18 @@ import torch.nn as nn
|
||||
from torch.distributions import Categorical
|
||||
|
||||
class ActorCritic(nn.Module):
|
||||
def __init__(self, n_states, n_actions, hidden_dim=256):
|
||||
def __init__(self, state_dim, action_dim, hidden_dim=256):
|
||||
super(ActorCritic, self).__init__()
|
||||
self.critic = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.Linear(state_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, 1)
|
||||
)
|
||||
|
||||
self.actor = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.Linear(state_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, n_actions),
|
||||
nn.Linear(hidden_dim, action_dim),
|
||||
nn.Softmax(dim=1),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user