This commit is contained in:
JohnJim0816
2021-03-28 11:18:52 +08:00
parent 2df8d965d2
commit 6e4d966e1f
56 changed files with 497 additions and 165 deletions

View File

@@ -13,9 +13,9 @@ from A2C.model import ActorCritic
import torch.optim as optim
class A2C:
def __init__(self,n_states, n_actions, cfg):
def __init__(self,state_dim, action_dim, cfg):
self.gamma = 0.99
self.model = ActorCritic(n_states, n_actions, hidden_dim=cfg.hidden_dim).to(cfg.device)
self.model = ActorCritic(state_dim, action_dim, hidden_dim=cfg.hidden_dim).to(cfg.device)
self.optimizer = optim.Adam(self.model.parameters(),lr=cfg.lr)
def choose_action(self, state):
dist, value = self.model(state)

View File

@@ -95,8 +95,8 @@ if __name__ == "__main__":
cfg = A2CConfig()
env = gym.make('CartPole-v0')
env.seed(1) # set random seed for env
n_states = env.observation_space.shape[0]
n_actions = env.action_space.n
agent = A2C(n_states, n_actions, cfg)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
agent = A2C(state_dim, action_dim, cfg)
train(cfg,env,agent)

View File

@@ -13,18 +13,18 @@ import torch.nn as nn
from torch.distributions import Categorical
class ActorCritic(nn.Module):
def __init__(self, n_states, n_actions, hidden_dim=256):
def __init__(self, state_dim, action_dim, hidden_dim=256):
super(ActorCritic, self).__init__()
self.critic = nn.Sequential(
nn.Linear(n_states, hidden_dim),
nn.Linear(state_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 1)
)
self.actor = nn.Sequential(
nn.Linear(n_states, hidden_dim),
nn.Linear(state_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, n_actions),
nn.Linear(hidden_dim, action_dim),
nn.Softmax(dim=1),
)