update codes

This commit is contained in:
johnjim0816
2021-12-21 20:14:13 +08:00
parent 64c319cab4
commit 3b712e8815
71 changed files with 1097 additions and 1340 deletions

View File

@@ -10,12 +10,40 @@ Discription:
Environment:
'''
import torch.optim as optim
from A2C.model import ActorCritic
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
class ActorCritic(nn.Module):
''' A2C网络模型包含一个Actor和Critic
'''
def __init__(self, input_dim, output_dim, hidden_dim):
super(ActorCritic, self).__init__()
self.critic = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 1)
)
self.actor = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim),
nn.Softmax(dim=1),
)
def forward(self, x):
value = self.critic(x)
probs = self.actor(x)
dist = Categorical(probs)
return dist, value
class A2C:
def __init__(self,n_states,n_actions,cfg) -> None:
''' A2C算法
'''
def __init__(self,state_dim,action_dim,cfg) -> None:
self.gamma = cfg.gamma
self.device = cfg.device
self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device)
self.model = ActorCritic(state_dim, action_dim, cfg.hidden_size).to(self.device)
self.optimizer = optim.Adam(self.model.parameters())
def compute_returns(self,next_value, rewards, masks):