#!/usr/bin/env python # coding=utf-8 ''' Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 21:14:12 LastEditor: John LastEditTime: 2021-03-13 13:48:35 Discription: Environment: ''' import torch.nn as nn import torch.nn.functional as F class MLP1(nn.Module): ''' 多层感知机 输入:state维度 输出:概率 ''' def __init__(self,n_states,hidden_dim = 36): super(MLP1, self).__init__() # 24和36为hidden layer的层数,可根据state_dim, n_actions的情况来改变 self.fc1 = nn.Linear(n_states, hidden_dim) self.fc2 = nn.Linear(hidden_dim,hidden_dim) self.fc3 = nn.Linear(hidden_dim, 1) # Prob of Left def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = F.sigmoid(self.fc3(x)) return x class MLP2(nn.Module): def __init__(self, n_states,n_actions,hidden_dim=128): """ 初始化q网络,为全连接网络 n_states: 输入的feature即环境的state数目 n_actions: 输出的action总个数 """ super(MLP2, self).__init__() self.fc1 = nn.Linear(n_states, hidden_dim) # 输入层 self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层 self.fc3 = nn.Linear(hidden_dim, n_actions) # 输出层 def forward(self, x): # 各层对应的激活函数 x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) return self.fc3(x)