update codes
This commit is contained in:
@@ -136,12 +136,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class DuelingNet(nn.Module):\n",
|
||||
" def __init__(self, n_states, n_actions,hidden_size=128):\n",
|
||||
" def __init__(self, state_dim, action_dim,hidden_size=128):\n",
|
||||
" super(DuelingNet, self).__init__()\n",
|
||||
" \n",
|
||||
" # 隐藏层\n",
|
||||
" self.hidden = nn.Sequential(\n",
|
||||
" nn.Linear(n_states, hidden_size),\n",
|
||||
" nn.Linear(state_dim, hidden_size),\n",
|
||||
" nn.ReLU()\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
@@ -149,7 +149,7 @@
|
||||
" self.advantage = nn.Sequential(\n",
|
||||
" nn.Linear(hidden_size, hidden_size),\n",
|
||||
" nn.ReLU(),\n",
|
||||
" nn.Linear(hidden_size, n_actions)\n",
|
||||
" nn.Linear(hidden_size, action_dim)\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # 价值函数\n",
|
||||
@@ -192,7 +192,7 @@
|
||||
],
|
||||
"source": [
|
||||
"class DuelingDQN:\n",
|
||||
" def __init__(self,n_states,n_actions,cfg) -> None:\n",
|
||||
" def __init__(self,state_dim,action_dim,cfg) -> None:\n",
|
||||
" self.batch_size = cfg.batch_size\n",
|
||||
" self.device = cfg.device\n",
|
||||
" self.loss_history = [] # 记录loss的变化\n",
|
||||
@@ -200,8 +200,8 @@
|
||||
" self.epsilon = lambda frame_idx: cfg.epsilon_end + \\\n",
|
||||
" (cfg.epsilon_start - cfg.epsilon_end) * \\\n",
|
||||
" math.exp(-1. * frame_idx / cfg.epsilon_decay)\n",
|
||||
" self.policy_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n",
|
||||
" self.target_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n",
|
||||
" self.policy_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n",
|
||||
" self.target_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n",
|
||||
" for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网络targe_net\n",
|
||||
" target_param.data.copy_(param.data)\n",
|
||||
" self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n",
|
||||
|
||||
Reference in New Issue
Block a user