update codes

This commit is contained in:
johnjim0816
2021-12-21 20:14:13 +08:00
parent 64c319cab4
commit 3b712e8815
71 changed files with 1097 additions and 1340 deletions

View File

@@ -136,12 +136,12 @@
"outputs": [],
"source": [
"class DuelingNet(nn.Module):\n",
" def __init__(self, n_states, n_actions,hidden_size=128):\n",
" def __init__(self, state_dim, action_dim,hidden_size=128):\n",
" super(DuelingNet, self).__init__()\n",
" \n",
" # 隐藏层\n",
" self.hidden = nn.Sequential(\n",
" nn.Linear(n_states, hidden_size),\n",
" nn.Linear(state_dim, hidden_size),\n",
" nn.ReLU()\n",
" )\n",
" \n",
@@ -149,7 +149,7 @@
" self.advantage = nn.Sequential(\n",
" nn.Linear(hidden_size, hidden_size),\n",
" nn.ReLU(),\n",
" nn.Linear(hidden_size, n_actions)\n",
" nn.Linear(hidden_size, action_dim)\n",
" )\n",
" \n",
" # 价值函数\n",
@@ -192,7 +192,7 @@
],
"source": [
"class DuelingDQN:\n",
" def __init__(self,n_states,n_actions,cfg) -> None:\n",
" def __init__(self,state_dim,action_dim,cfg) -> None:\n",
" self.batch_size = cfg.batch_size\n",
" self.device = cfg.device\n",
" self.loss_history = [] # 记录loss的变化\n",
@@ -200,8 +200,8 @@
" self.epsilon = lambda frame_idx: cfg.epsilon_end + \\\n",
" (cfg.epsilon_start - cfg.epsilon_end) * \\\n",
" math.exp(-1. * frame_idx / cfg.epsilon_decay)\n",
" self.policy_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n",
" self.target_net = DuelingNet(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device)\n",
" self.policy_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n",
" self.target_net = DuelingNet(state_dim, action_dim,hidden_dim=cfg.hidden_dim).to(self.device)\n",
" for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网络targe_net\n",
" target_param.data.copy_(param.data)\n",
" self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n",