update

2021-09-16 15:35:40 +08:00
parent 5085040330
commit 34fcebc4b8
31 changed files with 434 additions and 137 deletions
--- a/codes/common/memory.py
+++ b/codes/common/memory.py
@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-10 15:27:16
@LastEditor: John
-LastEditTime: 2021-09-15 02:17:59
+LastEditTime: 2021-09-15 14:52:37
@Discription: 
@Environment: python 3.7.7
 '''
@@ -28,5 +28,9 @@ class ReplayBuffer:
        batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
        state, action, reward, next_state, done =  zip(*batch) # 解压成状态，动作等
        return state, action, reward, next_state, done
-
+    
+    def __len__(self):
+        ''' 返回当前存储的量
+        '''
+        return len(self.buffer)

--- a/codes/common/model.py
+++ b/codes/common/model.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 21:14:12
 LastEditor: John
-LastEditTime: 2021-05-04 02:45:27
+LastEditTime: 2021-09-15 13:21:03
 Discription: 
 Environment: 
 '''
@@ -17,8 +17,8 @@ from torch.distributions import Categorical
 class MLP(nn.Module):
    def __init__(self, input_dim,output_dim,hidden_dim=128):
        """ 初始化q网络，为全连接网络
-            input_dim: 输入的feature即环境的state数目
-            output_dim: 输出的action总个数
+            input_dim: 输入的特征数即环境的状态数
+            output_dim: 输出的动作维度
        """
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim) # 输入层
--- a/codes/common/plot.py
+++ b/codes/common/plot.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-10-07 20:57:11
 LastEditor: John
-LastEditTime: 2021-09-11 21:35:00
+LastEditTime: 2021-09-15 14:56:15
 Discription: 
 Environment: 
 '''
@@ -29,7 +29,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN
    ''' 中文画图
    '''
    sns.set()
-    plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
+    plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font())
    plt.xlabel(u'回合数',fontproperties=chinese_font())
    plt.plot(rewards)
    plt.plot(ma_rewards)