update

2021-09-15 10:32:52 +08:00
parent 95f3f4dd57
commit 5085040330
74 changed files with 431 additions and 433 deletions
--- a/codes/common/memory.py
+++ b/codes/common/memory.py
@@ -5,30 +5,28 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-10 15:27:16
@LastEditor: John
-LastEditTime: 2021-03-13 11:37:15
+LastEditTime: 2021-09-15 02:17:59
@Discription: 
@Environment: python 3.7.7
 '''
 import random
-
 class ReplayBuffer:
-    
    def __init__(self, capacity):
-        self.capacity = capacity
-        self.buffer = []
-        self.position = 0
+        self.capacity = capacity # 经验回放的容量
+        self.buffer = [] # 缓冲区
+        self.position = 0 
    
    def push(self, state, action, reward, next_state, done):
+        ''' 缓冲区是一个队列，容量超出时去掉开始存入的转移(transition)
+        '''
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        self.buffer[self.position] = (state, action, reward, next_state, done)
-        self.position = (self.position + 1) % self.capacity
+        self.position = (self.position + 1) % self.capacity 
    
    def sample(self, batch_size):
-        batch = random.sample(self.buffer, batch_size)
-        state, action, reward, next_state, done =  zip(*batch)
+        batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
+        state, action, reward, next_state, done =  zip(*batch) # 解压成状态，动作等
        return state, action, reward, next_state, done
-    
-    def __len__(self):
-        return len(self.buffer)
+

--- a/codes/common/plot.py
+++ b/codes/common/plot.py
@@ -5,12 +5,15 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-10-07 20:57:11
 LastEditor: John
-LastEditTime: 2021-04-29 15:41:48
+LastEditTime: 2021-09-11 21:35:00
 Discription: 
 Environment: 
 '''
 import matplotlib.pyplot as plt
 import seaborn as sns
+from matplotlib.font_manager import FontProperties
+def chinese_font():  
+    return FontProperties(fname='/System/Library/Fonts/STHeiti Light.ttc',size=15)  # 系统字体路径，此处是mac的
 def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
    sns.set()
    plt.title("average learning curve of {} for {}".format(algo,env))
@@ -21,16 +24,20 @@ def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",s
    if save:
        plt.savefig(path+"{}_rewards_curve".format(tag))
    plt.show()
-# def plot_rewards(dic,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
-#     sns.set()
-#     plt.title("average learning curve of {} for {}".format(algo,env))
-#     plt.xlabel('epsiodes')
-#     for key, value in dic.items():
-#         plt.plot(value,label=key)
-#     plt.legend()
-#     if save:
-#         plt.savefig(path+algo+"_rewards_curve_{}".format(tag))
-#     plt.show()
+
+def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
+    ''' 中文画图
+    '''
+    sns.set()
+    plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
+    plt.xlabel(u'回合数',fontproperties=chinese_font())
+    plt.plot(rewards)
+    plt.plot(ma_rewards)
+    plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font())
+    if save:
+        plt.savefig(path+f"{tag}_rewards_curve_cn")
+    plt.show()
+
 def plot_losses(losses,algo = "DQN",save=True,path='./'):
    sns.set()
    plt.title("loss curve of {}".format(algo))
--- a/codes/common/utils.py
+++ b/codes/common/utils.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 16:02:24
 LastEditor: John
-LastEditTime: 2021-05-04 19:58:31
+LastEditTime: 2021-09-11 21:48:49
 Discription: 
 Environment: 
 '''
@@ -18,7 +18,7 @@ def save_results(rewards,ma_rewards,tag='train',path='./results'):
    '''
    np.save(path+'{}_rewards.npy'.format(tag), rewards)
    np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
-    print('results saved!')
+    print('结果保存完毕！')

 def make_dir(*paths):
    for path in paths: