update
This commit is contained in:
@@ -5,30 +5,28 @@
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-10 15:27:16
|
||||
@LastEditor: John
|
||||
LastEditTime: 2021-03-13 11:37:15
|
||||
LastEditTime: 2021-09-15 02:17:59
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import random
|
||||
|
||||
class ReplayBuffer:
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity
|
||||
self.buffer = []
|
||||
self.position = 0
|
||||
self.capacity = capacity # 经验回放的容量
|
||||
self.buffer = [] # 缓冲区
|
||||
self.position = 0
|
||||
|
||||
def push(self, state, action, reward, next_state, done):
|
||||
''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)
|
||||
'''
|
||||
if len(self.buffer) < self.capacity:
|
||||
self.buffer.append(None)
|
||||
self.buffer[self.position] = (state, action, reward, next_state, done)
|
||||
self.position = (self.position + 1) % self.capacity
|
||||
self.position = (self.position + 1) % self.capacity
|
||||
|
||||
def sample(self, batch_size):
|
||||
batch = random.sample(self.buffer, batch_size)
|
||||
state, action, reward, next_state, done = zip(*batch)
|
||||
batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
|
||||
state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等
|
||||
return state, action, reward, next_state, done
|
||||
|
||||
def __len__(self):
|
||||
return len(self.buffer)
|
||||
|
||||
|
||||
|
||||
@@ -5,12 +5,15 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-10-07 20:57:11
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-04-29 15:41:48
|
||||
LastEditTime: 2021-09-11 21:35:00
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from matplotlib.font_manager import FontProperties
|
||||
def chinese_font():
|
||||
return FontProperties(fname='/System/Library/Fonts/STHeiti Light.ttc',size=15) # 系统字体路径,此处是mac的
|
||||
def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
|
||||
sns.set()
|
||||
plt.title("average learning curve of {} for {}".format(algo,env))
|
||||
@@ -21,16 +24,20 @@ def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",s
|
||||
if save:
|
||||
plt.savefig(path+"{}_rewards_curve".format(tag))
|
||||
plt.show()
|
||||
# def plot_rewards(dic,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
|
||||
# sns.set()
|
||||
# plt.title("average learning curve of {} for {}".format(algo,env))
|
||||
# plt.xlabel('epsiodes')
|
||||
# for key, value in dic.items():
|
||||
# plt.plot(value,label=key)
|
||||
# plt.legend()
|
||||
# if save:
|
||||
# plt.savefig(path+algo+"_rewards_curve_{}".format(tag))
|
||||
# plt.show()
|
||||
|
||||
def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
|
||||
''' 中文画图
|
||||
'''
|
||||
sns.set()
|
||||
plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
|
||||
plt.xlabel(u'回合数',fontproperties=chinese_font())
|
||||
plt.plot(rewards)
|
||||
plt.plot(ma_rewards)
|
||||
plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font())
|
||||
if save:
|
||||
plt.savefig(path+f"{tag}_rewards_curve_cn")
|
||||
plt.show()
|
||||
|
||||
def plot_losses(losses,algo = "DQN",save=True,path='./'):
|
||||
sns.set()
|
||||
plt.title("loss curve of {}".format(algo))
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 16:02:24
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-05-04 19:58:31
|
||||
LastEditTime: 2021-09-11 21:48:49
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -18,7 +18,7 @@ def save_results(rewards,ma_rewards,tag='train',path='./results'):
|
||||
'''
|
||||
np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
||||
np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
||||
print('results saved!')
|
||||
print('结果保存完毕!')
|
||||
|
||||
def make_dir(*paths):
|
||||
for path in paths:
|
||||
|
||||
Reference in New Issue
Block a user