This commit is contained in:
johnjim0816
2021-09-15 10:32:52 +08:00
parent 95f3f4dd57
commit 5085040330
74 changed files with 431 additions and 433 deletions

View File

@@ -5,30 +5,28 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-10 15:27:16
@LastEditor: John
LastEditTime: 2021-03-13 11:37:15
LastEditTime: 2021-09-15 02:17:59
@Discription:
@Environment: python 3.7.7
'''
import random
class ReplayBuffer:
def __init__(self, capacity):
self.capacity = capacity
self.buffer = []
self.position = 0
self.capacity = capacity # 经验回放的容量
self.buffer = [] # 缓冲区
self.position = 0
def push(self, state, action, reward, next_state, done):
''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)
'''
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward, next_state, done)
self.position = (self.position + 1) % self.capacity
self.position = (self.position + 1) % self.capacity
def sample(self, batch_size):
batch = random.sample(self.buffer, batch_size)
state, action, reward, next_state, done = zip(*batch)
batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等
return state, action, reward, next_state, done
def __len__(self):
return len(self.buffer)

View File

@@ -5,12 +5,15 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2020-10-07 20:57:11
LastEditor: John
LastEditTime: 2021-04-29 15:41:48
LastEditTime: 2021-09-11 21:35:00
Discription:
Environment:
'''
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.font_manager import FontProperties
def chinese_font():
return FontProperties(fname='/System/Library/Fonts/STHeiti Light.ttc',size=15) # 系统字体路径此处是mac的
def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
sns.set()
plt.title("average learning curve of {} for {}".format(algo,env))
@@ -21,16 +24,20 @@ def plot_rewards(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",s
if save:
plt.savefig(path+"{}_rewards_curve".format(tag))
plt.show()
# def plot_rewards(dic,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
# sns.set()
# plt.title("average learning curve of {} for {}".format(algo,env))
# plt.xlabel('epsiodes')
# for key, value in dic.items():
# plt.plot(value,label=key)
# plt.legend()
# if save:
# plt.savefig(path+algo+"_rewards_curve_{}".format(tag))
# plt.show()
def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN",save=True,path='./'):
''' 中文画图
'''
sns.set()
plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
plt.xlabel(u'回合数',fontproperties=chinese_font())
plt.plot(rewards)
plt.plot(ma_rewards)
plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font())
if save:
plt.savefig(path+f"{tag}_rewards_curve_cn")
plt.show()
def plot_losses(losses,algo = "DQN",save=True,path='./'):
sns.set()
plt.title("loss curve of {}".format(algo))

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-12 16:02:24
LastEditor: John
LastEditTime: 2021-05-04 19:58:31
LastEditTime: 2021-09-11 21:48:49
Discription:
Environment:
'''
@@ -18,7 +18,7 @@ def save_results(rewards,ma_rewards,tag='train',path='./results'):
'''
np.save(path+'{}_rewards.npy'.format(tag), rewards)
np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
print('results saved!')
print('结果保存完毕!')
def make_dir(*paths):
for path in paths: