update
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-10 15:27:16
|
||||
@LastEditor: John
|
||||
LastEditTime: 2021-09-15 02:17:59
|
||||
LastEditTime: 2021-09-15 14:52:37
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
@@ -28,5 +28,9 @@ class ReplayBuffer:
|
||||
batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
|
||||
state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等
|
||||
return state, action, reward, next_state, done
|
||||
|
||||
|
||||
def __len__(self):
|
||||
''' 返回当前存储的量
|
||||
'''
|
||||
return len(self.buffer)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 21:14:12
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-05-04 02:45:27
|
||||
LastEditTime: 2021-09-15 13:21:03
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -17,8 +17,8 @@ from torch.distributions import Categorical
|
||||
class MLP(nn.Module):
|
||||
def __init__(self, input_dim,output_dim,hidden_dim=128):
|
||||
""" 初始化q网络,为全连接网络
|
||||
input_dim: 输入的feature即环境的state数目
|
||||
output_dim: 输出的action总个数
|
||||
input_dim: 输入的特征数即环境的状态数
|
||||
output_dim: 输出的动作维度
|
||||
"""
|
||||
super(MLP, self).__init__()
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim) # 输入层
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-10-07 20:57:11
|
||||
LastEditor: John
|
||||
LastEditTime: 2021-09-11 21:35:00
|
||||
LastEditTime: 2021-09-15 14:56:15
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -29,7 +29,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN
|
||||
''' 中文画图
|
||||
'''
|
||||
sns.set()
|
||||
plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
|
||||
plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font())
|
||||
plt.xlabel(u'回合数',fontproperties=chinese_font())
|
||||
plt.plot(rewards)
|
||||
plt.plot(ma_rewards)
|
||||
|
||||
Reference in New Issue
Block a user