This commit is contained in:
johnjim0816
2021-09-16 15:35:40 +08:00
parent 5085040330
commit 34fcebc4b8
31 changed files with 434 additions and 137 deletions

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-10 15:27:16
@LastEditor: John
LastEditTime: 2021-09-15 02:17:59
LastEditTime: 2021-09-15 14:52:37
@Discription:
@Environment: python 3.7.7
'''
@@ -28,5 +28,9 @@ class ReplayBuffer:
batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移
state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等
return state, action, reward, next_state, done
def __len__(self):
''' 返回当前存储的量
'''
return len(self.buffer)

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-12 21:14:12
LastEditor: John
LastEditTime: 2021-05-04 02:45:27
LastEditTime: 2021-09-15 13:21:03
Discription:
Environment:
'''
@@ -17,8 +17,8 @@ from torch.distributions import Categorical
class MLP(nn.Module):
def __init__(self, input_dim,output_dim,hidden_dim=128):
""" 初始化q网络为全连接网络
input_dim: 输入的feature即环境的state数目
output_dim: 输出的action总个数
input_dim: 输入的特征数即环境的状态数
output_dim: 输出的动作维度
"""
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim) # 输入层

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2020-10-07 20:57:11
LastEditor: John
LastEditTime: 2021-09-11 21:35:00
LastEditTime: 2021-09-15 14:56:15
Discription:
Environment:
'''
@@ -29,7 +29,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN
''' 中文画图
'''
sns.set()
plt.title(u"{}环境下Q学习算法的学习曲线".format(env),fontproperties=chinese_font())
plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font())
plt.xlabel(u'回合数',fontproperties=chinese_font())
plt.plot(rewards)
plt.plot(ma_rewards)