update DoubleDQN
This commit is contained in:
33
codes/DoubleDQN/README.md
Normal file
33
codes/DoubleDQN/README.md
Normal file
@@ -0,0 +1,33 @@
|
||||
## 思路
|
||||
|
||||
见[博客](https://blog.csdn.net/JohnJim0/article/details/111552545)
|
||||
|
||||
## 环境
|
||||
|
||||
python 3.7.9
|
||||
|
||||
pytorch 1.6.0
|
||||
|
||||
tensorboard 2.3.0
|
||||
|
||||
torchvision 0.7.0
|
||||
|
||||
## 使用
|
||||
|
||||
|
||||
train:
|
||||
|
||||
```python
|
||||
python main.py
|
||||
```
|
||||
|
||||
eval:
|
||||
|
||||
```python
|
||||
python main.py --train 0
|
||||
```
|
||||
可视化
|
||||
|
||||
```python
|
||||
tensorboard --logdir logs
|
||||
```
|
||||
134
codes/DoubleDQN/agent.py
Normal file
134
codes/DoubleDQN/agent.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-12 00:50:49
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-12-22 14:44:46
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
'''off-policy
|
||||
'''
|
||||
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import torch.nn.functional as F
|
||||
import random
|
||||
import math
|
||||
import numpy as np
|
||||
from memory import ReplayBuffer
|
||||
from model import FCN
|
||||
class DQN:
|
||||
def __init__(self, n_states, n_actions, gamma=0.99, epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=200, memory_capacity=10000, policy_lr=0.01, batch_size=128, device="cpu"):
|
||||
self.actions_count = 0
|
||||
self.n_actions = n_actions # 总的动作个数
|
||||
self.device = device # 设备,cpu或gpu等
|
||||
self.gamma = gamma
|
||||
# e-greedy策略相关参数
|
||||
self.epsilon = 0
|
||||
self.epsilon_start = epsilon_start
|
||||
self.epsilon_end = epsilon_end
|
||||
self.epsilon_decay = epsilon_decay
|
||||
self.batch_size = batch_size
|
||||
self.policy_net = FCN(n_states, n_actions).to(self.device)
|
||||
self.target_net = FCN(n_states, n_actions).to(self.device)
|
||||
# target_net的初始模型参数完全复制policy_net
|
||||
self.target_net.load_state_dict(self.policy_net.state_dict())
|
||||
self.target_net.eval() # 不启用 BatchNormalization 和 Dropout
|
||||
# 可查parameters()与state_dict()的区别,前者require_grad=True
|
||||
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=policy_lr)
|
||||
self.loss = 0
|
||||
self.memory = ReplayBuffer(memory_capacity)
|
||||
|
||||
def choose_action(self, state, train=True):
|
||||
'''选择动作
|
||||
'''
|
||||
if train:
|
||||
self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
|
||||
math.exp(-1. * self.actions_count / self.epsilon_decay)
|
||||
self.actions_count += 1
|
||||
if random.random() > self.epsilon:
|
||||
with torch.no_grad():
|
||||
# 先转为张量便于丢给神经网络,state元素数据原本为float64
|
||||
# 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
|
||||
state = torch.tensor(
|
||||
[state], device=self.device, dtype=torch.float32)
|
||||
# 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
|
||||
q_value = self.policy_net(state)
|
||||
# tensor.max(1)返回每行的最大值以及对应的下标,
|
||||
# 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
|
||||
# 所以tensor.max(1)[1]返回最大值对应的下标,即action
|
||||
action = q_value.max(1)[1].item()
|
||||
else:
|
||||
action = random.randrange(self.n_actions)
|
||||
return action
|
||||
else:
|
||||
with torch.no_grad():
|
||||
# 先转为张量便于丢给神经网络,state元素数据原本为float64
|
||||
# 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价
|
||||
state = torch.tensor(
|
||||
[state], device='cpu', dtype=torch.float32)
|
||||
# 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>)
|
||||
q_value = self.target_net(state)
|
||||
# tensor.max(1)返回每行的最大值以及对应的下标,
|
||||
# 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0]))
|
||||
# 所以tensor.max(1)[1]返回最大值对应的下标,即action
|
||||
action = q_value.max(1)[1].item()
|
||||
return action
|
||||
def update(self):
|
||||
|
||||
if len(self.memory) < self.batch_size:
|
||||
return
|
||||
# 从memory中随机采样transition
|
||||
state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(
|
||||
self.batch_size)
|
||||
# 转为张量
|
||||
# 例如tensor([[-4.5543e-02, -2.3910e-01, 1.8344e-02, 2.3158e-01],...,[-1.8615e-02, -2.3921e-01, -1.1791e-02, 2.3400e-01]])
|
||||
state_batch = torch.tensor(
|
||||
state_batch, device=self.device, dtype=torch.float)
|
||||
action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(
|
||||
1) # 例如tensor([[1],...,[0]])
|
||||
reward_batch = torch.tensor(
|
||||
reward_batch, device=self.device, dtype=torch.float) # tensor([1., 1.,...,1])
|
||||
next_state_batch = torch.tensor(
|
||||
next_state_batch, device=self.device, dtype=torch.float)
|
||||
done_batch = torch.tensor(np.float32(
|
||||
done_batch), device=self.device).unsqueeze(1) # 将bool转为float然后转为张量
|
||||
|
||||
# 计算当前(s_t,a)对应的Q(s_t, a)
|
||||
q_values = self.policy_net(state_batch)
|
||||
next_q_values = self.policy_net(state_batch)
|
||||
# 代入当前选择的action,得到Q(s_t|a=a_t)
|
||||
q_value = q_values.gather(dim=1, index=action_batch)
|
||||
'''以下是Nature DQN的q_target计算方式
|
||||
# 计算所有next states的Q'(s_{t+1})的最大值,Q'为目标网络的q函数
|
||||
next_q_state_value = self.target_net(
|
||||
next_state_batch).max(1)[0].detach() # 比如tensor([ 0.0060, -0.0171,...,])
|
||||
# 计算 q_target
|
||||
# 对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward
|
||||
q_target = reward_batch + self.gamma * next_q_state_value * (1-done_batch[0])
|
||||
'''
|
||||
'''以下是Double DQNq_target计算方式,与NatureDQN稍有不同'''
|
||||
next_target_values = self.target_net(
|
||||
next_state_batch)
|
||||
# 选出Q(s_t‘, a)对应的action,代入到next_target_values获得target net对应的next_q_value,即Q’(s_t|a=argmax Q(s_t‘, a))
|
||||
next_target_q_value = next_target_values.gather(1, torch.max(next_q_values, 1)[1].unsqueeze(1)).squeeze(1)
|
||||
q_target = reward_batch + self.gamma * next_target_q_value * (1-done_batch[0])
|
||||
self.loss = nn.MSELoss()(q_value, q_target.unsqueeze(1)) # 计算 均方误差loss
|
||||
# 优化模型
|
||||
self.optimizer.zero_grad() # zero_grad清除上一步所有旧的gradients from the last step
|
||||
# loss.backward()使用backpropagation计算loss相对于所有parameters(需要gradients)的微分
|
||||
self.loss.backward()
|
||||
for param in self.policy_net.parameters(): # clip防止梯度爆炸
|
||||
param.grad.data.clamp_(-1, 1)
|
||||
self.optimizer.step() # 更新模型
|
||||
|
||||
def save_model(self,path):
|
||||
torch.save(self.target_net.state_dict(), path)
|
||||
|
||||
def load_model(self,path):
|
||||
self.target_net.load_state_dict(torch.load(path))
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
126
codes/DoubleDQN/main.py
Normal file
126
codes/DoubleDQN/main.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-12 00:48:57
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-12-22 15:39:46
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import gym
|
||||
import torch
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import os
|
||||
from agent import DQN
|
||||
from params import SEQUENCE,SAVED_MODEL_PATH,RESULT_PATH
|
||||
from params import get_args
|
||||
from utils import save_results
|
||||
|
||||
def train(cfg):
|
||||
print('Start to train !')
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测gpu
|
||||
env = gym.make('CartPole-v0').unwrapped # 可google为什么unwrapped gym,此处一般不需要
|
||||
env.seed(1) # 设置env随机种子
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.n
|
||||
agent = DQN(n_states=n_states, n_actions=n_actions, device=device, gamma=cfg.gamma, epsilon_start=cfg.epsilon_start,
|
||||
epsilon_end=cfg.epsilon_end, epsilon_decay=cfg.epsilon_decay, policy_lr=cfg.policy_lr, memory_capacity=cfg.memory_capacity, batch_size=cfg.batch_size)
|
||||
rewards = []
|
||||
moving_average_rewards = []
|
||||
ep_steps = []
|
||||
log_dir=os.path.split(os.path.abspath(__file__))[0]+"/logs/train/" + SEQUENCE
|
||||
writer = SummaryWriter(log_dir)
|
||||
for i_episode in range(1, cfg.train_eps+1):
|
||||
state = env.reset() # reset环境状态
|
||||
ep_reward = 0
|
||||
for i_step in range(1, cfg.train_steps+1):
|
||||
action = agent.choose_action(state) # 根据当前环境state选择action
|
||||
next_state, reward, done, _ = env.step(action) # 更新环境参数
|
||||
ep_reward += reward
|
||||
agent.memory.push(state, action, reward, next_state, done) # 将state等这些transition存入memory
|
||||
state = next_state # 跳转到下一个状态
|
||||
agent.update() # 每步更新网络
|
||||
if done:
|
||||
break
|
||||
# 更新target network,复制DQN中的所有weights and biases
|
||||
if i_episode % cfg.target_update == 0:
|
||||
agent.target_net.load_state_dict(agent.policy_net.state_dict())
|
||||
print('Episode:', i_episode, ' Reward: %i' %
|
||||
int(ep_reward), 'n_steps:', i_step, 'done: ', done,' Explore: %.2f' % agent.epsilon)
|
||||
ep_steps.append(i_step)
|
||||
rewards.append(ep_reward)
|
||||
# 计算滑动窗口的reward
|
||||
if i_episode == 1:
|
||||
moving_average_rewards.append(ep_reward)
|
||||
else:
|
||||
moving_average_rewards.append(
|
||||
0.9*moving_average_rewards[-1]+0.1*ep_reward)
|
||||
writer.add_scalars('rewards',{'raw':rewards[-1], 'moving_average': moving_average_rewards[-1]}, i_episode)
|
||||
writer.add_scalar('steps_of_each_episode',
|
||||
ep_steps[-1], i_episode)
|
||||
writer.close()
|
||||
print('Complete training!')
|
||||
''' 保存模型 '''
|
||||
if not os.path.exists(SAVED_MODEL_PATH): # 检测是否存在文件夹
|
||||
os.mkdir(SAVED_MODEL_PATH)
|
||||
agent.save_model(SAVED_MODEL_PATH+'checkpoint.pth')
|
||||
print('model saved!')
|
||||
'''存储reward等相关结果'''
|
||||
save_results(rewards,moving_average_rewards,ep_steps,tag='train',result_path=RESULT_PATH)
|
||||
|
||||
|
||||
def eval(cfg, saved_model_path = SAVED_MODEL_PATH):
|
||||
print('start to eval !')
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测gpu
|
||||
env = gym.make('CartPole-v0').unwrapped # 可google为什么unwrapped gym,此处一般不需要
|
||||
env.seed(1) # 设置env随机种子
|
||||
n_states = env.observation_space.shape[0]
|
||||
n_actions = env.action_space.n
|
||||
agent = DQN(n_states=n_states, n_actions=n_actions, device=device, gamma=cfg.gamma, epsilon_start=cfg.epsilon_start,
|
||||
epsilon_end=cfg.epsilon_end, epsilon_decay=cfg.epsilon_decay, policy_lr=cfg.policy_lr, memory_capacity=cfg.memory_capacity, batch_size=cfg.batch_size)
|
||||
agent.load_model(saved_model_path+'checkpoint.pth')
|
||||
rewards = []
|
||||
moving_average_rewards = []
|
||||
ep_steps = []
|
||||
log_dir=os.path.split(os.path.abspath(__file__))[0]+"/logs/eval/" + SEQUENCE
|
||||
writer = SummaryWriter(log_dir)
|
||||
for i_episode in range(1, cfg.eval_eps+1):
|
||||
state = env.reset() # reset环境状态
|
||||
ep_reward = 0
|
||||
for i_step in range(1, cfg.eval_steps+1):
|
||||
action = agent.choose_action(state,train=False) # 根据当前环境state选择action
|
||||
next_state, reward, done, _ = env.step(action) # 更新环境参数
|
||||
ep_reward += reward
|
||||
state = next_state # 跳转到下一个状态
|
||||
if done:
|
||||
break
|
||||
print('Episode:', i_episode, ' Reward: %i' %
|
||||
int(ep_reward), 'n_steps:', i_step, 'done: ', done)
|
||||
|
||||
ep_steps.append(i_step)
|
||||
rewards.append(ep_reward)
|
||||
# 计算滑动窗口的reward
|
||||
if i_episode == 1:
|
||||
moving_average_rewards.append(ep_reward)
|
||||
else:
|
||||
moving_average_rewards.append(
|
||||
0.9*moving_average_rewards[-1]+0.1*ep_reward)
|
||||
|
||||
writer.add_scalars('rewards',{'raw':rewards[-1], 'moving_average': moving_average_rewards[-1]}, i_episode)
|
||||
writer.add_scalar('steps_of_each_episode',
|
||||
ep_steps[-1], i_episode)
|
||||
writer.close()
|
||||
'''存储reward等相关结果'''
|
||||
save_results(rewards,moving_average_rewards,ep_steps,tag='eval',result_path=RESULT_PATH)
|
||||
print('Complete evaling!')
|
||||
|
||||
if __name__ == "__main__":
|
||||
cfg = get_args()
|
||||
if cfg.train:
|
||||
train(cfg)
|
||||
eval(cfg)
|
||||
else:
|
||||
model_path = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"
|
||||
eval(cfg,saved_model_path=model_path)
|
||||
41
codes/DoubleDQN/memory.py
Normal file
41
codes/DoubleDQN/memory.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-10 15:27:16
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-12-22 12:56:27
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
class ReplayBuffer:
|
||||
|
||||
def __init__(self, capacity):
|
||||
self.capacity = capacity # buffer的最大容量
|
||||
self.buffer = []
|
||||
self.position = 0
|
||||
|
||||
def push(self, state, action, reward, next_state, done):
|
||||
'''以队列的方式将样本填入buffer中
|
||||
'''
|
||||
if len(self.buffer) < self.capacity:
|
||||
self.buffer.append(None)
|
||||
self.buffer[self.position] = (state, action, reward, next_state, done)
|
||||
self.position = (self.position + 1) % self.capacity
|
||||
|
||||
def sample(self, batch_size):
|
||||
'''随机采样batch_size个样本
|
||||
'''
|
||||
batch = random.sample(self.buffer, batch_size)
|
||||
state, action, reward, next_state, done = zip(*batch)
|
||||
return state, action, reward, next_state, done
|
||||
|
||||
def __len__(self):
|
||||
'''返回buffer的长度
|
||||
'''
|
||||
return len(self.buffer)
|
||||
|
||||
30
codes/DoubleDQN/model.py
Normal file
30
codes/DoubleDQN/model.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-12 00:47:02
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-08-19 16:55:54
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class FCN(nn.Module):
|
||||
def __init__(self, n_states=4, n_actions=18):
|
||||
""" 初始化q网络,为全连接网络
|
||||
n_states: 输入的feature即环境的state数目
|
||||
n_actions: 输出的action总个数
|
||||
"""
|
||||
super(FCN, self).__init__()
|
||||
self.fc1 = nn.Linear(n_states, 128) # 输入层
|
||||
self.fc2 = nn.Linear(128, 128) # 隐藏层
|
||||
self.fc3 = nn.Linear(128, n_actions) # 输出层
|
||||
|
||||
def forward(self, x):
|
||||
# 各层对应的激活函数
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
return self.fc3(x)
|
||||
48
codes/DoubleDQN/params.py
Normal file
48
codes/DoubleDQN/params.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-12-22 15:22:17
|
||||
LastEditor: John
|
||||
LastEditTime: 2020-12-22 15:26:09
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import datetime
|
||||
import os
|
||||
import argparse
|
||||
|
||||
ALGO_NAME = 'Double DQN'
|
||||
SEQUENCE = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
SAVED_MODEL_PATH = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"+SEQUENCE+'/'
|
||||
RESULT_PATH = os.path.split(os.path.abspath(__file__))[0]+"/result/"+SEQUENCE+'/'
|
||||
|
||||
def get_args():
|
||||
'''模型参数
|
||||
'''
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--train", default=1, type=int) # 1 表示训练,0表示只进行eval
|
||||
parser.add_argument("--gamma", default=0.99,
|
||||
type=float) # q-learning中的gamma
|
||||
parser.add_argument("--epsilon_start", default=0.95,
|
||||
type=float) # 基于贪心选择action对应的参数epsilon
|
||||
parser.add_argument("--epsilon_end", default=0.01, type=float)
|
||||
parser.add_argument("--epsilon_decay", default=500, type=float)
|
||||
parser.add_argument("--policy_lr", default=0.01, type=float)
|
||||
parser.add_argument("--memory_capacity", default=1000,
|
||||
type=int, help="capacity of Replay Memory")
|
||||
|
||||
parser.add_argument("--batch_size", default=32, type=int,
|
||||
help="batch size of memory sampling")
|
||||
parser.add_argument("--train_eps", default=200, type=int) # 训练的最大episode数目
|
||||
parser.add_argument("--train_steps", default=200, type=int)
|
||||
parser.add_argument("--target_update", default=2, type=int,
|
||||
help="when(every default 2 eisodes) to update target net ") # 更新频率
|
||||
|
||||
parser.add_argument("--eval_eps", default=100, type=int) # 训练的最大episode数目
|
||||
parser.add_argument("--eval_steps", default=200,
|
||||
type=int) # 训练每个episode的长度
|
||||
config = parser.parse_args()
|
||||
|
||||
return config
|
||||
48
codes/DoubleDQN/plot.py
Normal file
48
codes/DoubleDQN/plot.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
@Author: John
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-11 16:30:09
|
||||
@LastEditor: John
|
||||
LastEditTime: 2020-12-22 15:24:31
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import numpy as np
|
||||
import os
|
||||
from params import ALGO_NAME
|
||||
def plot(item,ylabel='rewards_train', save_fig = True):
|
||||
'''plot using searborn to plot
|
||||
'''
|
||||
sns.set()
|
||||
plt.figure()
|
||||
plt.plot(np.arange(len(item)), item)
|
||||
plt.title(ylabel+' of '+ALGO_NAME)
|
||||
plt.ylabel(ylabel)
|
||||
plt.xlabel('episodes')
|
||||
if save_fig:
|
||||
plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
|
||||
plt.show()
|
||||
|
||||
|
||||
# plt.show()
|
||||
if __name__ == "__main__":
|
||||
|
||||
output_path = os.path.split(os.path.abspath(__file__))[0]+"/result/"
|
||||
tag = 'train'
|
||||
rewards=np.load(output_path+"rewards_"+tag+".npy", )
|
||||
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
|
||||
steps=np.load(output_path+"steps_"+tag+".npy")
|
||||
plot(rewards)
|
||||
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
|
||||
plot(steps,ylabel='steps_'+tag)
|
||||
tag = 'eval'
|
||||
rewards=np.load(output_path+"rewards_"+tag+".npy", )
|
||||
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
|
||||
steps=np.load(output_path+"steps_"+tag+".npy")
|
||||
plot(rewards,ylabel='rewards_'+tag)
|
||||
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
|
||||
plot(steps,ylabel='steps_'+tag)
|
||||
Binary file not shown.
Binary file not shown.
BIN
codes/DoubleDQN/result/20201222-144524/rewards_eval.npy
Normal file
BIN
codes/DoubleDQN/result/20201222-144524/rewards_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/20201222-144524/rewards_train.npy
Normal file
BIN
codes/DoubleDQN/result/20201222-144524/rewards_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/20201222-144524/steps_eval.npy
Normal file
BIN
codes/DoubleDQN/result/20201222-144524/steps_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/20201222-144524/steps_train.npy
Normal file
BIN
codes/DoubleDQN/result/20201222-144524/steps_train.npy
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
codes/DoubleDQN/result/DQN20201015-215937/rewards_eval.npy
Normal file
BIN
codes/DoubleDQN/result/DQN20201015-215937/rewards_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/DQN20201015-215937/rewards_train.npy
Normal file
BIN
codes/DoubleDQN/result/DQN20201015-215937/rewards_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/DQN20201015-215937/steps_eval.npy
Normal file
BIN
codes/DoubleDQN/result/DQN20201015-215937/steps_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/DQN20201015-215937/steps_train.npy
Normal file
BIN
codes/DoubleDQN/result/DQN20201015-215937/steps_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/moving_average_rewards_eval.npy
Normal file
BIN
codes/DoubleDQN/result/moving_average_rewards_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/moving_average_rewards_eval.png
Normal file
BIN
codes/DoubleDQN/result/moving_average_rewards_eval.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
BIN
codes/DoubleDQN/result/moving_average_rewards_train.npy
Normal file
BIN
codes/DoubleDQN/result/moving_average_rewards_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/moving_average_rewards_train.png
Normal file
BIN
codes/DoubleDQN/result/moving_average_rewards_train.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 39 KiB |
BIN
codes/DoubleDQN/result/rewards_eval.npy
Normal file
BIN
codes/DoubleDQN/result/rewards_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/rewards_eval.png
Normal file
BIN
codes/DoubleDQN/result/rewards_eval.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 23 KiB |
BIN
codes/DoubleDQN/result/rewards_train.npy
Normal file
BIN
codes/DoubleDQN/result/rewards_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/rewards_train.png
Normal file
BIN
codes/DoubleDQN/result/rewards_train.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
BIN
codes/DoubleDQN/result/steps_eval.npy
Normal file
BIN
codes/DoubleDQN/result/steps_eval.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/steps_eval.png
Normal file
BIN
codes/DoubleDQN/result/steps_eval.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
BIN
codes/DoubleDQN/result/steps_train.npy
Normal file
BIN
codes/DoubleDQN/result/steps_train.npy
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/result/steps_train.png
Normal file
BIN
codes/DoubleDQN/result/steps_train.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
BIN
codes/DoubleDQN/saved_model/20201222-144524/checkpoint.pth
Normal file
BIN
codes/DoubleDQN/saved_model/20201222-144524/checkpoint.pth
Normal file
Binary file not shown.
BIN
codes/DoubleDQN/saved_model/checkpoint.pth
Normal file
BIN
codes/DoubleDQN/saved_model/checkpoint.pth
Normal file
Binary file not shown.
21
codes/DoubleDQN/utils.py
Normal file
21
codes/DoubleDQN/utils.py
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-10-15 21:28:00
|
||||
LastEditor: John
|
||||
LastEditTime: 2020-10-15 21:50:30
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
|
||||
def save_results(rewards,moving_average_rewards,ep_steps,tag='train',result_path='./result'):
|
||||
if not os.path.exists(result_path): # 检测是否存在文件夹
|
||||
os.mkdir(result_path)
|
||||
np.save(result_path+'rewards_'+tag+'.npy', rewards)
|
||||
np.save(result_path+'moving_average_rewards_'+tag+'.npy', moving_average_rewards)
|
||||
np.save(result_path+'steps_'+tag+'.npy',ep_steps )
|
||||
Reference in New Issue
Block a user