This commit is contained in:
johnjim0816
2022-07-21 00:13:44 +08:00
parent bab7f6fe8c
commit 0f38e23baf
34 changed files with 665 additions and 422 deletions

View File

@@ -5,10 +5,11 @@ Author: JiangJi
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2021-05-03 22:16:08 Date: 2021-05-03 22:16:08
LastEditor: JiangJi LastEditor: JiangJi
LastEditTime: 2021-05-03 22:23:48 LastEditTime: 2022-07-20 23:54:40
Discription: Discription:
Environment: Environment:
''' '''
import torch
import torch.optim as optim import torch.optim as optim
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
@@ -42,7 +43,7 @@ class A2C:
''' '''
def __init__(self,n_states,n_actions,cfg) -> None: def __init__(self,n_states,n_actions,cfg) -> None:
self.gamma = cfg.gamma self.gamma = cfg.gamma
self.device = cfg.device self.device = torch.device(cfg.device)
self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device) self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device)
self.optimizer = optim.Adam(self.model.parameters()) self.optimizer = optim.Adam(self.model.parameters())

View File

@@ -0,0 +1,14 @@
{
"algo_name": "A2C",
"env_name": "CartPole-v0",
"n_envs": 8,
"max_steps": 20000,
"n_steps": 5,
"gamma": 0.99,
"lr": 0.001,
"hidden_dim": 256,
"deivce": "cpu",
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-221850/results/",
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-221850/models/",
"save_fig": true
}

View File

@@ -1,14 +0,0 @@
------------------ start ------------------
algo_name : A2C
env_name : CartPole-v0
n_envs : 8
max_steps : 30000
n_steps : 5
gamma : 0.99
lr : 0.001
hidden_dim : 256
result_path : c:\Users\24438\Desktop\rl-tutorials\codes\A2C/outputs/CartPole-v0/20220713-221850/results/
model_path : c:\Users\24438\Desktop\rl-tutorials\codes\A2C/outputs/CartPole-v0/20220713-221850/models/
save_fig : True
device : cuda
------------------- end -------------------

File diff suppressed because one or more lines are too long

View File

@@ -29,14 +29,13 @@ def get_args():
parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
parser.add_argument('--lr',default=1e-3,type=float,help="learning rate") parser.add_argument('--lr',default=1e-3,type=float,help="learning rate")
parser.add_argument('--hidden_dim',default=256,type=int) parser.add_argument('--hidden_dim',default=256,type=int)
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/results/' ) '/' + curr_time + '/results/' )
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/models/' ) # path to save models '/' + curr_time + '/models/' ) # path to save models
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
args = parser.parse_args() args = parser.parse_args()
args.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu") # check GPU
return args return args
def make_envs(env_name): def make_envs(env_name):

View File

@@ -73,11 +73,11 @@ class Critic(nn.Module):
return x return x
class DDPG: class DDPG:
def __init__(self, n_states, n_actions, cfg): def __init__(self, n_states, n_actions, cfg):
self.device = cfg.device self.device = torch.device(cfg.device)
self.critic = Critic(n_states, n_actions, cfg.hidden_dim).to(cfg.device) self.critic = Critic(n_states, n_actions, cfg.hidden_dim).to(self.device)
self.actor = Actor(n_states, n_actions, cfg.hidden_dim).to(cfg.device) self.actor = Actor(n_states, n_actions, cfg.hidden_dim).to(self.device)
self.target_critic = Critic(n_states, n_actions, cfg.hidden_dim).to(cfg.device) self.target_critic = Critic(n_states, n_actions, cfg.hidden_dim).to(self.device)
self.target_actor = Actor(n_states, n_actions, cfg.hidden_dim).to(cfg.device) self.target_actor = Actor(n_states, n_actions, cfg.hidden_dim).to(self.device)
# 复制参数到目标网络 # 复制参数到目标网络
for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()): for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()):

View File

@@ -0,0 +1,18 @@
{
"algo_name": "DDPG",
"env_name": "Pendulum-v1",
"train_eps": 300,
"test_eps": 20,
"gamma": 0.99,
"critic_lr": 0.001,
"actor_lr": 0.0001,
"memory_capacity": 8000,
"batch_size": 128,
"target_update": 2,
"soft_tau": 0.01,
"hidden_dim": 256,
"deivce": "cpu",
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/DDPG/outputs/Pendulum-v1/20220713-225402/results//",
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/DDPG/outputs/Pendulum-v1/20220713-225402/models/",
"save_fig": true
}

View File

@@ -1,18 +0,0 @@
------------------ start ------------------
algo_name : DDPG
env_name : Pendulum-v1
train_eps : 300
test_eps : 20
gamma : 0.99
critic_lr : 0.001
actor_lr : 0.0001
memory_capacity : 8000
batch_size : 128
target_update : 2
soft_tau : 0.01
hidden_dim : 256
result_path : c:\Users\24438\Desktop\rl-tutorials\codes\DDPG/outputs/Pendulum-v1/20220713-225402/results/
model_path : c:\Users\24438\Desktop\rl-tutorials\codes\DDPG/outputs/Pendulum-v1/20220713-225402/models/
save_fig : True
device : cuda
------------------- end -------------------

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com @Email: johnjim0816@gmail.com
@Date: 2020-06-11 20:58:21 @Date: 2020-06-11 20:58:21
@LastEditor: John @LastEditor: John
LastEditTime: 2022-07-13 22:53:11 LastEditTime: 2022-07-21 00:05:41
@Discription: @Discription:
@Environment: python 3.7.7 @Environment: python 3.7.7
''' '''
@@ -41,14 +41,13 @@ def get_args():
parser.add_argument('--target_update',default=2,type=int) parser.add_argument('--target_update',default=2,type=int)
parser.add_argument('--soft_tau',default=1e-2,type=float) parser.add_argument('--soft_tau',default=1e-2,type=float)
parser.add_argument('--hidden_dim',default=256,type=int) parser.add_argument('--hidden_dim',default=256,type=int)
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/results/' ) '/' + curr_time + '/results/' )
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/models/' ) # path to save models '/' + curr_time + '/models/' ) # path to save models
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
args = parser.parse_args() args = parser.parse_args()
args.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu") # check GPU
return args return args
def env_agent_config(cfg,seed=1): def env_agent_config(cfg,seed=1):
@@ -122,11 +121,11 @@ if __name__ == "__main__":
save_args(cfg) save_args(cfg)
agent.save(path=cfg.model_path) agent.save(path=cfg.model_path)
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果 plot_rewards(rewards, ma_rewards, cfg, tag="train")
# testing # testing
env,agent = env_agent_config(cfg,seed=10) env,agent = env_agent_config(cfg,seed=10)
agent.load(path=cfg.model_path) agent.load(path=cfg.model_path)
rewards,ma_rewards = test(cfg,env,agent) rewards,ma_rewards = test(cfg,env,agent)
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path) save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
plot_rewards(rewards, ma_rewards, cfg, tag="test") # 画出结果 plot_rewards(rewards, ma_rewards, cfg, tag="test")

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com @Email: johnjim0816@gmail.com
@Date: 2020-06-12 00:50:49 @Date: 2020-06-12 00:50:49
@LastEditor: John @LastEditor: John
LastEditTime: 2022-07-13 00:08:18 LastEditTime: 2022-07-20 23:57:16
@Discription: @Discription:
@Environment: python 3.7.7 @Environment: python 3.7.7
''' '''
@@ -64,8 +64,8 @@ class ReplayBuffer:
class DQN: class DQN:
def __init__(self, n_states,n_actions,cfg): def __init__(self, n_states,n_actions,cfg):
self.n_actions = n_actions # 总的动作个数 self.n_actions = n_actions
self.device = cfg.device # 设备cpu或gpu等 self.device = torch.device(cfg.device) # cpu or cuda
self.gamma = cfg.gamma # 奖励的折扣因子 self.gamma = cfg.gamma # 奖励的折扣因子
# e-greedy策略相关参数 # e-greedy策略相关参数
self.frame_idx = 0 # 用于epsilon的衰减计数 self.frame_idx = 0 # 用于epsilon的衰减计数

View File

@@ -0,0 +1,19 @@
{
"algo_name": "DQN",
"env_name": "CartPole-v0",
"train_eps": 200,
"test_eps": 20,
"gamma": 0.95,
"epsilon_start": 0.95,
"epsilon_end": 0.01,
"epsilon_decay": 500,
"lr": 0.0001,
"memory_capacity": 100000,
"batch_size": 64,
"target_update": 4,
"hidden_dim": 256,
"deivce": "cpu",
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-211653/results/",
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-211653/models/",
"save_fig": true
}

View File

@@ -1,19 +0,0 @@
------------------ start ------------------
algo_name : DQN
env_name : CartPole-v0
train_eps : 200
test_eps : 20
gamma : 0.95
epsilon_start : 0.95
epsilon_end : 0.01
epsilon_decay : 500
lr : 0.0001
memory_capacity : 100000
batch_size : 64
target_update : 4
hidden_dim : 256
result_path : C:\Users\24438\Desktop\rl-tutorials\codes\DQN/outputs/CartPole-v0/20220713-211653/results/
model_path : C:\Users\24438\Desktop\rl-tutorials\codes\DQN/outputs/CartPole-v0/20220713-211653/models/
save_fig : True
device : cuda
------------------- end -------------------

View File

@@ -1,12 +1,9 @@
from lib2to3.pytree import type_repr import sys,os
import sys curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
import os parent_path = os.path.dirname(curr_path) # parent path
from parso import parse sys.path.append(parent_path) # add to system path
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
parent_path = os.path.dirname(curr_path) # 父路径
sys.path.append(parent_path) # 添加路径到系统路径
import gym import gym
import torch import torch
@@ -35,14 +32,13 @@ def get_args():
parser.add_argument('--batch_size',default=64,type=int) parser.add_argument('--batch_size',default=64,type=int)
parser.add_argument('--target_update',default=4,type=int) parser.add_argument('--target_update',default=4,type=int)
parser.add_argument('--hidden_dim',default=256,type=int) parser.add_argument('--hidden_dim',default=256,type=int)
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/results/' ) '/' + curr_time + '/results/' )
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/models/' ) # path to save models '/' + curr_time + '/models/' ) # path to save models
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
args = parser.parse_args() args = parser.parse_args()
args.device = torch.device(
"cuda" if torch.cuda.is_available() else "cpu") # check GPU
return args return args
def env_agent_config(cfg,seed=1): def env_agent_config(cfg,seed=1):

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com @Email: johnjim0816@gmail.com
@Date: 2020-06-12 00:50:49 @Date: 2020-06-12 00:50:49
@LastEditor: John @LastEditor: John
LastEditTime: 2021-11-19 18:07:09 LastEditTime: 2022-07-21 00:08:26
@Discription: @Discription:
@Environment: python 3.7.7 @Environment: python 3.7.7
''' '''
@@ -65,7 +65,7 @@ class MLP(nn.Module):
class DoubleDQN: class DoubleDQN:
def __init__(self, n_states, n_actions, cfg): def __init__(self, n_states, n_actions, cfg):
self.n_actions = n_actions # 总的动作个数 self.n_actions = n_actions # 总的动作个数
self.device = cfg.device # 设备cpu或gpu等 self.device = torch.device(cfg.device) # 设备cpu或gpu等
self.gamma = cfg.gamma self.gamma = cfg.gamma
# e-greedy策略相关参数 # e-greedy策略相关参数
self.actions_count = 0 self.actions_count = 0
@@ -88,8 +88,7 @@ class DoubleDQN:
'''选择动作 '''选择动作
''' '''
self.actions_count += 1 self.actions_count += 1
self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * math.exp(-1. * self.actions_count / self.epsilon_decay)
math.exp(-1. * self.actions_count / self.epsilon_decay)
if random.random() > self.epsilon: if random.random() > self.epsilon:
with torch.no_grad(): with torch.no_grad():
# 先转为张量便于丢给神经网络,state元素数据原本为float64 # 先转为张量便于丢给神经网络,state元素数据原本为float64

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

View File

@@ -0,0 +1 @@
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 2, "hidden_dim": 256, "device": "cuda", "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/models/", "save_fig": true}

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@@ -5,55 +5,49 @@ Author: JiangJi
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2021-11-07 18:10:37 Date: 2021-11-07 18:10:37
LastEditor: JiangJi LastEditor: JiangJi
LastEditTime: 2021-12-29 15:02:30 LastEditTime: 2022-07-21 00:08:38
Discription: Discription:
''' '''
import sys,os import sys,os
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
parent_path = os.path.dirname(curr_path) # 父路径 parent_path = os.path.dirname(curr_path) # parent path
sys.path.append(parent_path) # 添加路径到系统路径 sys.path.append(parent_path) # add to system path
import gym import gym
import torch import torch
import datetime import datetime
import argparse
from common.utils import save_results, make_dir from common.utils import save_results,make_dir
from common.utils import plot_rewards from common.utils import plot_rewards,save_args
from DoubleDQN.double_dqn import DoubleDQN from DoubleDQN.double_dqn import DoubleDQN
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 def get_args():
""" Hyperparameters
class Config: """
def __init__(self): curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
################################## 环境超参数 ################################### parser = argparse.ArgumentParser(description="hyperparameters")
self.algo_name = 'DoubleDQN' # 算法名称 parser.add_argument('--algo_name',default='DoubleDQN',type=str,help="name of algorithm")
self.env_name = 'CartPole-v0' # 环境名称 parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
self.device = torch.device( parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
"cuda" if torch.cuda.is_available() else "cpu") # 检测GPU parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
self.train_eps = 200 # 训练的回合数 parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
self.test_eps = 30 # 测试的回合数 parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon")
################################################################################ parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon")
parser.add_argument('--epsilon_decay',default=500,type=int,help="decay rate of epsilon")
################################## 算法超参数 ################################### parser.add_argument('--lr',default=0.0001,type=float,help="learning rate")
self.gamma = 0.95 # 强化学习中的折扣因子 parser.add_argument('--memory_capacity',default=100000,type=int,help="memory capacity")
self.epsilon_start = 0.95 # e-greedy策略中初始epsilon parser.add_argument('--batch_size',default=64,type=int)
self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon parser.add_argument('--target_update',default=2,type=int)
self.epsilon_decay = 500 # e-greedy策略中epsilon的衰减率 parser.add_argument('--hidden_dim',default=256,type=int)
self.lr = 0.0001 # 学习率 parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
self.memory_capacity = 100000 # 经验回放的容量 parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
self.batch_size = 64 # mini-batch SGD中的批量大小 '/' + curr_time + '/results/' )
self.target_update = 2 # 目标网络的更新频率 parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
self.hidden_dim = 256 # 网络隐藏层 '/' + curr_time + '/models/' ) # path to save models
################################################################################ parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
args = parser.parse_args()
################################# 保存结果相关参数 ############################## return args
self.result_path = curr_path + "/outputs/" + self.env_name + \
'/' + curr_time + '/results/' # 保存结果的路径
self.model_path = curr_path + "/outputs/" + self.env_name + \
'/' + curr_time + '/models/' # 保存模型的路径
self.save = True # 是否保存图片
################################################################################
def env_agent_config(cfg,seed=1): def env_agent_config(cfg,seed=1):
@@ -65,8 +59,8 @@ def env_agent_config(cfg,seed=1):
return env,agent return env,agent
def train(cfg,env,agent): def train(cfg,env,agent):
print('开始训练!') print('Start training!')
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
rewards = [] # 记录所有回合的奖励 rewards = [] # 记录所有回合的奖励
ma_rewards = [] # 记录所有回合的滑动平均奖励 ma_rewards = [] # 记录所有回合的滑动平均奖励
for i_ep in range(cfg.train_eps): for i_ep in range(cfg.train_eps):
@@ -84,20 +78,19 @@ def train(cfg,env,agent):
if i_ep % cfg.target_update == 0: if i_ep % cfg.target_update == 0:
agent.target_net.load_state_dict(agent.policy_net.state_dict()) agent.target_net.load_state_dict(agent.policy_net.state_dict())
if (i_ep+1)%10 == 0: if (i_ep+1)%10 == 0:
print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward}') print(f'Env:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}')
rewards.append(ep_reward) rewards.append(ep_reward)
if ma_rewards: if ma_rewards:
ma_rewards.append( ma_rewards.append(
0.9*ma_rewards[-1]+0.1*ep_reward) 0.9*ma_rewards[-1]+0.1*ep_reward)
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print('完成训练!') print('Finish training!')
env.close()
return rewards,ma_rewards return rewards,ma_rewards
def test(cfg,env,agent): def test(cfg,env,agent):
print('开始测试!') print('Start testing')
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
############# 由于测试不需要使用epsilon-greedy策略所以相应的值设置为0 ############### ############# 由于测试不需要使用epsilon-greedy策略所以相应的值设置为0 ###############
cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon
cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon
@@ -120,25 +113,24 @@ def test(cfg,env,agent):
ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1)
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print(f"回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.1f}") print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
print('完成测试!') print('Finish testing!')
env.close()
return rewards,ma_rewards return rewards,ma_rewards
if __name__ == "__main__": if __name__ == "__main__":
cfg = Config() cfg = get_args()
# 训练 print(cfg.device)
env, agent = env_agent_config(cfg) # training
env,agent = env_agent_config(cfg,seed=1)
rewards, ma_rewards = train(cfg, env, agent) rewards, ma_rewards = train(cfg, env, agent)
make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹 make_dir(cfg.result_path, cfg.model_path)
agent.save(path=cfg.model_path) # 保存模型 save_args(cfg)
save_results(rewards, ma_rewards, tag='train', agent.save(path=cfg.model_path)
path=cfg.result_path) # 保存结果 save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果 plot_rewards(rewards, ma_rewards, cfg, tag="train")
# 测试 # testing
env, agent = env_agent_config(cfg) env,agent = env_agent_config(cfg,seed=10)
agent.load(path=cfg.model_path) # 导入模型 agent.load(path=cfg.model_path)
rewards, ma_rewards = test(cfg, env, agent) rewards,ma_rewards = test(cfg,env,agent)
save_results(rewards, ma_rewards, tag='test', save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
path=cfg.result_path) # 保存结果 plot_rewards(rewards, ma_rewards, cfg, tag="test")
plot_rewards(rewards, ma_rewards, cfg, tag="test") # 画出结果

View File

@@ -16,7 +16,7 @@ curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时
class Config: class Config:
def __init__(self) -> None: def __init__(self) -> None:
################################## 环境超参数 ################################### ################################## 环境超参数 ###################################
self.algo_name = "DQN" # 算法名称 self.algo_name = "PPO" # 算法名称
self.env_name = 'CartPole-v0' # 环境名称 self.env_name = 'CartPole-v0' # 环境名称
self.continuous = False # 环境是否为连续动作 self.continuous = False # 环境是否为连续动作
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2021-03-12 16:02:24 Date: 2021-03-12 16:02:24
LastEditor: John LastEditor: John
LastEditTime: 2022-07-13 22:15:46 LastEditTime: 2022-07-20 23:53:34
Discription: Discription:
Environment: Environment:
''' '''
@@ -14,6 +14,7 @@ import numpy as np
from pathlib import Path from pathlib import Path
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
import json
from matplotlib.font_manager import FontProperties # 导入字体模块 from matplotlib.font_manager import FontProperties # 导入字体模块
@@ -101,11 +102,8 @@ def del_empty_dir(*paths):
def save_args(args): def save_args(args):
# save parameters # save parameters
argsDict = args.__dict__ args_dict = vars(args)
with open(args.result_path+'params.txt', 'w') as f: with open(args.result_path+'params.json', 'w') as fp:
f.writelines('------------------ start ------------------' + '\n') json.dump(args_dict, fp)
for eachArg, value in argsDict.items():
f.writelines(eachArg + ' : ' + str(value) + '\n')
f.writelines('------------------- end -------------------')
print("Parameters saved!") print("Parameters saved!")

370
notebooks/A2C.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,153 @@
# 该代码来自 openai baseline用于多线程环境
# https://github.com/openai/baselines/tree/master/baselines/common/vec_env
import numpy as np
from multiprocessing import Process, Pipe
def worker(remote, parent_remote, env_fn_wrapper):
parent_remote.close()
env = env_fn_wrapper.x()
while True:
cmd, data = remote.recv()
if cmd == 'step':
ob, reward, done, info = env.step(data)
if done:
ob = env.reset()
remote.send((ob, reward, done, info))
elif cmd == 'reset':
ob = env.reset()
remote.send(ob)
elif cmd == 'reset_task':
ob = env.reset_task()
remote.send(ob)
elif cmd == 'close':
remote.close()
break
elif cmd == 'get_spaces':
remote.send((env.observation_space, env.action_space))
else:
raise NotImplementedError
class VecEnv(object):
"""
An abstract asynchronous, vectorized environment.
"""
def __init__(self, num_envs, observation_space, action_space):
self.num_envs = num_envs
self.observation_space = observation_space
self.action_space = action_space
def reset(self):
"""
Reset all the environments and return an array of
observations, or a tuple of observation arrays.
If step_async is still doing work, that work will
be cancelled and step_wait() should not be called
until step_async() is invoked again.
"""
pass
def step_async(self, actions):
"""
Tell all the environments to start taking a step
with the given actions.
Call step_wait() to get the results of the step.
You should not call this if a step_async run is
already pending.
"""
pass
def step_wait(self):
"""
Wait for the step taken with step_async().
Returns (obs, rews, dones, infos):
- obs: an array of observations, or a tuple of
arrays of observations.
- rews: an array of rewards
- dones: an array of "episode done" booleans
- infos: a sequence of info objects
"""
pass
def close(self):
"""
Clean up the environments' resources.
"""
pass
def step(self, actions):
self.step_async(actions)
return self.step_wait()
class CloudpickleWrapper(object):
"""
Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
"""
def __init__(self, x):
self.x = x
def __getstate__(self):
import cloudpickle
return cloudpickle.dumps(self.x)
def __setstate__(self, ob):
import pickle
self.x = pickle.loads(ob)
class SubprocVecEnv(VecEnv):
def __init__(self, env_fns, spaces=None):
"""
envs: list of gym environments to run in subprocesses
"""
self.waiting = False
self.closed = False
nenvs = len(env_fns)
self.nenvs = nenvs
self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
for p in self.ps:
p.daemon = True # if the main process crashes, we should not cause things to hang
p.start()
for remote in self.work_remotes:
remote.close()
self.remotes[0].send(('get_spaces', None))
observation_space, action_space = self.remotes[0].recv()
VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def step_async(self, actions):
for remote, action in zip(self.remotes, actions):
remote.send(('step', action))
self.waiting = True
def step_wait(self):
results = [remote.recv() for remote in self.remotes]
self.waiting = False
obs, rews, dones, infos = zip(*results)
return np.stack(obs), np.stack(rews), np.stack(dones), infos
def reset(self):
for remote in self.remotes:
remote.send(('reset', None))
return np.stack([remote.recv() for remote in self.remotes])
def reset_task(self):
for remote in self.remotes:
remote.send(('reset_task', None))
return np.stack([remote.recv() for remote in self.remotes])
def close(self):
if self.closed:
return
if self.waiting:
for remote in self.remotes:
remote.recv()
for remote in self.remotes:
remote.send(('close', None))
for p in self.ps:
p.join()
self.closed = True
def __len__(self):
return self.nenvs