update
This commit is contained in:
@@ -123,14 +123,15 @@ def train(cfg,envs):
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
print('Finish training!')
|
||||
return test_rewards, test_ma_rewards
|
||||
return {'rewards':test_rewards,'ma_rewards':test_ma_rewards}
|
||||
if __name__ == "__main__":
|
||||
cfg = get_args()
|
||||
envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)]
|
||||
envs = SubprocVecEnv(envs)
|
||||
# training
|
||||
rewards,ma_rewards = train(cfg,envs)
|
||||
res_dic = train(cfg,envs)
|
||||
make_dir(cfg.result_path,cfg.model_path)
|
||||
save_args(cfg)
|
||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) # 保存结果
|
||||
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果
|
||||
save_results(res_dic, tag='train',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
@Email: johnjim0816@gmail.com
|
||||
@Date: 2020-06-11 20:58:21
|
||||
@LastEditor: John
|
||||
LastEditTime: 2022-07-21 00:05:41
|
||||
LastEditTime: 2022-07-21 21:51:34
|
||||
@Discription:
|
||||
@Environment: python 3.7.7
|
||||
'''
|
||||
@@ -86,7 +86,7 @@ def train(cfg, env, agent):
|
||||
else:
|
||||
ma_rewards.append(ep_reward)
|
||||
print('Finish training!')
|
||||
return rewards, ma_rewards
|
||||
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||
|
||||
def test(cfg, env, agent):
|
||||
print('Start testing')
|
||||
@@ -111,21 +111,23 @@ def test(cfg, env, agent):
|
||||
ma_rewards.append(ep_reward)
|
||||
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
||||
print('Finish testing!')
|
||||
return rewards, ma_rewards
|
||||
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||
if __name__ == "__main__":
|
||||
cfg = get_args()
|
||||
# training
|
||||
env,agent = env_agent_config(cfg,seed=1)
|
||||
rewards, ma_rewards = train(cfg, env, agent)
|
||||
res_dic = train(cfg, env, agent)
|
||||
make_dir(cfg.result_path, cfg.model_path)
|
||||
save_args(cfg)
|
||||
agent.save(path=cfg.model_path)
|
||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
|
||||
plot_rewards(rewards, ma_rewards, cfg, tag="train")
|
||||
save_results(res_dic, tag='train',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||
# testing
|
||||
env,agent = env_agent_config(cfg,seed=10)
|
||||
agent.load(path=cfg.model_path)
|
||||
rewards,ma_rewards = test(cfg,env,agent)
|
||||
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
|
||||
plot_rewards(rewards, ma_rewards, cfg, tag="test")
|
||||
res_dic = test(cfg,env,agent)
|
||||
save_results(res_dic, tag='test',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import torch
|
||||
import datetime
|
||||
import numpy as np
|
||||
import argparse
|
||||
from common.utils import save_results_1, make_dir
|
||||
from common.utils import save_results, make_dir
|
||||
from common.utils import plot_rewards,save_args
|
||||
from dqn import DQN
|
||||
|
||||
@@ -95,8 +95,8 @@ def train(cfg, env, agent):
|
||||
|
||||
|
||||
def test(cfg, env, agent):
|
||||
print('开始测试!')
|
||||
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
|
||||
print('Start testing!')
|
||||
print(f'Env:{cfg.env_name}, A{cfg.algo_name}, 设备:{cfg.device}')
|
||||
############# 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 ###############
|
||||
cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon
|
||||
cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon
|
||||
@@ -123,7 +123,7 @@ def test(cfg, env, agent):
|
||||
else:
|
||||
ma_rewards.append(ep_reward)
|
||||
print(f'Episode:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}')
|
||||
print('完成测试!')
|
||||
print('Finish testing')
|
||||
env.close()
|
||||
return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps}
|
||||
|
||||
@@ -133,16 +133,16 @@ if __name__ == "__main__":
|
||||
# 训练
|
||||
env, agent = env_agent_config(cfg)
|
||||
res_dic = train(cfg, env, agent)
|
||||
make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹
|
||||
save_args(cfg)
|
||||
agent.save(path=cfg.model_path) # 保存模型
|
||||
save_results_1(res_dic, tag='train',
|
||||
path=cfg.result_path) # 保存结果
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果
|
||||
make_dir(cfg.result_path, cfg.model_path)
|
||||
save_args(cfg) # save parameters
|
||||
agent.save(path=cfg.model_path) # save model
|
||||
save_results(res_dic, tag='train',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||
# 测试
|
||||
env, agent = env_agent_config(cfg)
|
||||
agent.load(path=cfg.model_path) # 导入模型
|
||||
res_dic = test(cfg, env, agent)
|
||||
save_results_1(res_dic, tag='test',
|
||||
save_results(res_dic, tag='test',
|
||||
path=cfg.result_path) # 保存结果
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果
|
||||
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 2, "hidden_dim": 256, "device": "cuda", "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/models/", "save_fig": true}
|
||||
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 38 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 44 KiB |
Binary file not shown.
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"algo_name": "DoubleDQN",
|
||||
"env_name": "CartPole-v0",
|
||||
"train_eps": 200,
|
||||
"test_eps": 20,
|
||||
"gamma": 0.99,
|
||||
"epsilon_start": 0.95,
|
||||
"epsilon_end": 0.01,
|
||||
"epsilon_decay": 500,
|
||||
"lr": 0.0001,
|
||||
"memory_capacity": 100000,
|
||||
"batch_size": 64,
|
||||
"target_update": 2,
|
||||
"hidden_dim": 256,
|
||||
"device": "cuda",
|
||||
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/results/",
|
||||
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/models/",
|
||||
"save_fig": true
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
@@ -5,7 +5,7 @@ Author: JiangJi
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-11-07 18:10:37
|
||||
LastEditor: JiangJi
|
||||
LastEditTime: 2022-07-21 00:08:38
|
||||
LastEditTime: 2022-07-21 21:52:31
|
||||
Discription:
|
||||
'''
|
||||
import sys,os
|
||||
@@ -86,7 +86,7 @@ def train(cfg,env,agent):
|
||||
else:
|
||||
ma_rewards.append(ep_reward)
|
||||
print('Finish training!')
|
||||
return rewards,ma_rewards
|
||||
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||
|
||||
def test(cfg,env,agent):
|
||||
print('Start testing')
|
||||
@@ -115,22 +115,24 @@ def test(cfg,env,agent):
|
||||
ma_rewards.append(ep_reward)
|
||||
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
|
||||
print('Finish testing!')
|
||||
return rewards,ma_rewards
|
||||
return {'rewards':rewards,'ma_rewards':ma_rewards}
|
||||
|
||||
if __name__ == "__main__":
|
||||
cfg = get_args()
|
||||
print(cfg.device)
|
||||
# training
|
||||
env,agent = env_agent_config(cfg,seed=1)
|
||||
rewards, ma_rewards = train(cfg, env, agent)
|
||||
res_dic = train(cfg, env, agent)
|
||||
make_dir(cfg.result_path, cfg.model_path)
|
||||
save_args(cfg)
|
||||
agent.save(path=cfg.model_path)
|
||||
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path)
|
||||
plot_rewards(rewards, ma_rewards, cfg, tag="train")
|
||||
save_results(res_dic, tag='train',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
|
||||
# testing
|
||||
env,agent = env_agent_config(cfg,seed=10)
|
||||
agent.load(path=cfg.model_path)
|
||||
rewards,ma_rewards = test(cfg,env,agent)
|
||||
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path)
|
||||
plot_rewards(rewards, ma_rewards, cfg, tag="test")
|
||||
res_dic = test(cfg,env,agent)
|
||||
save_results(res_dic, tag='test',
|
||||
path=cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")
|
||||
|
||||
@@ -5,56 +5,47 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-11-22 23:21:53
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-02-10 06:13:21
|
||||
LastEditTime: 2022-07-21 21:44:00
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import sys
|
||||
import os
|
||||
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
|
||||
parent_path = os.path.dirname(curr_path) # 父路径
|
||||
sys.path.append(parent_path) # 添加路径到系统路径
|
||||
import sys,os
|
||||
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
|
||||
parent_path = os.path.dirname(curr_path) # parent path
|
||||
sys.path.append(parent_path) # add to system path
|
||||
|
||||
import gym
|
||||
import torch
|
||||
import datetime
|
||||
import argparse
|
||||
from itertools import count
|
||||
|
||||
from pg import PolicyGradient
|
||||
from common.utils import save_results, make_dir
|
||||
from common.utils import plot_rewards
|
||||
|
||||
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
|
||||
|
||||
class Config:
|
||||
'''超参数
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
################################## 环境超参数 ###################################
|
||||
self.algo_name = "PolicyGradient" # 算法名称
|
||||
self.env_name = 'CartPole-v0' # 环境名称
|
||||
self.device = torch.device(
|
||||
"cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十
|
||||
self.seed = 10 # 随机种子,置0则不设置随机种子
|
||||
self.train_eps = 300 # 训练的回合数
|
||||
self.test_eps = 30 # 测试的回合数
|
||||
################################################################################
|
||||
|
||||
################################## 算法超参数 ###################################
|
||||
self.batch_size = 8 # mini-batch SGD中的批量大小
|
||||
self.lr = 0.01 # 学习率
|
||||
self.gamma = 0.99 # 强化学习中的折扣因子
|
||||
self.hidden_dim = 36 # 网络隐藏层
|
||||
################################################################################
|
||||
|
||||
################################# 保存结果相关参数 ################################
|
||||
self.result_path = curr_path + "/outputs/" + self.env_name + \
|
||||
'/' + curr_time + '/results/' # 保存结果的路径
|
||||
self.model_path = curr_path + "/outputs/" + self.env_name + \
|
||||
'/' + curr_time + '/models/' # 保存模型的路径
|
||||
self.save = True # 是否保存图片
|
||||
################################################################################
|
||||
def get_args():
|
||||
""" Hyperparameters
|
||||
"""
|
||||
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
|
||||
parser = argparse.ArgumentParser(description="hyperparameters")
|
||||
parser.add_argument('--algo_name',default='PolicyGradient',type=str,help="name of algorithm")
|
||||
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
|
||||
parser.add_argument('--train_eps',default=300,type=int,help="episodes of training")
|
||||
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
|
||||
parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
|
||||
parser.add_argument('--lr',default=0.01,type=float,help="learning rate")
|
||||
parser.add_argument('--batch_size',default=8,type=int)
|
||||
parser.add_argument('--hidden_dim',default=36,type=int)
|
||||
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
|
||||
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||
'/' + curr_time + '/results/' )
|
||||
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||
'/' + curr_time + '/models/' ) # path to save models
|
||||
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def env_agent_config(cfg,seed=1):
|
||||
@@ -65,9 +56,9 @@ def env_agent_config(cfg,seed=1):
|
||||
return env,agent
|
||||
|
||||
def train(cfg,env,agent):
|
||||
print('开始训练!')
|
||||
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
|
||||
state_pool = [] # 存放每batch_size个episode的state序列
|
||||
print('Start training!')
|
||||
print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
|
||||
state_pool = [] # temp states pool per several episodes
|
||||
action_pool = []
|
||||
reward_pool = []
|
||||
rewards = []
|
||||
@@ -86,11 +77,11 @@ def train(cfg,env,agent):
|
||||
reward_pool.append(reward)
|
||||
state = next_state
|
||||
if done:
|
||||
print('回合:{}/{}, 奖励:{}'.format(i_ep + 1, cfg.train_eps, ep_reward))
|
||||
print(f'Episode:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}')
|
||||
break
|
||||
if i_ep > 0 and i_ep % cfg.batch_size == 0:
|
||||
agent.update(reward_pool,state_pool,action_pool)
|
||||
state_pool = [] # 每个episode的state
|
||||
state_pool = []
|
||||
action_pool = []
|
||||
reward_pool = []
|
||||
rewards.append(ep_reward)
|
||||
@@ -99,8 +90,8 @@ def train(cfg,env,agent):
|
||||
0.9*ma_rewards[-1]+0.1*ep_reward)
|
||||
else:
|
||||
ma_rewards.append(ep_reward)
|
||||
print('完成训练!')
|
||||
env.close()
|
||||
print('Finish training!')
|
||||
env.close() # close environment
|
||||
return rewards, ma_rewards
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 16:02:24
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-07-20 23:53:34
|
||||
LastEditTime: 2022-07-21 21:45:33
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -69,19 +69,19 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
|
||||
plt.savefig(path+"losses_curve")
|
||||
plt.show()
|
||||
|
||||
def save_results_1(dic, tag='train', path='./results'):
|
||||
def save_results(dic, tag='train', path='./results'):
|
||||
''' 保存奖励
|
||||
'''
|
||||
for key,value in dic.items():
|
||||
np.save(path+'{}_{}.npy'.format(tag,key),value)
|
||||
print('Results saved!')
|
||||
|
||||
def save_results(rewards, ma_rewards, tag='train', path='./results'):
|
||||
''' 保存奖励
|
||||
'''
|
||||
np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
||||
np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
||||
print('Result saved!')
|
||||
# def save_results(rewards, ma_rewards, tag='train', path='./results'):
|
||||
# ''' 保存奖励
|
||||
# '''
|
||||
# np.save(path+'{}_rewards.npy'.format(tag), rewards)
|
||||
# np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
|
||||
# print('Result saved!')
|
||||
|
||||
|
||||
def make_dir(*paths):
|
||||
|
||||
Reference in New Issue
Block a user