This commit is contained in:
johnjim0816
2022-07-21 22:12:19 +08:00
parent 6b3121fcff
commit e9b3e92141
21 changed files with 99 additions and 85 deletions

View File

@@ -123,14 +123,15 @@ def train(cfg,envs):
loss.backward() loss.backward()
optimizer.step() optimizer.step()
print('Finish training') print('Finish training')
return test_rewards, test_ma_rewards return {'rewards':test_rewards,'ma_rewards':test_ma_rewards}
if __name__ == "__main__": if __name__ == "__main__":
cfg = get_args() cfg = get_args()
envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)] envs = [make_envs(cfg.env_name) for i in range(cfg.n_envs)]
envs = SubprocVecEnv(envs) envs = SubprocVecEnv(envs)
# training # training
rewards,ma_rewards = train(cfg,envs) res_dic = train(cfg,envs)
make_dir(cfg.result_path,cfg.model_path) make_dir(cfg.result_path,cfg.model_path)
save_args(cfg) save_args(cfg)
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) # 保存结果 save_results(res_dic, tag='train',
plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果 path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com @Email: johnjim0816@gmail.com
@Date: 2020-06-11 20:58:21 @Date: 2020-06-11 20:58:21
@LastEditor: John @LastEditor: John
LastEditTime: 2022-07-21 00:05:41 LastEditTime: 2022-07-21 21:51:34
@Discription: @Discription:
@Environment: python 3.7.7 @Environment: python 3.7.7
''' '''
@@ -86,7 +86,7 @@ def train(cfg, env, agent):
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print('Finish training!') print('Finish training!')
return rewards, ma_rewards return {'rewards':rewards,'ma_rewards':ma_rewards}
def test(cfg, env, agent): def test(cfg, env, agent):
print('Start testing') print('Start testing')
@@ -111,21 +111,23 @@ def test(cfg, env, agent):
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}") print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
print('Finish testing!') print('Finish testing!')
return rewards, ma_rewards return {'rewards':rewards,'ma_rewards':ma_rewards}
if __name__ == "__main__": if __name__ == "__main__":
cfg = get_args() cfg = get_args()
# training # training
env,agent = env_agent_config(cfg,seed=1) env,agent = env_agent_config(cfg,seed=1)
rewards, ma_rewards = train(cfg, env, agent) res_dic = train(cfg, env, agent)
make_dir(cfg.result_path, cfg.model_path) make_dir(cfg.result_path, cfg.model_path)
save_args(cfg) save_args(cfg)
agent.save(path=cfg.model_path) agent.save(path=cfg.model_path)
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) save_results(res_dic, tag='train',
plot_rewards(rewards, ma_rewards, cfg, tag="train") path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
# testing # testing
env,agent = env_agent_config(cfg,seed=10) env,agent = env_agent_config(cfg,seed=10)
agent.load(path=cfg.model_path) agent.load(path=cfg.model_path)
rewards,ma_rewards = test(cfg,env,agent) res_dic = test(cfg,env,agent)
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path) save_results(res_dic, tag='test',
plot_rewards(rewards, ma_rewards, cfg, tag="test") path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")

View File

@@ -10,7 +10,7 @@ import torch
import datetime import datetime
import numpy as np import numpy as np
import argparse import argparse
from common.utils import save_results_1, make_dir from common.utils import save_results, make_dir
from common.utils import plot_rewards,save_args from common.utils import plot_rewards,save_args
from dqn import DQN from dqn import DQN
@@ -95,8 +95,8 @@ def train(cfg, env, agent):
def test(cfg, env, agent): def test(cfg, env, agent):
print('开始测试!') print('Start testing!')
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') print(f'Env:{cfg.env_name}, A{cfg.algo_name}, 设备:{cfg.device}')
############# 由于测试不需要使用epsilon-greedy策略所以相应的值设置为0 ############### ############# 由于测试不需要使用epsilon-greedy策略所以相应的值设置为0 ###############
cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon
cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon
@@ -123,7 +123,7 @@ def test(cfg, env, agent):
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print(f'Episode{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}') print(f'Episode{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}')
print('完成测试!') print('Finish testing')
env.close() env.close()
return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps} return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps}
@@ -133,16 +133,16 @@ if __name__ == "__main__":
# 训练 # 训练
env, agent = env_agent_config(cfg) env, agent = env_agent_config(cfg)
res_dic = train(cfg, env, agent) res_dic = train(cfg, env, agent)
make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹 make_dir(cfg.result_path, cfg.model_path)
save_args(cfg) save_args(cfg) # save parameters
agent.save(path=cfg.model_path) # 保存模型 agent.save(path=cfg.model_path) # save model
save_results_1(res_dic, tag='train', save_results(res_dic, tag='train',
path=cfg.result_path) # 保存结果 path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") # 画出结果 plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
# 测试 # 测试
env, agent = env_agent_config(cfg) env, agent = env_agent_config(cfg)
agent.load(path=cfg.model_path) # 导入模型 agent.load(path=cfg.model_path) # 导入模型
res_dic = test(cfg, env, agent) res_dic = test(cfg, env, agent)
save_results_1(res_dic, tag='test', save_results(res_dic, tag='test',
path=cfg.result_path) # 保存结果 path=cfg.result_path) # 保存结果
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果 plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果

View File

@@ -1 +0,0 @@
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 2, "hidden_dim": 256, "device": "cuda", "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-000842/models/", "save_fig": true}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

View File

@@ -0,0 +1,19 @@
{
"algo_name": "DoubleDQN",
"env_name": "CartPole-v0",
"train_eps": 200,
"test_eps": 20,
"gamma": 0.99,
"epsilon_start": 0.95,
"epsilon_end": 0.01,
"epsilon_decay": 500,
"lr": 0.0001,
"memory_capacity": 100000,
"batch_size": 64,
"target_update": 2,
"hidden_dim": 256,
"device": "cuda",
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/results/",
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/models/",
"save_fig": true
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

View File

@@ -5,7 +5,7 @@ Author: JiangJi
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2021-11-07 18:10:37 Date: 2021-11-07 18:10:37
LastEditor: JiangJi LastEditor: JiangJi
LastEditTime: 2022-07-21 00:08:38 LastEditTime: 2022-07-21 21:52:31
Discription: Discription:
''' '''
import sys,os import sys,os
@@ -86,7 +86,7 @@ def train(cfg,env,agent):
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print('Finish training!') print('Finish training!')
return rewards,ma_rewards return {'rewards':rewards,'ma_rewards':ma_rewards}
def test(cfg,env,agent): def test(cfg,env,agent):
print('Start testing') print('Start testing')
@@ -115,22 +115,24 @@ def test(cfg,env,agent):
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}") print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}")
print('Finish testing!') print('Finish testing!')
return rewards,ma_rewards return {'rewards':rewards,'ma_rewards':ma_rewards}
if __name__ == "__main__": if __name__ == "__main__":
cfg = get_args() cfg = get_args()
print(cfg.device) print(cfg.device)
# training # training
env,agent = env_agent_config(cfg,seed=1) env,agent = env_agent_config(cfg,seed=1)
rewards, ma_rewards = train(cfg, env, agent) res_dic = train(cfg, env, agent)
make_dir(cfg.result_path, cfg.model_path) make_dir(cfg.result_path, cfg.model_path)
save_args(cfg) save_args(cfg)
agent.save(path=cfg.model_path) agent.save(path=cfg.model_path)
save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) save_results(res_dic, tag='train',
plot_rewards(rewards, ma_rewards, cfg, tag="train") path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train")
# testing # testing
env,agent = env_agent_config(cfg,seed=10) env,agent = env_agent_config(cfg,seed=10)
agent.load(path=cfg.model_path) agent.load(path=cfg.model_path)
rewards,ma_rewards = test(cfg,env,agent) res_dic = test(cfg,env,agent)
save_results(rewards,ma_rewards,tag = 'test',path = cfg.result_path) save_results(res_dic, tag='test',
plot_rewards(rewards, ma_rewards, cfg, tag="test") path=cfg.result_path)
plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test")

View File

@@ -5,56 +5,47 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2020-11-22 23:21:53 Date: 2020-11-22 23:21:53
LastEditor: John LastEditor: John
LastEditTime: 2022-02-10 06:13:21 LastEditTime: 2022-07-21 21:44:00
Discription: Discription:
Environment: Environment:
''' '''
import sys import sys,os
import os curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 parent_path = os.path.dirname(curr_path) # parent path
parent_path = os.path.dirname(curr_path) # 父路径 sys.path.append(parent_path) # add to system path
sys.path.append(parent_path) # 添加路径到系统路径
import gym import gym
import torch import torch
import datetime import datetime
import argparse
from itertools import count from itertools import count
from pg import PolicyGradient from pg import PolicyGradient
from common.utils import save_results, make_dir from common.utils import save_results, make_dir
from common.utils import plot_rewards from common.utils import plot_rewards
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
class Config: def get_args():
'''超参数 """ Hyperparameters
''' """
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time
def __init__(self): parser = argparse.ArgumentParser(description="hyperparameters")
################################## 环境超参数 ################################### parser.add_argument('--algo_name',default='PolicyGradient',type=str,help="name of algorithm")
self.algo_name = "PolicyGradient" # 算法名称 parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
self.env_name = 'CartPole-v0' # 环境名称 parser.add_argument('--train_eps',default=300,type=int,help="episodes of training")
self.device = torch.device( parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
"cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十 parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
self.seed = 10 # 随机种子置0则不设置随机种子 parser.add_argument('--lr',default=0.01,type=float,help="learning rate")
self.train_eps = 300 # 训练的回合数 parser.add_argument('--batch_size',default=8,type=int)
self.test_eps = 30 # 测试的回合数 parser.add_argument('--hidden_dim',default=36,type=int)
################################################################################ parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
################################## 算法超参数 ################################### '/' + curr_time + '/results/' )
self.batch_size = 8 # mini-batch SGD中的批量大小 parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
self.lr = 0.01 # 学习率 '/' + curr_time + '/models/' ) # path to save models
self.gamma = 0.99 # 强化学习中的折扣因子 parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
self.hidden_dim = 36 # 网络隐藏层 args = parser.parse_args()
################################################################################ return args
################################# 保存结果相关参数 ################################
self.result_path = curr_path + "/outputs/" + self.env_name + \
'/' + curr_time + '/results/' # 保存结果的路径
self.model_path = curr_path + "/outputs/" + self.env_name + \
'/' + curr_time + '/models/' # 保存模型的路径
self.save = True # 是否保存图片
################################################################################
def env_agent_config(cfg,seed=1): def env_agent_config(cfg,seed=1):
@@ -65,9 +56,9 @@ def env_agent_config(cfg,seed=1):
return env,agent return env,agent
def train(cfg,env,agent): def train(cfg,env,agent):
print('开始训练!') print('Start training!')
print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}')
state_pool = [] # 存放每batch_size个episode的state序列 state_pool = [] # temp states pool per several episodes
action_pool = [] action_pool = []
reward_pool = [] reward_pool = []
rewards = [] rewards = []
@@ -86,11 +77,11 @@ def train(cfg,env,agent):
reward_pool.append(reward) reward_pool.append(reward)
state = next_state state = next_state
if done: if done:
print('回合:{}/{}, 奖励:{}'.format(i_ep + 1, cfg.train_eps, ep_reward)) print(f'Episode{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}')
break break
if i_ep > 0 and i_ep % cfg.batch_size == 0: if i_ep > 0 and i_ep % cfg.batch_size == 0:
agent.update(reward_pool,state_pool,action_pool) agent.update(reward_pool,state_pool,action_pool)
state_pool = [] # 每个episode的state state_pool = []
action_pool = [] action_pool = []
reward_pool = [] reward_pool = []
rewards.append(ep_reward) rewards.append(ep_reward)
@@ -99,8 +90,8 @@ def train(cfg,env,agent):
0.9*ma_rewards[-1]+0.1*ep_reward) 0.9*ma_rewards[-1]+0.1*ep_reward)
else: else:
ma_rewards.append(ep_reward) ma_rewards.append(ep_reward)
print('完成训练!') print('Finish training!')
env.close() env.close() # close environment
return rewards, ma_rewards return rewards, ma_rewards

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2021-03-12 16:02:24 Date: 2021-03-12 16:02:24
LastEditor: John LastEditor: John
LastEditTime: 2022-07-20 23:53:34 LastEditTime: 2022-07-21 21:45:33
Discription: Discription:
Environment: Environment:
''' '''
@@ -69,19 +69,19 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
plt.savefig(path+"losses_curve") plt.savefig(path+"losses_curve")
plt.show() plt.show()
def save_results_1(dic, tag='train', path='./results'): def save_results(dic, tag='train', path='./results'):
''' 保存奖励 ''' 保存奖励
''' '''
for key,value in dic.items(): for key,value in dic.items():
np.save(path+'{}_{}.npy'.format(tag,key),value) np.save(path+'{}_{}.npy'.format(tag,key),value)
print('Results saved') print('Results saved')
def save_results(rewards, ma_rewards, tag='train', path='./results'): # def save_results(rewards, ma_rewards, tag='train', path='./results'):
''' 保存奖励 # ''' 保存奖励
''' # '''
np.save(path+'{}_rewards.npy'.format(tag), rewards) # np.save(path+'{}_rewards.npy'.format(tag), rewards)
np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards) # np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards)
print('Result saved!') # print('Result saved!')
def make_dir(*paths): def make_dir(*paths):