update DDPG

This commit is contained in:
JohnJim0816
2020-10-15 22:07:42 +08:00
parent cf9887f6d0
commit 0ff03c498e
37 changed files with 161 additions and 99 deletions

View File

@@ -5,63 +5,76 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-11 20:58:21
@LastEditor: John
LastEditTime: 2020-09-02 01:24:50
LastEditTime: 2020-10-15 21:23:39
@Discription:
@Environment: python 3.7.7
'''
from token import NUMBER
from typing import Sequence
import torch
import gym
from ddpg import DDPG
from agent import DDPG
from env import NormalizedActions
from noise import OUNoise
from plot import plot
import os
import numpy as np
import argparse
from torch.utils.tensorboard import SummaryWriter
import datetime
SEQUENCE = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
SAVED_MODEL_PATH = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"+SEQUENCE+'/'
RESULT_PATH = os.path.split(os.path.abspath(__file__))[0]+"/result/"+SEQUENCE+'/'
def get_args():
'''模型建立好之后只需要在这里调参
'''
parser = argparse.ArgumentParser()
parser.add_argument("--gamma", default=0.99, type=float) # q-learning中的gamma
parser.add_argument("--critic_lr", default=1e-3, type=float) # critic学习率
parser.add_argument("--train", default=1, type=int) # 1 表示训练0表示只进行eval
parser.add_argument("--gamma", default=0.99,
type=float) # q-learning中的gamma
parser.add_argument("--critic_lr", default=1e-3, type=float) # critic学习率
parser.add_argument("--actor_lr", default=1e-4, type=float)
parser.add_argument("--memory_capacity", default=10000, type=int,help="capacity of Replay Memory")
parser.add_argument("--batch_size", default=128, type=int,help="batch size of memory sampling")
parser.add_argument("--memory_capacity", default=10000,
type=int, help="capacity of Replay Memory")
parser.add_argument("--batch_size", default=128, type=int,
help="batch size of memory sampling")
parser.add_argument("--train_eps", default=200, type=int)
parser.add_argument("--train_steps", default=200, type=int)
parser.add_argument("--eval_eps", default=200, type=int) # 训练的最大episode数目
parser.add_argument("--eval_steps", default=200, type=int) # 训练每个episode的长度
parser.add_argument("--target_update", default=4, type=int,help="when(every default 10 eisodes) to update target net ")
parser.add_argument("--eval_eps", default=200, type=int) # 训练的最大episode数目
parser.add_argument("--eval_steps", default=200,
type=int) # 训练每个episode的长度
parser.add_argument("--target_update", default=4, type=int,
help="when(every default 10 eisodes) to update target net ")
config = parser.parse_args()
return config
def train():
cfg = get_args()
def train(cfg):
print('Start to train ! \n')
env = NormalizedActions(gym.make("Pendulum-v0"))
# 增加action噪声
ou_noise = OUNoise(env.action_space)
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent=DDPG(n_states,n_actions,device="cpu", critic_lr=1e-3,
actor_lr=1e-4, gamma=0.99, soft_tau=1e-2, memory_capacity=100000, batch_size=128)
rewards = []
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = DDPG(n_states, n_actions, device="cpu", critic_lr=1e-3,
actor_lr=1e-4, gamma=0.99, soft_tau=1e-2, memory_capacity=100000, batch_size=128)
rewards = []
moving_average_rewards = []
ep_steps = []
for i_episode in range(1,cfg.train_eps+1):
state=env.reset()
log_dir=os.path.split(os.path.abspath(__file__))[0]+"/logs/train/" + SEQUENCE
writer = SummaryWriter(log_dir)
for i_episode in range(1, cfg.train_eps+1):
state = env.reset()
ou_noise.reset()
ep_reward = 0
for i_step in range(1,cfg.train_steps+1):
action = agent.select_action(state)
action = ou_noise.get_action(action, i_step) # 即paper中的random process
for i_step in range(1, cfg.train_steps+1):
action = agent.select_action(state)
action = ou_noise.get_action(
action, i_step) # 即paper中的random process
next_state, reward, done, _ = env.step(action)
ep_reward += reward
agent.memory.push(state, action, reward, next_state, done)
@@ -69,7 +82,8 @@ def train():
state = next_state
if done:
break
print('Episode:', i_episode, ' Reward: %i' % int(ep_reward),'n_steps:', i_step)
print('Episode:', i_episode, ' Reward: %i' %
int(ep_reward), 'n_steps:', i_step)
ep_steps.append(i_step)
rewards.append(ep_reward)
if i_episode == 1:
@@ -77,54 +91,43 @@ def train():
else:
moving_average_rewards.append(
0.9*moving_average_rewards[-1]+0.1*ep_reward)
print('Complete')
# 保存模型
import os
import numpy as np
save_path = os.path.dirname(__file__)+"/saved_model/"
if not os.path.exists(save_path):
os.mkdir(save_path)
agent.save_model(save_path+'checkpoint.pth')
# 存储reward等相关结果
output_path = os.path.dirname(__file__)+"/result/"
# 检测是否存在文件夹
if not os.path.exists(output_path):
os.mkdir(output_path)
np.save(output_path+"rewards.npy", rewards)
np.save(output_path+"moving_average_rewards.npy", moving_average_rewards)
np.save(output_path+"steps.npy", ep_steps)
plot(rewards)
plot(moving_average_rewards,ylabel="moving_average_rewards")
plot(ep_steps, ylabel="steps_of_each_episode")
def eval():
cfg = get_args()
env = NormalizedActions(gym.make("Pendulum-v0"))
# 增加action噪声
ou_noise = OUNoise(env.action_space)
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
agent=DDPG(n_states,n_actions, critic_lr=1e-3,
actor_lr=1e-4, gamma=0.99, soft_tau=1e-2, memory_capacity=100000, batch_size=128)
writer.add_scalars('rewards',{'raw':rewards[-1], 'moving_average': moving_average_rewards[-1]}, i_episode)
writer.add_scalar('steps_of_each_episode',
ep_steps[-1], i_episode)
writer.close()
print('Complete training')
''' 保存模型 '''
if not os.path.exists(SAVED_MODEL_PATH): # 检测是否存在文件夹
os.mkdir(SAVED_MODEL_PATH)
agent.save_model(SAVED_MODEL_PATH+'checkpoint.pth')
'''存储reward等相关结果'''
if not os.path.exists(RESULT_PATH): # 检测是否存在文件夹
os.mkdir(RESULT_PATH)
np.save(RESULT_PATH+'rewards_train.npy', rewards)
np.save(RESULT_PATH+'moving_average_rewards_train.npy', moving_average_rewards)
np.save(RESULT_PATH+'steps_train.npy', ep_steps)
import os
save_path = os.path.dirname(__file__)+"/saved_model/"
if not os.path.exists(save_path):
os.mkdir(save_path)
agent.load_model(save_path+'checkpoint.pth')
def eval(cfg, saved_model_path = SAVED_MODEL_PATH):
print('start to eval ! \n')
env = NormalizedActions(gym.make("Pendulum-v0"))
n_states = env.observation_space.shape[0]
n_actions = env.action_space.shape[0]
agent = DDPG(n_states, n_actions, critic_lr=1e-3,
actor_lr=1e-4, gamma=0.99, soft_tau=1e-2, memory_capacity=100000, batch_size=128)
agent.load_model(saved_model_path+'checkpoint.pth')
rewards = []
moving_average_rewards = []
ep_steps = []
log_dir=os.path.split(os.path.abspath(__file__))[0]+"/logs/eval/" + SEQUENCE
writer = SummaryWriter(log_dir)
for i_episode in range(1, cfg.eval_eps+1):
state = env.reset() # reset环境状态
state = env.reset() # reset环境状态
ep_reward = 0
for i_step in range(1, cfg.eval_steps+1):
action = agent.select_action(state) # 根据当前环境state选择action
next_state, reward, done, _ = env.step(action) # 更新环境参数
action = agent.select_action(state) # 根据当前环境state选择action
next_state, reward, done, _ = env.step(action) # 更新环境参数
ep_reward += reward
state = next_state # 跳转到下一个状态
state = next_state # 跳转到下一个状态
if done:
break
print('Episode:', i_episode, ' Reward: %i' %
@@ -137,11 +140,22 @@ def eval():
else:
moving_average_rewards.append(
0.9*moving_average_rewards[-1]+0.1*ep_reward)
plot(rewards,save_fig=False)
plot(moving_average_rewards, ylabel="moving_average_rewards",save_fig=False)
plot(ep_steps, ylabel="steps_of_each_episode",save_fig=False)
writer.add_scalars('rewards',{'raw':rewards[-1], 'moving_average': moving_average_rewards[-1]}, i_episode)
writer.add_scalar('steps_of_each_episode',
ep_steps[-1], i_episode)
writer.close()
'''存储reward等相关结果'''
if not os.path.exists(RESULT_PATH): # 检测是否存在文件夹
os.mkdir(RESULT_PATH)
np.save(RESULT_PATH+'rewards_eval.npy', rewards)
np.save(RESULT_PATH+'moving_average_rewards_eval.npy', moving_average_rewards)
np.save(RESULT_PATH+'steps_eval.npy', ep_steps)
if __name__ == "__main__":
# train()
eval()
cfg = get_args()
if cfg.train:
train(cfg)
eval(cfg)
else:
model_path = os.path.split(os.path.abspath(__file__))[0]+"/saved_model/"
eval(cfg,saved_model_path=model_path)