This commit is contained in:
JohnJim0816
2021-04-16 14:59:23 +08:00
parent 312b57fdff
commit e4690ac89f
71 changed files with 805 additions and 153 deletions

View File

@@ -5,12 +5,14 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-22 16:18:10
LastEditor: John
LastEditTime: 2021-03-23 15:52:52
LastEditTime: 2021-04-11 01:24:41
Discription:
Environment:
'''
import sys,os
sys.path.append(os.getcwd()) # add current terminal path to sys.path
curr_path = os.path.dirname(__file__)
parent_path=os.path.dirname(curr_path)
sys.path.append(parent_path) # add current terminal path to sys.path
import gym
import numpy as np
import torch
@@ -33,15 +35,18 @@ if not os.path.exists(RESULT_PATH): # 检测是否存在文件夹
class PPOConfig:
def __init__(self) -> None:
self.env = 'CartPole-v0'
self.algo = 'PPO'
self.batch_size = 5
self.gamma=0.99
self.n_epochs = 4
self.lr = 0.0003
self.actor_lr = 0.0003
self.critic_lr = 0.0003
self.gae_lambda=0.95
self.policy_clip=0.2
self.hidden_dim = 256
self.update_fre = 20 # frequency of agent update
self.train_eps = 250 # max training episodes
self.train_eps = 300 # max training episodes
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # check gpu
def train(cfg,env,agent):
@@ -70,7 +75,8 @@ def train(cfg,env,agent):
else:
ma_rewards.append(ep_reward)
avg_reward = np.mean(rewards[-100:])
if avg_reward > best_reward:
if avg_rewardself.actor_lr = 0.002
self.critic_lr = 0.005 > best_reward:
best_reward = avg_reward
agent.save(path=SAVED_MODEL_PATH)
print('Episode:{}/{}, Reward:{:.1f}, avg reward:{:.1f}, Done:{}'.format(i_episode+1,cfg.train_eps,ep_reward,avg_reward,done))
@@ -78,7 +84,7 @@ def train(cfg,env,agent):
if __name__ == '__main__':
cfg = PPOConfig()
env = gym.make('CartPole-v0')
env = gym.make(cfg.env)
env.seed(1)
state_dim=env.observation_space.shape[0]
action_dim=env.action_space.n