update

2021-04-16 14:59:23 +08:00
parent 312b57fdff
commit e4690ac89f
71 changed files with 805 additions and 153 deletions
--- a/codes/PPO/main.py
+++ b/codes/PPO/main.py
@@ -5,12 +5,14 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-22 16:18:10
 LastEditor: John
-LastEditTime: 2021-03-23 15:52:52
+LastEditTime: 2021-04-11 01:24:41
 Discription: 
 Environment: 
 '''
 import sys,os
-sys.path.append(os.getcwd()) # add current terminal path to sys.path
+curr_path = os.path.dirname(__file__)
+parent_path=os.path.dirname(curr_path) 
+sys.path.append(parent_path) # add current terminal path to sys.path
 import gym
 import numpy as np
 import torch
@@ -33,15 +35,18 @@ if not os.path.exists(RESULT_PATH): # 检测是否存在文件夹

 class PPOConfig:
    def __init__(self) -> None:
+        self.env = 'CartPole-v0'
        self.algo = 'PPO'
        self.batch_size = 5
        self.gamma=0.99
        self.n_epochs = 4
-        self.lr = 0.0003
+        self.actor_lr = 0.0003
+        self.critic_lr = 0.0003
        self.gae_lambda=0.95
        self.policy_clip=0.2
+        self.hidden_dim = 256
        self.update_fre = 20 # frequency of agent update
-        self.train_eps = 250 # max training episodes
+        self.train_eps = 300 # max training episodes
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # check gpu
        
 def train(cfg,env,agent):
@@ -70,7 +75,8 @@ def train(cfg,env,agent):
        else:
            ma_rewards.append(ep_reward)
        avg_reward = np.mean(rewards[-100:])
-        if avg_reward > best_reward:
+        if avg_rewardself.actor_lr = 0.002
+        self.critic_lr = 0.005 > best_reward:
            best_reward = avg_reward
            agent.save(path=SAVED_MODEL_PATH)
        print('Episode:{}/{}, Reward:{:.1f}, avg reward:{:.1f}, Done:{}'.format(i_episode+1,cfg.train_eps,ep_reward,avg_reward,done))
@@ -78,7 +84,7 @@ def train(cfg,env,agent):

 if __name__ == '__main__':
    cfg = PPOConfig()
-    env = gym.make('CartPole-v0')
+    env = gym.make(cfg.env)
    env.seed(1)
    state_dim=env.observation_space.shape[0]
    action_dim=env.action_space.n