update

2021-12-22 11:19:13 +08:00
parent c257313d5b
commit 75df999258
55 changed files with 605 additions and 403 deletions
--- a/codes/Sarsa/task0_train.py
+++ b/codes/Sarsa/task0_train.py
@@ -31,7 +31,7 @@ class SarsaConfig:
        self.result_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/results/'  # path to save results
        self.model_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/models/'  # path to save models
        self.train_eps = 200
-        self.eval_eps = 50
+        self.test_eps = 50
        self.epsilon = 0.15 # epsilon: The probability to select a random action . 
        self.gamma = 0.9 # gamma: Gamma discount factor.
        self.lr = 0.2 # learning rate: step size parameter
@@ -74,7 +74,7 @@ def train(cfg,env,agent):
 def eval(cfg,env,agent):
    rewards = []
    ma_rewards = []
-    for i_episode in range(cfg.eval_eps):
+    for i_episode in range(cfg.test_eps):
        # Print out which episode we're on, useful for debugging.
        # Generate an episode.
        # An episode is an array of (state, action, reward) tuples
@@ -94,7 +94,7 @@ def eval(cfg,env,agent):
            ma_rewards.append(ep_reward)
        rewards.append(ep_reward)
        if (i_episode+1)%10==0:
-            print("Episode:{}/{}: Reward:{}".format(i_episode+1, cfg.eval_eps,ep_reward))
+            print("Episode:{}/{}: Reward:{}".format(i_episode+1, cfg.test_eps,ep_reward))
    print('Complete evaling！')
    return rewards,ma_rewards