diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/models/sarsa_model.pkl b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/models/sarsa_model.pkl deleted file mode 100644 index ff25fd5..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/models/sarsa_model.pkl and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_ma_rewards.npy b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_ma_rewards.npy deleted file mode 100644 index d7d62e3..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_ma_rewards.npy and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards.npy b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards.npy deleted file mode 100644 index de0a816..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards.npy and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards_curve.png b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards_curve.png deleted file mode 100644 index 3de2db7..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/eval_rewards_curve.png and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_ma_rewards.npy b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_ma_rewards.npy deleted file mode 100644 index 3f9bf83..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_ma_rewards.npy and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards.npy b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards.npy deleted file mode 100644 index e0fd7e9..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards.npy and /dev/null differ diff --git a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards_curve.png b/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards_curve.png deleted file mode 100644 index 0a8cd37..0000000 Binary files a/codes/Sarsa/outputs/CliffWalking-v0/20210506-171245/results/train_rewards_curve.png and /dev/null differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/models/sarsa_model.pkl b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/models/sarsa_model.pkl new file mode 100644 index 0000000..a80fa68 Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/models/sarsa_model.pkl differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_ma_rewards.npy b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_ma_rewards.npy new file mode 100644 index 0000000..91051de Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_ma_rewards.npy differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards.npy b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards.npy new file mode 100644 index 0000000..176ea2e Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards.npy differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards_curve.png b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards_curve.png new file mode 100644 index 0000000..a72aede Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/eval_rewards_curve.png differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_ma_rewards.npy b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_ma_rewards.npy new file mode 100644 index 0000000..47c20c6 Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_ma_rewards.npy differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards.npy b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards.npy new file mode 100644 index 0000000..204a158 Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards.npy differ diff --git a/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards_curve.png b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards_curve.png new file mode 100644 index 0000000..22e9755 Binary files /dev/null and b/codes/Sarsa/outputs/RacetrackEnv/20210715-085005/results/train_rewards_curve.png differ diff --git a/codes/Sarsa/task0_train.py b/codes/Sarsa/task0_train.py index 7fad1ab..c8363bc 100644 --- a/codes/Sarsa/task0_train.py +++ b/codes/Sarsa/task0_train.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-11 17:59:16 LastEditor: John -LastEditTime: 2021-07-14 17:27:40 +LastEditTime: 2021-07-15 08:49:31 Discription: Environment: ''' @@ -52,14 +52,16 @@ def train(cfg,env,agent): # An episode is an array of (state, action, reward) tuples state = env.reset() ep_reward = 0 + action = agent.choose_action(state) while True: # for t in range(cfg.n_steps): - action = agent.choose_action(state) + next_state, reward, done = env.step(action) ep_reward+=reward next_action = agent.choose_action(next_state) agent.update(state, action, reward, next_state, next_action,done) state = next_state + action = next_action if done: break if ma_rewards: