更新PPO,增加PER DQN

This commit is contained in:
johnjim0816
2022-11-14 21:35:28 +08:00
parent dc78698262
commit b8aec4c188
34 changed files with 1993 additions and 476 deletions

View File

@@ -0,0 +1,25 @@
general_cfg:
algo_name: PER_DQN
device: cpu
env_name: CartPole-v1
eval_eps: 10
eval_per_episode: 5
load_checkpoint: true
load_path: Train_CartPole-v1_PER_DQN_20221113-162804
max_steps: 200
mode: test
save_fig: true
seed: 0
show_fig: false
test_eps: 10
train_eps: 200
algo_cfg:
batch_size: 64
buffer_size: 100000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
hidden_dim: 256
lr: 0.0001
target_update: 4

View File

@@ -0,0 +1,14 @@
2022-11-14 10:46:49 - r - INFO: - n_states: 4, n_actions: 2
2022-11-14 10:46:49 - r - INFO: - Start testing!
2022-11-14 10:46:49 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cpu
2022-11-14 10:46:49 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200
2022-11-14 10:46:49 - r - INFO: - Finish testing!

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,11 @@
episodes,rewards,steps
0,200.0,200
1,200.0,200
2,200.0,200
3,200.0,200
4,200.0,200
5,200.0,200
6,200.0,200
7,200.0,200
8,200.0,200
9,200.0,200
1 episodes rewards steps
2 0 200.0 200
3 1 200.0 200
4 2 200.0 200
5 3 200.0 200
6 4 200.0 200
7 5 200.0 200
8 6 200.0 200
9 7 200.0 200
10 8 200.0 200
11 9 200.0 200