更新算法模版

This commit is contained in:
johnjim0816
2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions

View File

@@ -0,0 +1,25 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: CartPole-v1
eval_eps: 10
eval_per_episode: 5
load_checkpoint: true
load_path: Train_CartPole-v1_DQN_20221031-001201
max_steps: 200
mode: test
save_fig: true
seed: 0
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 64
buffer_size: 100000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
hidden_dim: 256
lr: 0.0001
target_update: 4

View File

@@ -0,0 +1,14 @@
2022-10-31 00:13:43 - r - INFO: - n_states: 4, n_actions: 2
2022-10-31 00:13:44 - r - INFO: - Start testing!
2022-10-31 00:13:44 - r - INFO: - Env: CartPole-v1, Algorithm: DQN, Device: cuda
2022-10-31 00:13:45 - r - INFO: - Episode: 1/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 2/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 3/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 4/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 5/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 6/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 7/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 8/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 9/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Episode: 10/10, Reward: 200.0, Step: 200
2022-10-31 00:13:45 - r - INFO: - Finish testing!

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,11 @@
episodes,rewards,steps
0,200.0,200
1,200.0,200
2,200.0,200
3,200.0,200
4,200.0,200
5,200.0,200
6,200.0,200
7,200.0,200
8,200.0,200
9,200.0,200
1 episodes rewards steps
2 0 200.0 200
3 1 200.0 200
4 2 200.0 200
5 3 200.0 200
6 4 200.0 200
7 5 200.0 200
8 6 200.0 200
9 7 200.0 200
10 8 200.0 200
11 9 200.0 200

View File

@@ -0,0 +1,23 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: Acrobot-v1
load_checkpoint: false
load_path: Train_CartPole-v1_DQN_20221026-054757
max_steps: 100000
mode: train
save_fig: true
seed: 1
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 128
buffer_size: 200000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
hidden_dim: 256
lr: 0.002
target_update: 4

View File

@@ -0,0 +1,104 @@
2022-10-26 09:46:45 - r - INFO: - n_states: 6, n_actions: 3
2022-10-26 09:46:48 - r - INFO: - Start training!
2022-10-26 09:46:48 - r - INFO: - Env: Acrobot-v1, Algorithm: DQN, Device: cuda
2022-10-26 09:46:50 - r - INFO: - Episode: 1/100, Reward: -861.00: Epislon: 0.178
2022-10-26 09:46:50 - r - INFO: - Episode: 2/100, Reward: -252.00: Epislon: 0.111
2022-10-26 09:46:50 - r - INFO: - Episode: 3/100, Reward: -196.00: Epislon: 0.078
2022-10-26 09:46:51 - r - INFO: - Episode: 4/100, Reward: -390.00: Epislon: 0.041
2022-10-26 09:46:52 - r - INFO: - Episode: 5/100, Reward: -371.00: Epislon: 0.025
2022-10-26 09:46:52 - r - INFO: - Episode: 6/100, Reward: -237.00: Epislon: 0.019
2022-10-26 09:46:52 - r - INFO: - Episode: 7/100, Reward: -227.00: Epislon: 0.016
2022-10-26 09:46:53 - r - INFO: - Episode: 8/100, Reward: -228.00: Epislon: 0.014
2022-10-26 09:46:53 - r - INFO: - Episode: 9/100, Reward: -305.00: Epislon: 0.012
2022-10-26 09:46:54 - r - INFO: - Episode: 10/100, Reward: -234.00: Epislon: 0.011
2022-10-26 09:46:54 - r - INFO: - Episode: 11/100, Reward: -204.00: Epislon: 0.011
2022-10-26 09:46:55 - r - INFO: - Episode: 12/100, Reward: -277.00: Epislon: 0.010
2022-10-26 09:46:55 - r - INFO: - Episode: 13/100, Reward: -148.00: Epislon: 0.010
2022-10-26 09:46:56 - r - INFO: - Episode: 14/100, Reward: -372.00: Epislon: 0.010
2022-10-26 09:46:56 - r - INFO: - Episode: 15/100, Reward: -273.00: Epislon: 0.010
2022-10-26 09:46:56 - r - INFO: - Episode: 16/100, Reward: -105.00: Epislon: 0.010
2022-10-26 09:46:56 - r - INFO: - Episode: 17/100, Reward: -79.00: Epislon: 0.010
2022-10-26 09:46:57 - r - INFO: - Episode: 18/100, Reward: -112.00: Epislon: 0.010
2022-10-26 09:46:57 - r - INFO: - Episode: 19/100, Reward: -276.00: Epislon: 0.010
2022-10-26 09:46:57 - r - INFO: - Episode: 20/100, Reward: -148.00: Epislon: 0.010
2022-10-26 09:46:58 - r - INFO: - Episode: 21/100, Reward: -201.00: Epislon: 0.010
2022-10-26 09:46:58 - r - INFO: - Episode: 22/100, Reward: -173.00: Epislon: 0.010
2022-10-26 09:46:58 - r - INFO: - Episode: 23/100, Reward: -226.00: Epislon: 0.010
2022-10-26 09:46:59 - r - INFO: - Episode: 24/100, Reward: -154.00: Epislon: 0.010
2022-10-26 09:46:59 - r - INFO: - Episode: 25/100, Reward: -269.00: Epislon: 0.010
2022-10-26 09:46:59 - r - INFO: - Episode: 26/100, Reward: -191.00: Epislon: 0.010
2022-10-26 09:47:00 - r - INFO: - Episode: 27/100, Reward: -177.00: Epislon: 0.010
2022-10-26 09:47:00 - r - INFO: - Episode: 28/100, Reward: -209.00: Epislon: 0.010
2022-10-26 09:47:00 - r - INFO: - Episode: 29/100, Reward: -116.00: Epislon: 0.010
2022-10-26 09:47:00 - r - INFO: - Episode: 30/100, Reward: -117.00: Epislon: 0.010
2022-10-26 09:47:01 - r - INFO: - Episode: 31/100, Reward: -121.00: Epislon: 0.010
2022-10-26 09:47:01 - r - INFO: - Episode: 32/100, Reward: -208.00: Epislon: 0.010
2022-10-26 09:47:01 - r - INFO: - Episode: 33/100, Reward: -147.00: Epislon: 0.010
2022-10-26 09:47:02 - r - INFO: - Episode: 34/100, Reward: -104.00: Epislon: 0.010
2022-10-26 09:47:02 - r - INFO: - Episode: 35/100, Reward: -161.00: Epislon: 0.010
2022-10-26 09:47:02 - r - INFO: - Episode: 36/100, Reward: -144.00: Epislon: 0.010
2022-10-26 09:47:02 - r - INFO: - Episode: 37/100, Reward: -131.00: Epislon: 0.010
2022-10-26 09:47:03 - r - INFO: - Episode: 38/100, Reward: -226.00: Epislon: 0.010
2022-10-26 09:47:03 - r - INFO: - Episode: 39/100, Reward: -117.00: Epislon: 0.010
2022-10-26 09:47:03 - r - INFO: - Episode: 40/100, Reward: -344.00: Epislon: 0.010
2022-10-26 09:47:04 - r - INFO: - Episode: 41/100, Reward: -123.00: Epislon: 0.010
2022-10-26 09:47:04 - r - INFO: - Episode: 42/100, Reward: -232.00: Epislon: 0.010
2022-10-26 09:47:04 - r - INFO: - Episode: 43/100, Reward: -190.00: Epislon: 0.010
2022-10-26 09:47:05 - r - INFO: - Episode: 44/100, Reward: -176.00: Epislon: 0.010
2022-10-26 09:47:05 - r - INFO: - Episode: 45/100, Reward: -139.00: Epislon: 0.010
2022-10-26 09:47:06 - r - INFO: - Episode: 46/100, Reward: -410.00: Epislon: 0.010
2022-10-26 09:47:06 - r - INFO: - Episode: 47/100, Reward: -115.00: Epislon: 0.010
2022-10-26 09:47:06 - r - INFO: - Episode: 48/100, Reward: -118.00: Epislon: 0.010
2022-10-26 09:47:06 - r - INFO: - Episode: 49/100, Reward: -113.00: Epislon: 0.010
2022-10-26 09:47:07 - r - INFO: - Episode: 50/100, Reward: -355.00: Epislon: 0.010
2022-10-26 09:47:07 - r - INFO: - Episode: 51/100, Reward: -110.00: Epislon: 0.010
2022-10-26 09:47:07 - r - INFO: - Episode: 52/100, Reward: -148.00: Epislon: 0.010
2022-10-26 09:47:08 - r - INFO: - Episode: 53/100, Reward: -135.00: Epislon: 0.010
2022-10-26 09:47:08 - r - INFO: - Episode: 54/100, Reward: -220.00: Epislon: 0.010
2022-10-26 09:47:08 - r - INFO: - Episode: 55/100, Reward: -157.00: Epislon: 0.010
2022-10-26 09:47:09 - r - INFO: - Episode: 56/100, Reward: -130.00: Epislon: 0.010
2022-10-26 09:47:09 - r - INFO: - Episode: 57/100, Reward: -150.00: Epislon: 0.010
2022-10-26 09:47:09 - r - INFO: - Episode: 58/100, Reward: -254.00: Epislon: 0.010
2022-10-26 09:47:10 - r - INFO: - Episode: 59/100, Reward: -148.00: Epislon: 0.010
2022-10-26 09:47:10 - r - INFO: - Episode: 60/100, Reward: -108.00: Epislon: 0.010
2022-10-26 09:47:10 - r - INFO: - Episode: 61/100, Reward: -152.00: Epislon: 0.010
2022-10-26 09:47:10 - r - INFO: - Episode: 62/100, Reward: -107.00: Epislon: 0.010
2022-10-26 09:47:10 - r - INFO: - Episode: 63/100, Reward: -110.00: Epislon: 0.010
2022-10-26 09:47:11 - r - INFO: - Episode: 64/100, Reward: -266.00: Epislon: 0.010
2022-10-26 09:47:11 - r - INFO: - Episode: 65/100, Reward: -344.00: Epislon: 0.010
2022-10-26 09:47:12 - r - INFO: - Episode: 66/100, Reward: -93.00: Epislon: 0.010
2022-10-26 09:47:12 - r - INFO: - Episode: 67/100, Reward: -113.00: Epislon: 0.010
2022-10-26 09:47:12 - r - INFO: - Episode: 68/100, Reward: -191.00: Epislon: 0.010
2022-10-26 09:47:12 - r - INFO: - Episode: 69/100, Reward: -102.00: Epislon: 0.010
2022-10-26 09:47:13 - r - INFO: - Episode: 70/100, Reward: -187.00: Epislon: 0.010
2022-10-26 09:47:13 - r - INFO: - Episode: 71/100, Reward: -158.00: Epislon: 0.010
2022-10-26 09:47:13 - r - INFO: - Episode: 72/100, Reward: -166.00: Epislon: 0.010
2022-10-26 09:47:14 - r - INFO: - Episode: 73/100, Reward: -202.00: Epislon: 0.010
2022-10-26 09:47:14 - r - INFO: - Episode: 74/100, Reward: -179.00: Epislon: 0.010
2022-10-26 09:47:14 - r - INFO: - Episode: 75/100, Reward: -150.00: Epislon: 0.010
2022-10-26 09:47:14 - r - INFO: - Episode: 76/100, Reward: -170.00: Epislon: 0.010
2022-10-26 09:47:15 - r - INFO: - Episode: 77/100, Reward: -149.00: Epislon: 0.010
2022-10-26 09:47:15 - r - INFO: - Episode: 78/100, Reward: -119.00: Epislon: 0.010
2022-10-26 09:47:15 - r - INFO: - Episode: 79/100, Reward: -115.00: Epislon: 0.010
2022-10-26 09:47:15 - r - INFO: - Episode: 80/100, Reward: -97.00: Epislon: 0.010
2022-10-26 09:47:16 - r - INFO: - Episode: 81/100, Reward: -153.00: Epislon: 0.010
2022-10-26 09:47:16 - r - INFO: - Episode: 82/100, Reward: -97.00: Epislon: 0.010
2022-10-26 09:47:16 - r - INFO: - Episode: 83/100, Reward: -211.00: Epislon: 0.010
2022-10-26 09:47:16 - r - INFO: - Episode: 84/100, Reward: -195.00: Epislon: 0.010
2022-10-26 09:47:17 - r - INFO: - Episode: 85/100, Reward: -125.00: Epislon: 0.010
2022-10-26 09:47:17 - r - INFO: - Episode: 86/100, Reward: -155.00: Epislon: 0.010
2022-10-26 09:47:17 - r - INFO: - Episode: 87/100, Reward: -151.00: Epislon: 0.010
2022-10-26 09:47:18 - r - INFO: - Episode: 88/100, Reward: -194.00: Epislon: 0.010
2022-10-26 09:47:18 - r - INFO: - Episode: 89/100, Reward: -188.00: Epislon: 0.010
2022-10-26 09:47:18 - r - INFO: - Episode: 90/100, Reward: -195.00: Epislon: 0.010
2022-10-26 09:47:19 - r - INFO: - Episode: 91/100, Reward: -141.00: Epislon: 0.010
2022-10-26 09:47:19 - r - INFO: - Episode: 92/100, Reward: -132.00: Epislon: 0.010
2022-10-26 09:47:19 - r - INFO: - Episode: 93/100, Reward: -127.00: Epislon: 0.010
2022-10-26 09:47:19 - r - INFO: - Episode: 94/100, Reward: -195.00: Epislon: 0.010
2022-10-26 09:47:20 - r - INFO: - Episode: 95/100, Reward: -152.00: Epislon: 0.010
2022-10-26 09:47:20 - r - INFO: - Episode: 96/100, Reward: -145.00: Epislon: 0.010
2022-10-26 09:47:20 - r - INFO: - Episode: 97/100, Reward: -123.00: Epislon: 0.010
2022-10-26 09:47:20 - r - INFO: - Episode: 98/100, Reward: -176.00: Epislon: 0.010
2022-10-26 09:47:21 - r - INFO: - Episode: 99/100, Reward: -180.00: Epislon: 0.010
2022-10-26 09:47:21 - r - INFO: - Episode: 100/100, Reward: -124.00: Epislon: 0.010
2022-10-26 09:47:21 - r - INFO: - Finish training!

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

View File

@@ -0,0 +1,101 @@
episodes,rewards,steps
0,-861.0,862
1,-252.0,253
2,-196.0,197
3,-390.0,391
4,-371.0,372
5,-237.0,238
6,-227.0,228
7,-228.0,229
8,-305.0,306
9,-234.0,235
10,-204.0,205
11,-277.0,278
12,-148.0,149
13,-372.0,373
14,-273.0,274
15,-105.0,106
16,-79.0,80
17,-112.0,113
18,-276.0,277
19,-148.0,149
20,-201.0,202
21,-173.0,174
22,-226.0,227
23,-154.0,155
24,-269.0,270
25,-191.0,192
26,-177.0,178
27,-209.0,210
28,-116.0,117
29,-117.0,118
30,-121.0,122
31,-208.0,209
32,-147.0,148
33,-104.0,105
34,-161.0,162
35,-144.0,145
36,-131.0,132
37,-226.0,227
38,-117.0,118
39,-344.0,345
40,-123.0,124
41,-232.0,233
42,-190.0,191
43,-176.0,177
44,-139.0,140
45,-410.0,411
46,-115.0,116
47,-118.0,119
48,-113.0,114
49,-355.0,356
50,-110.0,111
51,-148.0,149
52,-135.0,136
53,-220.0,221
54,-157.0,158
55,-130.0,131
56,-150.0,151
57,-254.0,255
58,-148.0,149
59,-108.0,109
60,-152.0,153
61,-107.0,108
62,-110.0,111
63,-266.0,267
64,-344.0,345
65,-93.0,94
66,-113.0,114
67,-191.0,192
68,-102.0,103
69,-187.0,188
70,-158.0,159
71,-166.0,167
72,-202.0,203
73,-179.0,180
74,-150.0,151
75,-170.0,171
76,-149.0,150
77,-119.0,120
78,-115.0,116
79,-97.0,98
80,-153.0,154
81,-97.0,98
82,-211.0,212
83,-195.0,196
84,-125.0,126
85,-155.0,156
86,-151.0,152
87,-194.0,195
88,-188.0,189
89,-195.0,196
90,-141.0,142
91,-132.0,133
92,-127.0,128
93,-195.0,196
94,-152.0,153
95,-145.0,146
96,-123.0,124
97,-176.0,177
98,-180.0,181
99,-124.0,125
1 episodes rewards steps
2 0 -861.0 862
3 1 -252.0 253
4 2 -196.0 197
5 3 -390.0 391
6 4 -371.0 372
7 5 -237.0 238
8 6 -227.0 228
9 7 -228.0 229
10 8 -305.0 306
11 9 -234.0 235
12 10 -204.0 205
13 11 -277.0 278
14 12 -148.0 149
15 13 -372.0 373
16 14 -273.0 274
17 15 -105.0 106
18 16 -79.0 80
19 17 -112.0 113
20 18 -276.0 277
21 19 -148.0 149
22 20 -201.0 202
23 21 -173.0 174
24 22 -226.0 227
25 23 -154.0 155
26 24 -269.0 270
27 25 -191.0 192
28 26 -177.0 178
29 27 -209.0 210
30 28 -116.0 117
31 29 -117.0 118
32 30 -121.0 122
33 31 -208.0 209
34 32 -147.0 148
35 33 -104.0 105
36 34 -161.0 162
37 35 -144.0 145
38 36 -131.0 132
39 37 -226.0 227
40 38 -117.0 118
41 39 -344.0 345
42 40 -123.0 124
43 41 -232.0 233
44 42 -190.0 191
45 43 -176.0 177
46 44 -139.0 140
47 45 -410.0 411
48 46 -115.0 116
49 47 -118.0 119
50 48 -113.0 114
51 49 -355.0 356
52 50 -110.0 111
53 51 -148.0 149
54 52 -135.0 136
55 53 -220.0 221
56 54 -157.0 158
57 55 -130.0 131
58 56 -150.0 151
59 57 -254.0 255
60 58 -148.0 149
61 59 -108.0 109
62 60 -152.0 153
63 61 -107.0 108
64 62 -110.0 111
65 63 -266.0 267
66 64 -344.0 345
67 65 -93.0 94
68 66 -113.0 114
69 67 -191.0 192
70 68 -102.0 103
71 69 -187.0 188
72 70 -158.0 159
73 71 -166.0 167
74 72 -202.0 203
75 73 -179.0 180
76 74 -150.0 151
77 75 -170.0 171
78 76 -149.0 150
79 77 -119.0 120
80 78 -115.0 116
81 79 -97.0 98
82 80 -153.0 154
83 81 -97.0 98
84 82 -211.0 212
85 83 -195.0 196
86 84 -125.0 126
87 85 -155.0 156
88 86 -151.0 152
89 87 -194.0 195
90 88 -188.0 189
91 89 -195.0 196
92 90 -141.0 142
93 91 -132.0 133
94 92 -127.0 128
95 93 -195.0 196
96 94 -152.0 153
97 95 -145.0 146
98 96 -123.0 124
99 97 -176.0 177
100 98 -180.0 181
101 99 -124.0 125

View File

@@ -0,0 +1,25 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: CartPole-v1
eval_eps: 10
eval_per_episode: 5
load_checkpoint: false
load_path: tasks
max_steps: 200
mode: train
save_fig: true
seed: 1
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 64
buffer_size: 100000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
hidden_dim: 256
lr: 0.0001
target_update: 800

View File

@@ -0,0 +1,116 @@
2022-10-31 00:12:01 - r - INFO: - n_states: 4, n_actions: 2
2022-10-31 00:12:01 - r - INFO: - Start training!
2022-10-31 00:12:01 - r - INFO: - Env: CartPole-v1, Algorithm: DQN, Device: cuda
2022-10-31 00:12:04 - r - INFO: - Episode: 1/100, Reward: 18.0, Step: 18
2022-10-31 00:12:04 - r - INFO: - Episode: 2/100, Reward: 35.0, Step: 35
2022-10-31 00:12:04 - r - INFO: - Episode: 3/100, Reward: 13.0, Step: 13
2022-10-31 00:12:04 - r - INFO: - Episode: 4/100, Reward: 32.0, Step: 32
2022-10-31 00:12:04 - r - INFO: - Episode: 5/100, Reward: 16.0, Step: 16
2022-10-31 00:12:04 - r - INFO: - Current episode 5 has the best eval reward: 15.30
2022-10-31 00:12:04 - r - INFO: - Episode: 6/100, Reward: 12.0, Step: 12
2022-10-31 00:12:04 - r - INFO: - Episode: 7/100, Reward: 13.0, Step: 13
2022-10-31 00:12:04 - r - INFO: - Episode: 8/100, Reward: 15.0, Step: 15
2022-10-31 00:12:04 - r - INFO: - Episode: 9/100, Reward: 11.0, Step: 11
2022-10-31 00:12:04 - r - INFO: - Episode: 10/100, Reward: 15.0, Step: 15
2022-10-31 00:12:04 - r - INFO: - Episode: 11/100, Reward: 9.0, Step: 9
2022-10-31 00:12:04 - r - INFO: - Episode: 12/100, Reward: 13.0, Step: 13
2022-10-31 00:12:04 - r - INFO: - Episode: 13/100, Reward: 13.0, Step: 13
2022-10-31 00:12:04 - r - INFO: - Episode: 14/100, Reward: 10.0, Step: 10
2022-10-31 00:12:04 - r - INFO: - Episode: 15/100, Reward: 9.0, Step: 9
2022-10-31 00:12:04 - r - INFO: - Episode: 16/100, Reward: 24.0, Step: 24
2022-10-31 00:12:04 - r - INFO: - Episode: 17/100, Reward: 8.0, Step: 8
2022-10-31 00:12:04 - r - INFO: - Episode: 18/100, Reward: 10.0, Step: 10
2022-10-31 00:12:04 - r - INFO: - Episode: 19/100, Reward: 11.0, Step: 11
2022-10-31 00:12:04 - r - INFO: - Episode: 20/100, Reward: 13.0, Step: 13
2022-10-31 00:12:04 - r - INFO: - Episode: 21/100, Reward: 12.0, Step: 12
2022-10-31 00:12:04 - r - INFO: - Episode: 22/100, Reward: 11.0, Step: 11
2022-10-31 00:12:04 - r - INFO: - Episode: 23/100, Reward: 9.0, Step: 9
2022-10-31 00:12:04 - r - INFO: - Episode: 24/100, Reward: 21.0, Step: 21
2022-10-31 00:12:05 - r - INFO: - Episode: 25/100, Reward: 14.0, Step: 14
2022-10-31 00:12:05 - r - INFO: - Episode: 26/100, Reward: 12.0, Step: 12
2022-10-31 00:12:05 - r - INFO: - Episode: 27/100, Reward: 9.0, Step: 9
2022-10-31 00:12:05 - r - INFO: - Episode: 28/100, Reward: 11.0, Step: 11
2022-10-31 00:12:05 - r - INFO: - Episode: 29/100, Reward: 12.0, Step: 12
2022-10-31 00:12:05 - r - INFO: - Episode: 30/100, Reward: 13.0, Step: 13
2022-10-31 00:12:05 - r - INFO: - Episode: 31/100, Reward: 10.0, Step: 10
2022-10-31 00:12:05 - r - INFO: - Episode: 32/100, Reward: 13.0, Step: 13
2022-10-31 00:12:05 - r - INFO: - Episode: 33/100, Reward: 18.0, Step: 18
2022-10-31 00:12:05 - r - INFO: - Episode: 34/100, Reward: 9.0, Step: 9
2022-10-31 00:12:05 - r - INFO: - Episode: 35/100, Reward: 10.0, Step: 10
2022-10-31 00:12:05 - r - INFO: - Episode: 36/100, Reward: 9.0, Step: 9
2022-10-31 00:12:05 - r - INFO: - Episode: 37/100, Reward: 10.0, Step: 10
2022-10-31 00:12:05 - r - INFO: - Episode: 38/100, Reward: 10.0, Step: 10
2022-10-31 00:12:05 - r - INFO: - Episode: 39/100, Reward: 10.0, Step: 10
2022-10-31 00:12:05 - r - INFO: - Episode: 40/100, Reward: 8.0, Step: 8
2022-10-31 00:12:06 - r - INFO: - Episode: 41/100, Reward: 9.0, Step: 9
2022-10-31 00:12:06 - r - INFO: - Episode: 42/100, Reward: 9.0, Step: 9
2022-10-31 00:12:06 - r - INFO: - Episode: 43/100, Reward: 20.0, Step: 20
2022-10-31 00:12:06 - r - INFO: - Episode: 44/100, Reward: 16.0, Step: 16
2022-10-31 00:12:06 - r - INFO: - Episode: 45/100, Reward: 17.0, Step: 17
2022-10-31 00:12:06 - r - INFO: - Current episode 45 has the best eval reward: 17.50
2022-10-31 00:12:06 - r - INFO: - Episode: 46/100, Reward: 17.0, Step: 17
2022-10-31 00:12:06 - r - INFO: - Episode: 47/100, Reward: 17.0, Step: 17
2022-10-31 00:12:06 - r - INFO: - Episode: 48/100, Reward: 18.0, Step: 18
2022-10-31 00:12:06 - r - INFO: - Episode: 49/100, Reward: 25.0, Step: 25
2022-10-31 00:12:06 - r - INFO: - Episode: 50/100, Reward: 31.0, Step: 31
2022-10-31 00:12:06 - r - INFO: - Current episode 50 has the best eval reward: 24.80
2022-10-31 00:12:06 - r - INFO: - Episode: 51/100, Reward: 22.0, Step: 22
2022-10-31 00:12:06 - r - INFO: - Episode: 52/100, Reward: 39.0, Step: 39
2022-10-31 00:12:06 - r - INFO: - Episode: 53/100, Reward: 36.0, Step: 36
2022-10-31 00:12:06 - r - INFO: - Episode: 54/100, Reward: 26.0, Step: 26
2022-10-31 00:12:07 - r - INFO: - Episode: 55/100, Reward: 33.0, Step: 33
2022-10-31 00:12:07 - r - INFO: - Current episode 55 has the best eval reward: 38.70
2022-10-31 00:12:07 - r - INFO: - Episode: 56/100, Reward: 56.0, Step: 56
2022-10-31 00:12:07 - r - INFO: - Episode: 57/100, Reward: 112.0, Step: 112
2022-10-31 00:12:07 - r - INFO: - Episode: 58/100, Reward: 101.0, Step: 101
2022-10-31 00:12:08 - r - INFO: - Episode: 59/100, Reward: 69.0, Step: 69
2022-10-31 00:12:08 - r - INFO: - Episode: 60/100, Reward: 75.0, Step: 75
2022-10-31 00:12:08 - r - INFO: - Episode: 61/100, Reward: 182.0, Step: 182
2022-10-31 00:12:09 - r - INFO: - Episode: 62/100, Reward: 52.0, Step: 52
2022-10-31 00:12:09 - r - INFO: - Episode: 63/100, Reward: 67.0, Step: 67
2022-10-31 00:12:09 - r - INFO: - Episode: 64/100, Reward: 53.0, Step: 53
2022-10-31 00:12:09 - r - INFO: - Episode: 65/100, Reward: 119.0, Step: 119
2022-10-31 00:12:10 - r - INFO: - Current episode 65 has the best eval reward: 171.90
2022-10-31 00:12:10 - r - INFO: - Episode: 66/100, Reward: 200.0, Step: 200
2022-10-31 00:12:10 - r - INFO: - Episode: 67/100, Reward: 74.0, Step: 74
2022-10-31 00:12:11 - r - INFO: - Episode: 68/100, Reward: 138.0, Step: 138
2022-10-31 00:12:11 - r - INFO: - Episode: 69/100, Reward: 149.0, Step: 149
2022-10-31 00:12:12 - r - INFO: - Episode: 70/100, Reward: 144.0, Step: 144
2022-10-31 00:12:12 - r - INFO: - Current episode 70 has the best eval reward: 173.70
2022-10-31 00:12:13 - r - INFO: - Episode: 71/100, Reward: 200.0, Step: 200
2022-10-31 00:12:13 - r - INFO: - Episode: 72/100, Reward: 198.0, Step: 198
2022-10-31 00:12:14 - r - INFO: - Episode: 73/100, Reward: 200.0, Step: 200
2022-10-31 00:12:14 - r - INFO: - Episode: 74/100, Reward: 200.0, Step: 200
2022-10-31 00:12:15 - r - INFO: - Episode: 75/100, Reward: 200.0, Step: 200
2022-10-31 00:12:16 - r - INFO: - Current episode 75 has the best eval reward: 200.00
2022-10-31 00:12:16 - r - INFO: - Episode: 76/100, Reward: 200.0, Step: 200
2022-10-31 00:12:17 - r - INFO: - Episode: 77/100, Reward: 200.0, Step: 200
2022-10-31 00:12:17 - r - INFO: - Episode: 78/100, Reward: 200.0, Step: 200
2022-10-31 00:12:18 - r - INFO: - Episode: 79/100, Reward: 200.0, Step: 200
2022-10-31 00:12:19 - r - INFO: - Episode: 80/100, Reward: 200.0, Step: 200
2022-10-31 00:12:19 - r - INFO: - Current episode 80 has the best eval reward: 200.00
2022-10-31 00:12:20 - r - INFO: - Episode: 81/100, Reward: 200.0, Step: 200
2022-10-31 00:12:20 - r - INFO: - Episode: 82/100, Reward: 200.0, Step: 200
2022-10-31 00:12:21 - r - INFO: - Episode: 83/100, Reward: 200.0, Step: 200
2022-10-31 00:12:21 - r - INFO: - Episode: 84/100, Reward: 200.0, Step: 200
2022-10-31 00:12:22 - r - INFO: - Episode: 85/100, Reward: 200.0, Step: 200
2022-10-31 00:12:23 - r - INFO: - Current episode 85 has the best eval reward: 200.00
2022-10-31 00:12:23 - r - INFO: - Episode: 86/100, Reward: 200.0, Step: 200
2022-10-31 00:12:24 - r - INFO: - Episode: 87/100, Reward: 200.0, Step: 200
2022-10-31 00:12:25 - r - INFO: - Episode: 88/100, Reward: 200.0, Step: 200
2022-10-31 00:12:25 - r - INFO: - Episode: 89/100, Reward: 200.0, Step: 200
2022-10-31 00:12:26 - r - INFO: - Episode: 90/100, Reward: 200.0, Step: 200
2022-10-31 00:12:27 - r - INFO: - Current episode 90 has the best eval reward: 200.00
2022-10-31 00:12:27 - r - INFO: - Episode: 91/100, Reward: 200.0, Step: 200
2022-10-31 00:12:28 - r - INFO: - Episode: 92/100, Reward: 200.0, Step: 200
2022-10-31 00:12:28 - r - INFO: - Episode: 93/100, Reward: 200.0, Step: 200
2022-10-31 00:12:29 - r - INFO: - Episode: 94/100, Reward: 200.0, Step: 200
2022-10-31 00:12:29 - r - INFO: - Episode: 95/100, Reward: 200.0, Step: 200
2022-10-31 00:12:30 - r - INFO: - Current episode 95 has the best eval reward: 200.00
2022-10-31 00:12:31 - r - INFO: - Episode: 96/100, Reward: 200.0, Step: 200
2022-10-31 00:12:31 - r - INFO: - Episode: 97/100, Reward: 200.0, Step: 200
2022-10-31 00:12:32 - r - INFO: - Episode: 98/100, Reward: 200.0, Step: 200
2022-10-31 00:12:32 - r - INFO: - Episode: 99/100, Reward: 200.0, Step: 200
2022-10-31 00:12:33 - r - INFO: - Episode: 100/100, Reward: 200.0, Step: 200
2022-10-31 00:12:33 - r - INFO: - Current episode 100 has the best eval reward: 200.00
2022-10-31 00:12:33 - r - INFO: - Finish training!

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

View File

@@ -0,0 +1,101 @@
episodes,rewards,steps
0,18.0,18
1,35.0,35
2,13.0,13
3,32.0,32
4,16.0,16
5,12.0,12
6,13.0,13
7,15.0,15
8,11.0,11
9,15.0,15
10,9.0,9
11,13.0,13
12,13.0,13
13,10.0,10
14,9.0,9
15,24.0,24
16,8.0,8
17,10.0,10
18,11.0,11
19,13.0,13
20,12.0,12
21,11.0,11
22,9.0,9
23,21.0,21
24,14.0,14
25,12.0,12
26,9.0,9
27,11.0,11
28,12.0,12
29,13.0,13
30,10.0,10
31,13.0,13
32,18.0,18
33,9.0,9
34,10.0,10
35,9.0,9
36,10.0,10
37,10.0,10
38,10.0,10
39,8.0,8
40,9.0,9
41,9.0,9
42,20.0,20
43,16.0,16
44,17.0,17
45,17.0,17
46,17.0,17
47,18.0,18
48,25.0,25
49,31.0,31
50,22.0,22
51,39.0,39
52,36.0,36
53,26.0,26
54,33.0,33
55,56.0,56
56,112.0,112
57,101.0,101
58,69.0,69
59,75.0,75
60,182.0,182
61,52.0,52
62,67.0,67
63,53.0,53
64,119.0,119
65,200.0,200
66,74.0,74
67,138.0,138
68,149.0,149
69,144.0,144
70,200.0,200
71,198.0,198
72,200.0,200
73,200.0,200
74,200.0,200
75,200.0,200
76,200.0,200
77,200.0,200
78,200.0,200
79,200.0,200
80,200.0,200
81,200.0,200
82,200.0,200
83,200.0,200
84,200.0,200
85,200.0,200
86,200.0,200
87,200.0,200
88,200.0,200
89,200.0,200
90,200.0,200
91,200.0,200
92,200.0,200
93,200.0,200
94,200.0,200
95,200.0,200
96,200.0,200
97,200.0,200
98,200.0,200
99,200.0,200
1 episodes rewards steps
2 0 18.0 18
3 1 35.0 35
4 2 13.0 13
5 3 32.0 32
6 4 16.0 16
7 5 12.0 12
8 6 13.0 13
9 7 15.0 15
10 8 11.0 11
11 9 15.0 15
12 10 9.0 9
13 11 13.0 13
14 12 13.0 13
15 13 10.0 10
16 14 9.0 9
17 15 24.0 24
18 16 8.0 8
19 17 10.0 10
20 18 11.0 11
21 19 13.0 13
22 20 12.0 12
23 21 11.0 11
24 22 9.0 9
25 23 21.0 21
26 24 14.0 14
27 25 12.0 12
28 26 9.0 9
29 27 11.0 11
30 28 12.0 12
31 29 13.0 13
32 30 10.0 10
33 31 13.0 13
34 32 18.0 18
35 33 9.0 9
36 34 10.0 10
37 35 9.0 9
38 36 10.0 10
39 37 10.0 10
40 38 10.0 10
41 39 8.0 8
42 40 9.0 9
43 41 9.0 9
44 42 20.0 20
45 43 16.0 16
46 44 17.0 17
47 45 17.0 17
48 46 17.0 17
49 47 18.0 18
50 48 25.0 25
51 49 31.0 31
52 50 22.0 22
53 51 39.0 39
54 52 36.0 36
55 53 26.0 26
56 54 33.0 33
57 55 56.0 56
58 56 112.0 112
59 57 101.0 101
60 58 69.0 69
61 59 75.0 75
62 60 182.0 182
63 61 52.0 52
64 62 67.0 67
65 63 53.0 53
66 64 119.0 119
67 65 200.0 200
68 66 74.0 74
69 67 138.0 138
70 68 149.0 149
71 69 144.0 144
72 70 200.0 200
73 71 198.0 198
74 72 200.0 200
75 73 200.0 200
76 74 200.0 200
77 75 200.0 200
78 76 200.0 200
79 77 200.0 200
80 78 200.0 200
81 79 200.0 200
82 80 200.0 200
83 81 200.0 200
84 82 200.0 200
85 83 200.0 200
86 84 200.0 200
87 85 200.0 200
88 86 200.0 200
89 87 200.0 200
90 88 200.0 200
91 89 200.0 200
92 90 200.0 200
93 91 200.0 200
94 92 200.0 200
95 93 200.0 200
96 94 200.0 200
97 95 200.0 200
98 96 200.0 200
99 97 200.0 200
100 98 200.0 200
101 99 200.0 200

View File

@@ -0,0 +1,22 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: Acrobot-v1
mode: test
load_checkpoint: true
load_path: Train_Acrobot-v1_DQN_20221026-094645
max_steps: 100000
save_fig: true
seed: 1
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 128
buffer_size: 200000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
lr: 0.002
target_update: 4

View File

@@ -0,0 +1,22 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: Acrobot-v1
mode: train
load_checkpoint: false
load_path: Train_CartPole-v1_DQN_20221026-054757
max_steps: 100000
save_fig: true
seed: 1
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 128
buffer_size: 200000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
lr: 0.002
target_update: 4

View File

@@ -0,0 +1,22 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: CartPole-v1
mode: test
load_checkpoint: true
load_path: Train_CartPole-v1_DQN_20221031-001201
max_steps: 200
save_fig: true
seed: 0
show_fig: false
test_eps: 10
train_eps: 100
algo_cfg:
batch_size: 64
buffer_size: 100000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
lr: 0.0001
target_update: 4

View File

@@ -0,0 +1,22 @@
general_cfg:
algo_name: DQN
device: cuda
env_name: CartPole-v1
mode: train
load_checkpoint: false
load_path: Train_CartPole-v1_DQN_20221026-054757
max_steps: 200
save_fig: true
seed: 0
show_fig: false
test_eps: 10
train_eps: 200
algo_cfg:
batch_size: 64
buffer_size: 100000
epsilon_decay: 500
epsilon_end: 0.01
epsilon_start: 0.95
gamma: 0.95
lr: 0.0001
target_update: 4

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python
# coding=utf-8
'''
Author: JiangJi
Email: johnjim0816@gmail.com
Date: 2022-10-30 00:37:33
LastEditor: JiangJi
LastEditTime: 2022-10-31 00:11:57
Discription: default parameters of DQN
'''
from common.config import GeneralConfig,AlgoConfig
class GeneralConfigDQN(GeneralConfig):
def __init__(self) -> None:
self.env_name = "CartPole-v1" # name of environment
self.algo_name = "DQN" # name of algorithm
self.mode = "train" # train or test
self.seed = 1 # random seed
self.device = "cuda" # device to use
self.train_eps = 100 # number of episodes for training
self.test_eps = 10 # number of episodes for testing
self.max_steps = 200 # max steps for each episode
self.load_checkpoint = False
self.load_path = "tasks" # path to load model
self.show_fig = False # show figure or not
self.save_fig = True # save figure or not
class AlgoConfigDQN(AlgoConfig):
def __init__(self) -> None:
# set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end
self.epsilon_start = 0.95 # epsilon start value
self.epsilon_end = 0.01 # epsilon end value
self.epsilon_decay = 500 # epsilon decay rate
self.hidden_dim = 256 # hidden_dim for MLP
self.gamma = 0.95 # discount factor
self.lr = 0.0001 # learning rate
self.buffer_size = 100000 # size of replay buffer
self.batch_size = 64 # batch size
self.target_update = 800 # target network update frequency per steps

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-12 00:50:49
@LastEditor: John
LastEditTime: 2022-08-29 23:30:08
LastEditTime: 2022-10-31 00:07:19
@Discription:
@Environment: python 3.7.7
'''
@@ -22,27 +22,28 @@ import numpy as np
class DQN:
def __init__(self,model,memory,cfg):
self.n_actions = cfg['n_actions']
self.device = torch.device(cfg['device'])
self.gamma = cfg['gamma']
self.n_actions = cfg.n_actions
self.device = torch.device(cfg.device)
self.gamma = cfg.gamma
## e-greedy parameters
self.sample_count = 0 # sample count for epsilon decay
self.epsilon = cfg['epsilon_start']
self.epsilon = cfg.epsilon_start
self.sample_count = 0
self.epsilon_start = cfg['epsilon_start']
self.epsilon_end = cfg['epsilon_end']
self.epsilon_decay = cfg['epsilon_decay']
self.batch_size = cfg['batch_size']
self.epsilon_start = cfg.epsilon_start
self.epsilon_end = cfg.epsilon_end
self.epsilon_decay = cfg.epsilon_decay
self.batch_size = cfg.batch_size
self.target_update = cfg.target_update
self.policy_net = model.to(self.device)
self.target_net = model.to(self.device)
## copy parameters from policy net to target net
for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()):
target_param.data.copy_(param.data)
# self.target_net.load_state_dict(self.policy_net.state_dict()) # or use this to copy parameters
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg['lr'])
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr)
self.memory = memory
self.update_flag = False
def sample_action(self, state):
''' sample action with e-greedy policy
'''
@@ -58,6 +59,21 @@ class DQN:
else:
action = random.randrange(self.n_actions)
return action
# @torch.no_grad()
# def sample_action(self, state):
# ''' sample action with e-greedy policy
# '''
# self.sample_count += 1
# # epsilon must decay(linear,exponential and etc.) for balancing exploration and exploitation
# self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
# math.exp(-1. * self.sample_count / self.epsilon_decay)
# if random.random() > self.epsilon:
# state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
# q_values = self.policy_net(state)
# action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value
# else:
# action = random.randrange(self.n_actions)
# return action
def predict_action(self,state):
''' predict action
'''
@@ -99,14 +115,16 @@ class DQN:
for param in self.policy_net.parameters():
param.grad.data.clamp_(-1, 1)
self.optimizer.step()
if self.sample_count % self.target_update == 0: # target net update, target_update means "C" in pseucodes
self.target_net.load_state_dict(self.policy_net.state_dict())
def save_model(self, path):
def save_model(self, fpath):
from pathlib import Path
# create path
Path(path).mkdir(parents=True, exist_ok=True)
torch.save(self.target_net.state_dict(), f"{path}/checkpoint.pt")
Path(fpath).mkdir(parents=True, exist_ok=True)
torch.save(self.target_net.state_dict(), f"{fpath}/checkpoint.pt")
def load_model(self, path):
self.target_net.load_state_dict(torch.load(f"{path}/checkpoint.pt"))
def load_model(self, fpath):
self.target_net.load_state_dict(torch.load(f"{fpath}/checkpoint.pt"))
for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()):
param.data.copy_(target_param.data)

View File

@@ -1 +0,0 @@
{"algo_name": "DQN", "env_name": "Acrobot-v1", "train_eps": 100, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 1500, "lr": 0.002, "memory_capacity": 200000, "batch_size": 128, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/results", "model_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/models", "n_states": 6, "n_actions": 3}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

View File

@@ -1,21 +0,0 @@
episodes,rewards
0,-79.0
1,-113.0
2,-81.0
3,-132.0
4,-110.0
5,-114.0
6,-80.0
7,-101.0
8,-78.0
9,-91.0
10,-107.0
11,-87.0
12,-105.0
13,-91.0
14,-128.0
15,-132.0
16,-119.0
17,-77.0
18,-89.0
19,-134.0
1 episodes rewards
2 0 -79.0
3 1 -113.0
4 2 -81.0
5 3 -132.0
6 4 -110.0
7 5 -114.0
8 6 -80.0
9 7 -101.0
10 8 -78.0
11 9 -91.0
12 10 -107.0
13 11 -87.0
14 12 -105.0
15 13 -91.0
16 14 -128.0
17 15 -132.0
18 16 -119.0
19 17 -77.0
20 18 -89.0
21 19 -134.0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 54 KiB

View File

@@ -1,101 +0,0 @@
episodes,rewards
0,-500.0
1,-500.0
2,-500.0
3,-370.0
4,-449.0
5,-500.0
6,-312.0
7,-374.0
8,-180.0
9,-154.0
10,-137.0
11,-185.0
12,-135.0
13,-302.0
14,-146.0
15,-137.0
16,-119.0
17,-149.0
18,-217.0
19,-191.0
20,-157.0
21,-166.0
22,-138.0
23,-135.0
24,-182.0
25,-130.0
26,-175.0
27,-222.0
28,-133.0
29,-108.0
30,-250.0
31,-119.0
32,-135.0
33,-148.0
34,-194.0
35,-194.0
36,-186.0
37,-131.0
38,-185.0
39,-79.0
40,-129.0
41,-271.0
42,-117.0
43,-159.0
44,-156.0
45,-117.0
46,-158.0
47,-153.0
48,-119.0
49,-164.0
50,-134.0
51,-231.0
52,-117.0
53,-119.0
54,-136.0
55,-173.0
56,-202.0
57,-133.0
58,-142.0
59,-169.0
60,-137.0
61,-123.0
62,-205.0
63,-107.0
64,-194.0
65,-150.0
66,-143.0
67,-218.0
68,-145.0
69,-90.0
70,-107.0
71,-169.0
72,-125.0
73,-142.0
74,-145.0
75,-94.0
76,-150.0
77,-134.0
78,-159.0
79,-137.0
80,-146.0
81,-191.0
82,-242.0
83,-117.0
84,-92.0
85,-193.0
86,-239.0
87,-173.0
88,-140.0
89,-157.0
90,-133.0
91,-148.0
92,-87.0
93,-398.0
94,-98.0
95,-121.0
96,-102.0
97,-120.0
98,-195.0
99,-219.0
1 episodes rewards
2 0 -500.0
3 1 -500.0
4 2 -500.0
5 3 -370.0
6 4 -449.0
7 5 -500.0
8 6 -312.0
9 7 -374.0
10 8 -180.0
11 9 -154.0
12 10 -137.0
13 11 -185.0
14 12 -135.0
15 13 -302.0
16 14 -146.0
17 15 -137.0
18 16 -119.0
19 17 -149.0
20 18 -217.0
21 19 -191.0
22 20 -157.0
23 21 -166.0
24 22 -138.0
25 23 -135.0
26 24 -182.0
27 25 -130.0
28 26 -175.0
29 27 -222.0
30 28 -133.0
31 29 -108.0
32 30 -250.0
33 31 -119.0
34 32 -135.0
35 33 -148.0
36 34 -194.0
37 35 -194.0
38 36 -186.0
39 37 -131.0
40 38 -185.0
41 39 -79.0
42 40 -129.0
43 41 -271.0
44 42 -117.0
45 43 -159.0
46 44 -156.0
47 45 -117.0
48 46 -158.0
49 47 -153.0
50 48 -119.0
51 49 -164.0
52 50 -134.0
53 51 -231.0
54 52 -117.0
55 53 -119.0
56 54 -136.0
57 55 -173.0
58 56 -202.0
59 57 -133.0
60 58 -142.0
61 59 -169.0
62 60 -137.0
63 61 -123.0
64 62 -205.0
65 63 -107.0
66 64 -194.0
67 65 -150.0
68 66 -143.0
69 67 -218.0
70 68 -145.0
71 69 -90.0
72 70 -107.0
73 71 -169.0
74 72 -125.0
75 73 -142.0
76 74 -145.0
77 75 -94.0
78 76 -150.0
79 77 -134.0
80 78 -159.0
81 79 -137.0
82 80 -146.0
83 81 -191.0
84 82 -242.0
85 83 -117.0
86 84 -92.0
87 85 -193.0
88 86 -239.0
89 87 -173.0
90 88 -140.0
91 89 -157.0
92 90 -133.0
93 91 -148.0
94 92 -87.0
95 93 -398.0
96 94 -98.0
97 95 -121.0
98 96 -102.0
99 97 -120.0
100 98 -195.0
101 99 -219.0

View File

@@ -1,21 +0,0 @@
{
"algo_name": "DQN",
"env_name": "CartPole-v0",
"train_eps": 200,
"test_eps": 20,
"gamma": 0.95,
"epsilon_start": 0.95,
"epsilon_end": 0.01,
"epsilon_decay": 500,
"lr": 0.0001,
"memory_capacity": 100000,
"batch_size": 64,
"target_update": 4,
"hidden_dim": 256,
"device": "cpu",
"seed": 10,
"result_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v0/20220823-173936/results",
"model_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v0/20220823-173936/models",
"show_fig": false,
"save_fig": true
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

View File

@@ -1,21 +0,0 @@
episodes,rewards
0,200.0
1,200.0
2,200.0
3,200.0
4,200.0
5,200.0
6,200.0
7,200.0
8,200.0
9,200.0
10,200.0
11,200.0
12,200.0
13,200.0
14,200.0
15,200.0
16,200.0
17,200.0
18,200.0
19,200.0
1 episodes rewards
2 0 200.0
3 1 200.0
4 2 200.0
5 3 200.0
6 4 200.0
7 5 200.0
8 6 200.0
9 7 200.0
10 8 200.0
11 9 200.0
12 10 200.0
13 11 200.0
14 12 200.0
15 13 200.0
16 14 200.0
17 15 200.0
18 16 200.0
19 17 200.0
20 18 200.0
21 19 200.0

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

View File

@@ -1,201 +0,0 @@
episodes,rewards
0,38.0
1,16.0
2,37.0
3,15.0
4,22.0
5,34.0
6,20.0
7,12.0
8,16.0
9,14.0
10,13.0
11,21.0
12,14.0
13,12.0
14,17.0
15,12.0
16,10.0
17,14.0
18,10.0
19,10.0
20,16.0
21,9.0
22,14.0
23,13.0
24,10.0
25,9.0
26,12.0
27,12.0
28,14.0
29,11.0
30,9.0
31,8.0
32,9.0
33,11.0
34,12.0
35,10.0
36,11.0
37,10.0
38,10.0
39,18.0
40,13.0
41,15.0
42,10.0
43,9.0
44,14.0
45,14.0
46,23.0
47,17.0
48,15.0
49,15.0
50,20.0
51,28.0
52,36.0
53,36.0
54,23.0
55,27.0
56,53.0
57,19.0
58,35.0
59,62.0
60,57.0
61,38.0
62,61.0
63,65.0
64,58.0
65,43.0
66,67.0
67,56.0
68,91.0
69,128.0
70,71.0
71,126.0
72,100.0
73,200.0
74,200.0
75,200.0
76,200.0
77,200.0
78,200.0
79,200.0
80,200.0
81,200.0
82,200.0
83,200.0
84,200.0
85,200.0
86,200.0
87,200.0
88,200.0
89,200.0
90,200.0
91,200.0
92,200.0
93,200.0
94,200.0
95,200.0
96,200.0
97,200.0
98,200.0
99,200.0
100,200.0
101,200.0
102,200.0
103,200.0
104,200.0
105,200.0
106,200.0
107,200.0
108,200.0
109,200.0
110,200.0
111,200.0
112,200.0
113,200.0
114,200.0
115,200.0
116,200.0
117,200.0
118,200.0
119,200.0
120,200.0
121,200.0
122,200.0
123,200.0
124,200.0
125,200.0
126,200.0
127,200.0
128,200.0
129,200.0
130,200.0
131,200.0
132,200.0
133,200.0
134,200.0
135,200.0
136,200.0
137,200.0
138,200.0
139,200.0
140,200.0
141,200.0
142,200.0
143,200.0
144,200.0
145,200.0
146,200.0
147,200.0
148,200.0
149,200.0
150,200.0
151,200.0
152,200.0
153,200.0
154,200.0
155,200.0
156,200.0
157,200.0
158,200.0
159,200.0
160,200.0
161,200.0
162,200.0
163,200.0
164,200.0
165,200.0
166,200.0
167,200.0
168,200.0
169,200.0
170,200.0
171,200.0
172,200.0
173,200.0
174,200.0
175,200.0
176,200.0
177,200.0
178,200.0
179,200.0
180,200.0
181,200.0
182,200.0
183,200.0
184,200.0
185,200.0
186,200.0
187,200.0
188,200.0
189,200.0
190,200.0
191,200.0
192,200.0
193,200.0
194,200.0
195,200.0
196,200.0
197,200.0
198,200.0
199,200.0
1 episodes rewards
2 0 38.0
3 1 16.0
4 2 37.0
5 3 15.0
6 4 22.0
7 5 34.0
8 6 20.0
9 7 12.0
10 8 16.0
11 9 14.0
12 10 13.0
13 11 21.0
14 12 14.0
15 13 12.0
16 14 17.0
17 15 12.0
18 16 10.0
19 17 14.0
20 18 10.0
21 19 10.0
22 20 16.0
23 21 9.0
24 22 14.0
25 23 13.0
26 24 10.0
27 25 9.0
28 26 12.0
29 27 12.0
30 28 14.0
31 29 11.0
32 30 9.0
33 31 8.0
34 32 9.0
35 33 11.0
36 34 12.0
37 35 10.0
38 36 11.0
39 37 10.0
40 38 10.0
41 39 18.0
42 40 13.0
43 41 15.0
44 42 10.0
45 43 9.0
46 44 14.0
47 45 14.0
48 46 23.0
49 47 17.0
50 48 15.0
51 49 15.0
52 50 20.0
53 51 28.0
54 52 36.0
55 53 36.0
56 54 23.0
57 55 27.0
58 56 53.0
59 57 19.0
60 58 35.0
61 59 62.0
62 60 57.0
63 61 38.0
64 62 61.0
65 63 65.0
66 64 58.0
67 65 43.0
68 66 67.0
69 67 56.0
70 68 91.0
71 69 128.0
72 70 71.0
73 71 126.0
74 72 100.0
75 73 200.0
76 74 200.0
77 75 200.0
78 76 200.0
79 77 200.0
80 78 200.0
81 79 200.0
82 80 200.0
83 81 200.0
84 82 200.0
85 83 200.0
86 84 200.0
87 85 200.0
88 86 200.0
89 87 200.0
90 88 200.0
91 89 200.0
92 90 200.0
93 91 200.0
94 92 200.0
95 93 200.0
96 94 200.0
97 95 200.0
98 96 200.0
99 97 200.0
100 98 200.0
101 99 200.0
102 100 200.0
103 101 200.0
104 102 200.0
105 103 200.0
106 104 200.0
107 105 200.0
108 106 200.0
109 107 200.0
110 108 200.0
111 109 200.0
112 110 200.0
113 111 200.0
114 112 200.0
115 113 200.0
116 114 200.0
117 115 200.0
118 116 200.0
119 117 200.0
120 118 200.0
121 119 200.0
122 120 200.0
123 121 200.0
124 122 200.0
125 123 200.0
126 124 200.0
127 125 200.0
128 126 200.0
129 127 200.0
130 128 200.0
131 129 200.0
132 130 200.0
133 131 200.0
134 132 200.0
135 133 200.0
136 134 200.0
137 135 200.0
138 136 200.0
139 137 200.0
140 138 200.0
141 139 200.0
142 140 200.0
143 141 200.0
144 142 200.0
145 143 200.0
146 144 200.0
147 145 200.0
148 146 200.0
149 147 200.0
150 148 200.0
151 149 200.0
152 150 200.0
153 151 200.0
154 152 200.0
155 153 200.0
156 154 200.0
157 155 200.0
158 156 200.0
159 157 200.0
160 158 200.0
161 159 200.0
162 160 200.0
163 161 200.0
164 162 200.0
165 163 200.0
166 164 200.0
167 165 200.0
168 166 200.0
169 167 200.0
170 168 200.0
171 169 200.0
172 170 200.0
173 171 200.0
174 172 200.0
175 173 200.0
176 174 200.0
177 175 200.0
178 176 200.0
179 177 200.0
180 178 200.0
181 179 200.0
182 180 200.0
183 181 200.0
184 182 200.0
185 183 200.0
186 184 200.0
187 185 200.0
188 186 200.0
189 187 200.0
190 188 200.0
191 189 200.0
192 190 200.0
193 191 200.0
194 192 200.0
195 193 200.0
196 194 200.0
197 195 200.0
198 196 200.0
199 197 200.0
200 198 200.0
201 199 200.0

View File

@@ -1,24 +0,0 @@
{
"algo_name": "DQN",
"env_name": "CartPole-v1",
"train_eps": 2000,
"test_eps": 20,
"ep_max_steps": 100000,
"gamma": 0.99,
"epsilon_start": 0.95,
"epsilon_end": 0.01,
"epsilon_decay": 6000,
"lr": 1e-05,
"memory_capacity": 200000,
"batch_size": 64,
"target_update": 4,
"hidden_dim": 256,
"device": "cuda",
"seed": 10,
"show_fig": false,
"save_fig": true,
"result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results",
"model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models",
"n_states": 4,
"n_actions": 2
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

View File

@@ -1,21 +0,0 @@
episodes,rewards,steps
0,371.0,371
1,446.0,446
2,300.0,300
3,500.0,500
4,313.0,313
5,500.0,500
6,341.0,341
7,489.0,489
8,304.0,304
9,358.0,358
10,278.0,278
11,500.0,500
12,500.0,500
13,500.0,500
14,500.0,500
15,476.0,476
16,308.0,308
17,394.0,394
18,500.0,500
19,500.0,500
1 episodes rewards steps
2 0 371.0 371
3 1 446.0 446
4 2 300.0 300
5 3 500.0 500
6 4 313.0 313
7 5 500.0 500
8 6 341.0 341
9 7 489.0 489
10 8 304.0 304
11 9 358.0 358
12 10 278.0 278
13 11 500.0 500
14 12 500.0 500
15 13 500.0 500
16 14 500.0 500
17 15 476.0 476
18 16 308.0 308
19 17 394.0 394
20 18 500.0 500
21 19 500.0 500

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

138
projects/codes/DQN/task0.py Normal file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python
# coding=utf-8
'''
Author: JiangJi
Email: johnjim0816@gmail.com
Date: 2022-10-12 11:09:54
LastEditor: JiangJi
LastEditTime: 2022-10-31 00:13:31
Discription: CartPole-v1,Acrobot-v1
'''
import sys,os
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
parent_path = os.path.dirname(curr_path) # parent path
sys.path.append(parent_path) # add to system path
import gym
from common.utils import all_seed,merge_class_attrs
from common.models import MLP
from common.memories import ReplayBuffer
from common.launcher import Launcher
from envs.register import register_env
from dqn import DQN
from config.config import GeneralConfigDQN,AlgoConfigDQN
class Main(Launcher):
def __init__(self) -> None:
super().__init__()
self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigDQN())
self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigDQN())
def env_agent_config(self,cfg,logger):
''' create env and agent
'''
register_env(cfg.env_name)
env = gym.make(cfg.env_name,new_step_api=True) # create env
if cfg.seed !=0: # set random seed
all_seed(env,seed=cfg.seed)
try: # state dimension
n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
except AttributeError:
n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
n_actions = env.action_space.n # action dimension
logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info
# update to cfg paramters
setattr(cfg, 'n_states', n_states)
setattr(cfg, 'n_actions', n_actions)
# cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
model = MLP(n_states,n_actions,hidden_dim=cfg.hidden_dim)
memory = ReplayBuffer(cfg.buffer_size) # replay buffer
agent = DQN(model,memory,cfg) # create agent
return env, agent
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.sample_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
agent.memory.push(state, action, reward,
next_state, terminated) # save transitions
agent.update() # update agent
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
return agent,ep_reward,ep_step
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.predict_action(state) # sample action
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
return agent,ep_reward,ep_step
# def train(self,env, agent,cfg,logger):
# ''' 训练
# '''
# logger.info("Start training!")
# logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}")
# rewards = [] # record rewards for all episodes
# steps = [] # record steps for all episodes
# for i_ep in range(cfg.train_eps):
# ep_reward = 0 # reward per episode
# ep_step = 0
# state = env.reset() # reset and obtain initial state
# for _ in range(cfg.max_steps):
# ep_step += 1
# action = agent.sample_action(state) # sample action
# next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
# agent.memory.push(state, action, reward,
# next_state, terminated) # save transitions
# state = next_state # update next state for env
# agent.update() # update agent
# ep_reward += reward #
# if terminated:
# break
# if (i_ep + 1) % cfg.target_update == 0: # target net update, target_update means "C" in pseucodes
# agent.target_net.load_state_dict(agent.policy_net.state_dict())
# steps.append(ep_step)
# rewards.append(ep_reward)
# logger.info(f'Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}')
# logger.info("Finish training!")
# env.close()
# res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
# return res_dic
# def test(self,cfg, env, agent,logger):
# logger.info("Start testing!")
# logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}")
# rewards = [] # record rewards for all episodes
# steps = [] # record steps for all episodes
# for i_ep in range(cfg.test_eps):
# ep_reward = 0 # reward per episode
# ep_step = 0
# state = env.reset() # reset and obtain initial state
# for _ in range(cfg.max_steps):
# ep_step+=1
# action = agent.predict_action(state) # predict action
# next_state, reward, terminated, _, _ = env.step(action)
# state = next_state
# ep_reward += reward
# if terminated:
# break
# steps.append(ep_step)
# rewards.append(ep_reward)
# logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}")
# logger.info("Finish testing!")
# env.close()
# return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
if __name__ == "__main__":
main = Main()
main.run()

View File

@@ -1,3 +1,13 @@
#!/usr/bin/env python
# coding=utf-8
'''
Author: JiangJi
Email: johnjim0816@gmail.com
Date: 2022-10-24 08:21:31
LastEditor: JiangJi
LastEditTime: 2022-10-26 09:50:49
Discription: Not finished
'''
import sys,os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized."
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
@@ -15,6 +25,73 @@ from common.memories import ReplayBuffer
from common.launcher import Launcher
from envs.register import register_env
from dqn import DQN
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from PIL import Image
resize = T.Compose([T.ToPILImage(),
T.Resize(40, interpolation=Image.CUBIC),
T.ToTensor()])
# xvfb-run -s "-screen 0 640x480x24" python main1.py
def get_cart_location(env,screen_width):
world_width = env.x_threshold * 2
scale = screen_width / world_width
return int(env.state[0] * scale + screen_width / 2.0) # MIDDLE OF CART
def get_screen(env):
# Returned screen requested by gym is 400x600x3, but is sometimes larger
# such as 800x1200x3. Transpose it into torch order (CHW).
screen = env.render().transpose((2, 0, 1))
# Cart is in the lower half, so strip off the top and bottom of the screen
_, screen_height, screen_width = screen.shape
screen = screen[:, int(screen_height*0.4):int(screen_height * 0.8)]
view_width = int(screen_width * 0.6)
cart_location = get_cart_location(env,screen_width)
if cart_location < view_width // 2:
slice_range = slice(view_width)
elif cart_location > (screen_width - view_width // 2):
slice_range = slice(-view_width, None)
else:
slice_range = slice(cart_location - view_width // 2,
cart_location + view_width // 2)
# Strip off the edges, so that we have a square image centered on a cart
screen = screen[:, :, slice_range]
# Convert to float, rescale, convert to torch tensor
# (this doesn't require a copy)
screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
screen = torch.from_numpy(screen)
# Resize, and add a batch dimension (BCHW)
return resize(screen)
class CNN(nn.Module):
def __init__(self, h, w, outputs):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
self.bn1 = nn.BatchNorm2d(16)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
self.bn2 = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
self.bn3 = nn.BatchNorm2d(32)
# Number of Linear input connections depends on output of conv2d layers
# and therefore the input image size, so compute it.
def conv2d_size_out(size, kernel_size = 5, stride = 2):
return (size - (kernel_size - 1) - 1) // stride + 1
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
linear_input_size = convw * convh * 32
self.head = nn.Linear(linear_input_size, outputs)
# Called with either one element to determine next action, or a batch
# during optimization. Returns tensor([[left0exp,right0exp]...]).
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
return self.head(x.view(x.size(0), -1))
class Main(Launcher):
def get_args(self):
""" hyperparameters
@@ -22,20 +99,20 @@ class Main(Launcher):
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time
parser = argparse.ArgumentParser(description="hyperparameters")
parser.add_argument('--algo_name',default='DQN',type=str,help="name of algorithm")
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
parser.add_argument('--env_name',default='CartPole-v1',type=str,help="name of environment")
parser.add_argument('--train_eps',default=800,type=int,help="episodes of training")
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor")
parser.add_argument('--gamma',default=0.999,type=float,help="discounted factor")
parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon")
parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon")
parser.add_argument('--epsilon_decay',default=500,type=int,help="decay rate of epsilon, the higher value, the slower decay")
parser.add_argument('--lr',default=0.0001,type=float,help="learning rate")
parser.add_argument('--memory_capacity',default=100000,type=int,help="memory capacity")
parser.add_argument('--batch_size',default=64,type=int)
parser.add_argument('--batch_size',default=128,type=int)
parser.add_argument('--target_update',default=4,type=int)
parser.add_argument('--hidden_dim',default=256,type=int)
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--device',default='cuda',type=str,help="cpu or cuda")
parser.add_argument('--seed',default=10,type=int,help="seed")
parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
@@ -48,11 +125,10 @@ class Main(Launcher):
args = {**vars(args)} # type(dict)
return args
def env_agent_config(cfg):
def env_agent_config(self,cfg):
''' create env and agent
'''
register_env(cfg['env_name'])
env = gym.make(cfg['env_name'])
env = gym.make('CartPole-v1', new_step_api=True, render_mode='single_rgb_array').unwrapped
if cfg['seed'] !=0: # set random seed
all_seed(env,seed=cfg["seed"])
try: # state dimension
@@ -62,12 +138,15 @@ class Main(Launcher):
n_actions = env.action_space.n # action dimension
print(f"n_states: {n_states}, n_actions: {n_actions}")
cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
model = MLP(n_states,n_actions,hidden_dim=cfg["hidden_dim"])
env.reset()
init_screen = get_screen(env)
_, screen_height, screen_width = init_screen.shape
model = CNN(screen_height, screen_width, n_actions)
memory = ReplayBuffer(cfg["memory_capacity"]) # replay buffer
agent = DQN(model,memory,cfg) # create agent
return env, agent
def train(cfg, env, agent):
def train(self,cfg, env, agent):
''' 训练
'''
print("Start training!")
@@ -78,12 +157,18 @@ class Main(Launcher):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
last_screen = get_screen(env)
current_screen = get_screen(env)
state = current_screen - last_screen
for _ in range(cfg['ep_max_steps']):
ep_step += 1
action = agent.sample_action(state) # sample action
next_state, reward, done, _ = env.step(action) # update env and return transitions
agent.memory.push(state, action, reward,
next_state, done) # save transitions
_, reward, done, _,_ = env.step(action) # update env and return transitions
last_screen = current_screen
current_screen = get_screen(env)
next_state = current_screen - last_screen
agent.memory.push(state.cpu().numpy(), action, reward,
next_state.cpu().numpy(), done) # save transitions
state = next_state # update next state for env
agent.update() # update agent
ep_reward += reward #
@@ -94,13 +179,13 @@ class Main(Launcher):
steps.append(ep_step)
rewards.append(ep_reward)
if (i_ep + 1) % 10 == 0:
print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}')
print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, step: {ep_step:d}, Epislon: {agent.epsilon:.3f}')
print("Finish training!")
env.close()
res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
return res_dic
def test(cfg, env, agent):
def test(self,cfg, env, agent):
print("Start testing!")
print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
rewards = [] # record rewards for all episodes
@@ -109,10 +194,16 @@ class Main(Launcher):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
last_screen = get_screen(env)
current_screen = get_screen(env)
state = current_screen - last_screen
for _ in range(cfg['ep_max_steps']):
ep_step+=1
action = agent.predict_action(state) # predict action
next_state, reward, done, _ = env.step(action)
_, reward, done, _,_ = env.step(action)
last_screen = current_screen
current_screen = get_screen(env)
next_state = current_screen - last_screen
state = next_state
ep_reward += reward
if done: