hot update Double DQN

This commit is contained in:
johnjim0816
2022-08-30 16:29:57 +08:00
parent 0b0f7e857d
commit 764ba63d40
26 changed files with 803 additions and 365 deletions

View File

@@ -1 +0,0 @@
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cpu", "result_path": "/root/Desktop/rl-tutorials/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/", "model_path": "/root/Desktop/rl-tutorials/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/models/", "save_fig": true}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

View File

@@ -0,0 +1 @@
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cpu", "seed": 1, "show_fig": false, "save_fig": true, "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220829-233435/results/", "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220829-233435/models/", "n_states": 4, "n_actions": 2}

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

View File

@@ -0,0 +1,21 @@
episodes,rewards,steps
0,145.0,0
1,166.0,0
2,171.0,0
3,200.0,0
4,139.0,0
5,200.0,0
6,200.0,0
7,141.0,0
8,200.0,0
9,187.0,0
10,166.0,0
11,172.0,0
12,121.0,0
13,200.0,0
14,200.0,0
15,149.0,0
16,128.0,0
17,200.0,0
18,178.0,0
19,185.0,0
1 episodes rewards steps
2 0 145.0 0
3 1 166.0 0
4 2 171.0 0
5 3 200.0 0
6 4 139.0 0
7 5 200.0 0
8 6 200.0 0
9 7 141.0 0
10 8 200.0 0
11 9 187.0 0
12 10 166.0 0
13 11 172.0 0
14 12 121.0 0
15 13 200.0 0
16 14 200.0 0
17 15 149.0 0
18 16 128.0 0
19 17 200.0 0
20 18 178.0 0
21 19 185.0 0

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@@ -0,0 +1,201 @@
episodes,rewards,steps
0,19.0,0
1,16.0,0
2,17.0,0
3,11.0,0
4,10.0,0
5,27.0,0
6,16.0,0
7,9.0,0
8,20.0,0
9,21.0,0
10,15.0,0
11,10.0,0
12,14.0,0
13,37.0,0
14,12.0,0
15,10.0,0
16,27.0,0
17,33.0,0
18,19.0,0
19,13.0,0
20,26.0,0
21,15.0,0
22,29.0,0
23,11.0,0
24,20.0,0
25,23.0,0
26,23.0,0
27,26.0,0
28,17.0,0
29,33.0,0
30,16.0,0
31,48.0,0
32,48.0,0
33,69.0,0
34,58.0,0
35,24.0,0
36,18.0,0
37,28.0,0
38,12.0,0
39,12.0,0
40,18.0,0
41,12.0,0
42,13.0,0
43,21.0,0
44,30.0,0
45,32.0,0
46,22.0,0
47,18.0,0
48,12.0,0
49,12.0,0
50,20.0,0
51,32.0,0
52,15.0,0
53,100.0,0
54,26.0,0
55,25.0,0
56,18.0,0
57,15.0,0
58,35.0,0
59,12.0,0
60,65.0,0
61,27.0,0
62,29.0,0
63,22.0,0
64,83.0,0
65,24.0,0
66,28.0,0
67,15.0,0
68,43.0,0
69,13.0,0
70,22.0,0
71,46.0,0
72,14.0,0
73,32.0,0
74,44.0,0
75,53.0,0
76,31.0,0
77,51.0,0
78,61.0,0
79,30.0,0
80,36.0,0
81,30.0,0
82,48.0,0
83,26.0,0
84,27.0,0
85,43.0,0
86,20.0,0
87,87.0,0
88,71.0,0
89,43.0,0
90,57.0,0
91,40.0,0
92,37.0,0
93,43.0,0
94,31.0,0
95,45.0,0
96,47.0,0
97,52.0,0
98,48.0,0
99,98.0,0
100,49.0,0
101,98.0,0
102,68.0,0
103,70.0,0
104,74.0,0
105,73.0,0
106,127.0,0
107,92.0,0
108,70.0,0
109,97.0,0
110,66.0,0
111,112.0,0
112,138.0,0
113,81.0,0
114,74.0,0
115,153.0,0
116,113.0,0
117,88.0,0
118,138.0,0
119,200.0,0
120,84.0,0
121,123.0,0
122,158.0,0
123,171.0,0
124,137.0,0
125,143.0,0
126,170.0,0
127,127.0,0
128,118.0,0
129,200.0,0
130,189.0,0
131,149.0,0
132,137.0,0
133,115.0,0
134,153.0,0
135,136.0,0
136,140.0,0
137,169.0,0
138,187.0,0
139,200.0,0
140,196.0,0
141,200.0,0
142,200.0,0
143,137.0,0
144,200.0,0
145,185.0,0
146,200.0,0
147,164.0,0
148,200.0,0
149,143.0,0
150,143.0,0
151,112.0,0
152,192.0,0
153,200.0,0
154,144.0,0
155,188.0,0
156,200.0,0
157,133.0,0
158,200.0,0
159,143.0,0
160,158.0,0
161,161.0,0
162,169.0,0
163,176.0,0
164,200.0,0
165,149.0,0
166,156.0,0
167,200.0,0
168,200.0,0
169,200.0,0
170,134.0,0
171,171.0,0
172,200.0,0
173,200.0,0
174,200.0,0
175,194.0,0
176,200.0,0
177,138.0,0
178,159.0,0
179,187.0,0
180,200.0,0
181,192.0,0
182,200.0,0
183,200.0,0
184,200.0,0
185,173.0,0
186,200.0,0
187,178.0,0
188,176.0,0
189,196.0,0
190,200.0,0
191,195.0,0
192,158.0,0
193,156.0,0
194,200.0,0
195,200.0,0
196,200.0,0
197,200.0,0
198,193.0,0
199,200.0,0
1 episodes rewards steps
2 0 19.0 0
3 1 16.0 0
4 2 17.0 0
5 3 11.0 0
6 4 10.0 0
7 5 27.0 0
8 6 16.0 0
9 7 9.0 0
10 8 20.0 0
11 9 21.0 0
12 10 15.0 0
13 11 10.0 0
14 12 14.0 0
15 13 37.0 0
16 14 12.0 0
17 15 10.0 0
18 16 27.0 0
19 17 33.0 0
20 18 19.0 0
21 19 13.0 0
22 20 26.0 0
23 21 15.0 0
24 22 29.0 0
25 23 11.0 0
26 24 20.0 0
27 25 23.0 0
28 26 23.0 0
29 27 26.0 0
30 28 17.0 0
31 29 33.0 0
32 30 16.0 0
33 31 48.0 0
34 32 48.0 0
35 33 69.0 0
36 34 58.0 0
37 35 24.0 0
38 36 18.0 0
39 37 28.0 0
40 38 12.0 0
41 39 12.0 0
42 40 18.0 0
43 41 12.0 0
44 42 13.0 0
45 43 21.0 0
46 44 30.0 0
47 45 32.0 0
48 46 22.0 0
49 47 18.0 0
50 48 12.0 0
51 49 12.0 0
52 50 20.0 0
53 51 32.0 0
54 52 15.0 0
55 53 100.0 0
56 54 26.0 0
57 55 25.0 0
58 56 18.0 0
59 57 15.0 0
60 58 35.0 0
61 59 12.0 0
62 60 65.0 0
63 61 27.0 0
64 62 29.0 0
65 63 22.0 0
66 64 83.0 0
67 65 24.0 0
68 66 28.0 0
69 67 15.0 0
70 68 43.0 0
71 69 13.0 0
72 70 22.0 0
73 71 46.0 0
74 72 14.0 0
75 73 32.0 0
76 74 44.0 0
77 75 53.0 0
78 76 31.0 0
79 77 51.0 0
80 78 61.0 0
81 79 30.0 0
82 80 36.0 0
83 81 30.0 0
84 82 48.0 0
85 83 26.0 0
86 84 27.0 0
87 85 43.0 0
88 86 20.0 0
89 87 87.0 0
90 88 71.0 0
91 89 43.0 0
92 90 57.0 0
93 91 40.0 0
94 92 37.0 0
95 93 43.0 0
96 94 31.0 0
97 95 45.0 0
98 96 47.0 0
99 97 52.0 0
100 98 48.0 0
101 99 98.0 0
102 100 49.0 0
103 101 98.0 0
104 102 68.0 0
105 103 70.0 0
106 104 74.0 0
107 105 73.0 0
108 106 127.0 0
109 107 92.0 0
110 108 70.0 0
111 109 97.0 0
112 110 66.0 0
113 111 112.0 0
114 112 138.0 0
115 113 81.0 0
116 114 74.0 0
117 115 153.0 0
118 116 113.0 0
119 117 88.0 0
120 118 138.0 0
121 119 200.0 0
122 120 84.0 0
123 121 123.0 0
124 122 158.0 0
125 123 171.0 0
126 124 137.0 0
127 125 143.0 0
128 126 170.0 0
129 127 127.0 0
130 128 118.0 0
131 129 200.0 0
132 130 189.0 0
133 131 149.0 0
134 132 137.0 0
135 133 115.0 0
136 134 153.0 0
137 135 136.0 0
138 136 140.0 0
139 137 169.0 0
140 138 187.0 0
141 139 200.0 0
142 140 196.0 0
143 141 200.0 0
144 142 200.0 0
145 143 137.0 0
146 144 200.0 0
147 145 185.0 0
148 146 200.0 0
149 147 164.0 0
150 148 200.0 0
151 149 143.0 0
152 150 143.0 0
153 151 112.0 0
154 152 192.0 0
155 153 200.0 0
156 154 144.0 0
157 155 188.0 0
158 156 200.0 0
159 157 133.0 0
160 158 200.0 0
161 159 143.0 0
162 160 158.0 0
163 161 161.0 0
164 162 169.0 0
165 163 176.0 0
166 164 200.0 0
167 165 149.0 0
168 166 156.0 0
169 167 200.0 0
170 168 200.0 0
171 169 200.0 0
172 170 134.0 0
173 171 171.0 0
174 172 200.0 0
175 173 200.0 0
176 174 200.0 0
177 175 194.0 0
178 176 200.0 0
179 177 138.0 0
180 178 159.0 0
181 179 187.0 0
182 180 200.0 0
183 181 192.0 0
184 182 200.0 0
185 183 200.0 0
186 184 200.0 0
187 185 173.0 0
188 186 200.0 0
189 187 178.0 0
190 188 176.0 0
191 189 196.0 0
192 190 200.0 0
193 191 195.0 0
194 192 158.0 0
195 193 156.0 0
196 194 200.0 0
197 195 200.0 0
198 196 200.0 0
199 197 200.0 0
200 198 193.0 0
201 199 200.0 0

View File

@@ -0,0 +1 @@
{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 1, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220829-233635/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220829-233635/models/", "n_states": 4, "n_actions": 2}

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

View File

@@ -0,0 +1,21 @@
episodes,rewards,steps
0,200.0,0
1,200.0,0
2,200.0,0
3,200.0,0
4,191.0,0
5,200.0,0
6,200.0,0
7,179.0,0
8,200.0,0
9,200.0,0
10,200.0,0
11,190.0,0
12,147.0,0
13,197.0,0
14,200.0,0
15,200.0,0
16,167.0,0
17,200.0,0
18,200.0,0
19,200.0,0
1 episodes rewards steps
2 0 200.0 0
3 1 200.0 0
4 2 200.0 0
5 3 200.0 0
6 4 191.0 0
7 5 200.0 0
8 6 200.0 0
9 7 179.0 0
10 8 200.0 0
11 9 200.0 0
12 10 200.0 0
13 11 190.0 0
14 12 147.0 0
15 13 197.0 0
16 14 200.0 0
17 15 200.0 0
18 16 167.0 0
19 17 200.0 0
20 18 200.0 0
21 19 200.0 0

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@@ -0,0 +1,201 @@
episodes,rewards,steps
0,19.0,0
1,16.0,0
2,17.0,0
3,11.0,0
4,10.0,0
5,27.0,0
6,55.0,0
7,17.0,0
8,23.0,0
9,9.0,0
10,17.0,0
11,14.0,0
12,17.0,0
13,12.0,0
14,14.0,0
15,16.0,0
16,27.0,0
17,36.0,0
18,17.0,0
19,17.0,0
20,21.0,0
21,23.0,0
22,13.0,0
23,12.0,0
24,17.0,0
25,26.0,0
26,25.0,0
27,17.0,0
28,10.0,0
29,16.0,0
30,14.0,0
31,19.0,0
32,23.0,0
33,37.0,0
34,29.0,0
35,22.0,0
36,29.0,0
37,15.0,0
38,16.0,0
39,18.0,0
40,23.0,0
41,16.0,0
42,26.0,0
43,13.0,0
44,24.0,0
45,39.0,0
46,23.0,0
47,32.0,0
48,123.0,0
49,18.0,0
50,39.0,0
51,17.0,0
52,28.0,0
53,34.0,0
54,26.0,0
55,61.0,0
56,28.0,0
57,16.0,0
58,45.0,0
59,41.0,0
60,49.0,0
61,18.0,0
62,40.0,0
63,24.0,0
64,37.0,0
65,26.0,0
66,51.0,0
67,17.0,0
68,152.0,0
69,17.0,0
70,29.0,0
71,37.0,0
72,15.0,0
73,55.0,0
74,152.0,0
75,23.0,0
76,45.0,0
77,30.0,0
78,39.0,0
79,20.0,0
80,53.0,0
81,49.0,0
82,71.0,0
83,115.0,0
84,41.0,0
85,52.0,0
86,52.0,0
87,36.0,0
88,84.0,0
89,122.0,0
90,49.0,0
91,200.0,0
92,67.0,0
93,87.0,0
94,183.0,0
95,132.0,0
96,76.0,0
97,200.0,0
98,200.0,0
99,200.0,0
100,200.0,0
101,200.0,0
102,106.0,0
103,192.0,0
104,111.0,0
105,95.0,0
106,200.0,0
107,200.0,0
108,148.0,0
109,200.0,0
110,97.0,0
111,200.0,0
112,200.0,0
113,105.0,0
114,135.0,0
115,200.0,0
116,144.0,0
117,156.0,0
118,200.0,0
119,200.0,0
120,166.0,0
121,200.0,0
122,200.0,0
123,200.0,0
124,200.0,0
125,200.0,0
126,200.0,0
127,158.0,0
128,139.0,0
129,200.0,0
130,200.0,0
131,200.0,0
132,200.0,0
133,122.0,0
134,200.0,0
135,188.0,0
136,200.0,0
137,183.0,0
138,200.0,0
139,200.0,0
140,200.0,0
141,200.0,0
142,200.0,0
143,158.0,0
144,200.0,0
145,200.0,0
146,200.0,0
147,191.0,0
148,200.0,0
149,194.0,0
150,178.0,0
151,200.0,0
152,200.0,0
153,200.0,0
154,162.0,0
155,200.0,0
156,200.0,0
157,128.0,0
158,200.0,0
159,184.0,0
160,194.0,0
161,200.0,0
162,200.0,0
163,200.0,0
164,200.0,0
165,160.0,0
166,163.0,0
167,200.0,0
168,200.0,0
169,200.0,0
170,141.0,0
171,200.0,0
172,200.0,0
173,200.0,0
174,200.0,0
175,200.0,0
176,200.0,0
177,157.0,0
178,164.0,0
179,200.0,0
180,200.0,0
181,200.0,0
182,200.0,0
183,200.0,0
184,200.0,0
185,193.0,0
186,182.0,0
187,200.0,0
188,200.0,0
189,200.0,0
190,200.0,0
191,200.0,0
192,174.0,0
193,178.0,0
194,200.0,0
195,200.0,0
196,200.0,0
197,200.0,0
198,200.0,0
199,200.0,0
1 episodes rewards steps
2 0 19.0 0
3 1 16.0 0
4 2 17.0 0
5 3 11.0 0
6 4 10.0 0
7 5 27.0 0
8 6 55.0 0
9 7 17.0 0
10 8 23.0 0
11 9 9.0 0
12 10 17.0 0
13 11 14.0 0
14 12 17.0 0
15 13 12.0 0
16 14 14.0 0
17 15 16.0 0
18 16 27.0 0
19 17 36.0 0
20 18 17.0 0
21 19 17.0 0
22 20 21.0 0
23 21 23.0 0
24 22 13.0 0
25 23 12.0 0
26 24 17.0 0
27 25 26.0 0
28 26 25.0 0
29 27 17.0 0
30 28 10.0 0
31 29 16.0 0
32 30 14.0 0
33 31 19.0 0
34 32 23.0 0
35 33 37.0 0
36 34 29.0 0
37 35 22.0 0
38 36 29.0 0
39 37 15.0 0
40 38 16.0 0
41 39 18.0 0
42 40 23.0 0
43 41 16.0 0
44 42 26.0 0
45 43 13.0 0
46 44 24.0 0
47 45 39.0 0
48 46 23.0 0
49 47 32.0 0
50 48 123.0 0
51 49 18.0 0
52 50 39.0 0
53 51 17.0 0
54 52 28.0 0
55 53 34.0 0
56 54 26.0 0
57 55 61.0 0
58 56 28.0 0
59 57 16.0 0
60 58 45.0 0
61 59 41.0 0
62 60 49.0 0
63 61 18.0 0
64 62 40.0 0
65 63 24.0 0
66 64 37.0 0
67 65 26.0 0
68 66 51.0 0
69 67 17.0 0
70 68 152.0 0
71 69 17.0 0
72 70 29.0 0
73 71 37.0 0
74 72 15.0 0
75 73 55.0 0
76 74 152.0 0
77 75 23.0 0
78 76 45.0 0
79 77 30.0 0
80 78 39.0 0
81 79 20.0 0
82 80 53.0 0
83 81 49.0 0
84 82 71.0 0
85 83 115.0 0
86 84 41.0 0
87 85 52.0 0
88 86 52.0 0
89 87 36.0 0
90 88 84.0 0
91 89 122.0 0
92 90 49.0 0
93 91 200.0 0
94 92 67.0 0
95 93 87.0 0
96 94 183.0 0
97 95 132.0 0
98 96 76.0 0
99 97 200.0 0
100 98 200.0 0
101 99 200.0 0
102 100 200.0 0
103 101 200.0 0
104 102 106.0 0
105 103 192.0 0
106 104 111.0 0
107 105 95.0 0
108 106 200.0 0
109 107 200.0 0
110 108 148.0 0
111 109 200.0 0
112 110 97.0 0
113 111 200.0 0
114 112 200.0 0
115 113 105.0 0
116 114 135.0 0
117 115 200.0 0
118 116 144.0 0
119 117 156.0 0
120 118 200.0 0
121 119 200.0 0
122 120 166.0 0
123 121 200.0 0
124 122 200.0 0
125 123 200.0 0
126 124 200.0 0
127 125 200.0 0
128 126 200.0 0
129 127 158.0 0
130 128 139.0 0
131 129 200.0 0
132 130 200.0 0
133 131 200.0 0
134 132 200.0 0
135 133 122.0 0
136 134 200.0 0
137 135 188.0 0
138 136 200.0 0
139 137 183.0 0
140 138 200.0 0
141 139 200.0 0
142 140 200.0 0
143 141 200.0 0
144 142 200.0 0
145 143 158.0 0
146 144 200.0 0
147 145 200.0 0
148 146 200.0 0
149 147 191.0 0
150 148 200.0 0
151 149 194.0 0
152 150 178.0 0
153 151 200.0 0
154 152 200.0 0
155 153 200.0 0
156 154 162.0 0
157 155 200.0 0
158 156 200.0 0
159 157 128.0 0
160 158 200.0 0
161 159 184.0 0
162 160 194.0 0
163 161 200.0 0
164 162 200.0 0
165 163 200.0 0
166 164 200.0 0
167 165 160.0 0
168 166 163.0 0
169 167 200.0 0
170 168 200.0 0
171 169 200.0 0
172 170 141.0 0
173 171 200.0 0
174 172 200.0 0
175 173 200.0 0
176 174 200.0 0
177 175 200.0 0
178 176 200.0 0
179 177 157.0 0
180 178 164.0 0
181 179 200.0 0
182 180 200.0 0
183 181 200.0 0
184 182 200.0 0
185 183 200.0 0
186 184 200.0 0
187 185 193.0 0
188 186 182.0 0
189 187 200.0 0
190 188 200.0 0
191 189 200.0 0
192 190 200.0 0
193 191 200.0 0
194 192 174.0 0
195 193 178.0 0
196 194 200.0 0
197 195 200.0 0
198 196 200.0 0
199 197 200.0 0
200 198 200.0 0
201 199 200.0 0