hot update DQN

This commit is contained in:
johnjim0816
2022-08-24 12:49:16 +08:00
parent 07fb1d233e
commit 4f4658503e
24 changed files with 148 additions and 512 deletions

View File

@@ -0,0 +1 @@
{"algo_name": "DQN", "env_name": "Acrobot-v1", "train_eps": 100, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 1500, "lr": 0.002, "memory_capacity": 200000, "batch_size": 128, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/results", "model_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/models", "n_states": 6, "n_actions": 3}

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@@ -0,0 +1,21 @@
episodes,rewards
0,-79.0
1,-113.0
2,-81.0
3,-132.0
4,-110.0
5,-114.0
6,-80.0
7,-101.0
8,-78.0
9,-91.0
10,-107.0
11,-87.0
12,-105.0
13,-91.0
14,-128.0
15,-132.0
16,-119.0
17,-77.0
18,-89.0
19,-134.0
1 episodes rewards
2 0 -79.0
3 1 -113.0
4 2 -81.0
5 3 -132.0
6 4 -110.0
7 5 -114.0
8 6 -80.0
9 7 -101.0
10 8 -78.0
11 9 -91.0
12 10 -107.0
13 11 -87.0
14 12 -105.0
15 13 -91.0
16 14 -128.0
17 15 -132.0
18 16 -119.0
19 17 -77.0
20 18 -89.0
21 19 -134.0

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

View File

@@ -0,0 +1,101 @@
episodes,rewards
0,-500.0
1,-500.0
2,-500.0
3,-370.0
4,-449.0
5,-500.0
6,-312.0
7,-374.0
8,-180.0
9,-154.0
10,-137.0
11,-185.0
12,-135.0
13,-302.0
14,-146.0
15,-137.0
16,-119.0
17,-149.0
18,-217.0
19,-191.0
20,-157.0
21,-166.0
22,-138.0
23,-135.0
24,-182.0
25,-130.0
26,-175.0
27,-222.0
28,-133.0
29,-108.0
30,-250.0
31,-119.0
32,-135.0
33,-148.0
34,-194.0
35,-194.0
36,-186.0
37,-131.0
38,-185.0
39,-79.0
40,-129.0
41,-271.0
42,-117.0
43,-159.0
44,-156.0
45,-117.0
46,-158.0
47,-153.0
48,-119.0
49,-164.0
50,-134.0
51,-231.0
52,-117.0
53,-119.0
54,-136.0
55,-173.0
56,-202.0
57,-133.0
58,-142.0
59,-169.0
60,-137.0
61,-123.0
62,-205.0
63,-107.0
64,-194.0
65,-150.0
66,-143.0
67,-218.0
68,-145.0
69,-90.0
70,-107.0
71,-169.0
72,-125.0
73,-142.0
74,-145.0
75,-94.0
76,-150.0
77,-134.0
78,-159.0
79,-137.0
80,-146.0
81,-191.0
82,-242.0
83,-117.0
84,-92.0
85,-193.0
86,-239.0
87,-173.0
88,-140.0
89,-157.0
90,-133.0
91,-148.0
92,-87.0
93,-398.0
94,-98.0
95,-121.0
96,-102.0
97,-120.0
98,-195.0
99,-219.0
1 episodes rewards
2 0 -500.0
3 1 -500.0
4 2 -500.0
5 3 -370.0
6 4 -449.0
7 5 -500.0
8 6 -312.0
9 7 -374.0
10 8 -180.0
11 9 -154.0
12 10 -137.0
13 11 -185.0
14 12 -135.0
15 13 -302.0
16 14 -146.0
17 15 -137.0
18 16 -119.0
19 17 -149.0
20 18 -217.0
21 19 -191.0
22 20 -157.0
23 21 -166.0
24 22 -138.0
25 23 -135.0
26 24 -182.0
27 25 -130.0
28 26 -175.0
29 27 -222.0
30 28 -133.0
31 29 -108.0
32 30 -250.0
33 31 -119.0
34 32 -135.0
35 33 -148.0
36 34 -194.0
37 35 -194.0
38 36 -186.0
39 37 -131.0
40 38 -185.0
41 39 -79.0
42 40 -129.0
43 41 -271.0
44 42 -117.0
45 43 -159.0
46 44 -156.0
47 45 -117.0
48 46 -158.0
49 47 -153.0
50 48 -119.0
51 49 -164.0
52 50 -134.0
53 51 -231.0
54 52 -117.0
55 53 -119.0
56 54 -136.0
57 55 -173.0
58 56 -202.0
59 57 -133.0
60 58 -142.0
61 59 -169.0
62 60 -137.0
63 61 -123.0
64 62 -205.0
65 63 -107.0
66 64 -194.0
67 65 -150.0
68 66 -143.0
69 67 -218.0
70 68 -145.0
71 69 -90.0
72 70 -107.0
73 71 -169.0
74 72 -125.0
75 73 -142.0
76 74 -145.0
77 75 -94.0
78 76 -150.0
79 77 -134.0
80 78 -159.0
81 79 -137.0
82 80 -146.0
83 81 -191.0
84 82 -242.0
85 83 -117.0
86 84 -92.0
87 85 -193.0
88 86 -239.0
89 87 -173.0
90 88 -140.0
91 89 -157.0
92 90 -133.0
93 91 -148.0
94 92 -87.0
95 93 -398.0
96 94 -98.0
97 95 -121.0
98 96 -102.0
99 97 -120.0
100 98 -195.0
101 99 -219.0