更新算法模版
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
general_cfg:
|
||||
algo_name: FirstVisitMC
|
||||
device: cpu
|
||||
env_name: Racetrack-v0
|
||||
eval_eps: 10
|
||||
eval_per_episode: 5
|
||||
load_checkpoint: false
|
||||
load_path: tasks
|
||||
max_steps: 200
|
||||
mode: train
|
||||
save_fig: true
|
||||
seed: 1
|
||||
show_fig: false
|
||||
test_eps: 20
|
||||
train_eps: 200
|
||||
algo_cfg:
|
||||
epsilon: 0.15
|
||||
gamma: 0.9
|
||||
lr: 0.1
|
||||
@@ -0,0 +1,210 @@
|
||||
2022-11-06 01:05:04 - r - INFO: - n_states: 4, n_actions: 9
|
||||
2022-11-06 01:05:04 - r - INFO: - Start training!
|
||||
2022-11-06 01:05:04 - r - INFO: - Env: Racetrack-v0, Algorithm: FirstVisitMC, Device: cpu
|
||||
2022-11-06 01:05:40 - r - INFO: - Episode: 1/200, Reward: -760.000, Step: 200
|
||||
2022-11-06 01:05:58 - r - INFO: - Episode: 2/200, Reward: -560.000, Step: 200
|
||||
2022-11-06 01:05:59 - r - INFO: - Episode: 3/200, Reward: -156.000, Step: 66
|
||||
2022-11-06 01:06:17 - r - INFO: - Episode: 4/200, Reward: -500.000, Step: 200
|
||||
2022-11-06 01:06:38 - r - INFO: - Episode: 5/200, Reward: -600.000, Step: 200
|
||||
2022-11-06 01:06:38 - r - INFO: - Current episode 5 has the best eval reward: -208.000
|
||||
2022-11-06 01:06:52 - r - INFO: - Episode: 6/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:07:07 - r - INFO: - Episode: 7/200, Reward: -430.000, Step: 200
|
||||
2022-11-06 01:07:10 - r - INFO: - Episode: 8/200, Reward: -206.000, Step: 96
|
||||
2022-11-06 01:07:31 - r - INFO: - Episode: 9/200, Reward: -460.000, Step: 200
|
||||
2022-11-06 01:07:45 - r - INFO: - Episode: 10/200, Reward: -410.000, Step: 200
|
||||
2022-11-06 01:07:45 - r - INFO: - Current episode 10 has the best eval reward: -204.000
|
||||
2022-11-06 01:07:58 - r - INFO: - Episode: 11/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:08:08 - r - INFO: - Episode: 12/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:08:09 - r - INFO: - Episode: 13/200, Reward: -155.000, Step: 75
|
||||
2022-11-06 01:08:24 - r - INFO: - Episode: 14/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:08:37 - r - INFO: - Episode: 15/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:08:37 - r - INFO: - Current episode 15 has the best eval reward: -203.000
|
||||
2022-11-06 01:08:51 - r - INFO: - Episode: 16/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:09:05 - r - INFO: - Episode: 17/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:09:23 - r - INFO: - Episode: 18/200, Reward: -420.000, Step: 200
|
||||
2022-11-06 01:09:37 - r - INFO: - Episode: 19/200, Reward: -430.000, Step: 200
|
||||
2022-11-06 01:09:48 - r - INFO: - Episode: 20/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:09:48 - r - INFO: - Current episode 20 has the best eval reward: -187.300
|
||||
2022-11-06 01:10:08 - r - INFO: - Episode: 21/200, Reward: -420.000, Step: 200
|
||||
2022-11-06 01:10:19 - r - INFO: - Episode: 22/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:10:19 - r - INFO: - Episode: 23/200, Reward: -59.000, Step: 49
|
||||
2022-11-06 01:10:33 - r - INFO: - Episode: 24/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:10:33 - r - INFO: - Episode: 25/200, Reward: 2.000, Step: 8
|
||||
2022-11-06 01:10:36 - r - INFO: - Episode: 26/200, Reward: -217.000, Step: 117
|
||||
2022-11-06 01:10:43 - r - INFO: - Episode: 27/200, Reward: -287.000, Step: 167
|
||||
2022-11-06 01:10:47 - r - INFO: - Episode: 28/200, Reward: -248.000, Step: 118
|
||||
2022-11-06 01:11:04 - r - INFO: - Episode: 29/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:11:19 - r - INFO: - Episode: 30/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:11:32 - r - INFO: - Episode: 31/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:11:39 - r - INFO: - Episode: 32/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:11:57 - r - INFO: - Episode: 33/200, Reward: -420.000, Step: 200
|
||||
2022-11-06 01:12:16 - r - INFO: - Episode: 34/200, Reward: -430.000, Step: 200
|
||||
2022-11-06 01:12:34 - r - INFO: - Episode: 35/200, Reward: -430.000, Step: 200
|
||||
2022-11-06 01:12:55 - r - INFO: - Episode: 36/200, Reward: -430.000, Step: 200
|
||||
2022-11-06 01:13:09 - r - INFO: - Episode: 37/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:13:27 - r - INFO: - Episode: 38/200, Reward: -420.000, Step: 200
|
||||
2022-11-06 01:13:40 - r - INFO: - Episode: 39/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:13:55 - r - INFO: - Episode: 40/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:14:09 - r - INFO: - Episode: 41/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:14:26 - r - INFO: - Episode: 42/200, Reward: -410.000, Step: 200
|
||||
2022-11-06 01:14:40 - r - INFO: - Episode: 43/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:14:40 - r - INFO: - Episode: 44/200, Reward: -16.000, Step: 16
|
||||
2022-11-06 01:14:40 - r - INFO: - Episode: 45/200, Reward: -23.000, Step: 13
|
||||
2022-11-06 01:14:52 - r - INFO: - Episode: 46/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:15:08 - r - INFO: - Episode: 47/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:15:09 - r - INFO: - Episode: 48/200, Reward: -109.000, Step: 79
|
||||
2022-11-06 01:15:22 - r - INFO: - Episode: 49/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:15:39 - r - INFO: - Episode: 50/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:15:55 - r - INFO: - Episode: 51/200, Reward: -460.000, Step: 200
|
||||
2022-11-06 01:16:11 - r - INFO: - Episode: 52/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:16:23 - r - INFO: - Episode: 53/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:16:32 - r - INFO: - Episode: 54/200, Reward: -310.000, Step: 200
|
||||
2022-11-06 01:16:47 - r - INFO: - Episode: 55/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:17:01 - r - INFO: - Episode: 56/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:17:19 - r - INFO: - Episode: 57/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:17:34 - r - INFO: - Episode: 58/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:17:35 - r - INFO: - Episode: 59/200, Reward: -123.000, Step: 73
|
||||
2022-11-06 01:17:39 - r - INFO: - Episode: 60/200, Reward: -204.000, Step: 124
|
||||
2022-11-06 01:17:40 - r - INFO: - Episode: 61/200, Reward: -39.000, Step: 29
|
||||
2022-11-06 01:17:41 - r - INFO: - Episode: 62/200, Reward: -155.000, Step: 85
|
||||
2022-11-06 01:17:42 - r - INFO: - Episode: 63/200, Reward: -108.000, Step: 58
|
||||
2022-11-06 01:17:49 - r - INFO: - Episode: 64/200, Reward: -249.000, Step: 169
|
||||
2022-11-06 01:17:51 - r - INFO: - Episode: 65/200, Reward: -170.000, Step: 100
|
||||
2022-11-06 01:17:51 - r - INFO: - Current episode 65 has the best eval reward: -181.800
|
||||
2022-11-06 01:17:51 - r - INFO: - Episode: 66/200, Reward: 1.000, Step: 9
|
||||
2022-11-06 01:17:51 - r - INFO: - Episode: 67/200, Reward: -23.000, Step: 23
|
||||
2022-11-06 01:17:52 - r - INFO: - Episode: 68/200, Reward: -104.000, Step: 74
|
||||
2022-11-06 01:17:56 - r - INFO: - Episode: 69/200, Reward: -223.000, Step: 123
|
||||
2022-11-06 01:18:11 - r - INFO: - Episode: 70/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:18:13 - r - INFO: - Episode: 71/200, Reward: -124.000, Step: 104
|
||||
2022-11-06 01:18:13 - r - INFO: - Episode: 72/200, Reward: -20.000, Step: 20
|
||||
2022-11-06 01:18:26 - r - INFO: - Episode: 73/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:18:26 - r - INFO: - Episode: 74/200, Reward: -67.000, Step: 37
|
||||
2022-11-06 01:18:40 - r - INFO: - Episode: 75/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:18:41 - r - INFO: - Episode: 76/200, Reward: -71.000, Step: 41
|
||||
2022-11-06 01:18:41 - r - INFO: - Episode: 77/200, Reward: -23.000, Step: 23
|
||||
2022-11-06 01:18:41 - r - INFO: - Episode: 78/200, Reward: -41.000, Step: 21
|
||||
2022-11-06 01:18:41 - r - INFO: - Episode: 79/200, Reward: -1.000, Step: 11
|
||||
2022-11-06 01:18:50 - r - INFO: - Episode: 80/200, Reward: -270.000, Step: 200
|
||||
2022-11-06 01:18:50 - r - INFO: - Current episode 80 has the best eval reward: -163.100
|
||||
2022-11-06 01:19:02 - r - INFO: - Episode: 81/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:19:10 - r - INFO: - Episode: 82/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:19:11 - r - INFO: - Episode: 83/200, Reward: -2.000, Step: 12
|
||||
2022-11-06 01:19:25 - r - INFO: - Episode: 84/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:19:37 - r - INFO: - Episode: 85/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:19:37 - r - INFO: - Episode: 86/200, Reward: -47.000, Step: 37
|
||||
2022-11-06 01:19:53 - r - INFO: - Episode: 87/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:20:04 - r - INFO: - Episode: 88/200, Reward: -308.000, Step: 188
|
||||
2022-11-06 01:20:21 - r - INFO: - Episode: 89/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:20:27 - r - INFO: - Episode: 90/200, Reward: -214.000, Step: 154
|
||||
2022-11-06 01:20:43 - r - INFO: - Episode: 91/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:21:00 - r - INFO: - Episode: 92/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:21:01 - r - INFO: - Episode: 93/200, Reward: -32.000, Step: 22
|
||||
2022-11-06 01:21:21 - r - INFO: - Episode: 94/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:21:25 - r - INFO: - Episode: 95/200, Reward: -217.000, Step: 127
|
||||
2022-11-06 01:21:41 - r - INFO: - Episode: 96/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:21:55 - r - INFO: - Episode: 97/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:22:16 - r - INFO: - Episode: 98/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:22:32 - r - INFO: - Episode: 99/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:22:46 - r - INFO: - Episode: 100/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:23:00 - r - INFO: - Episode: 101/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:23:11 - r - INFO: - Episode: 102/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:23:29 - r - INFO: - Episode: 103/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:23:45 - r - INFO: - Episode: 104/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:24:06 - r - INFO: - Episode: 105/200, Reward: -400.000, Step: 200
|
||||
2022-11-06 01:24:16 - r - INFO: - Episode: 106/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:24:19 - r - INFO: - Episode: 107/200, Reward: -203.000, Step: 103
|
||||
2022-11-06 01:24:19 - r - INFO: - Episode: 108/200, Reward: -74.000, Step: 54
|
||||
2022-11-06 01:24:36 - r - INFO: - Episode: 109/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:24:54 - r - INFO: - Episode: 110/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:25:03 - r - INFO: - Episode: 111/200, Reward: -263.000, Step: 173
|
||||
2022-11-06 01:25:20 - r - INFO: - Episode: 112/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:25:34 - r - INFO: - Episode: 113/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:25:34 - r - INFO: - Episode: 114/200, Reward: -86.000, Step: 66
|
||||
2022-11-06 01:25:50 - r - INFO: - Episode: 115/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:25:52 - r - INFO: - Episode: 116/200, Reward: -160.000, Step: 110
|
||||
2022-11-06 01:26:07 - r - INFO: - Episode: 117/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:26:15 - r - INFO: - Episode: 118/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:26:29 - r - INFO: - Episode: 119/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:26:43 - r - INFO: - Episode: 120/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:26:56 - r - INFO: - Episode: 121/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:27:09 - r - INFO: - Episode: 122/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:27:25 - r - INFO: - Episode: 123/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:27:38 - r - INFO: - Episode: 124/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:27:39 - r - INFO: - Episode: 125/200, Reward: -70.000, Step: 40
|
||||
2022-11-06 01:27:39 - r - INFO: - Episode: 126/200, Reward: -59.000, Step: 39
|
||||
2022-11-06 01:27:55 - r - INFO: - Episode: 127/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:27:56 - r - INFO: - Episode: 128/200, Reward: -87.000, Step: 77
|
||||
2022-11-06 01:28:13 - r - INFO: - Episode: 129/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:28:22 - r - INFO: - Episode: 130/200, Reward: -260.000, Step: 200
|
||||
2022-11-06 01:28:38 - r - INFO: - Episode: 131/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:28:57 - r - INFO: - Episode: 132/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:29:07 - r - INFO: - Episode: 133/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:29:08 - r - INFO: - Episode: 134/200, Reward: -78.000, Step: 48
|
||||
2022-11-06 01:29:23 - r - INFO: - Episode: 135/200, Reward: -390.000, Step: 200
|
||||
2022-11-06 01:29:33 - r - INFO: - Episode: 136/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:29:51 - r - INFO: - Episode: 137/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:30:06 - r - INFO: - Episode: 138/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:30:10 - r - INFO: - Episode: 139/200, Reward: -185.000, Step: 115
|
||||
2022-11-06 01:30:26 - r - INFO: - Episode: 140/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:30:43 - r - INFO: - Episode: 141/200, Reward: -250.000, Step: 200
|
||||
2022-11-06 01:30:57 - r - INFO: - Episode: 142/200, Reward: -347.000, Step: 197
|
||||
2022-11-06 01:31:11 - r - INFO: - Episode: 143/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:31:25 - r - INFO: - Episode: 144/200, Reward: -330.000, Step: 200
|
||||
2022-11-06 01:31:37 - r - INFO: - Episode: 145/200, Reward: -270.000, Step: 200
|
||||
2022-11-06 01:31:55 - r - INFO: - Episode: 146/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:32:10 - r - INFO: - Episode: 147/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:32:27 - r - INFO: - Episode: 148/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:32:38 - r - INFO: - Episode: 149/200, Reward: -310.000, Step: 200
|
||||
2022-11-06 01:32:57 - r - INFO: - Episode: 150/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:33:10 - r - INFO: - Episode: 151/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:33:21 - r - INFO: - Episode: 152/200, Reward: -281.000, Step: 181
|
||||
2022-11-06 01:33:21 - r - INFO: - Episode: 153/200, Reward: -30.000, Step: 30
|
||||
2022-11-06 01:33:33 - r - INFO: - Episode: 154/200, Reward: -280.000, Step: 200
|
||||
2022-11-06 01:33:45 - r - INFO: - Episode: 155/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:33:59 - r - INFO: - Episode: 156/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:34:10 - r - INFO: - Episode: 157/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:34:28 - r - INFO: - Episode: 158/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:34:45 - r - INFO: - Episode: 159/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:34:52 - r - INFO: - Episode: 160/200, Reward: -250.000, Step: 200
|
||||
2022-11-06 01:35:04 - r - INFO: - Episode: 161/200, Reward: -370.000, Step: 200
|
||||
2022-11-06 01:35:16 - r - INFO: - Episode: 162/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:35:31 - r - INFO: - Episode: 163/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:35:41 - r - INFO: - Episode: 164/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:35:41 - r - INFO: - Episode: 165/200, Reward: -44.000, Step: 44
|
||||
2022-11-06 01:35:53 - r - INFO: - Episode: 166/200, Reward: -216.000, Step: 196
|
||||
2022-11-06 01:36:06 - r - INFO: - Episode: 167/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:36:23 - r - INFO: - Episode: 168/200, Reward: -360.000, Step: 200
|
||||
2022-11-06 01:36:38 - r - INFO: - Episode: 169/200, Reward: -310.000, Step: 200
|
||||
2022-11-06 01:36:51 - r - INFO: - Episode: 170/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:37:08 - r - INFO: - Episode: 171/200, Reward: -280.000, Step: 200
|
||||
2022-11-06 01:37:17 - r - INFO: - Episode: 172/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:37:33 - r - INFO: - Episode: 173/200, Reward: -280.000, Step: 200
|
||||
2022-11-06 01:37:45 - r - INFO: - Episode: 174/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:38:02 - r - INFO: - Episode: 175/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:38:17 - r - INFO: - Episode: 176/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:38:31 - r - INFO: - Episode: 177/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:38:47 - r - INFO: - Episode: 178/200, Reward: -320.000, Step: 200
|
||||
2022-11-06 01:39:03 - r - INFO: - Episode: 179/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:39:04 - r - INFO: - Episode: 180/200, Reward: -117.000, Step: 87
|
||||
2022-11-06 01:39:06 - r - INFO: - Episode: 181/200, Reward: -158.000, Step: 88
|
||||
2022-11-06 01:39:23 - r - INFO: - Episode: 182/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:39:34 - r - INFO: - Episode: 183/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:39:51 - r - INFO: - Episode: 184/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:40:09 - r - INFO: - Episode: 185/200, Reward: -310.000, Step: 200
|
||||
2022-11-06 01:40:10 - r - INFO: - Episode: 186/200, Reward: -58.000, Step: 38
|
||||
2022-11-06 01:40:26 - r - INFO: - Episode: 187/200, Reward: -290.000, Step: 200
|
||||
2022-11-06 01:40:42 - r - INFO: - Episode: 188/200, Reward: -310.000, Step: 200
|
||||
2022-11-06 01:40:57 - r - INFO: - Episode: 189/200, Reward: -350.000, Step: 200
|
||||
2022-11-06 01:41:12 - r - INFO: - Episode: 190/200, Reward: -300.000, Step: 200
|
||||
2022-11-06 01:41:32 - r - INFO: - Episode: 191/200, Reward: -380.000, Step: 200
|
||||
2022-11-06 01:41:37 - r - INFO: - Episode: 192/200, Reward: -230.000, Step: 200
|
||||
2022-11-06 01:41:37 - r - INFO: - Episode: 193/200, Reward: -26.000, Step: 26
|
||||
2022-11-06 01:41:56 - r - INFO: - Episode: 194/200, Reward: -340.000, Step: 200
|
||||
2022-11-06 01:42:09 - r - INFO: - Episode: 195/200, Reward: -280.000, Step: 200
|
||||
2022-11-06 01:42:10 - r - INFO: - Episode: 196/200, Reward: -106.000, Step: 66
|
||||
2022-11-06 01:42:10 - r - INFO: - Episode: 197/200, Reward: -7.000, Step: 17
|
||||
2022-11-06 01:42:20 - r - INFO: - Episode: 198/200, Reward: -248.000, Step: 178
|
||||
2022-11-06 01:42:22 - r - INFO: - Episode: 199/200, Reward: -161.000, Step: 101
|
||||
2022-11-06 01:42:22 - r - INFO: - Episode: 200/200, Reward: -3.000, Step: 13
|
||||
2022-11-06 01:42:22 - r - INFO: - Finish training!
|
||||
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 72 KiB |
@@ -0,0 +1,201 @@
|
||||
episodes,rewards,steps
|
||||
0,-760,200
|
||||
1,-560,200
|
||||
2,-156,66
|
||||
3,-500,200
|
||||
4,-600,200
|
||||
5,-350,200
|
||||
6,-430,200
|
||||
7,-206,96
|
||||
8,-460,200
|
||||
9,-410,200
|
||||
10,-400,200
|
||||
11,-380,200
|
||||
12,-155,75
|
||||
13,-400,200
|
||||
14,-350,200
|
||||
15,-400,200
|
||||
16,-360,200
|
||||
17,-420,200
|
||||
18,-430,200
|
||||
19,-360,200
|
||||
20,-420,200
|
||||
21,-390,200
|
||||
22,-59,49
|
||||
23,-390,200
|
||||
24,2,8
|
||||
25,-217,117
|
||||
26,-287,167
|
||||
27,-248,118
|
||||
28,-370,200
|
||||
29,-390,200
|
||||
30,-370,200
|
||||
31,-360,200
|
||||
32,-420,200
|
||||
33,-430,200
|
||||
34,-430,200
|
||||
35,-430,200
|
||||
36,-380,200
|
||||
37,-420,200
|
||||
38,-350,200
|
||||
39,-370,200
|
||||
40,-400,200
|
||||
41,-410,200
|
||||
42,-360,200
|
||||
43,-16,16
|
||||
44,-23,13
|
||||
45,-390,200
|
||||
46,-390,200
|
||||
47,-109,79
|
||||
48,-300,200
|
||||
49,-370,200
|
||||
50,-460,200
|
||||
51,-350,200
|
||||
52,-320,200
|
||||
53,-310,200
|
||||
54,-390,200
|
||||
55,-370,200
|
||||
56,-390,200
|
||||
57,-350,200
|
||||
58,-123,73
|
||||
59,-204,124
|
||||
60,-39,29
|
||||
61,-155,85
|
||||
62,-108,58
|
||||
63,-249,169
|
||||
64,-170,100
|
||||
65,1,9
|
||||
66,-23,23
|
||||
67,-104,74
|
||||
68,-223,123
|
||||
69,-350,200
|
||||
70,-124,104
|
||||
71,-20,20
|
||||
72,-360,200
|
||||
73,-67,37
|
||||
74,-360,200
|
||||
75,-71,41
|
||||
76,-23,23
|
||||
77,-41,21
|
||||
78,-1,11
|
||||
79,-270,200
|
||||
80,-330,200
|
||||
81,-290,200
|
||||
82,-2,12
|
||||
83,-300,200
|
||||
84,-380,200
|
||||
85,-47,37
|
||||
86,-350,200
|
||||
87,-308,188
|
||||
88,-370,200
|
||||
89,-214,154
|
||||
90,-290,200
|
||||
91,-370,200
|
||||
92,-32,22
|
||||
93,-400,200
|
||||
94,-217,127
|
||||
95,-330,200
|
||||
96,-380,200
|
||||
97,-320,200
|
||||
98,-300,200
|
||||
99,-350,200
|
||||
100,-400,200
|
||||
101,-330,200
|
||||
102,-360,200
|
||||
103,-380,200
|
||||
104,-400,200
|
||||
105,-290,200
|
||||
106,-203,103
|
||||
107,-74,54
|
||||
108,-330,200
|
||||
109,-380,200
|
||||
110,-263,173
|
||||
111,-290,200
|
||||
112,-340,200
|
||||
113,-86,66
|
||||
114,-340,200
|
||||
115,-160,110
|
||||
116,-340,200
|
||||
117,-320,200
|
||||
118,-320,200
|
||||
119,-360,200
|
||||
120,-330,200
|
||||
121,-350,200
|
||||
122,-300,200
|
||||
123,-320,200
|
||||
124,-70,40
|
||||
125,-59,39
|
||||
126,-340,200
|
||||
127,-87,77
|
||||
128,-330,200
|
||||
129,-260,200
|
||||
130,-290,200
|
||||
131,-330,200
|
||||
132,-340,200
|
||||
133,-78,48
|
||||
134,-390,200
|
||||
135,-320,200
|
||||
136,-360,200
|
||||
137,-340,200
|
||||
138,-185,115
|
||||
139,-340,200
|
||||
140,-250,200
|
||||
141,-347,197
|
||||
142,-320,200
|
||||
143,-330,200
|
||||
144,-270,200
|
||||
145,-380,200
|
||||
146,-320,200
|
||||
147,-340,200
|
||||
148,-310,200
|
||||
149,-290,200
|
||||
150,-380,200
|
||||
151,-281,181
|
||||
152,-30,30
|
||||
153,-280,200
|
||||
154,-300,200
|
||||
155,-300,200
|
||||
156,-300,200
|
||||
157,-370,200
|
||||
158,-320,200
|
||||
159,-250,200
|
||||
160,-370,200
|
||||
161,-290,200
|
||||
162,-320,200
|
||||
163,-290,200
|
||||
164,-44,44
|
||||
165,-216,196
|
||||
166,-340,200
|
||||
167,-360,200
|
||||
168,-310,200
|
||||
169,-320,200
|
||||
170,-280,200
|
||||
171,-290,200
|
||||
172,-280,200
|
||||
173,-300,200
|
||||
174,-350,200
|
||||
175,-320,200
|
||||
176,-320,200
|
||||
177,-320,200
|
||||
178,-300,200
|
||||
179,-117,87
|
||||
180,-158,88
|
||||
181,-300,200
|
||||
182,-290,200
|
||||
183,-350,200
|
||||
184,-310,200
|
||||
185,-58,38
|
||||
186,-290,200
|
||||
187,-310,200
|
||||
188,-350,200
|
||||
189,-300,200
|
||||
190,-380,200
|
||||
191,-230,200
|
||||
192,-26,26
|
||||
193,-340,200
|
||||
194,-280,200
|
||||
195,-106,66
|
||||
196,-7,17
|
||||
197,-248,178
|
||||
198,-161,101
|
||||
199,-3,13
|
||||
|
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 16:14:34
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-08-15 18:10:13
|
||||
LastEditTime: 2022-11-06 01:04:57
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -17,15 +17,16 @@ import dill
|
||||
class FisrtVisitMC:
|
||||
''' On-Policy First-Visit MC Control
|
||||
'''
|
||||
def __init__(self,n_actions,cfg):
|
||||
self.n_actions = n_actions
|
||||
def __init__(self,cfg):
|
||||
self.n_actions = cfg.n_actions
|
||||
self.epsilon = cfg.epsilon
|
||||
self.gamma = cfg.gamma
|
||||
self.Q_table = defaultdict(lambda: np.zeros(n_actions))
|
||||
self.Q_table = defaultdict(lambda: np.zeros(cfg.n_actions))
|
||||
self.returns_sum = defaultdict(float) # 保存return之和
|
||||
self.returns_count = defaultdict(float)
|
||||
|
||||
def sample(self,state):
|
||||
def sample_action(self,state):
|
||||
state = str(state)
|
||||
if state in self.Q_table.keys():
|
||||
best_action = np.argmax(self.Q_table[state])
|
||||
action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions
|
||||
@@ -34,7 +35,8 @@ class FisrtVisitMC:
|
||||
else:
|
||||
action = np.random.randint(0,self.n_actions)
|
||||
return action
|
||||
def predict(self,state):
|
||||
def predict_action(self,state):
|
||||
state = str(state)
|
||||
if state in self.Q_table.keys():
|
||||
best_action = np.argmax(self.Q_table[state])
|
||||
action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions
|
||||
@@ -46,19 +48,20 @@ class FisrtVisitMC:
|
||||
def update(self,one_ep_transition):
|
||||
# Find all (state, action) pairs we've visited in this one_ep_transition
|
||||
# We convert each state to a tuple so that we can use it as a dict key
|
||||
sa_in_episode = set([(tuple(x[0]), x[1]) for x in one_ep_transition])
|
||||
sa_in_episode = set([(str(x[0]), x[1]) for x in one_ep_transition])
|
||||
for state, action in sa_in_episode:
|
||||
sa_pair = (state, action)
|
||||
# Find the first occurence of the (state, action) pair in the one_ep_transition
|
||||
|
||||
first_occurence_idx = next(i for i,x in enumerate(one_ep_transition)
|
||||
if x[0] == state and x[1] == action)
|
||||
if str(x[0]) == state and x[1] == action)
|
||||
# Sum up all rewards since the first occurance
|
||||
G = sum([x[2]*(self.gamma**i) for i,x in enumerate(one_ep_transition[first_occurence_idx:])])
|
||||
# Calculate average return for this state over all sampled episodes
|
||||
self.returns_sum[sa_pair] += G
|
||||
self.returns_count[sa_pair] += 1.0
|
||||
self.Q_table[state][action] = self.returns_sum[sa_pair] / self.returns_count[sa_pair]
|
||||
def save(self,path=None):
|
||||
def save_model(self,path=None):
|
||||
'''把 Q表格 的数据保存到文件中
|
||||
'''
|
||||
from pathlib import Path
|
||||
@@ -69,7 +72,7 @@ class FisrtVisitMC:
|
||||
pickle_module=dill
|
||||
)
|
||||
|
||||
def load(self, path=None):
|
||||
def load_model(self, path=None):
|
||||
'''从文件中读取数据到 Q表格
|
||||
'''
|
||||
self.Q_table =torch.load(f=path+"Q_table",pickle_module=dill)
|
||||
32
projects/codes/MonteCarlo/config/config.py
Normal file
32
projects/codes/MonteCarlo/config/config.py
Normal file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: JiangJi
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2022-11-06 00:31:35
|
||||
LastEditor: JiangJi
|
||||
LastEditTime: 2022-11-06 00:45:44
|
||||
Discription: parameters of MonteCarlo
|
||||
'''
|
||||
from common.config import GeneralConfig,AlgoConfig
|
||||
|
||||
class GeneralConfigMC(GeneralConfig):
|
||||
def __init__(self) -> None:
|
||||
self.env_name = "Racetrack-v0" # name of environment
|
||||
self.algo_name = "FirstVisitMC" # name of algorithm
|
||||
self.mode = "train" # train or test
|
||||
self.seed = 1 # random seed
|
||||
self.device = "cpu" # device to use
|
||||
self.train_eps = 200 # number of episodes for training
|
||||
self.test_eps = 20 # number of episodes for testing
|
||||
self.max_steps = 200 # max steps for each episode
|
||||
self.load_checkpoint = False
|
||||
self.load_path = "tasks" # path to load model
|
||||
self.show_fig = False # show figure or not
|
||||
self.save_fig = True # save figure or not
|
||||
|
||||
class AlgoConfigMC(AlgoConfig):
|
||||
def __init__(self) -> None:
|
||||
self.gamma = 0.90 # discount factor
|
||||
self.epsilon = 0.15 # epsilon greedy
|
||||
self.lr = 0.1 # learning rate
|
||||
@@ -5,51 +5,82 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-11 14:26:44
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-08-15 18:12:13
|
||||
LastEditTime: 2022-11-06 00:44:56
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
import sys,os
|
||||
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
|
||||
parent_path = os.path.dirname(curr_path) # 父路径
|
||||
sys.path.append(parent_path) # 添加路径到系统路径
|
||||
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized."
|
||||
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
|
||||
parent_path = os.path.dirname(curr_path) # parent path
|
||||
sys.path.append(parent_path) # add path to system path
|
||||
|
||||
import datetime
|
||||
import argparse
|
||||
from common.utils import save_results,save_args,plot_rewards
|
||||
|
||||
import gym
|
||||
from envs.wrappers import CliffWalkingWapper
|
||||
from envs.register import register_env
|
||||
from common.utils import merge_class_attrs,all_seed
|
||||
from common.launcher import Launcher
|
||||
from MonteCarlo.agent import FisrtVisitMC
|
||||
from envs.racetrack import RacetrackEnv
|
||||
from MonteCarlo.config.config import GeneralConfigMC,AlgoConfigMC
|
||||
|
||||
|
||||
curr_time = datetime.datetime.now().strftime(
|
||||
"%Y%m%d-%H%M%S") # obtain current time
|
||||
|
||||
def get_args():
|
||||
""" 超参数
|
||||
"""
|
||||
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
|
||||
parser = argparse.ArgumentParser(description="hyperparameters")
|
||||
parser.add_argument('--algo_name',default='First-Visit MC',type=str,help="name of algorithm")
|
||||
parser.add_argument('--env_name',default='Racetrack',type=str,help="name of environment")
|
||||
parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
|
||||
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
|
||||
parser.add_argument('--gamma',default=0.9,type=float,help="discounted factor")
|
||||
parser.add_argument('--epsilon',default=0.15,type=float,help="the probability to select a random action")
|
||||
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
|
||||
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||
'/' + curr_time + '/results/' )
|
||||
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
|
||||
'/' + curr_time + '/models/' )
|
||||
parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")
|
||||
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
def env_agent_config(cfg,seed=1):
|
||||
env = RacetrackEnv()
|
||||
n_actions = env.action_space.n
|
||||
agent = FisrtVisitMC(n_actions, cfg)
|
||||
return env,agent
|
||||
class Main(Launcher):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigMC())
|
||||
self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigMC())
|
||||
def env_agent_config(self,cfg,logger):
|
||||
''' create env and agent
|
||||
'''
|
||||
register_env(cfg.env_name)
|
||||
env = gym.make(cfg.env_name,new_step_api=False) # create env
|
||||
if cfg.env_name == 'CliffWalking-v0':
|
||||
env = CliffWalkingWapper(env)
|
||||
if cfg.seed !=0: # set random seed
|
||||
all_seed(env,seed=cfg.seed)
|
||||
try: # state dimension
|
||||
n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
|
||||
except AttributeError:
|
||||
n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
|
||||
n_actions = env.action_space.n # action dimension
|
||||
logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info
|
||||
# update to cfg paramters
|
||||
setattr(cfg, 'n_states', n_states)
|
||||
setattr(cfg, 'n_actions', n_actions)
|
||||
agent = FisrtVisitMC(cfg)
|
||||
return env,agent
|
||||
def train_one_episode(self, env, agent, cfg):
|
||||
ep_reward = 0 # reward per episode
|
||||
ep_step = 0
|
||||
state = env.reset() # reset and obtain initial state
|
||||
one_ep_transition = []
|
||||
for _ in range(cfg.max_steps):
|
||||
ep_step += 1
|
||||
action = agent.sample_action(state) # sample action
|
||||
next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
|
||||
one_ep_transition.append((state, action, reward)) # save transitions
|
||||
agent.update(one_ep_transition) # update agent
|
||||
state = next_state # update next state for env
|
||||
ep_reward += reward #
|
||||
if terminated:
|
||||
break
|
||||
return agent,ep_reward,ep_step
|
||||
def test_one_episode(self, env, agent, cfg):
|
||||
ep_reward = 0 # reward per episode
|
||||
ep_step = 0
|
||||
state = env.reset() # reset and obtain initial state
|
||||
for _ in range(cfg.max_steps):
|
||||
ep_step += 1
|
||||
action = agent.predict_action(state) # sample action
|
||||
next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
|
||||
state = next_state # update next state for env
|
||||
ep_reward += reward #
|
||||
if terminated:
|
||||
break
|
||||
return agent,ep_reward,ep_step
|
||||
|
||||
def train(cfg, env, agent):
|
||||
print("开始训练!")
|
||||
@@ -93,18 +124,5 @@ def test(cfg, env, agent):
|
||||
return {'rewards':rewards}
|
||||
|
||||
if __name__ == "__main__":
|
||||
cfg = get_args()
|
||||
# 训练
|
||||
env, agent = env_agent_config(cfg)
|
||||
res_dic = train(cfg, env, agent)
|
||||
save_args(cfg,path = cfg.result_path) # 保存参数到模型路径上
|
||||
agent.save(path = cfg.model_path) # 保存模型
|
||||
save_results(res_dic, tag = 'train', path = cfg.result_path)
|
||||
plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "train")
|
||||
# 测试
|
||||
env, agent = env_agent_config(cfg) # 也可以不加,加这一行的是为了避免训练之后环境可能会出现问题,因此新建一个环境用于测试
|
||||
agent.load(path = cfg.model_path) # 导入模型
|
||||
res_dic = test(cfg, env, agent)
|
||||
save_results(res_dic, tag='test',
|
||||
path = cfg.result_path) # 保存结果
|
||||
plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "test") # 画出结果
|
||||
main = Main()
|
||||
main.run()
|
||||
Reference in New Issue
Block a user