更新算法模版

This commit is contained in:
johnjim0816
2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions

View File

@@ -0,0 +1,19 @@
general_cfg:
algo_name: FirstVisitMC
device: cpu
env_name: Racetrack-v0
eval_eps: 10
eval_per_episode: 5
load_checkpoint: false
load_path: tasks
max_steps: 200
mode: train
save_fig: true
seed: 1
show_fig: false
test_eps: 20
train_eps: 200
algo_cfg:
epsilon: 0.15
gamma: 0.9
lr: 0.1

View File

@@ -0,0 +1,210 @@
2022-11-06 01:05:04 - r - INFO: - n_states: 4, n_actions: 9
2022-11-06 01:05:04 - r - INFO: - Start training!
2022-11-06 01:05:04 - r - INFO: - Env: Racetrack-v0, Algorithm: FirstVisitMC, Device: cpu
2022-11-06 01:05:40 - r - INFO: - Episode: 1/200, Reward: -760.000, Step: 200
2022-11-06 01:05:58 - r - INFO: - Episode: 2/200, Reward: -560.000, Step: 200
2022-11-06 01:05:59 - r - INFO: - Episode: 3/200, Reward: -156.000, Step: 66
2022-11-06 01:06:17 - r - INFO: - Episode: 4/200, Reward: -500.000, Step: 200
2022-11-06 01:06:38 - r - INFO: - Episode: 5/200, Reward: -600.000, Step: 200
2022-11-06 01:06:38 - r - INFO: - Current episode 5 has the best eval reward: -208.000
2022-11-06 01:06:52 - r - INFO: - Episode: 6/200, Reward: -350.000, Step: 200
2022-11-06 01:07:07 - r - INFO: - Episode: 7/200, Reward: -430.000, Step: 200
2022-11-06 01:07:10 - r - INFO: - Episode: 8/200, Reward: -206.000, Step: 96
2022-11-06 01:07:31 - r - INFO: - Episode: 9/200, Reward: -460.000, Step: 200
2022-11-06 01:07:45 - r - INFO: - Episode: 10/200, Reward: -410.000, Step: 200
2022-11-06 01:07:45 - r - INFO: - Current episode 10 has the best eval reward: -204.000
2022-11-06 01:07:58 - r - INFO: - Episode: 11/200, Reward: -400.000, Step: 200
2022-11-06 01:08:08 - r - INFO: - Episode: 12/200, Reward: -380.000, Step: 200
2022-11-06 01:08:09 - r - INFO: - Episode: 13/200, Reward: -155.000, Step: 75
2022-11-06 01:08:24 - r - INFO: - Episode: 14/200, Reward: -400.000, Step: 200
2022-11-06 01:08:37 - r - INFO: - Episode: 15/200, Reward: -350.000, Step: 200
2022-11-06 01:08:37 - r - INFO: - Current episode 15 has the best eval reward: -203.000
2022-11-06 01:08:51 - r - INFO: - Episode: 16/200, Reward: -400.000, Step: 200
2022-11-06 01:09:05 - r - INFO: - Episode: 17/200, Reward: -360.000, Step: 200
2022-11-06 01:09:23 - r - INFO: - Episode: 18/200, Reward: -420.000, Step: 200
2022-11-06 01:09:37 - r - INFO: - Episode: 19/200, Reward: -430.000, Step: 200
2022-11-06 01:09:48 - r - INFO: - Episode: 20/200, Reward: -360.000, Step: 200
2022-11-06 01:09:48 - r - INFO: - Current episode 20 has the best eval reward: -187.300
2022-11-06 01:10:08 - r - INFO: - Episode: 21/200, Reward: -420.000, Step: 200
2022-11-06 01:10:19 - r - INFO: - Episode: 22/200, Reward: -390.000, Step: 200
2022-11-06 01:10:19 - r - INFO: - Episode: 23/200, Reward: -59.000, Step: 49
2022-11-06 01:10:33 - r - INFO: - Episode: 24/200, Reward: -390.000, Step: 200
2022-11-06 01:10:33 - r - INFO: - Episode: 25/200, Reward: 2.000, Step: 8
2022-11-06 01:10:36 - r - INFO: - Episode: 26/200, Reward: -217.000, Step: 117
2022-11-06 01:10:43 - r - INFO: - Episode: 27/200, Reward: -287.000, Step: 167
2022-11-06 01:10:47 - r - INFO: - Episode: 28/200, Reward: -248.000, Step: 118
2022-11-06 01:11:04 - r - INFO: - Episode: 29/200, Reward: -370.000, Step: 200
2022-11-06 01:11:19 - r - INFO: - Episode: 30/200, Reward: -390.000, Step: 200
2022-11-06 01:11:32 - r - INFO: - Episode: 31/200, Reward: -370.000, Step: 200
2022-11-06 01:11:39 - r - INFO: - Episode: 32/200, Reward: -360.000, Step: 200
2022-11-06 01:11:57 - r - INFO: - Episode: 33/200, Reward: -420.000, Step: 200
2022-11-06 01:12:16 - r - INFO: - Episode: 34/200, Reward: -430.000, Step: 200
2022-11-06 01:12:34 - r - INFO: - Episode: 35/200, Reward: -430.000, Step: 200
2022-11-06 01:12:55 - r - INFO: - Episode: 36/200, Reward: -430.000, Step: 200
2022-11-06 01:13:09 - r - INFO: - Episode: 37/200, Reward: -380.000, Step: 200
2022-11-06 01:13:27 - r - INFO: - Episode: 38/200, Reward: -420.000, Step: 200
2022-11-06 01:13:40 - r - INFO: - Episode: 39/200, Reward: -350.000, Step: 200
2022-11-06 01:13:55 - r - INFO: - Episode: 40/200, Reward: -370.000, Step: 200
2022-11-06 01:14:09 - r - INFO: - Episode: 41/200, Reward: -400.000, Step: 200
2022-11-06 01:14:26 - r - INFO: - Episode: 42/200, Reward: -410.000, Step: 200
2022-11-06 01:14:40 - r - INFO: - Episode: 43/200, Reward: -360.000, Step: 200
2022-11-06 01:14:40 - r - INFO: - Episode: 44/200, Reward: -16.000, Step: 16
2022-11-06 01:14:40 - r - INFO: - Episode: 45/200, Reward: -23.000, Step: 13
2022-11-06 01:14:52 - r - INFO: - Episode: 46/200, Reward: -390.000, Step: 200
2022-11-06 01:15:08 - r - INFO: - Episode: 47/200, Reward: -390.000, Step: 200
2022-11-06 01:15:09 - r - INFO: - Episode: 48/200, Reward: -109.000, Step: 79
2022-11-06 01:15:22 - r - INFO: - Episode: 49/200, Reward: -300.000, Step: 200
2022-11-06 01:15:39 - r - INFO: - Episode: 50/200, Reward: -370.000, Step: 200
2022-11-06 01:15:55 - r - INFO: - Episode: 51/200, Reward: -460.000, Step: 200
2022-11-06 01:16:11 - r - INFO: - Episode: 52/200, Reward: -350.000, Step: 200
2022-11-06 01:16:23 - r - INFO: - Episode: 53/200, Reward: -320.000, Step: 200
2022-11-06 01:16:32 - r - INFO: - Episode: 54/200, Reward: -310.000, Step: 200
2022-11-06 01:16:47 - r - INFO: - Episode: 55/200, Reward: -390.000, Step: 200
2022-11-06 01:17:01 - r - INFO: - Episode: 56/200, Reward: -370.000, Step: 200
2022-11-06 01:17:19 - r - INFO: - Episode: 57/200, Reward: -390.000, Step: 200
2022-11-06 01:17:34 - r - INFO: - Episode: 58/200, Reward: -350.000, Step: 200
2022-11-06 01:17:35 - r - INFO: - Episode: 59/200, Reward: -123.000, Step: 73
2022-11-06 01:17:39 - r - INFO: - Episode: 60/200, Reward: -204.000, Step: 124
2022-11-06 01:17:40 - r - INFO: - Episode: 61/200, Reward: -39.000, Step: 29
2022-11-06 01:17:41 - r - INFO: - Episode: 62/200, Reward: -155.000, Step: 85
2022-11-06 01:17:42 - r - INFO: - Episode: 63/200, Reward: -108.000, Step: 58
2022-11-06 01:17:49 - r - INFO: - Episode: 64/200, Reward: -249.000, Step: 169
2022-11-06 01:17:51 - r - INFO: - Episode: 65/200, Reward: -170.000, Step: 100
2022-11-06 01:17:51 - r - INFO: - Current episode 65 has the best eval reward: -181.800
2022-11-06 01:17:51 - r - INFO: - Episode: 66/200, Reward: 1.000, Step: 9
2022-11-06 01:17:51 - r - INFO: - Episode: 67/200, Reward: -23.000, Step: 23
2022-11-06 01:17:52 - r - INFO: - Episode: 68/200, Reward: -104.000, Step: 74
2022-11-06 01:17:56 - r - INFO: - Episode: 69/200, Reward: -223.000, Step: 123
2022-11-06 01:18:11 - r - INFO: - Episode: 70/200, Reward: -350.000, Step: 200
2022-11-06 01:18:13 - r - INFO: - Episode: 71/200, Reward: -124.000, Step: 104
2022-11-06 01:18:13 - r - INFO: - Episode: 72/200, Reward: -20.000, Step: 20
2022-11-06 01:18:26 - r - INFO: - Episode: 73/200, Reward: -360.000, Step: 200
2022-11-06 01:18:26 - r - INFO: - Episode: 74/200, Reward: -67.000, Step: 37
2022-11-06 01:18:40 - r - INFO: - Episode: 75/200, Reward: -360.000, Step: 200
2022-11-06 01:18:41 - r - INFO: - Episode: 76/200, Reward: -71.000, Step: 41
2022-11-06 01:18:41 - r - INFO: - Episode: 77/200, Reward: -23.000, Step: 23
2022-11-06 01:18:41 - r - INFO: - Episode: 78/200, Reward: -41.000, Step: 21
2022-11-06 01:18:41 - r - INFO: - Episode: 79/200, Reward: -1.000, Step: 11
2022-11-06 01:18:50 - r - INFO: - Episode: 80/200, Reward: -270.000, Step: 200
2022-11-06 01:18:50 - r - INFO: - Current episode 80 has the best eval reward: -163.100
2022-11-06 01:19:02 - r - INFO: - Episode: 81/200, Reward: -330.000, Step: 200
2022-11-06 01:19:10 - r - INFO: - Episode: 82/200, Reward: -290.000, Step: 200
2022-11-06 01:19:11 - r - INFO: - Episode: 83/200, Reward: -2.000, Step: 12
2022-11-06 01:19:25 - r - INFO: - Episode: 84/200, Reward: -300.000, Step: 200
2022-11-06 01:19:37 - r - INFO: - Episode: 85/200, Reward: -380.000, Step: 200
2022-11-06 01:19:37 - r - INFO: - Episode: 86/200, Reward: -47.000, Step: 37
2022-11-06 01:19:53 - r - INFO: - Episode: 87/200, Reward: -350.000, Step: 200
2022-11-06 01:20:04 - r - INFO: - Episode: 88/200, Reward: -308.000, Step: 188
2022-11-06 01:20:21 - r - INFO: - Episode: 89/200, Reward: -370.000, Step: 200
2022-11-06 01:20:27 - r - INFO: - Episode: 90/200, Reward: -214.000, Step: 154
2022-11-06 01:20:43 - r - INFO: - Episode: 91/200, Reward: -290.000, Step: 200
2022-11-06 01:21:00 - r - INFO: - Episode: 92/200, Reward: -370.000, Step: 200
2022-11-06 01:21:01 - r - INFO: - Episode: 93/200, Reward: -32.000, Step: 22
2022-11-06 01:21:21 - r - INFO: - Episode: 94/200, Reward: -400.000, Step: 200
2022-11-06 01:21:25 - r - INFO: - Episode: 95/200, Reward: -217.000, Step: 127
2022-11-06 01:21:41 - r - INFO: - Episode: 96/200, Reward: -330.000, Step: 200
2022-11-06 01:21:55 - r - INFO: - Episode: 97/200, Reward: -380.000, Step: 200
2022-11-06 01:22:16 - r - INFO: - Episode: 98/200, Reward: -320.000, Step: 200
2022-11-06 01:22:32 - r - INFO: - Episode: 99/200, Reward: -300.000, Step: 200
2022-11-06 01:22:46 - r - INFO: - Episode: 100/200, Reward: -350.000, Step: 200
2022-11-06 01:23:00 - r - INFO: - Episode: 101/200, Reward: -400.000, Step: 200
2022-11-06 01:23:11 - r - INFO: - Episode: 102/200, Reward: -330.000, Step: 200
2022-11-06 01:23:29 - r - INFO: - Episode: 103/200, Reward: -360.000, Step: 200
2022-11-06 01:23:45 - r - INFO: - Episode: 104/200, Reward: -380.000, Step: 200
2022-11-06 01:24:06 - r - INFO: - Episode: 105/200, Reward: -400.000, Step: 200
2022-11-06 01:24:16 - r - INFO: - Episode: 106/200, Reward: -290.000, Step: 200
2022-11-06 01:24:19 - r - INFO: - Episode: 107/200, Reward: -203.000, Step: 103
2022-11-06 01:24:19 - r - INFO: - Episode: 108/200, Reward: -74.000, Step: 54
2022-11-06 01:24:36 - r - INFO: - Episode: 109/200, Reward: -330.000, Step: 200
2022-11-06 01:24:54 - r - INFO: - Episode: 110/200, Reward: -380.000, Step: 200
2022-11-06 01:25:03 - r - INFO: - Episode: 111/200, Reward: -263.000, Step: 173
2022-11-06 01:25:20 - r - INFO: - Episode: 112/200, Reward: -290.000, Step: 200
2022-11-06 01:25:34 - r - INFO: - Episode: 113/200, Reward: -340.000, Step: 200
2022-11-06 01:25:34 - r - INFO: - Episode: 114/200, Reward: -86.000, Step: 66
2022-11-06 01:25:50 - r - INFO: - Episode: 115/200, Reward: -340.000, Step: 200
2022-11-06 01:25:52 - r - INFO: - Episode: 116/200, Reward: -160.000, Step: 110
2022-11-06 01:26:07 - r - INFO: - Episode: 117/200, Reward: -340.000, Step: 200
2022-11-06 01:26:15 - r - INFO: - Episode: 118/200, Reward: -320.000, Step: 200
2022-11-06 01:26:29 - r - INFO: - Episode: 119/200, Reward: -320.000, Step: 200
2022-11-06 01:26:43 - r - INFO: - Episode: 120/200, Reward: -360.000, Step: 200
2022-11-06 01:26:56 - r - INFO: - Episode: 121/200, Reward: -330.000, Step: 200
2022-11-06 01:27:09 - r - INFO: - Episode: 122/200, Reward: -350.000, Step: 200
2022-11-06 01:27:25 - r - INFO: - Episode: 123/200, Reward: -300.000, Step: 200
2022-11-06 01:27:38 - r - INFO: - Episode: 124/200, Reward: -320.000, Step: 200
2022-11-06 01:27:39 - r - INFO: - Episode: 125/200, Reward: -70.000, Step: 40
2022-11-06 01:27:39 - r - INFO: - Episode: 126/200, Reward: -59.000, Step: 39
2022-11-06 01:27:55 - r - INFO: - Episode: 127/200, Reward: -340.000, Step: 200
2022-11-06 01:27:56 - r - INFO: - Episode: 128/200, Reward: -87.000, Step: 77
2022-11-06 01:28:13 - r - INFO: - Episode: 129/200, Reward: -330.000, Step: 200
2022-11-06 01:28:22 - r - INFO: - Episode: 130/200, Reward: -260.000, Step: 200
2022-11-06 01:28:38 - r - INFO: - Episode: 131/200, Reward: -290.000, Step: 200
2022-11-06 01:28:57 - r - INFO: - Episode: 132/200, Reward: -330.000, Step: 200
2022-11-06 01:29:07 - r - INFO: - Episode: 133/200, Reward: -340.000, Step: 200
2022-11-06 01:29:08 - r - INFO: - Episode: 134/200, Reward: -78.000, Step: 48
2022-11-06 01:29:23 - r - INFO: - Episode: 135/200, Reward: -390.000, Step: 200
2022-11-06 01:29:33 - r - INFO: - Episode: 136/200, Reward: -320.000, Step: 200
2022-11-06 01:29:51 - r - INFO: - Episode: 137/200, Reward: -360.000, Step: 200
2022-11-06 01:30:06 - r - INFO: - Episode: 138/200, Reward: -340.000, Step: 200
2022-11-06 01:30:10 - r - INFO: - Episode: 139/200, Reward: -185.000, Step: 115
2022-11-06 01:30:26 - r - INFO: - Episode: 140/200, Reward: -340.000, Step: 200
2022-11-06 01:30:43 - r - INFO: - Episode: 141/200, Reward: -250.000, Step: 200
2022-11-06 01:30:57 - r - INFO: - Episode: 142/200, Reward: -347.000, Step: 197
2022-11-06 01:31:11 - r - INFO: - Episode: 143/200, Reward: -320.000, Step: 200
2022-11-06 01:31:25 - r - INFO: - Episode: 144/200, Reward: -330.000, Step: 200
2022-11-06 01:31:37 - r - INFO: - Episode: 145/200, Reward: -270.000, Step: 200
2022-11-06 01:31:55 - r - INFO: - Episode: 146/200, Reward: -380.000, Step: 200
2022-11-06 01:32:10 - r - INFO: - Episode: 147/200, Reward: -320.000, Step: 200
2022-11-06 01:32:27 - r - INFO: - Episode: 148/200, Reward: -340.000, Step: 200
2022-11-06 01:32:38 - r - INFO: - Episode: 149/200, Reward: -310.000, Step: 200
2022-11-06 01:32:57 - r - INFO: - Episode: 150/200, Reward: -290.000, Step: 200
2022-11-06 01:33:10 - r - INFO: - Episode: 151/200, Reward: -380.000, Step: 200
2022-11-06 01:33:21 - r - INFO: - Episode: 152/200, Reward: -281.000, Step: 181
2022-11-06 01:33:21 - r - INFO: - Episode: 153/200, Reward: -30.000, Step: 30
2022-11-06 01:33:33 - r - INFO: - Episode: 154/200, Reward: -280.000, Step: 200
2022-11-06 01:33:45 - r - INFO: - Episode: 155/200, Reward: -300.000, Step: 200
2022-11-06 01:33:59 - r - INFO: - Episode: 156/200, Reward: -300.000, Step: 200
2022-11-06 01:34:10 - r - INFO: - Episode: 157/200, Reward: -300.000, Step: 200
2022-11-06 01:34:28 - r - INFO: - Episode: 158/200, Reward: -370.000, Step: 200
2022-11-06 01:34:45 - r - INFO: - Episode: 159/200, Reward: -320.000, Step: 200
2022-11-06 01:34:52 - r - INFO: - Episode: 160/200, Reward: -250.000, Step: 200
2022-11-06 01:35:04 - r - INFO: - Episode: 161/200, Reward: -370.000, Step: 200
2022-11-06 01:35:16 - r - INFO: - Episode: 162/200, Reward: -290.000, Step: 200
2022-11-06 01:35:31 - r - INFO: - Episode: 163/200, Reward: -320.000, Step: 200
2022-11-06 01:35:41 - r - INFO: - Episode: 164/200, Reward: -290.000, Step: 200
2022-11-06 01:35:41 - r - INFO: - Episode: 165/200, Reward: -44.000, Step: 44
2022-11-06 01:35:53 - r - INFO: - Episode: 166/200, Reward: -216.000, Step: 196
2022-11-06 01:36:06 - r - INFO: - Episode: 167/200, Reward: -340.000, Step: 200
2022-11-06 01:36:23 - r - INFO: - Episode: 168/200, Reward: -360.000, Step: 200
2022-11-06 01:36:38 - r - INFO: - Episode: 169/200, Reward: -310.000, Step: 200
2022-11-06 01:36:51 - r - INFO: - Episode: 170/200, Reward: -320.000, Step: 200
2022-11-06 01:37:08 - r - INFO: - Episode: 171/200, Reward: -280.000, Step: 200
2022-11-06 01:37:17 - r - INFO: - Episode: 172/200, Reward: -290.000, Step: 200
2022-11-06 01:37:33 - r - INFO: - Episode: 173/200, Reward: -280.000, Step: 200
2022-11-06 01:37:45 - r - INFO: - Episode: 174/200, Reward: -300.000, Step: 200
2022-11-06 01:38:02 - r - INFO: - Episode: 175/200, Reward: -350.000, Step: 200
2022-11-06 01:38:17 - r - INFO: - Episode: 176/200, Reward: -320.000, Step: 200
2022-11-06 01:38:31 - r - INFO: - Episode: 177/200, Reward: -320.000, Step: 200
2022-11-06 01:38:47 - r - INFO: - Episode: 178/200, Reward: -320.000, Step: 200
2022-11-06 01:39:03 - r - INFO: - Episode: 179/200, Reward: -300.000, Step: 200
2022-11-06 01:39:04 - r - INFO: - Episode: 180/200, Reward: -117.000, Step: 87
2022-11-06 01:39:06 - r - INFO: - Episode: 181/200, Reward: -158.000, Step: 88
2022-11-06 01:39:23 - r - INFO: - Episode: 182/200, Reward: -300.000, Step: 200
2022-11-06 01:39:34 - r - INFO: - Episode: 183/200, Reward: -290.000, Step: 200
2022-11-06 01:39:51 - r - INFO: - Episode: 184/200, Reward: -350.000, Step: 200
2022-11-06 01:40:09 - r - INFO: - Episode: 185/200, Reward: -310.000, Step: 200
2022-11-06 01:40:10 - r - INFO: - Episode: 186/200, Reward: -58.000, Step: 38
2022-11-06 01:40:26 - r - INFO: - Episode: 187/200, Reward: -290.000, Step: 200
2022-11-06 01:40:42 - r - INFO: - Episode: 188/200, Reward: -310.000, Step: 200
2022-11-06 01:40:57 - r - INFO: - Episode: 189/200, Reward: -350.000, Step: 200
2022-11-06 01:41:12 - r - INFO: - Episode: 190/200, Reward: -300.000, Step: 200
2022-11-06 01:41:32 - r - INFO: - Episode: 191/200, Reward: -380.000, Step: 200
2022-11-06 01:41:37 - r - INFO: - Episode: 192/200, Reward: -230.000, Step: 200
2022-11-06 01:41:37 - r - INFO: - Episode: 193/200, Reward: -26.000, Step: 26
2022-11-06 01:41:56 - r - INFO: - Episode: 194/200, Reward: -340.000, Step: 200
2022-11-06 01:42:09 - r - INFO: - Episode: 195/200, Reward: -280.000, Step: 200
2022-11-06 01:42:10 - r - INFO: - Episode: 196/200, Reward: -106.000, Step: 66
2022-11-06 01:42:10 - r - INFO: - Episode: 197/200, Reward: -7.000, Step: 17
2022-11-06 01:42:20 - r - INFO: - Episode: 198/200, Reward: -248.000, Step: 178
2022-11-06 01:42:22 - r - INFO: - Episode: 199/200, Reward: -161.000, Step: 101
2022-11-06 01:42:22 - r - INFO: - Episode: 200/200, Reward: -3.000, Step: 13
2022-11-06 01:42:22 - r - INFO: - Finish training!

Binary file not shown.

After

Width:  |  Height:  |  Size: 72 KiB

View File

@@ -0,0 +1,201 @@
episodes,rewards,steps
0,-760,200
1,-560,200
2,-156,66
3,-500,200
4,-600,200
5,-350,200
6,-430,200
7,-206,96
8,-460,200
9,-410,200
10,-400,200
11,-380,200
12,-155,75
13,-400,200
14,-350,200
15,-400,200
16,-360,200
17,-420,200
18,-430,200
19,-360,200
20,-420,200
21,-390,200
22,-59,49
23,-390,200
24,2,8
25,-217,117
26,-287,167
27,-248,118
28,-370,200
29,-390,200
30,-370,200
31,-360,200
32,-420,200
33,-430,200
34,-430,200
35,-430,200
36,-380,200
37,-420,200
38,-350,200
39,-370,200
40,-400,200
41,-410,200
42,-360,200
43,-16,16
44,-23,13
45,-390,200
46,-390,200
47,-109,79
48,-300,200
49,-370,200
50,-460,200
51,-350,200
52,-320,200
53,-310,200
54,-390,200
55,-370,200
56,-390,200
57,-350,200
58,-123,73
59,-204,124
60,-39,29
61,-155,85
62,-108,58
63,-249,169
64,-170,100
65,1,9
66,-23,23
67,-104,74
68,-223,123
69,-350,200
70,-124,104
71,-20,20
72,-360,200
73,-67,37
74,-360,200
75,-71,41
76,-23,23
77,-41,21
78,-1,11
79,-270,200
80,-330,200
81,-290,200
82,-2,12
83,-300,200
84,-380,200
85,-47,37
86,-350,200
87,-308,188
88,-370,200
89,-214,154
90,-290,200
91,-370,200
92,-32,22
93,-400,200
94,-217,127
95,-330,200
96,-380,200
97,-320,200
98,-300,200
99,-350,200
100,-400,200
101,-330,200
102,-360,200
103,-380,200
104,-400,200
105,-290,200
106,-203,103
107,-74,54
108,-330,200
109,-380,200
110,-263,173
111,-290,200
112,-340,200
113,-86,66
114,-340,200
115,-160,110
116,-340,200
117,-320,200
118,-320,200
119,-360,200
120,-330,200
121,-350,200
122,-300,200
123,-320,200
124,-70,40
125,-59,39
126,-340,200
127,-87,77
128,-330,200
129,-260,200
130,-290,200
131,-330,200
132,-340,200
133,-78,48
134,-390,200
135,-320,200
136,-360,200
137,-340,200
138,-185,115
139,-340,200
140,-250,200
141,-347,197
142,-320,200
143,-330,200
144,-270,200
145,-380,200
146,-320,200
147,-340,200
148,-310,200
149,-290,200
150,-380,200
151,-281,181
152,-30,30
153,-280,200
154,-300,200
155,-300,200
156,-300,200
157,-370,200
158,-320,200
159,-250,200
160,-370,200
161,-290,200
162,-320,200
163,-290,200
164,-44,44
165,-216,196
166,-340,200
167,-360,200
168,-310,200
169,-320,200
170,-280,200
171,-290,200
172,-280,200
173,-300,200
174,-350,200
175,-320,200
176,-320,200
177,-320,200
178,-300,200
179,-117,87
180,-158,88
181,-300,200
182,-290,200
183,-350,200
184,-310,200
185,-58,38
186,-290,200
187,-310,200
188,-350,200
189,-300,200
190,-380,200
191,-230,200
192,-26,26
193,-340,200
194,-280,200
195,-106,66
196,-7,17
197,-248,178
198,-161,101
199,-3,13
1 episodes rewards steps
2 0 -760 200
3 1 -560 200
4 2 -156 66
5 3 -500 200
6 4 -600 200
7 5 -350 200
8 6 -430 200
9 7 -206 96
10 8 -460 200
11 9 -410 200
12 10 -400 200
13 11 -380 200
14 12 -155 75
15 13 -400 200
16 14 -350 200
17 15 -400 200
18 16 -360 200
19 17 -420 200
20 18 -430 200
21 19 -360 200
22 20 -420 200
23 21 -390 200
24 22 -59 49
25 23 -390 200
26 24 2 8
27 25 -217 117
28 26 -287 167
29 27 -248 118
30 28 -370 200
31 29 -390 200
32 30 -370 200
33 31 -360 200
34 32 -420 200
35 33 -430 200
36 34 -430 200
37 35 -430 200
38 36 -380 200
39 37 -420 200
40 38 -350 200
41 39 -370 200
42 40 -400 200
43 41 -410 200
44 42 -360 200
45 43 -16 16
46 44 -23 13
47 45 -390 200
48 46 -390 200
49 47 -109 79
50 48 -300 200
51 49 -370 200
52 50 -460 200
53 51 -350 200
54 52 -320 200
55 53 -310 200
56 54 -390 200
57 55 -370 200
58 56 -390 200
59 57 -350 200
60 58 -123 73
61 59 -204 124
62 60 -39 29
63 61 -155 85
64 62 -108 58
65 63 -249 169
66 64 -170 100
67 65 1 9
68 66 -23 23
69 67 -104 74
70 68 -223 123
71 69 -350 200
72 70 -124 104
73 71 -20 20
74 72 -360 200
75 73 -67 37
76 74 -360 200
77 75 -71 41
78 76 -23 23
79 77 -41 21
80 78 -1 11
81 79 -270 200
82 80 -330 200
83 81 -290 200
84 82 -2 12
85 83 -300 200
86 84 -380 200
87 85 -47 37
88 86 -350 200
89 87 -308 188
90 88 -370 200
91 89 -214 154
92 90 -290 200
93 91 -370 200
94 92 -32 22
95 93 -400 200
96 94 -217 127
97 95 -330 200
98 96 -380 200
99 97 -320 200
100 98 -300 200
101 99 -350 200
102 100 -400 200
103 101 -330 200
104 102 -360 200
105 103 -380 200
106 104 -400 200
107 105 -290 200
108 106 -203 103
109 107 -74 54
110 108 -330 200
111 109 -380 200
112 110 -263 173
113 111 -290 200
114 112 -340 200
115 113 -86 66
116 114 -340 200
117 115 -160 110
118 116 -340 200
119 117 -320 200
120 118 -320 200
121 119 -360 200
122 120 -330 200
123 121 -350 200
124 122 -300 200
125 123 -320 200
126 124 -70 40
127 125 -59 39
128 126 -340 200
129 127 -87 77
130 128 -330 200
131 129 -260 200
132 130 -290 200
133 131 -330 200
134 132 -340 200
135 133 -78 48
136 134 -390 200
137 135 -320 200
138 136 -360 200
139 137 -340 200
140 138 -185 115
141 139 -340 200
142 140 -250 200
143 141 -347 197
144 142 -320 200
145 143 -330 200
146 144 -270 200
147 145 -380 200
148 146 -320 200
149 147 -340 200
150 148 -310 200
151 149 -290 200
152 150 -380 200
153 151 -281 181
154 152 -30 30
155 153 -280 200
156 154 -300 200
157 155 -300 200
158 156 -300 200
159 157 -370 200
160 158 -320 200
161 159 -250 200
162 160 -370 200
163 161 -290 200
164 162 -320 200
165 163 -290 200
166 164 -44 44
167 165 -216 196
168 166 -340 200
169 167 -360 200
170 168 -310 200
171 169 -320 200
172 170 -280 200
173 171 -290 200
174 172 -280 200
175 173 -300 200
176 174 -350 200
177 175 -320 200
178 176 -320 200
179 177 -320 200
180 178 -300 200
181 179 -117 87
182 180 -158 88
183 181 -300 200
184 182 -290 200
185 183 -350 200
186 184 -310 200
187 185 -58 38
188 186 -290 200
189 187 -310 200
190 188 -350 200
191 189 -300 200
192 190 -380 200
193 191 -230 200
194 192 -26 26
195 193 -340 200
196 194 -280 200
197 195 -106 66
198 196 -7 17
199 197 -248 178
200 198 -161 101
201 199 -3 13

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-12 16:14:34
LastEditor: John
LastEditTime: 2022-08-15 18:10:13
LastEditTime: 2022-11-06 01:04:57
Discription:
Environment:
'''
@@ -17,15 +17,16 @@ import dill
class FisrtVisitMC:
''' On-Policy First-Visit MC Control
'''
def __init__(self,n_actions,cfg):
self.n_actions = n_actions
def __init__(self,cfg):
self.n_actions = cfg.n_actions
self.epsilon = cfg.epsilon
self.gamma = cfg.gamma
self.Q_table = defaultdict(lambda: np.zeros(n_actions))
self.Q_table = defaultdict(lambda: np.zeros(cfg.n_actions))
self.returns_sum = defaultdict(float) # 保存return之和
self.returns_count = defaultdict(float)
def sample(self,state):
def sample_action(self,state):
state = str(state)
if state in self.Q_table.keys():
best_action = np.argmax(self.Q_table[state])
action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions
@@ -34,7 +35,8 @@ class FisrtVisitMC:
else:
action = np.random.randint(0,self.n_actions)
return action
def predict(self,state):
def predict_action(self,state):
state = str(state)
if state in self.Q_table.keys():
best_action = np.argmax(self.Q_table[state])
action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions
@@ -46,19 +48,20 @@ class FisrtVisitMC:
def update(self,one_ep_transition):
# Find all (state, action) pairs we've visited in this one_ep_transition
# We convert each state to a tuple so that we can use it as a dict key
sa_in_episode = set([(tuple(x[0]), x[1]) for x in one_ep_transition])
sa_in_episode = set([(str(x[0]), x[1]) for x in one_ep_transition])
for state, action in sa_in_episode:
sa_pair = (state, action)
# Find the first occurence of the (state, action) pair in the one_ep_transition
first_occurence_idx = next(i for i,x in enumerate(one_ep_transition)
if x[0] == state and x[1] == action)
if str(x[0]) == state and x[1] == action)
# Sum up all rewards since the first occurance
G = sum([x[2]*(self.gamma**i) for i,x in enumerate(one_ep_transition[first_occurence_idx:])])
# Calculate average return for this state over all sampled episodes
self.returns_sum[sa_pair] += G
self.returns_count[sa_pair] += 1.0
self.Q_table[state][action] = self.returns_sum[sa_pair] / self.returns_count[sa_pair]
def save(self,path=None):
def save_model(self,path=None):
'''把 Q表格 的数据保存到文件中
'''
from pathlib import Path
@@ -69,7 +72,7 @@ class FisrtVisitMC:
pickle_module=dill
)
def load(self, path=None):
def load_model(self, path=None):
'''从文件中读取数据到 Q表格
'''
self.Q_table =torch.load(f=path+"Q_table",pickle_module=dill)

View File

@@ -0,0 +1,32 @@
#!/usr/bin/env python
# coding=utf-8
'''
Author: JiangJi
Email: johnjim0816@gmail.com
Date: 2022-11-06 00:31:35
LastEditor: JiangJi
LastEditTime: 2022-11-06 00:45:44
Discription: parameters of MonteCarlo
'''
from common.config import GeneralConfig,AlgoConfig
class GeneralConfigMC(GeneralConfig):
def __init__(self) -> None:
self.env_name = "Racetrack-v0" # name of environment
self.algo_name = "FirstVisitMC" # name of algorithm
self.mode = "train" # train or test
self.seed = 1 # random seed
self.device = "cpu" # device to use
self.train_eps = 200 # number of episodes for training
self.test_eps = 20 # number of episodes for testing
self.max_steps = 200 # max steps for each episode
self.load_checkpoint = False
self.load_path = "tasks" # path to load model
self.show_fig = False # show figure or not
self.save_fig = True # save figure or not
class AlgoConfigMC(AlgoConfig):
def __init__(self) -> None:
self.gamma = 0.90 # discount factor
self.epsilon = 0.15 # epsilon greedy
self.lr = 0.1 # learning rate

View File

@@ -5,51 +5,82 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-11 14:26:44
LastEditor: John
LastEditTime: 2022-08-15 18:12:13
LastEditTime: 2022-11-06 00:44:56
Discription:
Environment:
'''
import sys,os
curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径
parent_path = os.path.dirname(curr_path) # 父路径
sys.path.append(parent_path) # 添加路径到系统路径
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized."
curr_path = os.path.dirname(os.path.abspath(__file__)) # current path
parent_path = os.path.dirname(curr_path) # parent path
sys.path.append(parent_path) # add path to system path
import datetime
import argparse
from common.utils import save_results,save_args,plot_rewards
import gym
from envs.wrappers import CliffWalkingWapper
from envs.register import register_env
from common.utils import merge_class_attrs,all_seed
from common.launcher import Launcher
from MonteCarlo.agent import FisrtVisitMC
from envs.racetrack import RacetrackEnv
from MonteCarlo.config.config import GeneralConfigMC,AlgoConfigMC
curr_time = datetime.datetime.now().strftime(
"%Y%m%d-%H%M%S") # obtain current time
def get_args():
""" 超参数
"""
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
parser = argparse.ArgumentParser(description="hyperparameters")
parser.add_argument('--algo_name',default='First-Visit MC',type=str,help="name of algorithm")
parser.add_argument('--env_name',default='Racetrack',type=str,help="name of environment")
parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
parser.add_argument('--gamma',default=0.9,type=float,help="discounted factor")
parser.add_argument('--epsilon',default=0.15,type=float,help="the probability to select a random action")
parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda")
parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/results/' )
parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
'/' + curr_time + '/models/' )
parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")
parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")
args = parser.parse_args()
return args
def env_agent_config(cfg,seed=1):
env = RacetrackEnv()
n_actions = env.action_space.n
agent = FisrtVisitMC(n_actions, cfg)
return env,agent
class Main(Launcher):
def __init__(self) -> None:
super().__init__()
self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigMC())
self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigMC())
def env_agent_config(self,cfg,logger):
''' create env and agent
'''
register_env(cfg.env_name)
env = gym.make(cfg.env_name,new_step_api=False) # create env
if cfg.env_name == 'CliffWalking-v0':
env = CliffWalkingWapper(env)
if cfg.seed !=0: # set random seed
all_seed(env,seed=cfg.seed)
try: # state dimension
n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
except AttributeError:
n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
n_actions = env.action_space.n # action dimension
logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info
# update to cfg paramters
setattr(cfg, 'n_states', n_states)
setattr(cfg, 'n_actions', n_actions)
agent = FisrtVisitMC(cfg)
return env,agent
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
one_ep_transition = []
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.sample_action(state) # sample action
next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
one_ep_transition.append((state, action, reward)) # save transitions
agent.update(one_ep_transition) # update agent
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
return agent,ep_reward,ep_step
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.predict_action(state) # sample action
next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
return agent,ep_reward,ep_step
def train(cfg, env, agent):
print("开始训练!")
@@ -93,18 +124,5 @@ def test(cfg, env, agent):
return {'rewards':rewards}
if __name__ == "__main__":
cfg = get_args()
# 训练
env, agent = env_agent_config(cfg)
res_dic = train(cfg, env, agent)
save_args(cfg,path = cfg.result_path) # 保存参数到模型路径上
agent.save(path = cfg.model_path) # 保存模型
save_results(res_dic, tag = 'train', path = cfg.result_path)
plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "train")
# 测试
env, agent = env_agent_config(cfg) # 也可以不加,加这一行的是为了避免训练之后环境可能会出现问题,因此新建一个环境用于测试
agent.load(path = cfg.model_path) # 导入模型
res_dic = test(cfg, env, agent)
save_results(res_dic, tag='test',
path = cfg.result_path) # 保存结果
plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "test") # 画出结果
main = Main()
main.run()