hot update A2C

This commit is contained in:
johnjim0816
2022-08-29 15:12:33 +08:00
parent 99a3c1afec
commit 0b0f7e857d
109 changed files with 8213 additions and 1658 deletions

View File

@@ -24,6 +24,7 @@ def get_args():
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor")
parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon")
parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon")
@@ -72,7 +73,7 @@ def train(cfg, env, agent):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
while True:
for _ in range(cfg['ep_max_steps']):
ep_step += 1
action = agent.sample_action(state) # sample action
next_state, reward, done, _ = env.step(action) # update env and return transitions
@@ -91,7 +92,7 @@ def train(cfg, env, agent):
print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}')
print("Finish training!")
env.close()
res_dic = {'episodes':range(len(rewards)),'rewards':rewards}
res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
return res_dic
def test(cfg, env, agent):
@@ -103,7 +104,7 @@ def test(cfg, env, agent):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset() # reset and obtain initial state
while True:
for _ in range(cfg['ep_max_steps']):
ep_step+=1
action = agent.predict_action(state) # predict action
next_state, reward, done, _ = env.step(action)
@@ -116,7 +117,7 @@ def test(cfg, env, agent):
print(f"Episode: {i_ep+1}/{cfg['test_eps']}Reward: {ep_reward:.2f}")
print("Finish testing!")
env.close()
return {'episodes':range(len(rewards)),'rewards':rewards}
return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
if __name__ == "__main__":

View File

@@ -0,0 +1 @@
{"algo_name": "DQN", "env_name": "CartPole-v1", "train_eps": 2000, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 6000, "lr": 1e-05, "memory_capacity": 200000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models", "n_states": 4, "n_actions": 2}

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

View File

@@ -0,0 +1,21 @@
episodes,rewards,steps
0,371.0,371
1,446.0,446
2,300.0,300
3,500.0,500
4,313.0,313
5,500.0,500
6,341.0,341
7,489.0,489
8,304.0,304
9,358.0,358
10,278.0,278
11,500.0,500
12,500.0,500
13,500.0,500
14,500.0,500
15,476.0,476
16,308.0,308
17,394.0,394
18,500.0,500
19,500.0,500
1 episodes rewards steps
2 0 371.0 371
3 1 446.0 446
4 2 300.0 300
5 3 500.0 500
6 4 313.0 313
7 5 500.0 500
8 6 341.0 341
9 7 489.0 489
10 8 304.0 304
11 9 358.0 358
12 10 278.0 278
13 11 500.0 500
14 12 500.0 500
15 13 500.0 500
16 14 500.0 500
17 15 476.0 476
18 16 308.0 308
19 17 394.0 394
20 18 500.0 500
21 19 500.0 500

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB