hot update A2C
This commit is contained in:
@@ -24,6 +24,7 @@ def get_args():
|
||||
parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
|
||||
parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
|
||||
parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
|
||||
parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
|
||||
parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor")
|
||||
parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon")
|
||||
parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon")
|
||||
@@ -72,7 +73,7 @@ def train(cfg, env, agent):
|
||||
ep_reward = 0 # reward per episode
|
||||
ep_step = 0
|
||||
state = env.reset() # reset and obtain initial state
|
||||
while True:
|
||||
for _ in range(cfg['ep_max_steps']):
|
||||
ep_step += 1
|
||||
action = agent.sample_action(state) # sample action
|
||||
next_state, reward, done, _ = env.step(action) # update env and return transitions
|
||||
@@ -91,7 +92,7 @@ def train(cfg, env, agent):
|
||||
print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}')
|
||||
print("Finish training!")
|
||||
env.close()
|
||||
res_dic = {'episodes':range(len(rewards)),'rewards':rewards}
|
||||
res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
|
||||
return res_dic
|
||||
|
||||
def test(cfg, env, agent):
|
||||
@@ -103,7 +104,7 @@ def test(cfg, env, agent):
|
||||
ep_reward = 0 # reward per episode
|
||||
ep_step = 0
|
||||
state = env.reset() # reset and obtain initial state
|
||||
while True:
|
||||
for _ in range(cfg['ep_max_steps']):
|
||||
ep_step+=1
|
||||
action = agent.predict_action(state) # predict action
|
||||
next_state, reward, done, _ = env.step(action)
|
||||
@@ -116,7 +117,7 @@ def test(cfg, env, agent):
|
||||
print(f"Episode: {i_ep+1}/{cfg['test_eps']},Reward: {ep_reward:.2f}")
|
||||
print("Finish testing!")
|
||||
env.close()
|
||||
return {'episodes':range(len(rewards)),'rewards':rewards}
|
||||
return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
{"algo_name": "DQN", "env_name": "CartPole-v1", "train_eps": 2000, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 6000, "lr": 1e-05, "memory_capacity": 200000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models", "n_states": 4, "n_actions": 2}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
@@ -0,0 +1,21 @@
|
||||
episodes,rewards,steps
|
||||
0,371.0,371
|
||||
1,446.0,446
|
||||
2,300.0,300
|
||||
3,500.0,500
|
||||
4,313.0,313
|
||||
5,500.0,500
|
||||
6,341.0,341
|
||||
7,489.0,489
|
||||
8,304.0,304
|
||||
9,358.0,358
|
||||
10,278.0,278
|
||||
11,500.0,500
|
||||
12,500.0,500
|
||||
13,500.0,500
|
||||
14,500.0,500
|
||||
15,476.0,476
|
||||
16,308.0,308
|
||||
17,394.0,394
|
||||
18,500.0,500
|
||||
19,500.0,500
|
||||
|
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user