diff --git a/codes/Q-learning/main.py b/codes/Q-learning/main.py index 9f85d2f..8d35b08 100644 --- a/codes/Q-learning/main.py +++ b/codes/Q-learning/main.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2020-11-24 19:56:23 +LastEditTime: 2021-01-05 09:41:34 Discription: Environment: ''' @@ -92,7 +92,7 @@ def train(cfg): plot(rewards) plot(MA_rewards,ylabel='moving_average_rewards_train') -def test(cfg): +def eval(cfg, saved_model_path = SAVED_MODEL_PATH): env = gym.make("CliffWalking-v0") # 0 up, 1 right, 2 down, 3 left env = CliffWalkingWapper(env) @@ -102,7 +102,7 @@ def test(cfg): learning_rate=cfg.policy_lr, gamma=cfg.gamma, epsilon_start=cfg.epsilon_start,epsilon_end=cfg.epsilon_end,epsilon_decay=cfg.epsilon_decay) - agent.load() # 导入保存的模型 + agent.load_model(saved_model_path+'checkpoint.npy') # 导入保存的模型 rewards = [] # 记录所有episode的reward MA_rewards = [] # 记录滑动平均的reward steps = []# 记录所有episode的steps diff --git a/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy b/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy new file mode 100644 index 0000000..f3df3ea Binary files /dev/null and b/codes/Q-learning/result/20210105-094149/moving_average_rewards_train.npy differ diff --git a/codes/Q-learning/result/20210105-094149/rewards_train.npy b/codes/Q-learning/result/20210105-094149/rewards_train.npy new file mode 100644 index 0000000..3db7133 Binary files /dev/null and b/codes/Q-learning/result/20210105-094149/rewards_train.npy differ diff --git a/codes/Q-learning/result/moving_average_rewards_train.png b/codes/Q-learning/result/moving_average_rewards_train.png index f72ef4d..6ba9b6a 100644 Binary files a/codes/Q-learning/result/moving_average_rewards_train.png and b/codes/Q-learning/result/moving_average_rewards_train.png differ diff --git a/codes/Q-learning/result/rewards.png b/codes/Q-learning/result/rewards.png index 3a1005f..9fe320e 100644 Binary files a/codes/Q-learning/result/rewards.png and b/codes/Q-learning/result/rewards.png differ diff --git a/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy b/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy new file mode 100644 index 0000000..0a49f57 Binary files /dev/null and b/codes/Q-learning/saved_model/20210105-094149/checkpoint.npy differ diff --git a/codes/Q-learning/saved_model/checkpoint.npy b/codes/Q-learning/saved_model/checkpoint.npy new file mode 100644 index 0000000..0a49f57 Binary files /dev/null and b/codes/Q-learning/saved_model/checkpoint.npy differ diff --git a/codes/dqn/main.py b/codes/dqn/main.py index 9c6d76a..b024320 100644 --- a/codes/dqn/main.py +++ b/codes/dqn/main.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-12 00:48:57 @LastEditor: John -LastEditTime: 2020-11-23 11:58:17 +LastEditTime: 2021-01-05 09:41:02 @Discription: @Environment: python 3.7.7 '''