diff --git a/codes/QLearning/agent.py b/codes/QLearning/agent.py index a66128f..4587c86 100644 --- a/codes/QLearning/agent.py +++ b/codes/QLearning/agent.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2021-09-15 13:18:37 +LastEditTime: 2021-09-19 23:05:45 Discription: use defaultdict to define Q table Environment: ''' diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/models/Qleaning_model.pkl b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/models/Qleaning_model.pkl new file mode 100644 index 0000000..4d6ba95 Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/models/Qleaning_model.pkl differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_ma_rewards.npy b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_ma_rewards.npy new file mode 100644 index 0000000..a67d064 Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_ma_rewards.npy differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards.npy b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards.npy new file mode 100644 index 0000000..6de67e1 Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards.npy differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards_curve_cn.png b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards_curve_cn.png new file mode 100644 index 0000000..91ca06c Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/eval_rewards_curve_cn.png differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_ma_rewards.npy b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_ma_rewards.npy new file mode 100644 index 0000000..7184c7b Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_ma_rewards.npy differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards.npy b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards.npy new file mode 100644 index 0000000..f037a25 Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards.npy differ diff --git a/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards_curve_cn.png b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards_curve_cn.png new file mode 100644 index 0000000..9c0943a Binary files /dev/null and b/codes/QLearning/outputs/CliffWalking-v0/20210920-003309/results/train_rewards_curve_cn.png differ diff --git a/codes/QLearning/task0_train.py b/codes/QLearning/task0_train.py index a9bc36d..7eba2de 100644 --- a/codes/QLearning/task0_train.py +++ b/codes/QLearning/task0_train.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2021-09-15 14:44:25 +LastEditTime: 2021-09-20 00:32:59 Discription: Environment: ''' @@ -31,13 +31,13 @@ class QlearningConfig: self.env = 'CliffWalking-v0' # 环境名称 self.result_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/results/' # 保存结果的路径 self.model_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/models/' # 保存模型的路径 - self.train_eps = 200 # 训练的回合数 + self.train_eps = 400 # 训练的回合数 self.eval_eps = 30 # 测试的回合数 self.gamma = 0.9 # reward的衰减率 - self.epsilon_start = 0.90 # e-greedy策略中初始epsilon + self.epsilon_start = 0.99 # e-greedy策略中初始epsilon self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon - self.epsilon_decay = 200 # e-greedy策略中epsilon的衰减率 - self.lr = 0.05 # 学习率 + self.epsilon_decay = 300 # e-greedy策略中epsilon的衰减率 + self.lr = 0.1 # 学习率 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU @@ -111,8 +111,8 @@ if __name__ == "__main__": plot_rewards_cn(rewards,ma_rewards,tag="train",env=cfg.env,algo = cfg.algo,path=cfg.result_path) # # 测试 - # env,agent = env_agent_config(cfg,seed=10) - # agent.load(path=cfg.model_path) # 加载模型 + env,agent = env_agent_config(cfg,seed=10) + agent.load(path=cfg.model_path) # 加载模型 rewards,ma_rewards = eval(cfg,env,agent) save_results(rewards,ma_rewards,tag='eval',path=cfg.result_path) plot_rewards_cn(rewards,ma_rewards,tag="eval",env=cfg.env,algo = cfg.algo,path=cfg.result_path) diff --git a/codes/common/plot.py b/codes/common/plot.py index df78d9e..4f0f0b9 100644 --- a/codes/common/plot.py +++ b/codes/common/plot.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-10-07 20:57:11 LastEditor: John -LastEditTime: 2021-09-15 14:56:15 +LastEditTime: 2021-09-19 23:00:36 Discription: Environment: ''' @@ -29,6 +29,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN ''' 中文画图 ''' sns.set() + plt.figure() plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font()) plt.xlabel(u'回合数',fontproperties=chinese_font()) plt.plot(rewards) @@ -36,7 +37,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font()) if save: plt.savefig(path+f"{tag}_rewards_curve_cn") - plt.show() + # plt.show() def plot_losses(losses,algo = "DQN",save=True,path='./'): sns.set()