Merge branch 'master' of github.com:datawhalechina/easy-rl

This commit is contained in:
qiwang067
2021-09-21 11:06:35 +08:00
10 changed files with 11 additions and 10 deletions

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2020-09-11 23:03:00 Date: 2020-09-11 23:03:00
LastEditor: John LastEditor: John
LastEditTime: 2021-09-15 13:18:37 LastEditTime: 2021-09-19 23:05:45
Discription: use defaultdict to define Q table Discription: use defaultdict to define Q table
Environment: Environment:
''' '''

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2020-09-11 23:03:00 Date: 2020-09-11 23:03:00
LastEditor: John LastEditor: John
LastEditTime: 2021-09-15 14:44:25 LastEditTime: 2021-09-20 00:32:59
Discription: Discription:
Environment: Environment:
''' '''
@@ -31,13 +31,13 @@ class QlearningConfig:
self.env = 'CliffWalking-v0' # 环境名称 self.env = 'CliffWalking-v0' # 环境名称
self.result_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/results/' # 保存结果的路径 self.result_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/results/' # 保存结果的路径
self.model_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/models/' # 保存模型的路径 self.model_path = curr_path+"/outputs/" +self.env+'/'+curr_time+'/models/' # 保存模型的路径
self.train_eps = 200 # 训练的回合数 self.train_eps = 400 # 训练的回合数
self.eval_eps = 30 # 测试的回合数 self.eval_eps = 30 # 测试的回合数
self.gamma = 0.9 # reward的衰减率 self.gamma = 0.9 # reward的衰减率
self.epsilon_start = 0.90 # e-greedy策略中初始epsilon self.epsilon_start = 0.99 # e-greedy策略中初始epsilon
self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon
self.epsilon_decay = 200 # e-greedy策略中epsilon的衰减率 self.epsilon_decay = 300 # e-greedy策略中epsilon的衰减率
self.lr = 0.05 # 学习率 self.lr = 0.1 # 学习率
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 检测GPU
@@ -111,8 +111,8 @@ if __name__ == "__main__":
plot_rewards_cn(rewards,ma_rewards,tag="train",env=cfg.env,algo = cfg.algo,path=cfg.result_path) plot_rewards_cn(rewards,ma_rewards,tag="train",env=cfg.env,algo = cfg.algo,path=cfg.result_path)
# # 测试 # # 测试
# env,agent = env_agent_config(cfg,seed=10) env,agent = env_agent_config(cfg,seed=10)
# agent.load(path=cfg.model_path) # 加载模型 agent.load(path=cfg.model_path) # 加载模型
rewards,ma_rewards = eval(cfg,env,agent) rewards,ma_rewards = eval(cfg,env,agent)
save_results(rewards,ma_rewards,tag='eval',path=cfg.result_path) save_results(rewards,ma_rewards,tag='eval',path=cfg.result_path)
plot_rewards_cn(rewards,ma_rewards,tag="eval",env=cfg.env,algo = cfg.algo,path=cfg.result_path) plot_rewards_cn(rewards,ma_rewards,tag="eval",env=cfg.env,algo = cfg.algo,path=cfg.result_path)

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com Email: johnjim0816@gmail.com
Date: 2020-10-07 20:57:11 Date: 2020-10-07 20:57:11
LastEditor: John LastEditor: John
LastEditTime: 2021-09-15 14:56:15 LastEditTime: 2021-09-19 23:00:36
Discription: Discription:
Environment: Environment:
''' '''
@@ -29,6 +29,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN
''' 中文画图 ''' 中文画图
''' '''
sns.set() sns.set()
plt.figure()
plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font()) plt.title(u"{}环境下{}算法的学习曲线".format(env,algo),fontproperties=chinese_font())
plt.xlabel(u'回合数',fontproperties=chinese_font()) plt.xlabel(u'回合数',fontproperties=chinese_font())
plt.plot(rewards) plt.plot(rewards)
@@ -36,7 +37,7 @@ def plot_rewards_cn(rewards,ma_rewards,tag="train",env='CartPole-v0',algo = "DQN
plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font()) plt.legend((u'奖励',u'滑动平均奖励',),loc="best",prop=chinese_font())
if save: if save:
plt.savefig(path+f"{tag}_rewards_curve_cn") plt.savefig(path+f"{tag}_rewards_curve_cn")
plt.show() # plt.show()
def plot_losses(losses,algo = "DQN",save=True,path='./'): def plot_losses(losses,algo = "DQN",save=True,path='./'):
sns.set() sns.set()