update DQN

This commit is contained in:
JohnJim0816
2020-10-15 22:07:12 +08:00
parent 838088be41
commit cf9887f6d0
38 changed files with 212 additions and 70 deletions

View File

@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-11 16:30:09
@LastEditor: John
LastEditTime: 2020-10-07 20:57:22
LastEditTime: 2020-10-15 22:01:50
@Discription:
@Environment: python 3.7.7
'''
@@ -14,19 +14,45 @@ import seaborn as sns
import numpy as np
import os
def plot(item,ylabel='rewards'):
def plot(item,ylabel='rewards_train', save_fig = True):
'''plot using searborn to plot
'''
sns.set()
plt.figure()
plt.plot(np.arange(len(item)), item)
plt.title(ylabel+' of DQN')
plt.ylabel(ylabel)
plt.xlabel('episodes')
plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
if save_fig:
plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
plt.show()
# def plot(item,ylabel='rewards'):
#
# df = pd.DataFrame(dict(time=np.arange(len(item)),value=item))
# g = sns.relplot(x="time", y="value", kind="line", data=df)
# # g.fig.autofmt_xdate()
# # sns.lineplot(time=time, data=item, color="r", condition="behavior_cloning")
# # # sns.tsplot(time=time, data=x2, color="b", condition="dagger")
# # plt.ylabel("Reward")
# # plt.xlabel("Iteration Number")
# # plt.title("Imitation Learning")
# plt.show()
if __name__ == "__main__":
output_path = os.path.dirname(__file__)+"/result/"
rewards=np.load(output_path+"rewards.npy", )
moving_average_rewards=np.load(output_path+"moving_average_rewards.npy",)
output_path = os.path.split(os.path.abspath(__file__))[0]+"/result/"
tag = 'train'
rewards=np.load(output_path+"rewards_"+tag+".npy", )
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
steps=np.load(output_path+"steps_"+tag+".npy")
plot(rewards)
plot(moving_average_rewards,ylabel='moving_average_rewards')
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
plot(steps,ylabel='steps_'+tag)
tag = 'eval'
rewards=np.load(output_path+"rewards_"+tag+".npy", )
moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
steps=np.load(output_path+"steps_"+tag+".npy")
plot(rewards,ylabel='rewards_'+tag)
plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
plot(steps,ylabel='steps_'+tag)