update DQN

2020-10-15 22:07:12 +08:00
parent 838088be41
commit cf9887f6d0
38 changed files with 212 additions and 70 deletions
--- a/codes/dqn/plot.py
+++ b/codes/dqn/plot.py
@@ -5,7 +5,7 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-11 16:30:09
@LastEditor: John
-LastEditTime: 2020-10-07 20:57:22
+LastEditTime: 2020-10-15 22:01:50
@Discription: 
@Environment: python 3.7.7
 '''
@@ -14,19 +14,45 @@ import seaborn as sns
 import numpy as np
 import os 

-def plot(item,ylabel='rewards'):
+def plot(item,ylabel='rewards_train', save_fig = True):
+    '''plot using searborn to plot 
+    '''
    sns.set()
    plt.figure()
    plt.plot(np.arange(len(item)), item)
    plt.title(ylabel+' of DQN') 
    plt.ylabel(ylabel)
    plt.xlabel('episodes')
-    plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
+    if save_fig:
+        plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
    plt.show()
+
+# def plot(item,ylabel='rewards'):
+#     
+#     df = pd.DataFrame(dict(time=np.arange(len(item)),value=item))
+#     g = sns.relplot(x="time", y="value", kind="line", data=df)
+#     # g.fig.autofmt_xdate()
+#     # sns.lineplot(time=time, data=item, color="r", condition="behavior_cloning")
+#     # # sns.tsplot(time=time, data=x2, color="b", condition="dagger")
+#     # plt.ylabel("Reward")
+#     # plt.xlabel("Iteration Number")
+#     # plt.title("Imitation Learning")
+
+    # plt.show()
 if __name__ == "__main__":

-    output_path = os.path.dirname(__file__)+"/result/"
-    rewards=np.load(output_path+"rewards.npy", )
-    moving_average_rewards=np.load(output_path+"moving_average_rewards.npy",)
+    output_path = os.path.split(os.path.abspath(__file__))[0]+"/result/"
+    tag = 'train'
+    rewards=np.load(output_path+"rewards_"+tag+".npy", )
+    moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
+    steps=np.load(output_path+"steps_"+tag+".npy")
    plot(rewards)
-    plot(moving_average_rewards,ylabel='moving_average_rewards')
+    plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
+    plot(steps,ylabel='steps_'+tag)
+    tag = 'eval'
+    rewards=np.load(output_path+"rewards_"+tag+".npy", )
+    moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
+    steps=np.load(output_path+"steps_"+tag+".npy")
+    plot(rewards,ylabel='rewards_'+tag)
+    plot(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
+    plot(steps,ylabel='steps_'+tag)