update DDPG

2020-10-15 22:07:42 +08:00
parent cf9887f6d0
commit 0ff03c498e
37 changed files with 161 additions and 99 deletions
--- a/codes/ddpg/plot.py
+++ b/codes/ddpg/plot.py
@@ -5,17 +5,16 @@
@Email: johnjim0816@gmail.com
@Date: 2020-06-11 16:30:09
@LastEditor: John
-LastEditTime: 2020-09-02 01:20:03
+LastEditTime: 2020-10-15 21:32:05
@Discription: 
@Environment: python 3.7.7
 '''
 import matplotlib.pyplot as plt
-import pandas as pd
-import seaborn as sns; 
+import seaborn as sns
 import numpy as np
 import os 

-def plot(item,ylabel='rewards',save_fig = True):
+def plot_results(item,ylabel='rewards_train', save_fig = True):
    '''plot using searborn to plot 
    '''
    sns.set()
@@ -24,25 +23,24 @@ def plot(item,ylabel='rewards',save_fig = True):
    plt.title(ylabel+' of DDPG') 
    plt.ylabel(ylabel)
    plt.xlabel('episodes')
-    plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
+    if save_fig:
+        plt.savefig(os.path.dirname(__file__)+"/result/"+ylabel+".png")
    plt.show()

-# def plot(item,ylabel='rewards'):
-#     
-#     df = pd.DataFrame(dict(time=np.arange(len(item)),value=item))
-#     g = sns.relplot(x="time", y="value", kind="line", data=df)
-#     # g.fig.autofmt_xdate()
-#     # sns.lineplot(time=time, data=item, color="r", condition="behavior_cloning")
-#     # # sns.tsplot(time=time, data=x2, color="b", condition="dagger")
-#     # plt.ylabel("Reward")
-#     # plt.xlabel("Iteration Number")
-#     # plt.title("Imitation Learning")
-
-    # plt.show()
 if __name__ == "__main__":

-    output_path = os.path.dirname(__file__)+"/result/"
-    rewards=np.load(output_path+"rewards.npy", )
-    moving_average_rewards=np.load(output_path+"moving_average_rewards.npy",)
-    plot(rewards)
-    plot(moving_average_rewards,ylabel='moving_average_rewards')
+    output_path = os.path.split(os.path.abspath(__file__))[0]+"/result/"
+    tag = 'train'
+    rewards=np.load(output_path+"rewards_"+tag+".npy", )
+    moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
+    steps=np.load(output_path+"steps_"+tag+".npy")
+    plot_results(rewards)
+    plot_results(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
+    plot_results(steps,ylabel='steps_'+tag)
+    tag = 'eval'
+    rewards=np.load(output_path+"rewards_"+tag+".npy", )
+    moving_average_rewards=np.load(output_path+"moving_average_rewards_"+tag+".npy",)
+    steps=np.load(output_path+"steps_"+tag+".npy")
+    plot_results(rewards,ylabel='rewards_'+tag)
+    plot_results(moving_average_rewards,ylabel='moving_average_rewards_'+tag)
+    plot_results(steps,ylabel='steps_'+tag)