add common

2021-03-13 11:33:41 +08:00
parent f1394feb65
commit 4df3169142
3 changed files with 71 additions and 0 deletions
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# coding=utf-8
+'''
+Author: John
+Email: johnjim0816@gmail.com
+Date: 2021-03-12 21:14:12
+LastEditor: John
+LastEditTime: 2021-03-12 21:28:46
+Discription: 
+Environment: 
+'''
+import torch.nn as nn
+import torch.nn.functional as F
+class MLP(nn.Module):
+    ''' 全连接网络'''
+    def __init__(self,state_dim):
+        super(MLP, self).__init__()
+        # 24和36为hidden layer的层数，可根据state_dim, n_actions的情况来改变
+        self.fc1 = nn.Linear(state_dim, 36)
+        self.fc2 = nn.Linear(36, 36)
+        self.fc3 = nn.Linear(36, 1)  # Prob of Left
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.sigmoid(self.fc3(x))
+        return x
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# coding=utf-8
+'''
+Author: John
+Email: johnjim0816@gmail.com
+Date: 2020-10-07 20:57:11
+LastEditor: John
+LastEditTime: 2021-03-13 11:31:49
+Discription: 
+Environment: 
+'''
+import matplotlib.pyplot as plt
+import seaborn as sns
+def plot_rewards(rewards,ma_rewards,tag="train",algo = "On-Policy First-Visit MC Control",path='./'):
+    sns.set()
+    plt.title("average learning curve of {}".format(algo))
+    plt.xlabel('epsiodes')
+    plt.plot(rewards,label='rewards')
+    plt.plot(ma_rewards,label='moving average rewards')
+    plt.legend()
+    plt.savefig(path+"rewards_curve_{}".format(tag))
+    plt.show()
+   
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# coding=utf-8
+'''
+Author: John
+Email: johnjim0816@gmail.com
+Date: 2021-03-12 16:02:24
+LastEditor: John
+LastEditTime: 2021-03-12 16:10:28
+Discription: 
+Environment: 
+'''
+import os
+import numpy as np
+
+
+def save_results(rewards,ma_rewards,tag='train',path='./results'):
+    '''保存reward等结果
+    '''
+    np.save(path+'rewards_'+tag+'.npy', rewards)
+    np.save(path+'ma_rewards_'+tag+'.npy', ma_rewards)
+    print('results saved!')