更新算法模版

2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions
--- a/projects/codes/QLearning/qlearning.py
+++ b/projects/codes/QLearning/qlearning.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-09-11 23:03:00
 LastEditor: John
-LastEditTime: 2022-08-24 10:31:04
+LastEditTime: 2022-10-30 01:38:26
 Discription: use defaultdict to define Q table
 Environment: 
 '''
@@ -16,14 +16,14 @@ from collections import defaultdict

 class QLearning(object):
    def __init__(self,cfg):
-        self.n_actions = cfg['n_actions'] 
-        self.lr = cfg['lr']  
-        self.gamma = cfg['gamma']  
-        self.epsilon = cfg['epsilon_start']
+        self.n_actions = cfg.n_actions 
+        self.lr = cfg.lr 
+        self.gamma = cfg.gamma    
+        self.epsilon = cfg.epsilon_start
        self.sample_count = 0  
-        self.epsilon_start = cfg['epsilon_start']
-        self.epsilon_end = cfg['epsilon_end']
-        self.epsilon_decay = cfg['epsilon_decay']
+        self.epsilon_start = cfg.epsilon_start
+        self.epsilon_end = cfg.epsilon_end
+        self.epsilon_decay = cfg.epsilon_decay
        self.Q_table  = defaultdict(lambda: np.zeros(self.n_actions)) # use nested dictionary to represent Q(s,a), here set all Q(s,a)=0 initially, not like pseudo code
    def sample_action(self, state):
        ''' sample action with e-greedy policy while training