更新算法模版
This commit is contained in:
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2020-09-11 23:03:00
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-08-24 10:31:04
|
||||
LastEditTime: 2022-10-30 01:38:26
|
||||
Discription: use defaultdict to define Q table
|
||||
Environment:
|
||||
'''
|
||||
@@ -16,14 +16,14 @@ from collections import defaultdict
|
||||
|
||||
class QLearning(object):
|
||||
def __init__(self,cfg):
|
||||
self.n_actions = cfg['n_actions']
|
||||
self.lr = cfg['lr']
|
||||
self.gamma = cfg['gamma']
|
||||
self.epsilon = cfg['epsilon_start']
|
||||
self.n_actions = cfg.n_actions
|
||||
self.lr = cfg.lr
|
||||
self.gamma = cfg.gamma
|
||||
self.epsilon = cfg.epsilon_start
|
||||
self.sample_count = 0
|
||||
self.epsilon_start = cfg['epsilon_start']
|
||||
self.epsilon_end = cfg['epsilon_end']
|
||||
self.epsilon_decay = cfg['epsilon_decay']
|
||||
self.epsilon_start = cfg.epsilon_start
|
||||
self.epsilon_end = cfg.epsilon_end
|
||||
self.epsilon_decay = cfg.epsilon_decay
|
||||
self.Q_table = defaultdict(lambda: np.zeros(self.n_actions)) # use nested dictionary to represent Q(s,a), here set all Q(s,a)=0 initially, not like pseudo code
|
||||
def sample_action(self, state):
|
||||
''' sample action with e-greedy policy while training
|
||||
|
||||
Reference in New Issue
Block a user