update codes

This commit is contained in:
johnjim0816
2021-12-28 18:46:52 +08:00
parent 41fb561d25
commit bd51b5a7ad
52 changed files with 305 additions and 292 deletions

View File

@@ -37,7 +37,7 @@ class GridworldEnv(discrete.DiscreteEnv):
self.shape = shape
nS = np.prod(shape)
n_actions = 4
action_dim = 4
MAX_Y = shape[0]
MAX_X = shape[1]
@@ -51,7 +51,7 @@ class GridworldEnv(discrete.DiscreteEnv):
y, x = it.multi_index
# P[s][a] = (prob, next_state, reward, is_done)
P[s] = {a : [] for a in range(n_actions)}
P[s] = {a : [] for a in range(action_dim)}
is_done = lambda s: s == 0 or s == (nS - 1)
reward = 0.0 if is_done(s) else -1.0
@@ -82,7 +82,7 @@ class GridworldEnv(discrete.DiscreteEnv):
# This should not be used in any model-free learning algorithm
self.P = P
super(GridworldEnv, self).__init__(nS, n_actions, P, isd)
super(GridworldEnv, self).__init__(nS, action_dim, P, isd)
def _render(self, mode='human', close=False):
""" Renders the current gridworld layout