hot update PG

2022-08-25 21:00:53 +08:00
parent 4f4658503e
commit 80f20c73be
34 changed files with 1391 additions and 1695 deletions
--- a/projects/codes/envs/gridworld_env.py
+++ b/projects/codes/envs/gridworld_env.py
@@ -72,84 +72,6 @@ class FrozenLakeWapper(gym.Wrapper):
        self.move_player(x_pos, y_pos)


-class CliffWalkingWapper(gym.Wrapper):
-    def __init__(self, env):
-        gym.Wrapper.__init__(self, env)
-        self.t = None
-        self.unit = 50
-        self.max_x = 12
-        self.max_y = 4
-
-    def draw_x_line(self, y, x0, x1, color='gray'):
-        assert x1 > x0
-        self.t.color(color)
-        self.t.setheading(0)
-        self.t.up()
-        self.t.goto(x0, y)
-        self.t.down()
-        self.t.forward(x1 - x0)
-
-    def draw_y_line(self, x, y0, y1, color='gray'):
-        assert y1 > y0
-        self.t.color(color)
-        self.t.setheading(90)
-        self.t.up()
-        self.t.goto(x, y0)
-        self.t.down()
-        self.t.forward(y1 - y0)
-
-    def draw_box(self, x, y, fillcolor='', line_color='gray'):
-        self.t.up()
-        self.t.goto(x * self.unit, y * self.unit)
-        self.t.color(line_color)
-        self.t.fillcolor(fillcolor)
-        self.t.setheading(90)
-        self.t.down()
-        self.t.begin_fill()
-        for i in range(4):
-            self.t.forward(self.unit)
-            self.t.right(90)
-        self.t.end_fill()
-
-    def move_player(self, x, y):
-        self.t.up()
-        self.t.setheading(90)
-        self.t.fillcolor('red')
-        self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
-
-    def render(self):
-        if self.t == None:
-            self.t = turtle.Turtle()
-            self.wn = turtle.Screen()
-            self.wn.setup(self.unit * self.max_x + 100,
-                          self.unit * self.max_y + 100)
-            self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
-                                        self.unit * self.max_y)
-            self.t.shape('circle')
-            self.t.width(2)
-            self.t.speed(0)
-            self.t.color('gray')
-            for _ in range(2):
-                self.t.forward(self.max_x * self.unit)
-                self.t.left(90)
-                self.t.forward(self.max_y * self.unit)
-                self.t.left(90)
-            for i in range(1, self.max_y):
-                self.draw_x_line(
-                    y=i * self.unit, x0=0, x1=self.max_x * self.unit)
-            for i in range(1, self.max_x):
-                self.draw_y_line(
-                    x=i * self.unit, y0=0, y1=self.max_y * self.unit)
-
-            for i in range(1, self.max_x - 1):
-                self.draw_box(i, 0, 'black')
-            self.draw_box(self.max_x - 1, 0, 'yellow')
-            self.t.shape('turtle')
-
-        x_pos = self.s % self.max_x
-        y_pos = self.max_y - 1 - int(self.s / self.max_x)
-        self.move_player(x_pos, y_pos)
-

 if __name__ == '__main__':
    # 环境1：FrozenLake, 可以配置冰面是否是滑的
--- a/projects/codes/envs/racetrack_env.py
+++ b/projects/codes/envs/racetrack_env.py
@@ -1,10 +1,3 @@
-# Please do not make changes to this file - it will be overwritten with a clean
-# version when your work is marked.
-#
-# This file contains code for the racetrack environment that you will be using
-# as part of the second part of the CM50270: Reinforcement Learning coursework.
-
-import imp
 import time
 import random
 import numpy as np
@@ -12,23 +5,20 @@ import os
 import matplotlib.pyplot as plt
 import matplotlib.patheffects as pe
 from IPython.display import clear_output
-from gym.spaces import Discrete
+from gym.spaces import Discrete,Box
 from matplotlib import colors
+import gym

-class RacetrackEnv(object) :
+class RacetrackEnv(gym.Env) :
    """
    Class representing a race-track environment inspired by exercise 5.12 in Sutton & Barto 2018 (p.111).
    Please do not make changes to this class - it will be overwritten with a clean version when it comes to marking.

    The dynamics of this environment are detailed in this coursework exercise's jupyter notebook, although I have
    included rather verbose comments here  for those of you who are interested in how the environment has been
-    implemented (though this should not impact your solution code).
-
-    If you find any *bugs* with this code, please let me know immediately - thank you for finding them, sorry that I didn't!
-    However, please do not suggest optimisations - some things have been purposely simplified for readability's sake.
+    implemented (though this should not impact your solution code).ss
    """

-
    ACTIONS_DICT = {
        0 : (1, -1),  # Acc Vert., Brake Horiz.
        1 : (1, 0),   # Acc Vert., Hold Horiz.
@@ -61,18 +51,15 @@ class RacetrackEnv(object) :
            for x in range(self.track.shape[1]) :
                if (self.CELL_TYPES_DICT[self.track[y, x]] == "start") :
                    self.initial_states.append((y, x))
-
+        high= np.array([np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max])
+        self.observation_space = Box(low=-high, high=high, shape=(4,), dtype=np.float32)
        self.action_space = Discrete(9)
        self.is_reset = False

-        #print("Racetrack Environment File Loaded Successfully.")
-        #print("Be sure to call .reset() before starting to initialise the environment and get an initial state!")
-
-
    def step(self, action : int) :
        """
        Takes a given action in the environment's current state, and returns a next state,
-        reward, and whether the next state is terminal or not.
+        reward, and whether the next state is done or not.

        Arguments:
            action {int} -- The action to take in the environment's current state. Should be an integer in the range [0-8].
@@ -86,7 +73,7 @@ class RacetrackEnv(object) :
            A tuple of:\n
                {(int, int, int, int)} -- The next state, a tuple of (y_pos, x_pos, y_velocity, x_velocity).\n
                {int} -- The reward earned by taking the given action in the current environment state.\n
-                {bool} -- Whether the environment's next state is terminal or not.\n
+                {bool} -- Whether the environment's next state is done or not.\n

        """

@@ -131,7 +118,7 @@ class RacetrackEnv(object) :
        new_position = (self.position[0] + self.velocity[0], self.position[1] + self.velocity[1])

        reward = 0
-        terminal = False
+        done = False

        # If position is out-of-bounds, return to start and set velocity components to zero.
        if (new_position[0] < 0 or new_position[1] < 0 or new_position[0] >= self.track.shape[0] or new_position[1] >= self.track.shape[1]) :
@@ -150,7 +137,7 @@ class RacetrackEnv(object) :
        elif (self.CELL_TYPES_DICT[self.track[new_position]] == "goal") :
            self.position = new_position
            reward += 10
-            terminal = True
+            done = True
        # If this gets reached, then the student has touched something they shouldn't have. Naughty!
        else :
            raise RuntimeError("You've met with a terrible fate, haven't you?\nDon't modify things you shouldn't!")
@@ -158,12 +145,12 @@ class RacetrackEnv(object) :
        # Penalise every timestep.
        reward -= 1

-        # Require a reset if the current state is terminal.
-        if (terminal) :
+        # Require a reset if the current state is done.
+        if (done) :
            self.is_reset = False

        # Return next state, reward, and whether the episode has ended.
-        return (self.position[0], self.position[1], self.velocity[0], self.velocity[1]), reward, terminal
+        return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]]), reward, done,{}


    def reset(self) :
@@ -184,10 +171,10 @@ class RacetrackEnv(object) :

        self.is_reset = True

-        return (self.position[0], self.position[1], self.velocity[0], self.velocity[1])
+        return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]])


-    def render(self, sleep_time : float = 0.1) :
+    def render(self, mode = 'human') :
        """
        Renders a pretty matplotlib plot representing the current state of the environment.
        Calling this method on subsequent timesteps will update the plot.
@@ -230,13 +217,9 @@ class RacetrackEnv(object) :
        # Draw everything.
        #fig.canvas.draw()
        #fig.canvas.flush_events()
-
        plt.show()
-
-        # Sleep if desired.
-        if (sleep_time > 0) :
-            time.sleep(sleep_time)
-
+        # time sleep
+        time.sleep(0.1)

    def get_actions(self) :
        """
@@ -244,18 +227,16 @@ class RacetrackEnv(object) :
        of integers in the range [0-8].
        """
        return [*self.ACTIONS_DICT]
+if __name__ == "__main__":
+    num_steps = 1000000
+    env = RacetrackEnv()
+    state = env.reset()
+    print(state)
+    for _ in range(num_steps) :

-# num_steps = 1000000
+        next_state, reward, done,_ = env.step(random.choice(env.get_actions()))
+        print(next_state)
+        env.render()

-# env = RacetrackEnv()
-# state = env.reset()
-# print(state)
-
-# for _ in range(num_steps) :
-
-#     next_state, reward, terminal = env.step(random.choice(env.get_actions()))
-#     print(next_state)
-#     env.render()
-
-#     if (terminal) :
-#         _ = env.reset()
+        if (done) :
+            _ = env.reset()
--- a/projects/codes/envs/register.py
+++ b/projects/codes/envs/register.py
@@ -0,0 +1,34 @@
+
+from gym.envs.registration import register
+
+def register_env(env_name):
+    if env_name == 'Racetrack-v0':
+        register(
+            id='Racetrack-v0',
+            entry_point='racetrack:RacetrackEnv',
+            max_episode_steps=1000,
+            kwargs={}
+        )
+    elif env_name == 'FrozenLakeNoSlippery-v1':
+        register(
+            id='FrozenLakeNoSlippery-v1',
+            entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
+            kwargs={'map_name':"4x4",'is_slippery':False},
+        )
+    else:
+        print("The env name must be wrong or the environment donot need to register!")
+
+# if __name__ == "__main__":
+#     import random
+#     import gym
+#     env = gym.make('FrozenLakeNoSlippery-v1')
+#     num_steps = 1000000
+#     state = env.reset()
+#     n_actions = env.action_space.n
+#     print(state)
+#     for _ in range(num_steps) :
+#         next_state, reward, done,_ = env.step(random.choice(range(n_actions)))
+#         print(next_state)
+#         if (done) :
+#             _ = env.reset()
+    
--- a/projects/codes/envs/wrappers.py
+++ b/projects/codes/envs/wrappers.py
@@ -0,0 +1,78 @@
+import gym
+class CliffWalkingWapper(gym.Wrapper):
+    def __init__(self, env):
+        gym.Wrapper.__init__(self, env)
+        self.t = None
+        self.unit = 50
+        self.max_x = 12
+        self.max_y = 4
+
+    def draw_x_line(self, y, x0, x1, color='gray'):
+        assert x1 > x0
+        self.t.color(color)
+        self.t.setheading(0)
+        self.t.up()
+        self.t.goto(x0, y)
+        self.t.down()
+        self.t.forward(x1 - x0)
+
+    def draw_y_line(self, x, y0, y1, color='gray'):
+        assert y1 > y0
+        self.t.color(color)
+        self.t.setheading(90)
+        self.t.up()
+        self.t.goto(x, y0)
+        self.t.down()
+        self.t.forward(y1 - y0)
+
+    def draw_box(self, x, y, fillcolor='', line_color='gray'):
+        self.t.up()
+        self.t.goto(x * self.unit, y * self.unit)
+        self.t.color(line_color)
+        self.t.fillcolor(fillcolor)
+        self.t.setheading(90)
+        self.t.down()
+        self.t.begin_fill()
+        for i in range(4):
+            self.t.forward(self.unit)
+            self.t.right(90)
+        self.t.end_fill()
+
+    def move_player(self, x, y):
+        self.t.up()
+        self.t.setheading(90)
+        self.t.fillcolor('red')
+        self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
+
+    def render(self):
+        if self.t == None:
+            self.t = turtle.Turtle()
+            self.wn = turtle.Screen()
+            self.wn.setup(self.unit * self.max_x + 100,
+                          self.unit * self.max_y + 100)
+            self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
+                                        self.unit * self.max_y)
+            self.t.shape('circle')
+            self.t.width(2)
+            self.t.speed(0)
+            self.t.color('gray')
+            for _ in range(2):
+                self.t.forward(self.max_x * self.unit)
+                self.t.left(90)
+                self.t.forward(self.max_y * self.unit)
+                self.t.left(90)
+            for i in range(1, self.max_y):
+                self.draw_x_line(
+                    y=i * self.unit, x0=0, x1=self.max_x * self.unit)
+            for i in range(1, self.max_x):
+                self.draw_y_line(
+                    x=i * self.unit, y0=0, y1=self.max_y * self.unit)
+
+            for i in range(1, self.max_x - 1):
+                self.draw_box(i, 0, 'black')
+            self.draw_box(self.max_x - 1, 0, 'yellow')
+            self.t.shape('turtle')
+
+        x_pos = self.s % self.max_x
+        y_pos = self.max_y - 1 - int(self.s / self.max_x)
+        self.move_player(x_pos, y_pos)