hot update PG
This commit is contained in:
@@ -72,84 +72,6 @@ class FrozenLakeWapper(gym.Wrapper):
|
||||
self.move_player(x_pos, y_pos)
|
||||
|
||||
|
||||
class CliffWalkingWapper(gym.Wrapper):
|
||||
def __init__(self, env):
|
||||
gym.Wrapper.__init__(self, env)
|
||||
self.t = None
|
||||
self.unit = 50
|
||||
self.max_x = 12
|
||||
self.max_y = 4
|
||||
|
||||
def draw_x_line(self, y, x0, x1, color='gray'):
|
||||
assert x1 > x0
|
||||
self.t.color(color)
|
||||
self.t.setheading(0)
|
||||
self.t.up()
|
||||
self.t.goto(x0, y)
|
||||
self.t.down()
|
||||
self.t.forward(x1 - x0)
|
||||
|
||||
def draw_y_line(self, x, y0, y1, color='gray'):
|
||||
assert y1 > y0
|
||||
self.t.color(color)
|
||||
self.t.setheading(90)
|
||||
self.t.up()
|
||||
self.t.goto(x, y0)
|
||||
self.t.down()
|
||||
self.t.forward(y1 - y0)
|
||||
|
||||
def draw_box(self, x, y, fillcolor='', line_color='gray'):
|
||||
self.t.up()
|
||||
self.t.goto(x * self.unit, y * self.unit)
|
||||
self.t.color(line_color)
|
||||
self.t.fillcolor(fillcolor)
|
||||
self.t.setheading(90)
|
||||
self.t.down()
|
||||
self.t.begin_fill()
|
||||
for i in range(4):
|
||||
self.t.forward(self.unit)
|
||||
self.t.right(90)
|
||||
self.t.end_fill()
|
||||
|
||||
def move_player(self, x, y):
|
||||
self.t.up()
|
||||
self.t.setheading(90)
|
||||
self.t.fillcolor('red')
|
||||
self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
|
||||
|
||||
def render(self):
|
||||
if self.t == None:
|
||||
self.t = turtle.Turtle()
|
||||
self.wn = turtle.Screen()
|
||||
self.wn.setup(self.unit * self.max_x + 100,
|
||||
self.unit * self.max_y + 100)
|
||||
self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
|
||||
self.unit * self.max_y)
|
||||
self.t.shape('circle')
|
||||
self.t.width(2)
|
||||
self.t.speed(0)
|
||||
self.t.color('gray')
|
||||
for _ in range(2):
|
||||
self.t.forward(self.max_x * self.unit)
|
||||
self.t.left(90)
|
||||
self.t.forward(self.max_y * self.unit)
|
||||
self.t.left(90)
|
||||
for i in range(1, self.max_y):
|
||||
self.draw_x_line(
|
||||
y=i * self.unit, x0=0, x1=self.max_x * self.unit)
|
||||
for i in range(1, self.max_x):
|
||||
self.draw_y_line(
|
||||
x=i * self.unit, y0=0, y1=self.max_y * self.unit)
|
||||
|
||||
for i in range(1, self.max_x - 1):
|
||||
self.draw_box(i, 0, 'black')
|
||||
self.draw_box(self.max_x - 1, 0, 'yellow')
|
||||
self.t.shape('turtle')
|
||||
|
||||
x_pos = self.s % self.max_x
|
||||
y_pos = self.max_y - 1 - int(self.s / self.max_x)
|
||||
self.move_player(x_pos, y_pos)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 环境1:FrozenLake, 可以配置冰面是否是滑的
|
||||
|
||||
@@ -1,10 +1,3 @@
|
||||
# Please do not make changes to this file - it will be overwritten with a clean
|
||||
# version when your work is marked.
|
||||
#
|
||||
# This file contains code for the racetrack environment that you will be using
|
||||
# as part of the second part of the CM50270: Reinforcement Learning coursework.
|
||||
|
||||
import imp
|
||||
import time
|
||||
import random
|
||||
import numpy as np
|
||||
@@ -12,23 +5,20 @@ import os
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patheffects as pe
|
||||
from IPython.display import clear_output
|
||||
from gym.spaces import Discrete
|
||||
from gym.spaces import Discrete,Box
|
||||
from matplotlib import colors
|
||||
import gym
|
||||
|
||||
class RacetrackEnv(object) :
|
||||
class RacetrackEnv(gym.Env) :
|
||||
"""
|
||||
Class representing a race-track environment inspired by exercise 5.12 in Sutton & Barto 2018 (p.111).
|
||||
Please do not make changes to this class - it will be overwritten with a clean version when it comes to marking.
|
||||
|
||||
The dynamics of this environment are detailed in this coursework exercise's jupyter notebook, although I have
|
||||
included rather verbose comments here for those of you who are interested in how the environment has been
|
||||
implemented (though this should not impact your solution code).
|
||||
|
||||
If you find any *bugs* with this code, please let me know immediately - thank you for finding them, sorry that I didn't!
|
||||
However, please do not suggest optimisations - some things have been purposely simplified for readability's sake.
|
||||
implemented (though this should not impact your solution code).ss
|
||||
"""
|
||||
|
||||
|
||||
ACTIONS_DICT = {
|
||||
0 : (1, -1), # Acc Vert., Brake Horiz.
|
||||
1 : (1, 0), # Acc Vert., Hold Horiz.
|
||||
@@ -61,18 +51,15 @@ class RacetrackEnv(object) :
|
||||
for x in range(self.track.shape[1]) :
|
||||
if (self.CELL_TYPES_DICT[self.track[y, x]] == "start") :
|
||||
self.initial_states.append((y, x))
|
||||
|
||||
high= np.array([np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max])
|
||||
self.observation_space = Box(low=-high, high=high, shape=(4,), dtype=np.float32)
|
||||
self.action_space = Discrete(9)
|
||||
self.is_reset = False
|
||||
|
||||
#print("Racetrack Environment File Loaded Successfully.")
|
||||
#print("Be sure to call .reset() before starting to initialise the environment and get an initial state!")
|
||||
|
||||
|
||||
def step(self, action : int) :
|
||||
"""
|
||||
Takes a given action in the environment's current state, and returns a next state,
|
||||
reward, and whether the next state is terminal or not.
|
||||
reward, and whether the next state is done or not.
|
||||
|
||||
Arguments:
|
||||
action {int} -- The action to take in the environment's current state. Should be an integer in the range [0-8].
|
||||
@@ -86,7 +73,7 @@ class RacetrackEnv(object) :
|
||||
A tuple of:\n
|
||||
{(int, int, int, int)} -- The next state, a tuple of (y_pos, x_pos, y_velocity, x_velocity).\n
|
||||
{int} -- The reward earned by taking the given action in the current environment state.\n
|
||||
{bool} -- Whether the environment's next state is terminal or not.\n
|
||||
{bool} -- Whether the environment's next state is done or not.\n
|
||||
|
||||
"""
|
||||
|
||||
@@ -131,7 +118,7 @@ class RacetrackEnv(object) :
|
||||
new_position = (self.position[0] + self.velocity[0], self.position[1] + self.velocity[1])
|
||||
|
||||
reward = 0
|
||||
terminal = False
|
||||
done = False
|
||||
|
||||
# If position is out-of-bounds, return to start and set velocity components to zero.
|
||||
if (new_position[0] < 0 or new_position[1] < 0 or new_position[0] >= self.track.shape[0] or new_position[1] >= self.track.shape[1]) :
|
||||
@@ -150,7 +137,7 @@ class RacetrackEnv(object) :
|
||||
elif (self.CELL_TYPES_DICT[self.track[new_position]] == "goal") :
|
||||
self.position = new_position
|
||||
reward += 10
|
||||
terminal = True
|
||||
done = True
|
||||
# If this gets reached, then the student has touched something they shouldn't have. Naughty!
|
||||
else :
|
||||
raise RuntimeError("You've met with a terrible fate, haven't you?\nDon't modify things you shouldn't!")
|
||||
@@ -158,12 +145,12 @@ class RacetrackEnv(object) :
|
||||
# Penalise every timestep.
|
||||
reward -= 1
|
||||
|
||||
# Require a reset if the current state is terminal.
|
||||
if (terminal) :
|
||||
# Require a reset if the current state is done.
|
||||
if (done) :
|
||||
self.is_reset = False
|
||||
|
||||
# Return next state, reward, and whether the episode has ended.
|
||||
return (self.position[0], self.position[1], self.velocity[0], self.velocity[1]), reward, terminal
|
||||
return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]]), reward, done,{}
|
||||
|
||||
|
||||
def reset(self) :
|
||||
@@ -184,10 +171,10 @@ class RacetrackEnv(object) :
|
||||
|
||||
self.is_reset = True
|
||||
|
||||
return (self.position[0], self.position[1], self.velocity[0], self.velocity[1])
|
||||
return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]])
|
||||
|
||||
|
||||
def render(self, sleep_time : float = 0.1) :
|
||||
def render(self, mode = 'human') :
|
||||
"""
|
||||
Renders a pretty matplotlib plot representing the current state of the environment.
|
||||
Calling this method on subsequent timesteps will update the plot.
|
||||
@@ -230,13 +217,9 @@ class RacetrackEnv(object) :
|
||||
# Draw everything.
|
||||
#fig.canvas.draw()
|
||||
#fig.canvas.flush_events()
|
||||
|
||||
plt.show()
|
||||
|
||||
# Sleep if desired.
|
||||
if (sleep_time > 0) :
|
||||
time.sleep(sleep_time)
|
||||
|
||||
# time sleep
|
||||
time.sleep(0.1)
|
||||
|
||||
def get_actions(self) :
|
||||
"""
|
||||
@@ -244,18 +227,16 @@ class RacetrackEnv(object) :
|
||||
of integers in the range [0-8].
|
||||
"""
|
||||
return [*self.ACTIONS_DICT]
|
||||
if __name__ == "__main__":
|
||||
num_steps = 1000000
|
||||
env = RacetrackEnv()
|
||||
state = env.reset()
|
||||
print(state)
|
||||
for _ in range(num_steps) :
|
||||
|
||||
# num_steps = 1000000
|
||||
next_state, reward, done,_ = env.step(random.choice(env.get_actions()))
|
||||
print(next_state)
|
||||
env.render()
|
||||
|
||||
# env = RacetrackEnv()
|
||||
# state = env.reset()
|
||||
# print(state)
|
||||
|
||||
# for _ in range(num_steps) :
|
||||
|
||||
# next_state, reward, terminal = env.step(random.choice(env.get_actions()))
|
||||
# print(next_state)
|
||||
# env.render()
|
||||
|
||||
# if (terminal) :
|
||||
# _ = env.reset()
|
||||
if (done) :
|
||||
_ = env.reset()
|
||||
34
projects/codes/envs/register.py
Normal file
34
projects/codes/envs/register.py
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
from gym.envs.registration import register
|
||||
|
||||
def register_env(env_name):
|
||||
if env_name == 'Racetrack-v0':
|
||||
register(
|
||||
id='Racetrack-v0',
|
||||
entry_point='racetrack:RacetrackEnv',
|
||||
max_episode_steps=1000,
|
||||
kwargs={}
|
||||
)
|
||||
elif env_name == 'FrozenLakeNoSlippery-v1':
|
||||
register(
|
||||
id='FrozenLakeNoSlippery-v1',
|
||||
entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
|
||||
kwargs={'map_name':"4x4",'is_slippery':False},
|
||||
)
|
||||
else:
|
||||
print("The env name must be wrong or the environment donot need to register!")
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# import random
|
||||
# import gym
|
||||
# env = gym.make('FrozenLakeNoSlippery-v1')
|
||||
# num_steps = 1000000
|
||||
# state = env.reset()
|
||||
# n_actions = env.action_space.n
|
||||
# print(state)
|
||||
# for _ in range(num_steps) :
|
||||
# next_state, reward, done,_ = env.step(random.choice(range(n_actions)))
|
||||
# print(next_state)
|
||||
# if (done) :
|
||||
# _ = env.reset()
|
||||
|
||||
78
projects/codes/envs/wrappers.py
Normal file
78
projects/codes/envs/wrappers.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import gym
|
||||
class CliffWalkingWapper(gym.Wrapper):
|
||||
def __init__(self, env):
|
||||
gym.Wrapper.__init__(self, env)
|
||||
self.t = None
|
||||
self.unit = 50
|
||||
self.max_x = 12
|
||||
self.max_y = 4
|
||||
|
||||
def draw_x_line(self, y, x0, x1, color='gray'):
|
||||
assert x1 > x0
|
||||
self.t.color(color)
|
||||
self.t.setheading(0)
|
||||
self.t.up()
|
||||
self.t.goto(x0, y)
|
||||
self.t.down()
|
||||
self.t.forward(x1 - x0)
|
||||
|
||||
def draw_y_line(self, x, y0, y1, color='gray'):
|
||||
assert y1 > y0
|
||||
self.t.color(color)
|
||||
self.t.setheading(90)
|
||||
self.t.up()
|
||||
self.t.goto(x, y0)
|
||||
self.t.down()
|
||||
self.t.forward(y1 - y0)
|
||||
|
||||
def draw_box(self, x, y, fillcolor='', line_color='gray'):
|
||||
self.t.up()
|
||||
self.t.goto(x * self.unit, y * self.unit)
|
||||
self.t.color(line_color)
|
||||
self.t.fillcolor(fillcolor)
|
||||
self.t.setheading(90)
|
||||
self.t.down()
|
||||
self.t.begin_fill()
|
||||
for i in range(4):
|
||||
self.t.forward(self.unit)
|
||||
self.t.right(90)
|
||||
self.t.end_fill()
|
||||
|
||||
def move_player(self, x, y):
|
||||
self.t.up()
|
||||
self.t.setheading(90)
|
||||
self.t.fillcolor('red')
|
||||
self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
|
||||
|
||||
def render(self):
|
||||
if self.t == None:
|
||||
self.t = turtle.Turtle()
|
||||
self.wn = turtle.Screen()
|
||||
self.wn.setup(self.unit * self.max_x + 100,
|
||||
self.unit * self.max_y + 100)
|
||||
self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
|
||||
self.unit * self.max_y)
|
||||
self.t.shape('circle')
|
||||
self.t.width(2)
|
||||
self.t.speed(0)
|
||||
self.t.color('gray')
|
||||
for _ in range(2):
|
||||
self.t.forward(self.max_x * self.unit)
|
||||
self.t.left(90)
|
||||
self.t.forward(self.max_y * self.unit)
|
||||
self.t.left(90)
|
||||
for i in range(1, self.max_y):
|
||||
self.draw_x_line(
|
||||
y=i * self.unit, x0=0, x1=self.max_x * self.unit)
|
||||
for i in range(1, self.max_x):
|
||||
self.draw_y_line(
|
||||
x=i * self.unit, y0=0, y1=self.max_y * self.unit)
|
||||
|
||||
for i in range(1, self.max_x - 1):
|
||||
self.draw_box(i, 0, 'black')
|
||||
self.draw_box(self.max_x - 1, 0, 'yellow')
|
||||
self.t.shape('turtle')
|
||||
|
||||
x_pos = self.s % self.max_x
|
||||
y_pos = self.max_y - 1 - int(self.s / self.max_x)
|
||||
self.move_player(x_pos, y_pos)
|
||||
Reference in New Issue
Block a user