hot update PG

This commit is contained in:
johnjim0816
2022-08-25 21:00:53 +08:00
parent 4f4658503e
commit 80f20c73be
34 changed files with 1391 additions and 1695 deletions

View File

@@ -72,84 +72,6 @@ class FrozenLakeWapper(gym.Wrapper):
self.move_player(x_pos, y_pos)
class CliffWalkingWapper(gym.Wrapper):
def __init__(self, env):
gym.Wrapper.__init__(self, env)
self.t = None
self.unit = 50
self.max_x = 12
self.max_y = 4
def draw_x_line(self, y, x0, x1, color='gray'):
assert x1 > x0
self.t.color(color)
self.t.setheading(0)
self.t.up()
self.t.goto(x0, y)
self.t.down()
self.t.forward(x1 - x0)
def draw_y_line(self, x, y0, y1, color='gray'):
assert y1 > y0
self.t.color(color)
self.t.setheading(90)
self.t.up()
self.t.goto(x, y0)
self.t.down()
self.t.forward(y1 - y0)
def draw_box(self, x, y, fillcolor='', line_color='gray'):
self.t.up()
self.t.goto(x * self.unit, y * self.unit)
self.t.color(line_color)
self.t.fillcolor(fillcolor)
self.t.setheading(90)
self.t.down()
self.t.begin_fill()
for i in range(4):
self.t.forward(self.unit)
self.t.right(90)
self.t.end_fill()
def move_player(self, x, y):
self.t.up()
self.t.setheading(90)
self.t.fillcolor('red')
self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
def render(self):
if self.t == None:
self.t = turtle.Turtle()
self.wn = turtle.Screen()
self.wn.setup(self.unit * self.max_x + 100,
self.unit * self.max_y + 100)
self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
self.unit * self.max_y)
self.t.shape('circle')
self.t.width(2)
self.t.speed(0)
self.t.color('gray')
for _ in range(2):
self.t.forward(self.max_x * self.unit)
self.t.left(90)
self.t.forward(self.max_y * self.unit)
self.t.left(90)
for i in range(1, self.max_y):
self.draw_x_line(
y=i * self.unit, x0=0, x1=self.max_x * self.unit)
for i in range(1, self.max_x):
self.draw_y_line(
x=i * self.unit, y0=0, y1=self.max_y * self.unit)
for i in range(1, self.max_x - 1):
self.draw_box(i, 0, 'black')
self.draw_box(self.max_x - 1, 0, 'yellow')
self.t.shape('turtle')
x_pos = self.s % self.max_x
y_pos = self.max_y - 1 - int(self.s / self.max_x)
self.move_player(x_pos, y_pos)
if __name__ == '__main__':
# 环境1FrozenLake, 可以配置冰面是否是滑的

View File

@@ -1,10 +1,3 @@
# Please do not make changes to this file - it will be overwritten with a clean
# version when your work is marked.
#
# This file contains code for the racetrack environment that you will be using
# as part of the second part of the CM50270: Reinforcement Learning coursework.
import imp
import time
import random
import numpy as np
@@ -12,23 +5,20 @@ import os
import matplotlib.pyplot as plt
import matplotlib.patheffects as pe
from IPython.display import clear_output
from gym.spaces import Discrete
from gym.spaces import Discrete,Box
from matplotlib import colors
import gym
class RacetrackEnv(object) :
class RacetrackEnv(gym.Env) :
"""
Class representing a race-track environment inspired by exercise 5.12 in Sutton & Barto 2018 (p.111).
Please do not make changes to this class - it will be overwritten with a clean version when it comes to marking.
The dynamics of this environment are detailed in this coursework exercise's jupyter notebook, although I have
included rather verbose comments here for those of you who are interested in how the environment has been
implemented (though this should not impact your solution code).
If you find any *bugs* with this code, please let me know immediately - thank you for finding them, sorry that I didn't!
However, please do not suggest optimisations - some things have been purposely simplified for readability's sake.
implemented (though this should not impact your solution code).ss
"""
ACTIONS_DICT = {
0 : (1, -1), # Acc Vert., Brake Horiz.
1 : (1, 0), # Acc Vert., Hold Horiz.
@@ -61,18 +51,15 @@ class RacetrackEnv(object) :
for x in range(self.track.shape[1]) :
if (self.CELL_TYPES_DICT[self.track[y, x]] == "start") :
self.initial_states.append((y, x))
high= np.array([np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max])
self.observation_space = Box(low=-high, high=high, shape=(4,), dtype=np.float32)
self.action_space = Discrete(9)
self.is_reset = False
#print("Racetrack Environment File Loaded Successfully.")
#print("Be sure to call .reset() before starting to initialise the environment and get an initial state!")
def step(self, action : int) :
"""
Takes a given action in the environment's current state, and returns a next state,
reward, and whether the next state is terminal or not.
reward, and whether the next state is done or not.
Arguments:
action {int} -- The action to take in the environment's current state. Should be an integer in the range [0-8].
@@ -86,7 +73,7 @@ class RacetrackEnv(object) :
A tuple of:\n
{(int, int, int, int)} -- The next state, a tuple of (y_pos, x_pos, y_velocity, x_velocity).\n
{int} -- The reward earned by taking the given action in the current environment state.\n
{bool} -- Whether the environment's next state is terminal or not.\n
{bool} -- Whether the environment's next state is done or not.\n
"""
@@ -131,7 +118,7 @@ class RacetrackEnv(object) :
new_position = (self.position[0] + self.velocity[0], self.position[1] + self.velocity[1])
reward = 0
terminal = False
done = False
# If position is out-of-bounds, return to start and set velocity components to zero.
if (new_position[0] < 0 or new_position[1] < 0 or new_position[0] >= self.track.shape[0] or new_position[1] >= self.track.shape[1]) :
@@ -150,7 +137,7 @@ class RacetrackEnv(object) :
elif (self.CELL_TYPES_DICT[self.track[new_position]] == "goal") :
self.position = new_position
reward += 10
terminal = True
done = True
# If this gets reached, then the student has touched something they shouldn't have. Naughty!
else :
raise RuntimeError("You've met with a terrible fate, haven't you?\nDon't modify things you shouldn't!")
@@ -158,12 +145,12 @@ class RacetrackEnv(object) :
# Penalise every timestep.
reward -= 1
# Require a reset if the current state is terminal.
if (terminal) :
# Require a reset if the current state is done.
if (done) :
self.is_reset = False
# Return next state, reward, and whether the episode has ended.
return (self.position[0], self.position[1], self.velocity[0], self.velocity[1]), reward, terminal
return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]]), reward, done,{}
def reset(self) :
@@ -184,10 +171,10 @@ class RacetrackEnv(object) :
self.is_reset = True
return (self.position[0], self.position[1], self.velocity[0], self.velocity[1])
return np.array([self.position[0], self.position[1], self.velocity[0], self.velocity[1]])
def render(self, sleep_time : float = 0.1) :
def render(self, mode = 'human') :
"""
Renders a pretty matplotlib plot representing the current state of the environment.
Calling this method on subsequent timesteps will update the plot.
@@ -230,13 +217,9 @@ class RacetrackEnv(object) :
# Draw everything.
#fig.canvas.draw()
#fig.canvas.flush_events()
plt.show()
# Sleep if desired.
if (sleep_time > 0) :
time.sleep(sleep_time)
# time sleep
time.sleep(0.1)
def get_actions(self) :
"""
@@ -244,18 +227,16 @@ class RacetrackEnv(object) :
of integers in the range [0-8].
"""
return [*self.ACTIONS_DICT]
if __name__ == "__main__":
num_steps = 1000000
env = RacetrackEnv()
state = env.reset()
print(state)
for _ in range(num_steps) :
# num_steps = 1000000
next_state, reward, done,_ = env.step(random.choice(env.get_actions()))
print(next_state)
env.render()
# env = RacetrackEnv()
# state = env.reset()
# print(state)
# for _ in range(num_steps) :
# next_state, reward, terminal = env.step(random.choice(env.get_actions()))
# print(next_state)
# env.render()
# if (terminal) :
# _ = env.reset()
if (done) :
_ = env.reset()

View File

@@ -0,0 +1,34 @@
from gym.envs.registration import register
def register_env(env_name):
if env_name == 'Racetrack-v0':
register(
id='Racetrack-v0',
entry_point='racetrack:RacetrackEnv',
max_episode_steps=1000,
kwargs={}
)
elif env_name == 'FrozenLakeNoSlippery-v1':
register(
id='FrozenLakeNoSlippery-v1',
entry_point='gym.envs.toy_text.frozen_lake:FrozenLakeEnv',
kwargs={'map_name':"4x4",'is_slippery':False},
)
else:
print("The env name must be wrong or the environment donot need to register!")
# if __name__ == "__main__":
# import random
# import gym
# env = gym.make('FrozenLakeNoSlippery-v1')
# num_steps = 1000000
# state = env.reset()
# n_actions = env.action_space.n
# print(state)
# for _ in range(num_steps) :
# next_state, reward, done,_ = env.step(random.choice(range(n_actions)))
# print(next_state)
# if (done) :
# _ = env.reset()

View File

@@ -0,0 +1,78 @@
import gym
class CliffWalkingWapper(gym.Wrapper):
def __init__(self, env):
gym.Wrapper.__init__(self, env)
self.t = None
self.unit = 50
self.max_x = 12
self.max_y = 4
def draw_x_line(self, y, x0, x1, color='gray'):
assert x1 > x0
self.t.color(color)
self.t.setheading(0)
self.t.up()
self.t.goto(x0, y)
self.t.down()
self.t.forward(x1 - x0)
def draw_y_line(self, x, y0, y1, color='gray'):
assert y1 > y0
self.t.color(color)
self.t.setheading(90)
self.t.up()
self.t.goto(x, y0)
self.t.down()
self.t.forward(y1 - y0)
def draw_box(self, x, y, fillcolor='', line_color='gray'):
self.t.up()
self.t.goto(x * self.unit, y * self.unit)
self.t.color(line_color)
self.t.fillcolor(fillcolor)
self.t.setheading(90)
self.t.down()
self.t.begin_fill()
for i in range(4):
self.t.forward(self.unit)
self.t.right(90)
self.t.end_fill()
def move_player(self, x, y):
self.t.up()
self.t.setheading(90)
self.t.fillcolor('red')
self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)
def render(self):
if self.t == None:
self.t = turtle.Turtle()
self.wn = turtle.Screen()
self.wn.setup(self.unit * self.max_x + 100,
self.unit * self.max_y + 100)
self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,
self.unit * self.max_y)
self.t.shape('circle')
self.t.width(2)
self.t.speed(0)
self.t.color('gray')
for _ in range(2):
self.t.forward(self.max_x * self.unit)
self.t.left(90)
self.t.forward(self.max_y * self.unit)
self.t.left(90)
for i in range(1, self.max_y):
self.draw_x_line(
y=i * self.unit, x0=0, x1=self.max_x * self.unit)
for i in range(1, self.max_x):
self.draw_y_line(
x=i * self.unit, y0=0, y1=self.max_y * self.unit)
for i in range(1, self.max_x - 1):
self.draw_box(i, 0, 'black')
self.draw_box(self.max_x - 1, 0, 'yellow')
self.t.shape('turtle')
x_pos = self.s % self.max_x
y_pos = self.max_y - 1 - int(self.s / self.max_x)
self.move_player(x_pos, y_pos)