#!/usr/bin/env python # coding=utf-8 ''' Author: John Email: johnjim0816@gmail.com Date: 2021-03-24 22:12:19 LastEditor: John LastEditTime: 2021-03-26 17:12:43 Discription: Environment: ''' import numpy as np import random class StochasticMDP: def __init__(self): self.end = False self.curr_state = 2 self.action_dim = 2 self.state_dim = 6 self.p_right = 0.5 def reset(self): self.end = False self.curr_state = 2 state = np.zeros(self.state_dim) state[self.curr_state - 1] = 1. return state def step(self, action): if self.curr_state != 1: if action == 1: if random.random() < self.p_right and self.curr_state < self.state_dim: self.curr_state += 1 else: self.curr_state -= 1 if action == 0: self.curr_state -= 1 if self.curr_state == self.state_dim: self.end = True state = np.zeros(self.state_dim) state[self.curr_state - 1] = 1. if self.curr_state == 1: if self.end: return state, 1.00, True, {} else: return state, 1.00/100.00, True, {} else: return state, 0.0, False, {}