update codes
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
|
||||
<img src="../../easy_rl_book/res/ch12/assets/pendulum_1.png" alt="image-20210915161550713" style="zoom:50%;" />
|
||||
|
||||
该环境的状态数有三个,设摆针竖直方向上的顺时针旋转角为$\theta$,$\theta$设在$[-\pi,\pi]$之间,则相应的状态为$[cos\theta,sin\theta,\dot{\theta}]$,即表示角度和角速度,我们的动作则是一个-2到2之间的力矩,它是一个连续量,因而该环境不能用离散动作的算法比如 DQN 来解决。关于奖励是根据相关的物理原理而计算出的等式,如下:
|
||||
该环境的状态维度有三个,设摆针竖直方向上的顺时针旋转角为$\theta$,$\theta$设在$[-\pi,\pi]$之间,则相应的状态为$[cos\theta,sin\theta,\dot{\theta}]$,即表示角度和角速度,我们的动作则是一个-2到2之间的力矩,它是一个连续量,因而该环境不能用离散动作的算法比如 DQN 来解决。关于奖励是根据相关的物理原理而计算出的等式,如下:
|
||||
$$
|
||||
-\left(\theta^{2}+0.1 * \hat{\theta}^{2}+0.001 * \text { action }^{2}\right)
|
||||
$$
|
||||
@@ -90,15 +90,15 @@ class OUNoise(object):
|
||||
self.max_sigma = max_sigma
|
||||
self.min_sigma = min_sigma
|
||||
self.decay_period = decay_period
|
||||
self.n_actions = action_space.shape[0]
|
||||
self.action_dim = action_space.shape[0]
|
||||
self.low = action_space.low
|
||||
self.high = action_space.high
|
||||
self.reset()
|
||||
def reset(self):
|
||||
self.obs = np.ones(self.n_actions) * self.mu
|
||||
self.obs = np.ones(self.action_dim) * self.mu
|
||||
def evolve_obs(self):
|
||||
x = self.obs
|
||||
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.n_actions)
|
||||
dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
|
||||
self.obs = x + dx
|
||||
return self.obs
|
||||
def get_action(self, action, t=0):
|
||||
|
||||
Reference in New Issue
Block a user