update
This commit is contained in:
@@ -17,3 +17,18 @@ G_8=r_9+\gamma G_9=-1+0.6 \times(-2.176)=-2.3056 \approx-2.3
|
||||
\end{array}
|
||||
$$
|
||||
|
||||
* 149页,式(6.15) 改为
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
V^{\pi}(s) &\le Q^{\pi}(s,\pi'(s)) \\
|
||||
&=E\left[r_{t}+V^{\pi}\left(s_{t+1}\right) | s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right]\\
|
||||
&\le E\left[r_{t}+Q^{\pi}\left(s_{t+1}, \pi^{\prime}\left(s_{t+1}\right)\right) | s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right] \\
|
||||
&=E\left[r_{t}+r_{t+1}+V^{\pi}\left(s_{t+2}\right) |s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right] \\
|
||||
& \le E\left[r_{t}+r_{t+1}+Q^{\pi}\left(s_{t+2},\pi'(s_{t+2}\right) | s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right] \\
|
||||
& = E\left[r_{t}+r_{t+1}+r_{t+2}+V^{\pi}\left(s_{t+3}\right) |s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right] \\
|
||||
& \le \cdots\\
|
||||
& \le E\left[r_{t}+r_{t+1}+r_{t+2}+\cdots | s_{t}=s, a_{t}=\pi^{\prime}\left(s_{t}\right)\right] \\
|
||||
& = V^{\pi'}(s)
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
Reference in New Issue
Block a user