63 lines
2.3 KiB
TeX
63 lines
2.3 KiB
TeX
\documentclass[11pt]{ctexart}
|
|
\usepackage{ctex}
|
|
\usepackage{algorithm}
|
|
\usepackage{algorithmic}
|
|
\usepackage{amssymb}
|
|
\usepackage{amsmath}
|
|
|
|
|
|
\begin{document}
|
|
|
|
\begin{algorithm}
|
|
\floatname{algorithm}{{DQN算法}}
|
|
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
|
\caption{}
|
|
\renewcommand{\algorithmicrequire}{\textbf{输入:}}
|
|
\renewcommand{\algorithmicensure}{\textbf{输出:}}
|
|
\begin{algorithmic}
|
|
% \REQUIRE $n \geq 0 \vee x \neq 0$ % 输入
|
|
% \ENSURE $y = x^n$ % 输出
|
|
\STATE 初始化策略网络参数$\theta$ % 初始化
|
|
\STATE 复制参数到目标网络$\hat{Q} \leftarrow Q$
|
|
\STATE 初始化经验回放$D$
|
|
\FOR {回合数 = $1,M$}
|
|
\STATE 重置环境,获得初始状态$s_t$
|
|
\FOR {时步 = $1,t$}
|
|
\STATE 根据$\varepsilon-greedy$策略采样动作$a_t$
|
|
\STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$
|
|
\STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中
|
|
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
|
\STATE {\bfseries 更新策略:}
|
|
\STATE 从$D$中采样一个batch的transition
|
|
\STATE 计算实际的$Q$值,即$y_{j}= \begin{cases}r_{j} & \text {对于终止状态} s_{j+1} \\ r_{j}+\gamma \max _{a^{\prime}} Q\left(s_{j+1}, a^{\prime} ; \theta\right) & \text {对于非终止状态} s_{j+1}\end{cases}$
|
|
\STATE 对损失 $\left(y_{j}-Q\left(s_{j}, a_{j} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降
|
|
\STATE 每$C$步复制参数$\hat{Q} \leftarrow Q$
|
|
\ENDFOR
|
|
\ENDFOR
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\clearpage
|
|
|
|
\begin{algorithm}
|
|
\floatname{algorithm}{{SoftQ算法}}
|
|
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
|
\caption{}
|
|
\begin{algorithmic}
|
|
\STATE 初始化参数$\theta$和$\phi$% 初始化
|
|
\STATE 复制参数$\bar{\theta} \leftarrow \theta, \bar{\phi} \leftarrow \phi$
|
|
\STATE 初始化经验回放$D$
|
|
\FOR {回合数 = $1,M$}
|
|
\FOR {时步 = $1,t$}
|
|
\STATE 根据$a_{t} \leftarrow f^{\phi}\left(\xi ; \mathbf{s}_{t}\right)$采样动作,其中$\xi \sim \mathcal{N}(\mathbf{0}, \boldsymbol{I})$
|
|
\STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$
|
|
\STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中
|
|
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
|
\STATE 待完善
|
|
\ENDFOR
|
|
\ENDFOR
|
|
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\end{document} |