diff --git a/projects/README.md b/projects/README.md index 276c7e5..bb6196a 100644 --- a/projects/README.md +++ b/projects/README.md @@ -22,16 +22,19 @@ 注:点击对应的名称会跳到[codes](./codes/)下对应的算法中,其他版本还请读者自行翻阅 -| 算法名称 | 参考文献 | 备注 | -| :-------------------------------------: | :----------------------------------------------------------: | :--: | -| [Policy Gradient](codes/PolicyGradient) | [Policy Gradient paper](https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf) | | -| DQN-CNN | | 待更 | -| [DoubleDQN](codes/DoubleDQN) | [Double DQN Paper](https://arxiv.org/abs/1509.06461) | | -| [SoftQ](codes/SoftQ) | [Soft Q-learning paper](https://arxiv.org/abs/1702.08165) | | -| [SAC](codes/SAC) | [SAC paper](https://arxiv.org/pdf/1812.05905.pdf) | | -| [SAC-Discrete](codes/SAC) | [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf) | | -| SAC-S | [SAC-S paper](https://arxiv.org/abs/1801.01290) | | -| DSAC | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 | +| 算法名称 | 参考文献 | 作者 | 备注 | +| :-------------------------------------: | :----------------------------------------------------------: | :--------------------------------------------------: | :--: | +| [Policy Gradient](codes/PolicyGradient) | [Policy Gradient paper](https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf) | [johnjim0816](https://github.com/johnjim0816) | | +| [Monte Carlo](codes/MonteCarlo) | | [johnjim0816](https://github.com/johnjim0816) | | +| [DQN](codes/DQN) | | [johnjim0816](https://github.com/johnjim0816) | | +| DQN-CNN | | | 待更 | +| [PER_DQN](codes/PER_DQN) | [PER DQN Paper](https://arxiv.org/abs/1511.05952) | [wangzhongren](https://github.com/wangzhongren-code) | | +| [DoubleDQN](codes/DoubleDQN) | [Double DQN Paper](https://arxiv.org/abs/1509.06461) | [johnjim0816](https://github.com/johnjim0816) | | +| [SoftQ](codes/SoftQ) | [Soft Q-learning paper](https://arxiv.org/abs/1702.08165) | [johnjim0816](https://github.com/johnjim0816) | | +| [SAC](codes/SAC) | [SAC paper](https://arxiv.org/pdf/1812.05905.pdf) | | | +| [SAC-Discrete](codes/SAC) | [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf) | | | +| SAC-S | [SAC-S paper](https://arxiv.org/abs/1801.01290) | | | +| DSAC | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | | 待更 | ## 3. 算法环境 diff --git a/projects/assets/pseudocodes/pseudocodes.pdf b/projects/assets/pseudocodes/pseudocodes.pdf index cfe734a..5232181 100644 Binary files a/projects/assets/pseudocodes/pseudocodes.pdf and b/projects/assets/pseudocodes/pseudocodes.pdf differ diff --git a/projects/assets/pseudocodes/pseudocodes.tex b/projects/assets/pseudocodes/pseudocodes.tex index 7af7feb..0033ae8 100644 --- a/projects/assets/pseudocodes/pseudocodes.tex +++ b/projects/assets/pseudocodes/pseudocodes.tex @@ -126,6 +126,46 @@ \footnotetext[3]{$\theta_i \leftarrow \theta_i - \lambda \nabla_{\theta_{i}} L_{i}\left(\theta_{i}\right)$} \footnotetext[4]{此处也可像原论文中放到小循环中改成每$C$步,但没有每$C$个回合稳定} \clearpage + + +\section{PER\_DQN算法} +\begin{algorithm}[H] % [H]固定位置 + \floatname{algorithm}{{PER\_DQN算法}\footnotemark[1]} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \renewcommand{\algorithmicrequire}{\textbf{输入:}} + \renewcommand{\algorithmicensure}{\textbf{输出:}} + \begin{algorithmic}[1] + % \REQUIRE $n \geq 0 \vee x \neq 0$ % 输入 + % \ENSURE $y = x^n$ % 输出 + \STATE 初始化策略网络参数$\theta$ % 初始化 + \STATE 复制参数到目标网络$\hat{Q} \leftarrow Q$ + \STATE 初始化经验回放$D$ + \FOR {回合数 = $1,M$} + \STATE 重置环境,获得初始状态$s_t$ + \FOR {时步 = $1,t$} + \STATE 根据$\varepsilon-greedy$策略采样动作$a_t$ + \STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$ + \STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$,并根据TD-error损失确定其优先级$p_t$ + \STATE 更新环境状态$s_{t+1} \leftarrow s_t$ + \STATE {\bfseries 更新策略:} + \STATE 按照经验回放中的优先级别,每个样本采样概率为$P(j)=p_j^\alpha / \sum_i p_i^\alpha$,从$D$中采样一个大小为batch的transition + \STATE 计算各个样本重要性采样权重 $w_j=(N \cdot P(j))^{-\beta} / \max _i w_i$ + \STATE 计算TD-error $\delta_j$ ; 并根据TD-error更新优先级$p_j$ + \STATE 计算实际的$Q$值,即$y_{j}$\footnotemark[2] + \STATE 根据重要性采样权重调整损失 $L(\theta)=\left(y_{j}-Q\left(s_{j}, a_{j} ; \theta\right)\cdot w_j \right)^{2}$,并将其关于参数$\theta$做随机梯度下降\footnotemark[3] + \ENDFOR + \STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q$\footnotemark[4]] + \ENDFOR + \end{algorithmic} +\end{algorithm} +\footnotetext[1]{Playing Atari with Deep Reinforcement Learning} +\footnotetext[2]{$y_{i}= \begin{cases}r_{i} & \text {对于终止状态} s_{i+1} \\ r_{i}+\gamma \max _{a^{\prime}} Q\left(s_{i+1}, a^{\prime} ; \theta\right) & \text {对于非终止状态} s_{i+1}\end{cases}$} +\footnotetext[3]{$\theta_i \leftarrow \theta_i - \lambda \nabla_{\theta_{i}} L_{i}\left(\theta_{i}\right)$} +\footnotetext[4]{此处也可像原论文中放到小循环中改成每$C$步,但没有每$C$个回合稳定} +\clearpage + + \section{Policy Gradient算法} \begin{algorithm}[H] % [H]固定位置 \floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]} diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table deleted file mode 100644 index e21a117..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json deleted file mode 100644 index 6f75e32..0000000 --- a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "First-Visit MC", "env_name": "Racetrack", "train_eps": 200, "test_eps": 20, "gamma": 0.9, "epsilon": 0.15, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/", "save_fig": true} \ No newline at end of file diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy deleted file mode 100644 index c0de5ac..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png deleted file mode 100644 index 3c9cda1..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy deleted file mode 100644 index 026a78d..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png deleted file mode 100644 index 9e8c483..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png and /dev/null differ diff --git a/projects/codes/MonteCarlo/task0.py b/projects/codes/MonteCarlo/task0.py index 4570967..75e52e1 100644 --- a/projects/codes/MonteCarlo/task0.py +++ b/projects/codes/MonteCarlo/task0.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-11 14:26:44 LastEditor: John -LastEditTime: 2022-11-06 00:44:56 +LastEditTime: 2022-11-08 23:35:18 Discription: Environment: ''' @@ -24,9 +24,6 @@ from common.launcher import Launcher from MonteCarlo.agent import FisrtVisitMC from MonteCarlo.config.config import GeneralConfigMC,AlgoConfigMC - -curr_time = datetime.datetime.now().strftime( - "%Y%m%d-%H%M%S") # obtain current time class Main(Launcher): def __init__(self) -> None: super().__init__() diff --git a/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/config.yaml b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/config.yaml new file mode 100644 index 0000000..39f8743 --- /dev/null +++ b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/config.yaml @@ -0,0 +1,25 @@ +general_cfg: + algo_name: PER_DQN + device: cpu + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_CartPole-v1_PER_DQN_20221113-162804 + max_steps: 200 + mode: test + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/logs/log.txt b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/logs/log.txt new file mode 100644 index 0000000..9fe5454 --- /dev/null +++ b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/logs/log.txt @@ -0,0 +1,14 @@ +2022-11-14 10:46:49 - r - INFO: - n_states: 4, n_actions: 2 +2022-11-14 10:46:49 - r - INFO: - Start testing! +2022-11-14 10:46:49 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cpu +2022-11-14 10:46:49 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 +2022-11-14 10:46:49 - r - INFO: - Finish testing! diff --git a/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/models/checkpoint.pt b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/models/checkpoint.pt new file mode 100644 index 0000000..06d607b Binary files /dev/null and b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/models/checkpoint.pt differ diff --git a/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/learning_curve.png b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/learning_curve.png new file mode 100644 index 0000000..f1e8056 Binary files /dev/null and b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/learning_curve.png differ diff --git a/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/res.csv b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/res.csv new file mode 100644 index 0000000..cbbcf2e --- /dev/null +++ b/projects/codes/PER_DQN/Test_CartPole-v1_PER_DQN_20221114-104649/results/res.csv @@ -0,0 +1,11 @@ +episodes,rewards,steps +0,200.0,200 +1,200.0,200 +2,200.0,200 +3,200.0,200 +4,200.0,200 +5,200.0,200 +6,200.0,200 +7,200.0,200 +8,200.0,200 +9,200.0,200 diff --git a/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/config.yaml b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/config.yaml new file mode 100644 index 0000000..bd4f2bd --- /dev/null +++ b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/config.yaml @@ -0,0 +1,25 @@ +general_cfg: + algo_name: PER_DQN + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/logs/log.txt b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/logs/log.txt new file mode 100644 index 0000000..1cea48c --- /dev/null +++ b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/logs/log.txt @@ -0,0 +1,224 @@ +2022-11-13 16:28:04 - r - INFO: - n_states: 4, n_actions: 2 +2022-11-13 16:28:19 - r - INFO: - Start training! +2022-11-13 16:28:19 - r - INFO: - Env: CartPole-v1, Algorithm: PER_DQN, Device: cuda +2022-11-13 16:28:23 - r - INFO: - Episode: 1/200, Reward: 18.000, Step: 18 +2022-11-13 16:28:24 - r - INFO: - Episode: 2/200, Reward: 35.000, Step: 35 +2022-11-13 16:28:24 - r - INFO: - Episode: 3/200, Reward: 13.000, Step: 13 +2022-11-13 16:28:24 - r - INFO: - Episode: 4/200, Reward: 20.000, Step: 20 +2022-11-13 16:28:24 - r - INFO: - Episode: 5/200, Reward: 24.000, Step: 24 +2022-11-13 16:28:24 - r - INFO: - Current episode 5 has the best eval reward: 9.100 +2022-11-13 16:28:24 - r - INFO: - Episode: 6/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:24 - r - INFO: - Episode: 7/200, Reward: 20.000, Step: 20 +2022-11-13 16:28:24 - r - INFO: - Episode: 8/200, Reward: 19.000, Step: 19 +2022-11-13 16:28:25 - r - INFO: - Episode: 9/200, Reward: 30.000, Step: 30 +2022-11-13 16:28:25 - r - INFO: - Episode: 10/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:25 - r - INFO: - Current episode 10 has the best eval reward: 9.200 +2022-11-13 16:28:25 - r - INFO: - Episode: 11/200, Reward: 16.000, Step: 16 +2022-11-13 16:28:25 - r - INFO: - Episode: 12/200, Reward: 16.000, Step: 16 +2022-11-13 16:28:25 - r - INFO: - Episode: 13/200, Reward: 12.000, Step: 12 +2022-11-13 16:28:25 - r - INFO: - Episode: 14/200, Reward: 28.000, Step: 28 +2022-11-13 16:28:25 - r - INFO: - Episode: 15/200, Reward: 22.000, Step: 22 +2022-11-13 16:28:25 - r - INFO: - Current episode 15 has the best eval reward: 9.300 +2022-11-13 16:28:25 - r - INFO: - Episode: 16/200, Reward: 14.000, Step: 14 +2022-11-13 16:28:25 - r - INFO: - Episode: 17/200, Reward: 9.000, Step: 9 +2022-11-13 16:28:26 - r - INFO: - Episode: 18/200, Reward: 13.000, Step: 13 +2022-11-13 16:28:26 - r - INFO: - Episode: 19/200, Reward: 19.000, Step: 19 +2022-11-13 16:28:26 - r - INFO: - Episode: 20/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:26 - r - INFO: - Episode: 21/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:26 - r - INFO: - Episode: 22/200, Reward: 12.000, Step: 12 +2022-11-13 16:28:26 - r - INFO: - Episode: 23/200, Reward: 9.000, Step: 9 +2022-11-13 16:28:26 - r - INFO: - Episode: 24/200, Reward: 12.000, Step: 12 +2022-11-13 16:28:26 - r - INFO: - Episode: 25/200, Reward: 11.000, Step: 11 +2022-11-13 16:28:26 - r - INFO: - Current episode 25 has the best eval reward: 9.800 +2022-11-13 16:28:26 - r - INFO: - Episode: 26/200, Reward: 11.000, Step: 11 +2022-11-13 16:28:26 - r - INFO: - Episode: 27/200, Reward: 13.000, Step: 13 +2022-11-13 16:28:26 - r - INFO: - Episode: 28/200, Reward: 11.000, Step: 11 +2022-11-13 16:28:27 - r - INFO: - Episode: 29/200, Reward: 13.000, Step: 13 +2022-11-13 16:28:27 - r - INFO: - Episode: 30/200, Reward: 20.000, Step: 20 +2022-11-13 16:28:27 - r - INFO: - Current episode 30 has the best eval reward: 12.200 +2022-11-13 16:28:27 - r - INFO: - Episode: 31/200, Reward: 16.000, Step: 16 +2022-11-13 16:28:27 - r - INFO: - Episode: 32/200, Reward: 9.000, Step: 9 +2022-11-13 16:28:27 - r - INFO: - Episode: 33/200, Reward: 16.000, Step: 16 +2022-11-13 16:28:27 - r - INFO: - Episode: 34/200, Reward: 15.000, Step: 15 +2022-11-13 16:28:27 - r - INFO: - Episode: 35/200, Reward: 12.000, Step: 12 +2022-11-13 16:28:27 - r - INFO: - Current episode 35 has the best eval reward: 12.500 +2022-11-13 16:28:27 - r - INFO: - Episode: 36/200, Reward: 12.000, Step: 12 +2022-11-13 16:28:27 - r - INFO: - Episode: 37/200, Reward: 16.000, Step: 16 +2022-11-13 16:28:28 - r - INFO: - Episode: 38/200, Reward: 13.000, Step: 13 +2022-11-13 16:28:28 - r - INFO: - Episode: 39/200, Reward: 18.000, Step: 18 +2022-11-13 16:28:28 - r - INFO: - Episode: 40/200, Reward: 18.000, Step: 18 +2022-11-13 16:28:28 - r - INFO: - Current episode 40 has the best eval reward: 20.400 +2022-11-13 16:28:28 - r - INFO: - Episode: 41/200, Reward: 48.000, Step: 48 +2022-11-13 16:28:29 - r - INFO: - Episode: 42/200, Reward: 52.000, Step: 52 +2022-11-13 16:28:29 - r - INFO: - Episode: 43/200, Reward: 33.000, Step: 33 +2022-11-13 16:28:29 - r - INFO: - Episode: 44/200, Reward: 15.000, Step: 15 +2022-11-13 16:28:29 - r - INFO: - Episode: 45/200, Reward: 18.000, Step: 18 +2022-11-13 16:28:29 - r - INFO: - Episode: 46/200, Reward: 22.000, Step: 22 +2022-11-13 16:28:29 - r - INFO: - Episode: 47/200, Reward: 19.000, Step: 19 +2022-11-13 16:28:30 - r - INFO: - Episode: 48/200, Reward: 19.000, Step: 19 +2022-11-13 16:28:30 - r - INFO: - Episode: 49/200, Reward: 11.000, Step: 11 +2022-11-13 16:28:30 - r - INFO: - Episode: 50/200, Reward: 9.000, Step: 9 +2022-11-13 16:28:30 - r - INFO: - Episode: 51/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:30 - r - INFO: - Episode: 52/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:30 - r - INFO: - Episode: 53/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:30 - r - INFO: - Episode: 54/200, Reward: 10.000, Step: 10 +2022-11-13 16:28:30 - r - INFO: - Episode: 55/200, Reward: 9.000, Step: 9 +2022-11-13 16:28:30 - r - INFO: - Episode: 56/200, Reward: 17.000, Step: 17 +2022-11-13 16:28:31 - r - INFO: - Episode: 57/200, Reward: 75.000, Step: 75 +2022-11-13 16:28:31 - r - INFO: - Episode: 58/200, Reward: 28.000, Step: 28 +2022-11-13 16:28:31 - r - INFO: - Episode: 59/200, Reward: 30.000, Step: 30 +2022-11-13 16:28:32 - r - INFO: - Episode: 60/200, Reward: 54.000, Step: 54 +2022-11-13 16:28:32 - r - INFO: - Current episode 60 has the best eval reward: 34.600 +2022-11-13 16:28:32 - r - INFO: - Episode: 61/200, Reward: 22.000, Step: 22 +2022-11-13 16:28:32 - r - INFO: - Episode: 62/200, Reward: 28.000, Step: 28 +2022-11-13 16:28:32 - r - INFO: - Episode: 63/200, Reward: 26.000, Step: 26 +2022-11-13 16:28:33 - r - INFO: - Episode: 64/200, Reward: 32.000, Step: 32 +2022-11-13 16:28:33 - r - INFO: - Episode: 65/200, Reward: 30.000, Step: 30 +2022-11-13 16:28:33 - r - INFO: - Episode: 66/200, Reward: 29.000, Step: 29 +2022-11-13 16:28:34 - r - INFO: - Episode: 67/200, Reward: 28.000, Step: 28 +2022-11-13 16:28:34 - r - INFO: - Episode: 68/200, Reward: 38.000, Step: 38 +2022-11-13 16:28:34 - r - INFO: - Episode: 69/200, Reward: 28.000, Step: 28 +2022-11-13 16:28:34 - r - INFO: - Episode: 70/200, Reward: 22.000, Step: 22 +2022-11-13 16:28:34 - r - INFO: - Current episode 70 has the best eval reward: 36.700 +2022-11-13 16:28:35 - r - INFO: - Episode: 71/200, Reward: 40.000, Step: 40 +2022-11-13 16:28:35 - r - INFO: - Episode: 72/200, Reward: 27.000, Step: 27 +2022-11-13 16:28:35 - r - INFO: - Episode: 73/200, Reward: 24.000, Step: 24 +2022-11-13 16:28:35 - r - INFO: - Episode: 74/200, Reward: 47.000, Step: 47 +2022-11-13 16:28:36 - r - INFO: - Episode: 75/200, Reward: 127.000, Step: 127 +2022-11-13 16:28:37 - r - INFO: - Episode: 76/200, Reward: 48.000, Step: 48 +2022-11-13 16:28:37 - r - INFO: - Episode: 77/200, Reward: 27.000, Step: 27 +2022-11-13 16:28:37 - r - INFO: - Episode: 78/200, Reward: 65.000, Step: 65 +2022-11-13 16:28:38 - r - INFO: - Episode: 79/200, Reward: 75.000, Step: 75 +2022-11-13 16:28:38 - r - INFO: - Episode: 80/200, Reward: 47.000, Step: 47 +2022-11-13 16:28:38 - r - INFO: - Current episode 80 has the best eval reward: 37.200 +2022-11-13 16:28:39 - r - INFO: - Episode: 81/200, Reward: 34.000, Step: 34 +2022-11-13 16:28:39 - r - INFO: - Episode: 82/200, Reward: 38.000, Step: 38 +2022-11-13 16:28:39 - r - INFO: - Episode: 83/200, Reward: 24.000, Step: 24 +2022-11-13 16:28:39 - r - INFO: - Episode: 84/200, Reward: 47.000, Step: 47 +2022-11-13 16:28:40 - r - INFO: - Episode: 85/200, Reward: 35.000, Step: 35 +2022-11-13 16:28:40 - r - INFO: - Current episode 85 has the best eval reward: 66.900 +2022-11-13 16:28:41 - r - INFO: - Episode: 86/200, Reward: 103.000, Step: 103 +2022-11-13 16:28:41 - r - INFO: - Episode: 87/200, Reward: 64.000, Step: 64 +2022-11-13 16:28:42 - r - INFO: - Episode: 88/200, Reward: 59.000, Step: 59 +2022-11-13 16:28:43 - r - INFO: - Episode: 89/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:44 - r - INFO: - Episode: 90/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:46 - r - INFO: - Current episode 90 has the best eval reward: 200.000 +2022-11-13 16:28:47 - r - INFO: - Episode: 91/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:48 - r - INFO: - Episode: 92/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:50 - r - INFO: - Episode: 93/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:51 - r - INFO: - Episode: 94/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:52 - r - INFO: - Episode: 95/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:54 - r - INFO: - Current episode 95 has the best eval reward: 200.000 +2022-11-13 16:28:55 - r - INFO: - Episode: 96/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:56 - r - INFO: - Episode: 97/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:58 - r - INFO: - Episode: 98/200, Reward: 200.000, Step: 200 +2022-11-13 16:28:59 - r - INFO: - Episode: 99/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:00 - r - INFO: - Episode: 100/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:02 - r - INFO: - Current episode 100 has the best eval reward: 200.000 +2022-11-13 16:29:04 - r - INFO: - Episode: 101/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:05 - r - INFO: - Episode: 102/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:06 - r - INFO: - Episode: 103/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:07 - r - INFO: - Episode: 104/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:09 - r - INFO: - Episode: 105/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:10 - r - INFO: - Current episode 105 has the best eval reward: 200.000 +2022-11-13 16:29:11 - r - INFO: - Episode: 106/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:13 - r - INFO: - Episode: 107/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:14 - r - INFO: - Episode: 108/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:16 - r - INFO: - Episode: 109/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:17 - r - INFO: - Episode: 110/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:20 - r - INFO: - Episode: 111/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:21 - r - INFO: - Episode: 112/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:22 - r - INFO: - Episode: 113/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:23 - r - INFO: - Episode: 114/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:25 - r - INFO: - Episode: 115/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:26 - r - INFO: - Current episode 115 has the best eval reward: 200.000 +2022-11-13 16:29:27 - r - INFO: - Episode: 116/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:29 - r - INFO: - Episode: 117/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:30 - r - INFO: - Episode: 118/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:31 - r - INFO: - Episode: 119/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:33 - r - INFO: - Episode: 120/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:34 - r - INFO: - Current episode 120 has the best eval reward: 200.000 +2022-11-13 16:29:35 - r - INFO: - Episode: 121/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:37 - r - INFO: - Episode: 122/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:38 - r - INFO: - Episode: 123/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:39 - r - INFO: - Episode: 124/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:41 - r - INFO: - Episode: 125/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:43 - r - INFO: - Episode: 126/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:45 - r - INFO: - Episode: 127/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:46 - r - INFO: - Episode: 128/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:47 - r - INFO: - Episode: 129/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:49 - r - INFO: - Episode: 130/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:51 - r - INFO: - Episode: 131/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:53 - r - INFO: - Episode: 132/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:54 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:55 - r - INFO: - Episode: 134/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:57 - r - INFO: - Episode: 135/200, Reward: 200.000, Step: 200 +2022-11-13 16:29:59 - r - INFO: - Episode: 136/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:01 - r - INFO: - Episode: 137/200, Reward: 185.000, Step: 185 +2022-11-13 16:30:02 - r - INFO: - Episode: 138/200, Reward: 193.000, Step: 193 +2022-11-13 16:30:03 - r - INFO: - Episode: 139/200, Reward: 192.000, Step: 192 +2022-11-13 16:30:04 - r - INFO: - Episode: 140/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:07 - r - INFO: - Episode: 141/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:08 - r - INFO: - Episode: 142/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:10 - r - INFO: - Episode: 143/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:11 - r - INFO: - Episode: 144/200, Reward: 191.000, Step: 191 +2022-11-13 16:30:12 - r - INFO: - Episode: 145/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:15 - r - INFO: - Episode: 146/200, Reward: 184.000, Step: 184 +2022-11-13 16:30:17 - r - INFO: - Episode: 147/200, Reward: 198.000, Step: 198 +2022-11-13 16:30:18 - r - INFO: - Episode: 148/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:19 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:21 - r - INFO: - Episode: 150/200, Reward: 192.000, Step: 192 +2022-11-13 16:30:23 - r - INFO: - Episode: 151/200, Reward: 186.000, Step: 186 +2022-11-13 16:30:25 - r - INFO: - Episode: 152/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:26 - r - INFO: - Episode: 153/200, Reward: 194.000, Step: 194 +2022-11-13 16:30:27 - r - INFO: - Episode: 154/200, Reward: 199.000, Step: 199 +2022-11-13 16:30:29 - r - INFO: - Episode: 155/200, Reward: 183.000, Step: 183 +2022-11-13 16:30:32 - r - INFO: - Episode: 156/200, Reward: 173.000, Step: 173 +2022-11-13 16:30:33 - r - INFO: - Episode: 157/200, Reward: 197.000, Step: 197 +2022-11-13 16:30:34 - r - INFO: - Episode: 158/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:36 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:37 - r - INFO: - Episode: 160/200, Reward: 196.000, Step: 196 +2022-11-13 16:30:40 - r - INFO: - Episode: 161/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:42 - r - INFO: - Episode: 162/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:43 - r - INFO: - Episode: 163/200, Reward: 194.000, Step: 194 +2022-11-13 16:30:44 - r - INFO: - Episode: 164/200, Reward: 185.000, Step: 185 +2022-11-13 16:30:45 - r - INFO: - Episode: 165/200, Reward: 173.000, Step: 173 +2022-11-13 16:30:48 - r - INFO: - Episode: 166/200, Reward: 192.000, Step: 192 +2022-11-13 16:30:49 - r - INFO: - Episode: 167/200, Reward: 164.000, Step: 164 +2022-11-13 16:30:50 - r - INFO: - Episode: 168/200, Reward: 188.000, Step: 188 +2022-11-13 16:30:52 - r - INFO: - Episode: 169/200, Reward: 189.000, Step: 189 +2022-11-13 16:30:53 - r - INFO: - Episode: 170/200, Reward: 197.000, Step: 197 +2022-11-13 16:30:55 - r - INFO: - Episode: 171/200, Reward: 187.000, Step: 187 +2022-11-13 16:30:57 - r - INFO: - Episode: 172/200, Reward: 200.000, Step: 200 +2022-11-13 16:30:58 - r - INFO: - Episode: 173/200, Reward: 195.000, Step: 195 +2022-11-13 16:30:59 - r - INFO: - Episode: 174/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:01 - r - INFO: - Episode: 175/200, Reward: 195.000, Step: 195 +2022-11-13 16:31:03 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:05 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:06 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:07 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:09 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:11 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:13 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:14 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:15 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:17 - r - INFO: - Episode: 185/200, Reward: 173.000, Step: 173 +2022-11-13 16:31:19 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:21 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:22 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:23 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:24 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:26 - r - INFO: - Current episode 190 has the best eval reward: 200.000 +2022-11-13 16:31:27 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:29 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:30 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:31 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:33 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:34 - r - INFO: - Current episode 195 has the best eval reward: 200.000 +2022-11-13 16:31:35 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:37 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:38 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:39 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:40 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200 +2022-11-13 16:31:42 - r - INFO: - Current episode 200 has the best eval reward: 200.000 +2022-11-13 16:31:42 - r - INFO: - Finish training! diff --git a/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/models/checkpoint.pt b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/models/checkpoint.pt new file mode 100644 index 0000000..acaef5b Binary files /dev/null and b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/models/checkpoint.pt differ diff --git a/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/learning_curve.png b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/learning_curve.png new file mode 100644 index 0000000..6f666e3 Binary files /dev/null and b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/learning_curve.png differ diff --git a/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/res.csv b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/res.csv new file mode 100644 index 0000000..1c3339f --- /dev/null +++ b/projects/codes/PER_DQN/Train_CartPole-v1_PER_DQN_20221113-162804/results/res.csv @@ -0,0 +1,201 @@ +episodes,rewards,steps +0,18.0,18 +1,35.0,35 +2,13.0,13 +3,20.0,20 +4,24.0,24 +5,10.0,10 +6,20.0,20 +7,19.0,19 +8,30.0,30 +9,10.0,10 +10,16.0,16 +11,16.0,16 +12,12.0,12 +13,28.0,28 +14,22.0,22 +15,14.0,14 +16,9.0,9 +17,13.0,13 +18,19.0,19 +19,10.0,10 +20,10.0,10 +21,12.0,12 +22,9.0,9 +23,12.0,12 +24,11.0,11 +25,11.0,11 +26,13.0,13 +27,11.0,11 +28,13.0,13 +29,20.0,20 +30,16.0,16 +31,9.0,9 +32,16.0,16 +33,15.0,15 +34,12.0,12 +35,12.0,12 +36,16.0,16 +37,13.0,13 +38,18.0,18 +39,18.0,18 +40,48.0,48 +41,52.0,52 +42,33.0,33 +43,15.0,15 +44,18.0,18 +45,22.0,22 +46,19.0,19 +47,19.0,19 +48,11.0,11 +49,9.0,9 +50,10.0,10 +51,10.0,10 +52,10.0,10 +53,10.0,10 +54,9.0,9 +55,17.0,17 +56,75.0,75 +57,28.0,28 +58,30.0,30 +59,54.0,54 +60,22.0,22 +61,28.0,28 +62,26.0,26 +63,32.0,32 +64,30.0,30 +65,29.0,29 +66,28.0,28 +67,38.0,38 +68,28.0,28 +69,22.0,22 +70,40.0,40 +71,27.0,27 +72,24.0,24 +73,47.0,47 +74,127.0,127 +75,48.0,48 +76,27.0,27 +77,65.0,65 +78,75.0,75 +79,47.0,47 +80,34.0,34 +81,38.0,38 +82,24.0,24 +83,47.0,47 +84,35.0,35 +85,103.0,103 +86,64.0,64 +87,59.0,59 +88,200.0,200 +89,200.0,200 +90,200.0,200 +91,200.0,200 +92,200.0,200 +93,200.0,200 +94,200.0,200 +95,200.0,200 +96,200.0,200 +97,200.0,200 +98,200.0,200 +99,200.0,200 +100,200.0,200 +101,200.0,200 +102,200.0,200 +103,200.0,200 +104,200.0,200 +105,200.0,200 +106,200.0,200 +107,200.0,200 +108,200.0,200 +109,200.0,200 +110,200.0,200 +111,200.0,200 +112,200.0,200 +113,200.0,200 +114,200.0,200 +115,200.0,200 +116,200.0,200 +117,200.0,200 +118,200.0,200 +119,200.0,200 +120,200.0,200 +121,200.0,200 +122,200.0,200 +123,200.0,200 +124,200.0,200 +125,200.0,200 +126,200.0,200 +127,200.0,200 +128,200.0,200 +129,200.0,200 +130,200.0,200 +131,200.0,200 +132,200.0,200 +133,200.0,200 +134,200.0,200 +135,200.0,200 +136,185.0,185 +137,193.0,193 +138,192.0,192 +139,200.0,200 +140,200.0,200 +141,200.0,200 +142,200.0,200 +143,191.0,191 +144,200.0,200 +145,184.0,184 +146,198.0,198 +147,200.0,200 +148,200.0,200 +149,192.0,192 +150,186.0,186 +151,200.0,200 +152,194.0,194 +153,199.0,199 +154,183.0,183 +155,173.0,173 +156,197.0,197 +157,200.0,200 +158,200.0,200 +159,196.0,196 +160,200.0,200 +161,200.0,200 +162,194.0,194 +163,185.0,185 +164,173.0,173 +165,192.0,192 +166,164.0,164 +167,188.0,188 +168,189.0,189 +169,197.0,197 +170,187.0,187 +171,200.0,200 +172,195.0,195 +173,200.0,200 +174,195.0,195 +175,200.0,200 +176,200.0,200 +177,200.0,200 +178,200.0,200 +179,200.0,200 +180,200.0,200 +181,200.0,200 +182,200.0,200 +183,200.0,200 +184,173.0,173 +185,200.0,200 +186,200.0,200 +187,200.0,200 +188,200.0,200 +189,200.0,200 +190,200.0,200 +191,200.0,200 +192,200.0,200 +193,200.0,200 +194,200.0,200 +195,200.0,200 +196,200.0,200 +197,200.0,200 +198,200.0,200 +199,200.0,200 diff --git a/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Test.yaml b/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Test.yaml new file mode 100644 index 0000000..a1db2ab --- /dev/null +++ b/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Test.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: PER_DQN + device: cpu + env_name: CartPole-v1 + mode: test + load_checkpoint: true + load_path: Train_CartPole-v1_PER_DQN_20221113-162804 + max_steps: 200 + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Train.yaml b/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Train.yaml new file mode 100644 index 0000000..553622f --- /dev/null +++ b/projects/codes/PER_DQN/config/CartPole-v1_PER_DQN_Train.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: PER_DQN + device: cuda + env_name: CartPole-v1 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_PER_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/PER_DQN/config/config.py b/projects/codes/PER_DQN/config/config.py new file mode 100644 index 0000000..a92c7e0 --- /dev/null +++ b/projects/codes/PER_DQN/config/config.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 00:37:33 +LastEditor: JiangJi +LastEditTime: 2022-10-30 01:19:08 +Discription: default parameters of DQN +''' +from common.config import GeneralConfig,AlgoConfig +class GeneralConfigDQN(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CartPole-v1" # name of environment + self.algo_name = "PER_DQN" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cuda" # device to use + self.train_eps = 200 # number of episodes for training + self.test_eps = 10 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigDQN(AlgoConfig): + def __init__(self) -> None: + # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end + self.epsilon_start = 0.95 # epsilon start value + self.epsilon_end = 0.01 # epsilon end value + self.epsilon_decay = 500 # epsilon decay rate + self.hidden_dim = 256 # hidden_dim for MLP + self.gamma = 0.95 # discount factor + self.lr = 0.0001 # learning rate + self.buffer_size = 100000 # size of replay buffer + self.batch_size = 64 # batch size + self.target_update = 4 # target network update frequency diff --git a/projects/codes/PER_DQN/per_dqn.py b/projects/codes/PER_DQN/per_dqn.py new file mode 100644 index 0000000..6fbf651 --- /dev/null +++ b/projects/codes/PER_DQN/per_dqn.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: DingLi +Email: wangzhongren@sjtu.edu.cn +Date: 2022-10-31 22:54:00 +LastEditor: DingLi +LastEditTime: 2022-11-14 10:43:18 +Discription: CartPole-v1 +''' + +''' +@Author: John +@Email: johnjim0816@gmail.com +@Date: 2020-06-12 00:50:49 +@LastEditor: John +LastEditTime: 2022-10-26 07:50:24 +@Discription: +@Environment: python 3.7.7 +''' +'''off-policy +''' + +import torch +import torch.nn as nn +import torch.optim as optim +import random +import math +import numpy as np + +class PER_DQN: + def __init__(self,model,memory,cfg): + + self.n_actions = cfg.n_actions + self.device = torch.device(cfg.device) + self.gamma = cfg.gamma + ## e-greedy parameters + self.sample_count = 0 # sample count for epsilon decay + self.epsilon = cfg.epsilon_start + self.sample_count = 0 + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay + self.batch_size = cfg.batch_size + self.policy_net = model.to(self.device) + self.target_net = model.to(self.device) + ## copy parameters from policy net to target net + for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): + target_param.data.copy_(param.data) + # self.target_net.load_state_dict(self.policy_net.state_dict()) # or use this to copy parameters + self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) + self.memory = memory + self.update_flag = False + + def sample_action(self, state): + ''' sample action with e-greedy policy + ''' + self.sample_count += 1 + # epsilon must decay(linear,exponential and etc.) for balancing exploration and exploitation + self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + math.exp(-1. * self.sample_count / self.epsilon_decay) + if random.random() > self.epsilon: + with torch.no_grad(): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + q_values = self.policy_net(state) + action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + else: + action = random.randrange(self.n_actions) + return action + # @torch.no_grad() + # def sample_action(self, state): + # ''' sample action with e-greedy policy + # ''' + # self.sample_count += 1 + # # epsilon must decay(linear,exponential and etc.) for balancing exploration and exploitation + # self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + # math.exp(-1. * self.sample_count / self.epsilon_decay) + # if random.random() > self.epsilon: + # state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + # q_values = self.policy_net(state) + # action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + # else: + # action = random.randrange(self.n_actions) + # return action + def predict_action(self,state): + ''' predict action + ''' + with torch.no_grad(): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + q_values = self.policy_net(state) + action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + return action + def update(self): + if len(self.memory) < self.batch_size: # when transitions in memory donot meet a batch, not update + # print ("self.batch_size = ", self.batch_size) + return + else: + if not self.update_flag: + print("Begin to update!") + self.update_flag = True + # sample a batch of transitions from replay buffer + (state_batch, action_batch, reward_batch, next_state_batch, done_batch), idxs_batch, is_weights_batch = self.memory.sample( + self.batch_size) + state_batch = torch.tensor(np.array(state_batch), device=self.device, dtype=torch.float) # shape(batchsize,n_states) + action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) # shape(batchsize,1) + reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float).unsqueeze(1) # shape(batchsize,1) + next_state_batch = torch.tensor(np.array(next_state_batch), device=self.device, dtype=torch.float) # shape(batchsize,n_states) + done_batch = torch.tensor(np.float32(done_batch), device=self.device).unsqueeze(1) # shape(batchsize,1) + q_value_batch = self.policy_net(state_batch).gather(dim=1, index=action_batch) # shape(batchsize,1),requires_grad=True + next_max_q_value_batch = self.target_net(next_state_batch).max(1)[0].detach().unsqueeze(1) + expected_q_value_batch = reward_batch + self.gamma * next_max_q_value_batch* (1-done_batch) + + loss = torch.mean(torch.pow((q_value_batch - expected_q_value_batch) * torch.from_numpy(is_weights_batch).cuda(), 2)) + # loss = nn.MSELoss()(q_value_batch, expected_q_value_batch) # shape same to + + abs_errors = np.sum(np.abs(q_value_batch.cpu().detach().numpy() - expected_q_value_batch.cpu().detach().numpy()), axis=1) + self.memory.batch_update(idxs_batch, abs_errors) + + # backpropagation + self.optimizer.zero_grad() + loss.backward() + # clip to avoid gradient explosion + for param in self.policy_net.parameters(): + param.grad.data.clamp_(-1, 1) + self.optimizer.step() + if self.sample_count % self.target_update == 0: # target net update, target_update means "C" in pseucodes + self.target_net.load_state_dict(self.policy_net.state_dict()) + + def save_model(self, fpath): + from pathlib import Path + # create path + Path(fpath).mkdir(parents=True, exist_ok=True) + torch.save(self.target_net.state_dict(), f"{fpath}/checkpoint.pt") + + def load_model(self, fpath): + checkpoint = torch.load(f"{fpath}/checkpoint.pt",map_location=self.device) + self.target_net.load_state_dict(checkpoint) + for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): + param.data.copy_(target_param.data) diff --git a/projects/codes/PER_DQN/task0.py b/projects/codes/PER_DQN/task0.py new file mode 100644 index 0000000..8b6247b --- /dev/null +++ b/projects/codes/PER_DQN/task0.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: DingLi +Email: wangzhongren@sjtu.edu.cn +Date: 2022-10-31 22:54:00 +LastEditor: DingLi +LastEditTime: 2022-11-14 10:45:11 +Discription: CartPole-v1 +''' + +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-12 11:09:54 +LastEditor: JiangJi +LastEditTime: 2022-10-30 01:29:25 +Discription: CartPole-v1,Acrobot-v1 +''' +import sys,os +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add to system path +import gym +import torch + +from common.utils import all_seed,merge_class_attrs +from common.models import MLP +from common.memories import ReplayBuffer, ReplayTree +from common.launcher import Launcher +from envs.register import register_env +from per_dqn import PER_DQN +from config.config import GeneralConfigDQN,AlgoConfigDQN +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigDQN()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigDQN()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=True) # create env + all_seed(env,seed=cfg.seed) # set random seed + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + # cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + model = MLP(n_states,n_actions,hidden_dim=cfg.hidden_dim) + memory = ReplayTree(cfg.buffer_size) # replay SumTree + agent = PER_DQN(model,memory,cfg) # create agent + return env, agent + + def train_one_episode(self,env, agent, cfg): + ''' train one episode + ''' + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + + policy_val = agent.policy_net(torch.tensor(state, device = cfg.device))[action] + target_val = agent.target_net(torch.tensor(next_state, device = cfg.device)) + + if terminated: + error = abs(policy_val - reward) + else: + error = abs(policy_val - reward - cfg.gamma * torch.max(target_val)) + agent.memory.push(error.cpu().detach().numpy(), (state, action, reward, + next_state, terminated)) # save transitions + state = next_state # update next state for env + agent.update() # update agent + ep_reward += reward # + if terminated: + break + return agent, ep_reward, ep_step + + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step+=1 + action = agent.predict_action(state) # predict action + next_state, reward, terminated, _, _ = env.step(action) + state = next_state + ep_reward += reward + if terminated: + break + return agent, ep_reward, ep_step + + +if __name__ == "__main__": + main = Main() + main.run() + diff --git a/projects/codes/common/launcher.py b/projects/codes/common/launcher.py index 148d200..2c0793c 100644 --- a/projects/codes/common/launcher.py +++ b/projects/codes/common/launcher.py @@ -36,11 +36,11 @@ class Launcher: ep_reward = 0 ep_step = 0 return agent,ep_reward,ep_step - def test_one_episode(self,env, agent, cfg): + def test_one_episode(self, env, agent, cfg): ep_reward = 0 ep_step = 0 return agent,ep_reward,ep_step - def evaluate(self,env, agent, cfg): + def evaluate(self, env, agent, cfg): sum_eval_reward = 0 for _ in range(cfg.eval_eps): _,eval_ep_reward,_ = self.test_one_episode(env, agent, cfg) diff --git a/projects/codes/common/memories.py b/projects/codes/common/memories.py index 1317dd1..fd50ab9 100644 --- a/projects/codes/common/memories.py +++ b/projects/codes/common/memories.py @@ -10,6 +10,7 @@ LastEditTime: 2022-08-28 23:44:06 @Environment: python 3.7.7 ''' import random +import numpy as np from collections import deque class ReplayBuffer: def __init__(self, capacity): @@ -71,4 +72,136 @@ class PGReplay(ReplayBufferQue): ''' sample all the transitions ''' batch = list(self.buffer) - return zip(*batch) \ No newline at end of file + return zip(*batch) + +class SumTree: + '''SumTree for the per(Prioritized Experience Replay) DQN. + This SumTree code is a modified version and the original code is from: + https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow/blob/master/contents/5.2_Prioritized_Replay_DQN/RL_brain.py + ''' + def __init__(self, capacity: int): + self.capacity = capacity + self.data_pointer = 0 + self.n_entries = 0 + self.tree = np.zeros(2 * capacity - 1) + self.data = np.zeros(capacity, dtype = object) + + def update(self, tree_idx, p): + '''Update the sampling weight + ''' + change = p - self.tree[tree_idx] + self.tree[tree_idx] = p + + while tree_idx != 0: + tree_idx = (tree_idx - 1) // 2 + self.tree[tree_idx] += change + + def add(self, p, data): + '''Adding new data to the sumTree + ''' + tree_idx = self.data_pointer + self.capacity - 1 + self.data[self.data_pointer] = data + # print ("tree_idx=", tree_idx) + # print ("nonzero = ", np.count_nonzero(self.tree)) + self.update(tree_idx, p) + + self.data_pointer += 1 + if self.data_pointer >= self.capacity: + self.data_pointer = 0 + + if self.n_entries < self.capacity: + self.n_entries += 1 + + def get_leaf(self, v): + '''Sampling the data + ''' + parent_idx = 0 + while True: + cl_idx = 2 * parent_idx + 1 + cr_idx = cl_idx + 1 + if cl_idx >= len(self.tree): + leaf_idx = parent_idx + break + else: + if v <= self.tree[cl_idx] : + parent_idx = cl_idx + else: + v -= self.tree[cl_idx] + parent_idx = cr_idx + + data_idx = leaf_idx - self.capacity + 1 + return leaf_idx, self.tree[leaf_idx], self.data[data_idx] + + def total(self): + return int(self.tree[0]) + +class ReplayTree: + '''ReplayTree for the per(Prioritized Experience Replay) DQN. + ''' + def __init__(self, capacity): + self.capacity = capacity # the capacity for memory replay + self.tree = SumTree(capacity) + self.abs_err_upper = 1. + + ## hyper parameter for calculating the importance sampling weight + self.beta_increment_per_sampling = 0.001 + self.alpha = 0.6 + self.beta = 0.4 + self.epsilon = 0.01 + self.abs_err_upper = 1. + + def __len__(self): + ''' return the num of storage + ''' + return self.tree.total() + + def push(self, error, sample): + '''Push the sample into the replay according to the importance sampling weight + ''' + p = (np.abs(error) + self.epsilon) ** self.alpha + self.tree.add(p, sample) + + + def sample(self, batch_size): + '''This is for sampling a batch data and the original code is from: + https://github.com/rlcode/per/blob/master/prioritized_memory.py + ''' + pri_segment = self.tree.total() / batch_size + + priorities = [] + batch = [] + idxs = [] + + is_weights = [] + + self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) + min_prob = np.min(self.tree.tree[-self.tree.capacity:]) / self.tree.total() + + for i in range(batch_size): + a = pri_segment * i + b = pri_segment * (i+1) + + s = random.uniform(a, b) + idx, p, data = self.tree.get_leaf(s) + + priorities.append(p) + batch.append(data) + idxs.append(idx) + prob = p / self.tree.total() + + sampling_probabilities = np.array(priorities) / self.tree.total() + is_weights = np.power(self.tree.n_entries * sampling_probabilities, -self.beta) + is_weights /= is_weights.max() + + return zip(*batch), idxs, is_weights + + def batch_update(self, tree_idx, abs_errors): + '''Update the importance sampling weight + ''' + abs_errors += self.epsilon + + clipped_errors = np.minimum(abs_errors, self.abs_err_upper) + ps = np.power(clipped_errors, self.alpha) + + for ti, p in zip(tree_idx, ps): + self.tree.update(ti, p) diff --git a/projects/codes/common/utils.py b/projects/codes/common/utils.py index 62c343d..212ec5f 100644 --- a/projects/codes/common/utils.py +++ b/projects/codes/common/utils.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:02:24 LastEditor: John -LastEditTime: 2022-10-26 07:38:17 +LastEditTime: 2022-11-14 10:27:43 Discription: Environment: ''' @@ -179,6 +179,8 @@ def all_seed(env,seed = 1): import torch import numpy as np import random + if seed == 0: + return # print(f"seed = {seed}") env.seed(seed) # env config np.random.seed(seed) diff --git a/projects/notebooks/1.QLearning.ipynb b/projects/notebooks/1.QLearning.ipynb deleted file mode 100644 index 4116815..0000000 --- a/projects/notebooks/1.QLearning.ipynb +++ /dev/null @@ -1,454 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1、定义算法\n", - "强化学习算法的模式都比较固定,一般包括sample(即训练时采样动作),predict(测试时预测动作),update(算法更新)以及保存模型和加载模型等几个方法,其中对于每种算法samle和update的方式是不相同,而其他方法就大同小异。" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import math\n", - "import torch\n", - "from collections import defaultdict\n", - "\n", - "class QLearning(object):\n", - " def __init__(self,n_states,\n", - " n_actions,cfg):\n", - " self.n_actions = n_actions \n", - " self.lr = cfg.lr # 学习率\n", - " self.gamma = cfg.gamma \n", - " self.epsilon = cfg.epsilon_start\n", - " self.sample_count = 0 \n", - " self.epsilon_start = cfg.epsilon_start\n", - " self.epsilon_end = cfg.epsilon_end\n", - " self.epsilon_decay = cfg.epsilon_decay\n", - " self.Q_table = defaultdict(lambda: np.zeros(n_actions)) # 用嵌套字典存放状态->动作->状态-动作值(Q值)的映射,即Q表\n", - " def sample_action(self, state):\n", - " ''' 采样动作,训练时用\n", - " '''\n", - " self.sample_count += 1\n", - " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", - " math.exp(-1. * self.sample_count / self.epsilon_decay) # epsilon是会递减的,这里选择指数递减\n", - " # e-greedy 策略\n", - " if np.random.uniform(0, 1) > self.epsilon:\n", - " action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作\n", - " else:\n", - " action = np.random.choice(self.n_actions) # 随机选择动作\n", - " return action\n", - " def predict_action(self,state):\n", - " ''' 预测或选择动作,测试时用\n", - " '''\n", - " action = np.argmax(self.Q_table[str(state)])\n", - " return action\n", - " def update(self, state, action, reward, next_state, done):\n", - " Q_predict = self.Q_table[str(state)][action] \n", - " if done: # 终止状态\n", - " Q_target = reward \n", - " else:\n", - " Q_target = reward + self.gamma * np.max(self.Q_table[str(next_state)]) \n", - " self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2、定义训练\n", - "强化学习算法的训练方式也比较固定,如下:\n", - "```python\n", - "for i_ep in range(train_eps): # 遍历每个回合\n", - " state = env.reset() # 重置环境,即开始新的回合\n", - " while True: # 对于一些比较复杂的游戏可以设置每回合最大的步长,例如while ep_step<100,就是每回合最大步长为100。\n", - " action = agent.sample(state) # 根据算法采样一个动作\n", - " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", - " agent.memory.push(state, action, reward, next_state, done) # 记录memory\n", - " agent.update(state, action, reward, next_state, done) # 算法更新\n", - " state = next_state # 更新状态\n", - " if done:\n", - " break\n", - "```\n", - "首先对于每个回合,回合开始时环境需要重置,好比我们每次开一把游戏需要从头再来一样。我们可以设置智能体在每回合数的最大步长,尤其是对于比较复杂的游戏,这样做的好处之一就是帮助智能体在训练中快速收敛,比如我们先验地知道最优解的大概步数,那么理论上智能体收敛时也应该是这个步数附近,设置最大步数可以方便智能体接近这个最优解。在每个回合中,智能体首先需要采样(sample),或者说采用探索策略例如常见的$\\varepsilon$-greedy策略或者UCB探索策略等等。采样的过程是将当前的状态state作为输入,智能体采样输出动作action。然后环境根据采样出来的动作反馈出下一个状态以及相应的reward等信息。接下来对于具有memory的智能体例如包含replay memory的DQN来说,需要将相应的transition(记住这个词,中文不好翻译,通常是状态、动作、奖励等信息)。紧接着就是智能体更新,对于深度强化学习此时一般从memory中随机采样一些transition进行更新,对于Q learning一般是采样上一次的transition。更新公式是比较关键的部分,但是也很通用,一般基于值的算法更新公式都是一个套路如下:\n", - "$$\n", - "y_{j}= \\begin{cases}r_{j} & \\text { for terminal } s_{t+1} \\\\ r_{j}+\\gamma \\max _{a^{\\prime}} Q\\left(s_{t+1}, a^{\\prime} ; \\theta\\right) & \\text { for non-terminal } s_{t+1}\\end{cases}\n", - "$$\n", - "智能体更新完之后,通常需要更新状态,即```state = next_state```,然后会检查是否完成了这一回合的游戏,即```done==True```,注意完成并不代表这回合成功,也有可能是失败的太离谱,等同学们有了自定义强化学习环境的经验就知道了(等你长大就知道了XD)。\n", - "如果需要记录奖励、损失等等的话可以再加上,如下方代码,实际项目中更多地使用tensorboard来记录相应的数据,甚至于笔者就在这些教学代码中使用过,但是看起来有些繁琐,容易给大家增加不必要的学习难度,因此学有余力以及需要在项目研究中做强化学习的可以去看看,也很简单。\n", - "此外稍微复杂一些的强化学习不是一次性写完代码就能收敛的,这时需要我们做一个调参侠。为了检查我们参数调得好不好,可以在终端print出奖励、损失以及epsilon等随着回合数的变化,这点说明一下强化学习的训练过程一般都是先探索然后收敛的,官方的话就是权衡exploration and exploitation。e-greedy策略的做法就是前期探索,然后逐渐减小探索率至慢慢收敛,也就是这个epsilon。这个值越大比如0.9就说明智能体90%的概率在随机探索,通常情况下会设置三个值,epsilon_start、epsilon_end以及epsilon_decay,即初始值、终止值和衰减率,其中初始值一般是0.95不变,终止值是0.01,也就是说即使在收敛阶段也让智能体保持很小概率的探索,这样做的原因就是智能体已经学出了一个不错的策略,但是保不齐还有更好的策略,好比我们知道要出人头地学历高比较重要,但是“人还是要有梦想的,万一实现了呢”,总是存在意外的可能,对吧。回归正题,比较关键的是epsilon_decay这个衰减率,这个epsilon衰减太快了学来的策略往往过拟合,好比一条只能选择一朵花的花道上,你早早选择了一朵看起来还可以的花,却错过了后面更多的好花。但是衰减的太慢会影响收敛的速度,好比你走过了花道的尽头也还没选出一朵花来,相比前者不如更甚。当然强化学习的调参相比于深度学习只能说是有过之无不及,比较复杂,不止epsilon这一个,这就需要同学们的耐心学习了。\n", - "强化学习测试的代码跟训练基本上是一样的,因此我放到同一个代码段里。相比于训练代码,测试代码主要有以下几点不同:1、测试模型的过程是不需要更新的,这个是不言而喻的;2、测试代码不需要采样(sample)动作,相比之代替的是预测(sample)动作,其区别就是采样动作时可能会使用各种策略例如$\\varepsilon$-greedy策略,而预测动作不需要,只需要根据训练时学习好的Q表或者网络模型代入状态得到动作即可;3、测试过程终端一般只需要看奖励,不需要看epislon等,反正它在测试中也是无意义的。" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "def train(cfg,env,agent):\n", - " print('开始训练!')\n", - " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", - " rewards = [] # 记录奖励\n", - " for i_ep in range(cfg.train_eps):\n", - " ep_reward = 0 # 记录每个回合的奖励\n", - " state = env.reset() # 重置环境,即开始新的回合\n", - " while True:\n", - " action = agent.sample_action(state) # 根据算法采样一个动作\n", - " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", - " agent.update(state, action, reward, next_state, done) # Q学习算法更新\n", - " state = next_state # 更新状态\n", - " ep_reward += reward\n", - " if done:\n", - " break\n", - " rewards.append(ep_reward)\n", - " if (i_ep+1)%20==0:\n", - " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon:.3f}\")\n", - " print('完成训练!')\n", - " return {\"rewards\":rewards}\n", - "def test(cfg,env,agent):\n", - " print('开始测试!')\n", - " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", - " rewards = [] # 记录所有回合的奖励\n", - " for i_ep in range(cfg.test_eps):\n", - " ep_reward = 0 # 记录每个episode的reward\n", - " state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合)\n", - " while True:\n", - " action = agent.predict_action(state) # 根据算法选择一个动作\n", - " next_state, reward, done, _ = env.step(action) # 与环境进行一个交互\n", - " state = next_state # 更新状态\n", - " ep_reward += reward\n", - " if done:\n", - " break\n", - " rewards.append(ep_reward)\n", - " print(f\"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", - " print('完成测试!')\n", - " return {\"rewards\":rewards}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3、定义环境\n", - "\n", - "OpenAI Gym中其实集成了很多强化学习环境,足够大家学习了,但是在做强化学习的应用中免不了要自己创建环境,比如在本项目中其实不太好找到Qlearning能学出来的环境,Qlearning实在是太弱了,需要足够简单的环境才行,因此本项目写了一个环境,大家感兴趣的话可以看一下,一般环境接口最关键的部分即使reset和step。" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "import gym\n", - "import turtle\n", - "import numpy as np\n", - "\n", - "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", - "\n", - "class CliffWalkingWapper(gym.Wrapper):\n", - " def __init__(self, env):\n", - " gym.Wrapper.__init__(self, env)\n", - " self.t = None\n", - " self.unit = 50\n", - " self.max_x = 12\n", - " self.max_y = 4\n", - "\n", - " def draw_x_line(self, y, x0, x1, color='gray'):\n", - " assert x1 > x0\n", - " self.t.color(color)\n", - " self.t.setheading(0)\n", - " self.t.up()\n", - " self.t.goto(x0, y)\n", - " self.t.down()\n", - " self.t.forward(x1 - x0)\n", - "\n", - " def draw_y_line(self, x, y0, y1, color='gray'):\n", - " assert y1 > y0\n", - " self.t.color(color)\n", - " self.t.setheading(90)\n", - " self.t.up()\n", - " self.t.goto(x, y0)\n", - " self.t.down()\n", - " self.t.forward(y1 - y0)\n", - "\n", - " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", - " self.t.up()\n", - " self.t.goto(x * self.unit, y * self.unit)\n", - " self.t.color(line_color)\n", - " self.t.fillcolor(fillcolor)\n", - " self.t.setheading(90)\n", - " self.t.down()\n", - " self.t.begin_fill()\n", - " for i in range(4):\n", - " self.t.forward(self.unit)\n", - " self.t.right(90)\n", - " self.t.end_fill()\n", - "\n", - " def move_player(self, x, y):\n", - " self.t.up()\n", - " self.t.setheading(90)\n", - " self.t.fillcolor('red')\n", - " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", - "\n", - " def render(self):\n", - " if self.t == None:\n", - " self.t = turtle.Turtle()\n", - " self.wn = turtle.Screen()\n", - " self.wn.setup(self.unit * self.max_x + 100,\n", - " self.unit * self.max_y + 100)\n", - " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", - " self.unit * self.max_y)\n", - " self.t.shape('circle')\n", - " self.t.width(2)\n", - " self.t.speed(0)\n", - " self.t.color('gray')\n", - " for _ in range(2):\n", - " self.t.forward(self.max_x * self.unit)\n", - " self.t.left(90)\n", - " self.t.forward(self.max_y * self.unit)\n", - " self.t.left(90)\n", - " for i in range(1, self.max_y):\n", - " self.draw_x_line(\n", - " y=i * self.unit, x0=0, x1=self.max_x * self.unit)\n", - " for i in range(1, self.max_x):\n", - " self.draw_y_line(\n", - " x=i * self.unit, y0=0, y1=self.max_y * self.unit)\n", - "\n", - " for i in range(1, self.max_x - 1):\n", - " self.draw_box(i, 0, 'black')\n", - " self.draw_box(self.max_x - 1, 0, 'yellow')\n", - " self.t.shape('turtle')\n", - "\n", - " x_pos = self.s % self.max_x\n", - " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", - " self.move_player(x_pos, y_pos)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "import gym\n", - "def env_agent_config(cfg,seed=1):\n", - " '''创建环境和智能体\n", - " Args:\n", - " cfg ([type]): [description]\n", - " seed (int, optional): 随机种子. Defaults to 1.\n", - " Returns:\n", - " env [type]: 环境\n", - " agent : 智能体\n", - " ''' \n", - " env = gym.make(cfg.env_name) \n", - " env = CliffWalkingWapper(env)\n", - " env.seed(seed) # 设置随机种子\n", - " n_states = env.observation_space.n # 状态维度\n", - " n_actions = env.action_space.n # 动作维度\n", - " agent = QLearning(n_states,n_actions,cfg)\n", - " return env,agent" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4、设置参数\n", - "\n", - "到这里所有qlearning模块就算完成了,下面需要设置一些参数,方便大家“炼丹”,其中默认的是笔者已经调好的~。另外为了定义了一个画图函数,用来描述奖励的变化。" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "import datetime\n", - "import argparse\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "def get_args():\n", - " \"\"\" \n", - " \"\"\"\n", - " curr_time = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", - " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", - " parser.add_argument('--algo_name',default='Q-learning',type=str,help=\"name of algorithm\")\n", - " parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help=\"name of environment\")\n", - " parser.add_argument('--train_eps',default=400,type=int,help=\"episodes of training\") # 训练的回合数\n", - " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\") # 测试的回合数\n", - " parser.add_argument('--gamma',default=0.90,type=float,help=\"discounted factor\") # 折扣因子\n", - " parser.add_argument('--epsilon_start',default=0.95,type=float,help=\"initial value of epsilon\") # e-greedy策略中初始epsilon\n", - " parser.add_argument('--epsilon_end',default=0.01,type=float,help=\"final value of epsilon\") # e-greedy策略中的终止epsilon\n", - " parser.add_argument('--epsilon_decay',default=300,type=int,help=\"decay rate of epsilon\") # e-greedy策略中epsilon的衰减率\n", - " parser.add_argument('--lr',default=0.1,type=float,help=\"learning rate\")\n", - " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", - " args = parser.parse_args([]) \n", - " return args\n", - "curr_time = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", - "\n", - "def smooth(data, weight=0.9): \n", - " '''用于平滑曲线,类似于Tensorboard中的smooth\n", - "\n", - " Args:\n", - " data (List):输入数据\n", - " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", - "\n", - " Returns:\n", - " smoothed (List): 平滑后的数据\n", - " '''\n", - " last = data[0] # First value in the plot (first timestep)\n", - " smoothed = list()\n", - " for point in data:\n", - " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", - " smoothed.append(smoothed_val) \n", - " last = smoothed_val \n", - " return smoothed\n", - "\n", - "def plot_rewards(rewards,cfg, tag='train'):\n", - " sns.set()\n", - " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", - " plt.title(\"learning curve on {} of {} for {}\".format(\n", - " cfg.device, cfg.algo_name, cfg.env_name))\n", - " plt.xlabel('epsiodes')\n", - " plt.plot(rewards, label='rewards')\n", - " plt.plot(smooth(rewards), label='smoothed')\n", - " plt.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5、我准备好了!\n", - "\n", - "到现在我们真的可以像海绵宝宝那样大声说出来“我准备好了!“,跟着注释来看下效果吧~。" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "开始训练!\n", - "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", - "回合:20/400,奖励:-45.0,Epsilon:0.010\n", - "回合:40/400,奖励:-34.0,Epsilon:0.010\n", - "回合:60/400,奖励:-47.0,Epsilon:0.010\n", - "回合:80/400,奖励:-88.0,Epsilon:0.010\n", - "回合:100/400,奖励:-53.0,Epsilon:0.010\n", - "回合:120/400,奖励:-23.0,Epsilon:0.010\n", - "回合:140/400,奖励:-20.0,Epsilon:0.010\n", - "回合:160/400,奖励:-29.0,Epsilon:0.010\n", - "回合:180/400,奖励:-42.0,Epsilon:0.010\n", - "回合:200/400,奖励:-28.0,Epsilon:0.010\n", - "回合:220/400,奖励:-20.0,Epsilon:0.010\n", - "回合:240/400,奖励:-20.0,Epsilon:0.010\n", - "回合:260/400,奖励:-17.0,Epsilon:0.010\n", - "回合:280/400,奖励:-13.0,Epsilon:0.010\n", - "回合:300/400,奖励:-13.0,Epsilon:0.010\n", - "回合:320/400,奖励:-13.0,Epsilon:0.010\n", - "回合:340/400,奖励:-13.0,Epsilon:0.010\n", - "回合:360/400,奖励:-13.0,Epsilon:0.010\n", - "回合:380/400,奖励:-13.0,Epsilon:0.010\n", - "回合:400/400,奖励:-14.0,Epsilon:0.010\n", - "完成训练!\n", - "开始测试!\n", - "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", - "回合数:1/20, 奖励:-13.0\n", - "回合数:2/20, 奖励:-13.0\n", - "回合数:3/20, 奖励:-13.0\n", - "回合数:4/20, 奖励:-13.0\n", - "回合数:5/20, 奖励:-13.0\n", - "回合数:6/20, 奖励:-13.0\n", - "回合数:7/20, 奖励:-13.0\n", - "回合数:8/20, 奖励:-13.0\n", - "回合数:9/20, 奖励:-13.0\n", - "回合数:10/20, 奖励:-13.0\n", - "回合数:11/20, 奖励:-13.0\n", - "回合数:12/20, 奖励:-13.0\n", - "回合数:13/20, 奖励:-13.0\n", - "回合数:14/20, 奖励:-13.0\n", - "回合数:15/20, 奖励:-13.0\n", - "回合数:16/20, 奖励:-13.0\n", - "回合数:17/20, 奖励:-13.0\n", - "回合数:18/20, 奖励:-13.0\n", - "回合数:19/20, 奖励:-13.0\n", - "回合数:20/20, 奖励:-13.0\n", - "完成测试!\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAABOu0lEQVR4nO3dd3gU5drA4d/MtmyyIYVsIKH3zlEISg8gQkJoFhQLqCjYRT0HCSgWMCJ8qKAiqOcoHtQDigKKICoKKCACIogKQqS3VNKTbfP9seySQAJJCNnIPvd1cZHMzs4882Z3nnnLvKNomqYhhBBCAKqvAxBCCFFzSFIQQgjhJUlBCCGElyQFIYQQXpIUhBBCeElSEEII4SVJ4Tw2b97M4MGDq2Vfc+bMYdmyZdWyL1G23NxcRo4cSUJCAqtXrz7n9d27d/PAAw8wcOBAhg0bxi233MI333xT5vY+/fRT7r333ksZsteTTz7Jxo0bq2RbFyqHili6dCk333wzw4YNY9CgQUyZMoXs7GwAXnvtNaZOnQrA2LFj2bdvHwDPPPMM/fr145VXXmHevHn06dOHUaNGcdVVV+Fyubzb/uc//0n79u3Jzc31LnvuueeYOXNmmfEU/14nJibyn//855x1quv7WFBQwD//+U/i4+MZOHDgeT9L1UXv6wCE2/jx430dggD++OMP0tPT+frrr8957ffff+eee+5h+vTpxMbGApCcnMz48eNJSUnh1ltvre5wS0hKSqqybZ2vHCpi/vz5rF+/nrlz5xIREYHdbueFF17gvvvu48MPPyyx7ttvv+39efHixaxdu5a6detyzTXXMGvWLDp16kS3bt3Ys2cPbdq0weFw8OOPP3L11Vfz/fffEx8fD8CmTZu8iaayquv7+NprrxEYGMiqVas4duwYN910E+3bt6du3brVsv/SSFIoJ5vNxqxZs9iyZQtOp5O2bdvy1FNPYbFY+O6773jzzTex2WxkZGQwfPhwHn30UTZv3kxSUhKBgYHk5+czYcIE5s6dS4MGDdi7dy82m42nn36arl27kpiYSIsWLbj77rvp0KED48aNY8OGDaSkpDB69GjuvPNOnE4nM2fO5NtvvyU4OJiOHTuSnJzMwoULz4n3zTffZOnSpej1eho1asSLL77I119/zerVq3nzzTcB91Ws5/fExEROnTrF4cOH6dGjB0uWLGH16tVYrVYAbrrpJh588EG6detWZjkUZ7fbefHFF9m0aRM6nY6OHTsyadIkLBYL/fr147rrrmPTpk0cP36c+Ph4nnjiiXOOYf/+/Tz99NNkZGSgqir3338/gwYNol+/fvTv35+tW7eSk5PDXXfdxa233srmzZuZNm0aK1asADjn9+K++eYbXn/9dZxOJxaLxRvb5MmTOXnyJMOGDWPx4sUEBAR43zN79mzGjh3rTQgAzZo1Y+bMmdx5553ccMMNmEymMj9DOTk5JCUl8eeff2K32+nWrRtPPPEEer2eJUuWsHjxYux2O1lZWYwdO5Zbb72VTz/9lCVLllBQUIDFYuG6667j66+/RlVVDh48iMFgYMaMGbRs2ZJRo0Zx22230b59e+68805iY2PZsWMHWVlZPPbYYwwaNIiCggKeeeYZduzYQXBwMM2bNwfgxRdf9Mb5119/nVMOP/zwwznl1bFjR1577TV++eUXUlJSaNWqFbNmzfJuJz8/3/s5jIiIAMBgMPDEE0/w9ddfY7PZSpRPv379mDNnDtOnT0fTNMaOHUt4eDgnT57kySefZPz48fTs2ZPNmzfTpk0btm3bRqtWrYiLi+Pbb78lPj6ekydPkp6eTqdOncr8XpZl+vTp7N69mzfeeINp06ZV2fdx//79jBw5ku+//x6j0YjT6aRv37688847fPPNN94yi46OpmfPnqxatYq77rqrzDgvNWk+Kqe33noLnU7Hp59+ymeffUZkZCSzZs1C0zTeeecdXnzxRT799FMWL17MW2+9RUZGBgB79+7lpZde4rPPPsNoNLJz507GjBnDsmXLuPHGG3n99dfP2ZfNZiMsLIxFixbx6quv8tJLL1FUVMTHH3/Mb7/9xooVK1i0aBGHDx8uNdY1a9Z4Y1mxYgX169fn/fffv+AxFhYW8sUXXzB58mSuvfZaPvvsM8B9NZyamkqvXr3KLIezzZs3j5SUFJYvX87y5ctxuVwlqvT5+fl8+OGHLFq0iPfff7/UY3n88ceJi4vjiy++4K233uLll1/2NhMUFhbyySefsHDhQl599VX27NlzwePzSE5O5plnnuG1117j888/55FHHuGBBx4gMjKS559/noYNG7J8+fISCQFg27ZtdOnS5ZzttW3bFkVRSE5OPu9+X3jhBdq1a8enn37KsmXLyMzM5N133yUvL4+PP/6Yt956i2XLlvHKK6/wf//3f9737du3j4ULF3pPNlu2bGHKlCmsWLGCTp06ldr8cfjwYXr27MmSJUv417/+5d3eG2+8gdPpZNWqVSxYsIDff//9nPc2bdq0RDkcPXq01PLy/C2OHj3K0qVLz/kc/PXXXwQEBNC4ceMSy81mM0OHDsVoNJZaTp4axHvvvcd7773n/YwNGjSI3r1789NPPwHw3Xff0adPH2JjY/n+++9xOp1s2rSJHj16oNPpzvu9LE7TNJ577jmOHj3K22+/TVBQUInXL/b72KRJE1q0aMG3334LwA8//EC9evVo3rw5x48fJyoqyrtunTp1OHHiRKnbqS5SUyintWvXkpOT422ztdvt1K5dG0VRmD9/PmvXrmXFihUkJyejaRoFBQUAREVFUa9ePe92oqOjadOmDeA+mSxdurTU/V1zzTUAtGvXDpvNRn5+PuvWrWPYsGHeq9Gbb7651FrCpk2biIuLIyQkBIBJkyYB7prB+XTu3Nn784gRI3juuee4++67+eSTT7j++utRVbXMcjjb+vXreeyxxzAYDACMGjWKBx988Jzjq1OnDrVr1yYrK4sGDRp4Xz916hS7d+9mxIgRgLsci7e33nrrrSiKQt26denVqxcbNmygXbt25z0+jx9//JGuXbt699etWzfCw8PZtWsXiqKUaxulcTqd53197dq1/PrrryxZsgRwJzaAoKAg5s+fz7p16zhw4AC7d+8mPz/f+75WrVqVqIm1a9fO27zQtm3bUpt4DAaDt0bTtm1bTp06BcC6deuYNGkSqqp6ax4XSqjnKy+AK664Ar3+3FOJqqol2v+rQq9evXjhhRdwuVx89913/Pvf/yYyMpLo6Gh27drFjz/+SJ8+fS74vSxuwYIFpKens2zZsjIT1cV8H8H9fVq6dClxcXF8+umn3s91abMMqapvr9UlKZSTy+Vi8uTJ3i9aXl4eRUVF5Ofnc91119G/f39iYmK44YYb+Oabb7x/7MDAwBLbKX71qShKqR8KwPtB85ykNE0754tX1odHp9OVOLllZ2eTnZ19zv7sdnuJ9xWPNSYmBofDwc6dO71XQucrh7OdfTJwuVwl9le8maW0cvAca/Hj+Ouvv4iOji7xumfbqqpe8Pg8SitzTdNwOBzeJFaaTp068dNPP9G+fXsAUlNTiYiIYM+ePdjtdlq2bMmTTz7pPVmOHDmyxHG6XC7mzJlDs2bNALx/kxMnTnDzzTdz00030blzZ+Li4vjuu++876vMZ8hgMHg/H8XLUK/Xl1i/PCeg85VXafF5NG/eHIfDwcGDB2nUqJF3eVFREQ899BDPP//8Bfd9tvDwcBo0aMBXX32FTqfzJqo+ffqwbds2fvrpJ5544okLfi+L69KlC506dWLSpEksXry41M9ARb6PJ0+eZNy4cd7lb731FnFxcUyfPp3k5GS2bNniba6LiooiNTXV20ybkpJC69atK1wuVUmaj8qpZ8+efPDBB9hsNlwuF1OmTOHll1/m4MGD5Obm8uijj9KvXz9++ukn7zpVLTY2ls8++wybzYbD4SizltG9e3e+/vprb/X+tddeY8GCBYSHh7N3716KiopwOBwlTjylGTFiBNOmTaNVq1bek3FZ5XC2Xr16sWjRIux2Oy6Xiw8++IAePXqU+1gtFgvt2rXzjgA5fvw4t9xyCzk5OQDe5ceOHWPDhg307t2b8PBwjh07Rnp6OpqmlTmSo2vXrmzYsMFb3ff0bfzjH/84b0yPP/4477zzDuvWrQPcV5jXX389EyZM4NFHH8VkMpGUlORtMrvllltKvL9nz54sWLAATdOw2Wzcf//9vP/+++zatYvw8HAeeOABevXq5f27XKjmURmxsbF88sknuFwuCgoKWLFixQVrR5UtL6PRyNixY5k8eTJpaWmAuynmhRdeoKCggDp16lTqGHr37s0bb7xBnz59vMv69OnD8uXLiYiIIDw8vELfy/bt23P77bcTHBxcanNuWcr6PtapU8f7GVi+fDl16tTBZDKRkJBAYmIiAwYMwGw2A+4ayOLFiwE4ceIE33//PX379q1UuVQVqSmU0wMPPMCMGTO47rrrcDqdtGnThsTERAIDA+nTpw/x8fHUqlWLhg0b0rx5cw4ePFhmVbSyrr/+evbv38/w4cMJDAykfv363g9XcbGxsezbt897UmrevDnTpk0jICCALl26EB8fj9Vq5eqrrz5v08Hw4cN5+eWXS5z0yyqHs91///3MmDGD4cOH43A46NixI1OmTKnQ8b700ks899xzLFy4EEVRSEpK8l5RHTlyhOuvv57CwkKeeuopmjZtCrivzm+44QasVmuJk0ZxzZs355lnnuGhhx7C6XQSEBDA/PnzCQ4OPm88bdq04d///jdz5szhhRdeQFVVgoKCCA8PZ8eOHRw7dsybPEvz5JNPkpSUxJAhQ7Db7XTv3p177rkHh8PBkiVLiIuLw2w207FjR++Jrarde++9TJ06lSFDhhAcHEzt2rXP6Ts5W2XLC+C+++7DbDZz9913A+5awlVXXcUbb7xR6WPo3bs3c+fOLfF56tChA2lpad4RYK1atarQ91JRFF544QWGDx9eYiDB+ZT3++gxYsQI3n//fZ599lnvsocffphnn32WhIQEnE4nEyZMoGHDhuUsiUtDkamz/z5++OEH0tPTGTZsGADPP/88JpOJCRMm+Diy6uUZpdKhQwdfh+K1bds2GjZs6E1aNdUXX3yBxWIhNjYWl8vFww8/TI8ePXw+nPbv6HL9Pkrz0d9IixYtWLZsGUOHDiUhIYHMzEzuu+8+X4clcHfS1/SEAO7P0Lx58xg2bBiDBw8mMjLS2+kpKuZy/T5KTUEIIYSX1BSEEEJ4SVIQQgjhJUlBCCGElyQFIYQQXpfFfQqZmXm4XBXvL69d20J6eu6FV6xmElfFSFwVU1Pjgpob2+UUl6oqhIUFlfn6ZZEUXC6tUknB896aSOKqGImrYmpqXFBzY/OXuKT5SAghhJckBSGEEF41Iil8/vnnDBo0iGuvvZYPPvjA1+EIIYTf8nmfwsmTJ3nllVf49NNPMRqNjBw5kquvvtr7RCghhBDVx+c1hY0bN9K1a1dCQ0MJDAxk4MCBfPnll74OSwgh/JLPawopKSklJhKLjIxk586dFdpG7dqWC69UBqv1wtP/+oLEVTF/x7g0TbuoJ71djOJxuVwaikKpsZQW4/nWL0tZx6ppGsVnX9M0jYgIC5rmHjpZfGSN5+1lzdamqmcegHO+9crLs3/PfqvyM1baiCFP/GWtp6pKqeVY1Z99nyeF0ubjq+gXJT09t1LDsqzWYFJTcyr8vkutvHFpmkahzYnZ5H6aVmpWIZGhZc/nrmka+45m0TS6FrrTT4nKyC4kI7uI5vVDSl2/+N/iQnEdTsnlj4OZXBtTv8T7zt7OkZRcTmTkE9M60rssO9/GkrXJ3NS3ORazwXtsGdmFhNcKwGzSs+6Xo9SzWmgWXcu7vbxCO0czCgkL1GMNNbPh1+MU2pxc07k+AOt+OUqjusE0rluL73ccIz27kHU7jjH59s6YTXq+3HyIQV0bsWRdMvuOnCI6IohOLa1c1cb9AJijaXkowJbdKeQXOogIDaB5vRDqhJn5dP1fGPU6+naqR0GRgyXrkunfuT6tG4bhcLo4lF5A87oWfv0rnf+u3kPXtnX4YedxhnRvTPcOUcz48Gc6NKnN3iOnCDIb6N+5PvlFDopsdnbuOU6LOibaNY3gg2/2YcvN4ubYRhxKLyI738Gf+47Qqq6JsCA9x09m4HC6UBUdDSICaGA1k1foxKlBoc3BwWNZmI0KuFw0rxfM5j3phF95DQaDkcycIr7ZdpimUbWICDUTGKCnXkQQLpfGik0HCQ7QUZSXQ+OoEJo0qsuGXSc4kZ5PiMVIs+ha/Hn4FC6XkxbhcCotDavZSXQtlfTMHELNCvl5+ZhUFwbFQdMWTVmf24QdyenoVY1GtU3k5eSiFmZRS80nRC3Aondg0TvAXkiwwYXmsKNXnBhwYtK50CsuFJcDHS50isv9P6eX63QUOnWgKCiKgtPpQgNUtNP/XCiK5v1d8SxXzvwMoKDhRGVbYC8+PxZJqJpPqJpPLTWfQMWGWbETqBYRqNgwKk70ihM9TvSKCwNnfvfs07P9PMXCV4FDOHGqEH3hKYKVAoLUIoKUIixKEYXW1qw9EUKhzUmnllZSTxVwOMV9D4Jep9KucRg7k9PR61UMOpVOraxMvOOqCp/DVFU574W0z2dJXbp0KVu3biUpKQmAuXPnomkaDz30ULm34S9JISvPxpvLd3FHfGvqhAWybU8Kc5fuYvLtnTmVW8Qby3Zx37B2dGxWm+9+Pkr/mProdSrLvt/Pjn1p9OgQxf/W7OWOuFZEhpoJqxXAtPe2UlDk4N9P9EVVFV7/9Ff+OpbFFS2sfL/jGK+O74XZpCflVAErfzzEkZQcbu7XnKAAA9/9fJR+nesRVdt9I8zkt37kRIb72cIdm9UmK8/GkO6Nee/L3YwZ1IYim4NVG/ZxKL2QYKWQBuFGBl0VTd3IML7afpLffnM/+H5g18YcTs3nr7+OEqLmU89ai/r1Ivhh+0FMioMgvZNW9WvRokUjvti0H3teNiF6O82iLew7cgqAxnWDURWFEyfScKGgoGFUnBhwYFCcGBQHmqZiUuw4dUZcTichivvE5EKhUAnAZgwlL7+QDUUt2W5rgooLi1Lo/SJH6HKIVLOx6GwEqnYMWhEBip0A1YlOAafLRba5HvNPXEmwWogOF1G6TCJ12dQyKQTYs7CohQSr7vLQK05MigMjdkq5aKxS7+f2YIutGaDRNtyGKfcYYWoeVkMBwVoOZsVOiJpPmJqHXnHh1BQ+yb8Kk8lEa0sWzuw09IqTuqYCLK4cVMr3pMG/7FZq6W2EKTnoyniPCwWnasKGAfRGXIoel6qnwK7gQEdAgAmXokNTVLTT/+fbNFIzczHgJDBAT2GRg1CLicAAPRoqKKdP+4qChgLK6bRw+n/P724K9hP7aG44UWZ8Dp0ZuxqAUzXgVPS40Ln/V/Sn/9fh8qYehfwiB81sfxCgOErdpqZBhsvCtKzrMCs2nJpKEQY6Ng2nqSWPg7t3s9PWkMDAAHp2iMLucNE0uhZD+rS4/JLCyZMnueWWW1iyZAlms5mRI0cybdo0OnbsWO5t+DIp5OTbKLI5iTjrCn3jruN8v+M4E2/r5F2WdqqAJ+Zv4snRnXnniz+4soWVG/s0K/G+QydziKpbC8PpP8uPv52gef0QIkLMfPjNn3yz9QgJ3RrRqmEoa7YeYUdyOs2ia9GsXghfbTlMWLCJzq2sfLP1CHfGt8Yaaub//re9xD46NqvtvuLQqTic7iua5vXDaBppYuP2AzTUp5PjCsCiFtE82kJGdgH5BTbCdIXUJhOjXiXNZcHgcF/pWIygx0mOTaG2moNFLaRIM2BQnDg0FbNix6zYCFDsqIqGU1PQKVX/sTvzETjdjAAUaQZAQ0PBqRgodKnYNT12dKhoFGl6TIoDk8lAnXrRHMnVcehYJoGqjTA17/TJP5c0pwWLWnjOl1pT9eS5jBRqRkLCQslz6knLdVLk1DBrhTTVlX5iAShSA8nSAgkJD+NYngFzYABh4SGgDyAoOJiMQoWvftyP2aQS37sdW/dmcio7n7TMPOJi2xJcKwRDgAmz2cyn65NpXj+E1o2tHEzJIzjQiEnvbv6whgeBquNoWj7vrd7LbY6PidDl8putHi2DsjHYi30HjIG4zKFkFukJsUYSEB7J0Vw9tj0/0ECf4V7HYOZooRmbS0fzVk3R1YpAsdRGDQxDCbCAMRBFb8Sl6tAZTCSfLOTPo9k4t3xEC/0JAq31iGzUmK37c7FGhNGseUOUwDCUwBCs9eqQlllU4dYCTdP4+c9UWjUMw2I24HS5vLXhykh8/VvaO3YRYDIxbGAnwuvXJ8tmRDEFgd5U4fh+2ZvG8mVraGs8SrPmjejYoQVKYAhKQDBKgIVlHyyhf9FXHHWEEa3PJM0ZzO/2evQKT0XNTwdgWX5nUkI78vCVedgP/Ixaqw4Nb3q8ypOCz5uP6tSpw2OPPcbo0aOx2+3ceOONFUoI1W3bnhQ++m4fSWO7otepPPn2ZnIL7LyT2M+7Tn6hg3+v+AMAu8OJQa8D4I+DmQB8s/UIx9PzOZ5+kOBAAxt3neCmfs1p3TCUZ9/dAsA7if347UAGb33+Oy3rh9C9QxTfbD0CwBebDvLFpjOPavzrWLb3Q3oqp5CN25JppU/nj03p/GUy0D/gIKFqPqdcgbQ0nIAUjYG1ijAqDoyKk1pKAeSBekAjPuysA87BfY49/Wx2pyGIoiIbgQY7DoOePM0IOiMFdo0og40sNYxUZxCqVoTNZUDFhc4azp40O00b1+XPY/nYCnLp2LElUXXD2bQ7nf2HUtG7bNRv3ACXpvDH/jSCAlTuGN6FQp2FJd/uJSzARe+YJnzw3UEGdGvBiVOFrFm3g8jawTz90ADSc138diCLPYdP0ffKeuh1KoEmHWYXLP9hP7FXRFMvPBC7w0VBkYMTGflEhASQX+RgZ3I618bUx6DX0VrT+PqTX/njYCZFdifWABu3GL4lTzMRFV0PS9PmaCYLOU4T1nr1UCwRBJ5ORnqdSjjQ4HTRrd52hC3rVxCly+SkMwQnKneMvAZD7XqgDyBYVYk4vW4E56oHXNM0h+BAIyHBJq5pz+nPlAuDvuQJ76bhZx4D2qa0jQENGoYy5roQvnt/B0MDfybaXIi5UXt00a3RRTZFtUSgGN0XN8UbE8OzC3luq4lmhpM0at2GofFXUyfXjtPlIjCk7OZKT4TNGwZxKt/JG/ldAHjwmg40bGWl51WlvMdgQlFsZW6zLIqi0LnVmebIi0kIAAZzEN+mtqdRSDA3Nu1CgDWYnIu4gLQEGjjotHKwwMqd9Vujb1jysa1HzS05mf8jDlR+M8fQMn873Ux7UcLaQYdBZG78hIEBOzHatlO02YUa0Qh9g/YXdYxl8XlSABgyZAhDhgzxdRjlsnD1HrLz7cxf/ht9rogmt8AO4L0yycwp4p9zN3jXn/3xTgZ0aYCmQYHN/SD2rNwi7+vrdxzjeHo+H3z1Jzf3OzMM94GX1xER4n527r6j2RxOzTsnFh1OYsyHCNZy0dIVRkUeItSRVnIlOxAIdk3FoLjI1EeSWaCRq5nJcxrRq5DisOBCoUF0BEcyCgmo25S/kg/RuFljoiND2bInlUOp+Tz3UH90pmDGzlyDAScv3N+b+qdPCr/+lc7/fbKTqXdfTZDNwfPvbeOW/i1oEGmhZYNQmp0uH0t6His2HqBBz9YYDTquaeMOs6DIQYBRh6IoWPem0jAyGENIAAbgrlvPPLP24TvcZdTc5SIlX6V7+yh0QSEo+Tm0b1qb9k1rn1NONxUrV4NexaA3UivI/ZzecKC+9cxVk6IoPHJjR5wuFycyCvhkbTKv7osjKEDPa8N6e9cLLLb9sr5EtYKMbCxqCcCEW67EGhqA8Twn0dI0rHNuJ+LZCaEijHqVbwvb8UNhS65o24B7+7W74HuCzAZyNDO/2BrTqFZdFEUlLNhUof16yhsgONBQ4birmyXAHaPFXDWnyGDzmWO2mM89ftVg4oWs4QBc06I+i35uQqFmYO49/XG5ND7+5ig3BW1in7EdVw+/CV1o2c8Cv1g1Iin8nRgNOsDOz3+m8vOfqd7lmdlFRISa2X0os8T6fxzM9NYQPFJOFXh/Pp6eT93wQE5k5LN0/V/e5YU2J0dS8wi1GDmVa6OgyMGYgU05vuNHik6lEBN4hDDXKQLVM1dVeaZ6pEVfA/oAAqObsvNAFllZebS9oh2vf/YnRhw8cXdf2oQEoNepHErJoW54IL/sSyPYbKRdk3Cu0jSWfr+f7TYdjSOa0LlbEzp2cccS3bAeqak5xHdtQvLRLMKLneA6NK3Nm//q462xvPaouy/Cw3PlFlU7iLFDzj0RFV/3yhYXfqylTlUZ0efS3cuiU1XqRQQRGOCOy/N/RdQKPHMitIYEEFHBhHApGA06NBSKMGIsZ3Ix6tXTTY0ugipRDlAyEfwtksLpE3dQKSfwSm0v8PxJwdOaABBey0SOZsZs0qHXqWiqxm/2+jxzagSdWlrpfgkTAkhSKOFUbhG5+XbqR7qvHH/87QQBJj1XNHfXx/MK7WVepe3an8Gew6fKdRWXkV1U4vcb+zTj9U9/5VBKLrVrmahlMbH/WDbgPtmmnzhORNYfdPx1Gf+wZUMgFFrq8UdWIxp3uYbpq7MwKXZm3hWH0XDmT3pt2zP7KFx+gEKMWEPN3hgb160FQNe2db3rKYpC/5j6nMzIp8+V9QD3B7ZJVK0S8ZameDtr8ZP835k3KZgqfnKoFXTmatpSQ06ExT+fxmInovNRFIUgs56sXFulT5LBgcZSf66pPMdZ2gm8MswmPaqi4NK0UpOiyXDm7+KphQWdrq0oioJRr2JzuDAaLv2tZZfHN7eKPDFvIw6n5u0feOvz3wF3+76maTw8+/sy3/vf1XvOu22dquAspTN84FUNaNs4jDA1l0xXEKEmmHB1Hhu/Ws9Ptmb0yvyROo4/IQh0Ya3JbjqaHw5qXBfXmd6nr76nNMjDbNKXSAhlKU/SqhVo5P7hl6a98u8m0FT5mkJwkPtLrdepmAzlOwFfasVPKoYKnGCCAgzupBBQuZNk8fKrTFlWN0sVJwVVUbCY9WTn20tNrJ6agqoohJxOmsX3bTTo3EnhIpoOy6vm/3UuIZdLI7/Q4f2QOpyem15Kjqt3OF04nRc3WubqtnXYuKvkSJQJI6+gVbiTglUv8mzoPvY7IrC6Cihal8eVRoXOpgM4CkwYOw1D3/Af6CKbEgjceFbrS3RE2XOjezx8fQfyCksfDifK5jkJVqr56HRNITjQ4LOb1M6mU1XvBUpFTjCeZqOgSraxq8WOX60hZXE+Vd18BGAJNJKTby+1Cc6TrA0GtdRaiudirry1u4vh10lh0dd7+N9Xe3h1fC90xQaGF9mdJa7sjp5u27+QmNaRxLSy8tF3+0o0EY0b0paOzWqXSAombIQdWU/e2tUA5GhmmujTyFJCibptKv+a9xMtdUep1z2e2JiWF32sV7a8cDu9ONeZ5qNK1BQCq/Zqs6oYDToKihyn+8fKx5McLZWsKfzdeJJfVf7tLGYDgQH6UkdGeU76Jr1aai3F87eqSO2usvw6KWw+fZJOzyqkyO70Lt+xL513V/3h/f2HX49775AF6P2PKBpH1eK/X7qbjJ4bcxU//5lKnyvrERJkZNn3+73rjh7Yiq7t3G32owe0JCr3NzJ3rqe14Ri63zV09doS0OsuZvxvN+E5ydRqfiVXNmrHKcNBvskPZ5zlTFu+qH4X09FsNunR65QalxRMRndSqMgoJs9J8mKunKNqB150jbu6VHXzEbj7CvIK7aW+5qkBGPQ6bxkXL2uTN2lITeGS8nT+5RXa2Xc0y7t83S9HsdnP3G25ZtsR7/BQgNohZnp1jCI710ZosIkGkRYaRJ4Z1uipCnZtW8fbWatpGt31v1G0+39EG6FQM6Be+yiBTdxtQQbTAXZlNqRPgHs/gQEGd/ujn1yZ1VTe5qNK1BQURSE40FhiOGZN4LnqrEg/x8WUg8fz91xd6fdWtxb1Q+natg7NoqvuomxEn2YlLj6L85wzjAZ3/1ObRmG0bBDqfd3gfV2SwiXlycT5hQ627k7xjg748/CZBHF3Qhv+88Uf/HYgw7ssJMiITlUZ2rNJqdv1VvVOZ3fNaadg9RycR3aha/APpvzaDB0ukhq08b7HbNSV+D/oIq5QRdU509FcueR8V3xrQis4pv9S84x0qUhNoVfHKOqEmUudtK28akq/SnlYzAbGDb3wPRwVEV4roMzXPH8Lz/8TbrmyxOtnahLSfHRJeaqGR9PyOJKaR58r67F2+1FcxWb+iAwzExlq5o8D7nsNruvdlJ4do867XVOxTiFNc1H43ds4j+zC1PVmDO36k7nDPYqpxPDA04kk4KyTkNQUfKt2SAD1IoJoHFW5mShLu5nO1zyftYp0NNezWqhnrfxsxOL8PCf9smoCnr9VdQxJ9fnzFHzJU1M4eXoSt6ZR51YVgwIMNImu5R1OGtPKesHRE2dqClD49Vwcf/2E8aqbMHaMR9Gd/yTvrSmYpaZQE5hNeqbdczXNos+dRfbvqnj7tagZvM1HZSTqM4lcmo8uKU9SSM8uBNx3EnqakDwsZgP1ig35LM9NWZ4aQIusH3GkbsN09c0YOsad9z2eNOPZfpDp4ttwhSiNyejpU/Dra8IaxVtTKOOkX501Bb8+4+h17gL2JAWzSY/FrCc4yMjR03MNBZn1JeZ5CTBeOFMrisJVxn00T92IvmkXDB3jSrSnXtOpPmlZBaW+17P9Ds1qU2hzeGMUoqqYDFJTqGnO7lM4m9QUqoln1nDPPQVmk55OrSKJqh3Ixl9PcPBkDjq15ORfFxqxoRXmEp/2HmGWFLKDGhLd995zOthuG3DufQeedTxNUx2b1aZjs5rXHi3+/irTpyAureKjjyrzelXy66RwNrNRx+iBrQDo2SGKvNMzoBZPChcaQVH083LCHCkAnGhwLfV0FSviv8cobvF3Vp0nGFE+F2o+Mlzg9ark15+Ksx8vFGA8cwI3m/TeB+eEWso3pNB16gT2374l2dyRKZk3UhhW/lk82zcNB6BOmO9n0hSXN6M0H9U4xgs0H5mkplA9tGLX5YpSdoGXd8bPos2LQW9gZ3BPsrXcCsXS98p6dGppLXcCEqKyPE2gUlOoOQwX+JsYqvE+Bf/+VBSrKZiN+ou6ucZxfA+Og9sxXjGYIp17PHdFHnSqKIokBFEtTNXYaSnKx3iBCe+aRtWiaXQtQqrh7ng/rymcodedPyG0bxJ+3pFAtu2fo5hrYewwAOXoviqKUIiq16l1JCfSci/4mRfVx2zSc9u1LflHGYNLmtcP4anRMdUSi38nhWJZodBW+pwkHo/ffEWZrznTD+E8sgtjlxtQ9EbvPQeadBuLGqhtk9pYyzHrr6hexSfd9CW/bj7SimUFm8N1njXPz/bLSjAEYGzrfjjPtV0aEBSgp0MNnOJACCHOR2oKF8mVnYrjr80YOgxEMbnvfG5YJ5jXHu19gXcKIUTN4981hSpo3rHt+goUFWOHgVUQkRBC+JZfJwW0M3MOVertLheO5M3oG12JGhRWZWEJIYSv+HfzEYACDw5vT53wwAq/33lyL1pBNvqmXao8NiGE8AX/TgqahoJC51aRlXq/468toDOgb9CxiiMTQgjf8O/mI9x3MleGprlw7N+KvkEHFKNMTSGEuDz4dVLQtMonBdfJZLT8U+ibVM8NJUIIUR38PCloVLar2b5/K6h69I2uqNKYhBDCl/w8KVSupqBpGo79W9HVb4dirHgHtRBC1FT+nRSoXD1By0lFy02XDmYhxGXHv5OCVrkbFZzH9wCgi2pdxREJIYRv+XVSAFAqkRUcx3ejBASjhkVfgoiEEMJ3/DopVLaf2Xn8T3R1W17U8xeEEKIm8u+kgFbhnODKTUfLSUUX1eqSxCSEEL7k10mBSow+OtOfIElBCHH58euk4G49qlhWcKbuB70JNbzBpQlKCCF8yL+TgkurcE3BlXYQtXYDFNWvi04IcZny6zNbRZ+moGkunOmH0NVueEniEUIIX/PvpKBpFRpBpOWkgb0QNaLRJYxKCCF8p8qTwrJly+jZsyfDhg1j2LBhvPLKKwAcO3aM2267jbi4OO6//37y8vIAyM7OZty4ccTHx3PbbbeRmppa1SGVqaI1BWfqfgB0khSEEJepKk8Kv/76K4mJiSxfvpzly5fz2GOPAfDcc89x66238uWXX9K+fXveeOMNAGbPnk1MTAyrVq1ixIgRJCUlVXVIZavg6CPnyWTQGaWTWQhx2bokSWHZsmUMHTqUf/3rX2RlZWG329myZQsDB7qfY3z99dfz5ZdfArB27VqGDBkCwODBg1m/fj12u72qwypVRe9dc6bsQxfZBEXVXaqQhBDCp6r8yWtWq5Vx48bRsWNHXn75ZaZOncrEiROxWCzo9XrvOidPngQgJSUFq9XqDkavx2KxkJGRQZ06dcq9z9q1LZWKVdM0VJ2K1Rp84XWdDnLSDxHSZRC1y7H+xSpPTL4gcVWMxFVxNTU2f4mr0klh1apVTJ8+vcSypk2bsmDBAu/v99xzD/379+eJJ5445/3n6+BVKzjcMz09F5eroj0EbppLIzU154LrOTOPgtNBkbluuda/GFZr8CXfR2VIXBUjcVVcTY3tcopLVZXzXkhXOinEx8cTHx9fYllOTg4LFizgzjvvBNxX4nq9nvDwcHJzc3E6neh0OlJTU4mMdD8XOTIykrS0NOrWrYvD4SA3N5fQ0NDKhlUhFXmegivjKABqeP1LGJEQQvhWlfYpBAYG8u9//5sdO3YA8P7773PttddiMBiIiYlh5cqVgHuEUu/evQGIjY1l2bJlAKxcuZKYmBgMBkNVhlWmigxJdWUeAUVBDal7iaMSQgjfqdI+BZ1Ox+zZs3n22WcpLCykcePGzJw5E4BnnnmGxMRE5s2bR1RUFC+//DIA48ePJzExkYSEBIKDg5k1a1ZVhnReWgVanFwZR1Br1UHRGy9dQEII4WNV3tEcExPD0qVLz1ler149Fi5ceM7y0NBQ5s+fX9VhlItG+aa50DQNZ+p+dHVbXvqghBDCh/z8jubyDUnVctPR8jLR1WlxyWMSQghf8uuk4HbhtOA88ScAurqSFIQQlze/TgrujuYLr+eeLtsoI4+EEJc9/04K5VzPlXkUNaye3MkshLjs+XVSQAO1HFUFV4Y7KQghxOXOr5OCqxw9za7CHLSCLHThkhSEEJc/v04KlGP0kSvzGIDUFIQQfsGvk4IGF5znwpV1AgA1JOrSBySEED7m30lB0y5YU9CyToKqQ7HUrpaYhBDCl/w7KXDhCfFcWSdRg60oFZy5VQgh/o78+0xXjjGpruwUlJDyP9tBCCH+zvw6KbjnPiq7qqBpGq7sk6i1JCkIIfyDfyeFC4w+0gqywWFDrWWttpiEEMKX/DopwPn7FLS8DPc60skshPATfp0U3I/wLDsruHLTAVAlKQgh/IRfJwW4QE0h11NTCK+maIQQwrf8OilcqE/BlZsOOgOKqeyHXAshxOXEv5PCBea50PIyUCy1y/0cZyGE+Lvz76SggXLePoUMVGk6EkL4Eb9OCsD5awq56ShB0skshPAffp0Uzjf3keZyoOVnSU1BCOFX/DspUPboIy0vE9Bk5JEQwq/4dVJAo8xOZNfp4ahyj4IQwp/4dVJwna/56PSNa0qQ1BSEEP7Dr5MC57mh2ZXnqSlIUhBC+A+/TgoaWplDUrXcDDAFoRgCqjkqIYTwHf9OCuerKeSmSy1BCOF3/DopQNm3KWh5GdKfIITwO36dFM4395H7bmYZeSSE8C/+nRTcY1LPXW4vhKI8uUdBCOF3/DspaKCWUlXw3qMgzUdCCD/j50lBK325PHFNCOGn/DwplH5Hs3uKC1CDwqo7JCGE8Cm/TgplcRVkAaCYQ3wciRBCVC+/TgqappU6IZ6WnwWGABSDqfqDEkIIH/LvpEDpQ1K1gmypJQgh/JJfJ4Wy5s7W8rNQAyUpCCH8j18nBffcR6UsL8hCMdeq9niEEMLX/DsplNF+5JLmIyGEn7ropDBnzhxee+017+/Z2dmMGzeO+Ph4brvtNlJTUwGw2WxMmDCB+Ph4rrvuOpKTkwF3Z++MGTOIi4tj0KBBbNu27WJDKjcNUM9qPtKcdvfdzNJ8JITwQ5VOCjk5OUyePJl33nmnxPLZs2cTExPDqlWrGDFiBElJSQAsXLgQs9nMqlWrmDx5MomJiQCsXr2a5ORkVq5cydy5c0lMTMThcFzEIZVfaTevaQXZANJ8JITwS5VOCmvWrKFx48bcddddJZavXbuWIUOGADB48GDWr1+P3W5n7dq1DB06FIAuXbqQmZnJsWPHWLduHYMGDUJVVZo0aUJ0dDTbt2+/iEMqv9ImxNMKcgBQzMHVEoMQQtQk+sq+cfjw4QAlmo4AUlJSsFqt7o3r9VgsFjIyMkosB7BarZw4cYKUlBQiIyPPWV4RtWtbKncQGphMBqzWMwkgP9tBPhAeVZcAq+8Sg9WH+z4fiatiJK6Kq6mx+UtcF0wKq1atYvr06SWWNW3alAULFpR7J6paeoVEVdVSm3DKWr8s6em5uFylz2N0PhoaNpuD1NQc7zL7iZMAZBXqyCm2vDpZrcElYqopJK6KkbgqrqbGdjnFparKeS+kL5gU4uPjiY+PL/cOIyMjSUtLo27dujgcDnJzcwkNDSUyMpLU1FQaNWoEQGpqKpGRkdSpU8fbGV18eXXQSpk5WyuUPgUhhP+q8iGpsbGxLFu2DICVK1cSExODwWAgNjaW5cuXA7B161ZMJhPR0dH07t2bzz//HKfTycGDBzlw4AAdOnSo6rDKTSvIAVUHBrPPYhBCCF+pdJ9CWcaPH09iYiIJCQkEBwcza9YsAEaNGsXTTz9NQkICRqORmTNnAhAXF8fOnTu9ndBJSUkEBARUdVilcs99VLKq4CrIQTHXKnX2VCGEuNxddFJ4+OGHS/weGhrK/Pnzz1nPZDIxY8aMc5YrisLEiROZOHHixYZSYaXdu6YVZqME1MwOJSGEuNT8/I7mc2dJ1QpzJCkIIfyWnycFzulp1k43HwkhhD/y+6RwbvOR1BSEEP7Lr5MClGw+0hw2sBdKTUEI4bf8Oimcfd+cVihTXAgh/Jt/JwUoMfTUM++RGiA1BSGEf/LrpMBZfQpn7maWmoIQwj/5dVLQzsoK3hlSpaNZCOGn/DspaKAUywoy75EQwt/5eVI4a/RRQQ6oejBUzzQbQghR0/h3UuDsPoUcFHOwzHskhPBb/p0UzsoKWlEeiinIZ/EIIYSv+XVSQNNK9ilIUhBC+Dm/TgoanFVTyEcxBvoqHCGE8Dn/Tgpn36dQlAdSUxBC+DG/TgrnzH1kk5qCEMK/+XVS0Iq1H2kup3syPKkpCCH8mH8nBUA9XVPQbPkAKCapKQgh/Jd/JwVNO/OQnaI8AKkpCCH8mp8nhTMdzVqR1BSEEMLvk4InK2iemoJRagpCCP/l10kBtGI1BXdSkCGpQgh/5tdJofgsqdLRLIQQ/p4U4Ezzka0AAMVo9lk8Qgjha36dFNxzH51mKwBFBzqjLyMSQgif8uukULKmkI9iNMu02UIIv+bfSUHTUBVPn0IBSNOREMLP+XlSKPazzHskhBB+nhQ4c0MztgLpZBZC+D3/TgpnDUmVpCCE8Hd+nRSKz3Ph7lOQ5iMhhH/z66RQ/BHNUlMQQgh/Twqns4KmucBWKElBCOH3/DopuOc+UsBe5P5Zmo+EEH7Or5OC53EKnnmP5D4FIYS/8/OkoJ1OCp55j6SmIITwb/6dFABQZDI8IYQ4zb+TgmdEqmfabEkKQgg/59dJASjRfCT3KQgh/J3+YjcwZ84cVFXl4YcfBmDLli089NBD1K1bF4C2bdsyffp0srOz+de//sXhw4cJDw9n9uzZWK1WbDYbTz75JLt27SIgIIBZs2bRrFmziw3rgrRiEx9pUlMQQgjgImoKOTk5TJ48mXfeeafE8l9//ZUxY8awfPlyli9fzvTp0wGYPXs2MTExrFq1ihEjRpCUlATAwoULMZvNrFq1ismTJ5OYmHgRh1N+npSgKIp0NAshxGmVTgpr1qyhcePG3HXXXSWW//rrr2zYsIHhw4dz3333cfz4cQDWrl3LkCFDABg8eDDr16/Hbrezdu1ahg4dCkCXLl3IzMzk2LFjlQ2r/E5nBXefQgEoKujlATtCCP9W6aQwfPhwxo0bh06nK7E8ODiY0aNHs2zZMmJjY3nssccASElJwWq1AqDX67FYLGRkZJRYDmC1Wjlx4kRlwyo3rVhW0Gz5IA/YEUKIC/cprFq1ytsE5NG0aVMWLFhQ6vpTp071/nzLLbfw0ksvkZOTU+q6qlp6TipreVlq17ZUaH0Ah9MFgMViwpTnQDMHYbUGV3g7l0pNiqU4iatiJK6Kq6mx+UtcF0wK8fHxxMfHl2tjLpeLN99885wahF6vJzIykrS0NOrWrYvD4SA3N5fQ0FAiIyNJTU2lUaNGAKSmphIZGVmhg0hPz8Xl0i68YjF2hzsp5OfZKMjOQtMFkJpaevKqblZrcI2JpTiJq2IkroqrqbFdTnGpqnLeC+kqHZKqqipff/01q1evBmDZsmX84x//wGw2Exsby7JlywBYuXIlMTExGAwGYmNjWb58OQBbt27FZDIRHR1dlWGV4fStawrygB0hhDjtooeknm3GjBlMmTKFuXPnEh4ezsyZMwEYP348iYmJJCQkEBwczKxZswAYNWoUTz/9NAkJCRiNRu/6l1rJR3EWoAZHVMt+hRCiJrvopOC5P8GjRYsWLFq06Jz1QkNDmT9//jnLTSYTM2bMuNgwKqzkkNR8mQxPCCHw5zuaiw1J1WwFKAZJCkII4bdJwTskFQ3s8oAdIYQAf04Kp3OCTnOC5gJjgG8DEkKIGsBvk4KHzlkEgGKQpCCEEH6bFDwT4hk0T1KQ5iMhhPDfpHD6f9Vlc/8gzUdCCOHHSeF0VtA7paYghBAefpsUPHSau6YgfQpCCOHHScHTp+CpKUjzkRBC+HNSOP2/ziXNR0II4eG3ScGTFXROaT4SQggPv00KZ9cUMJh8FosQQtQUfpsUPMOP9K4iMASgKP5bFEII4eG3Z0KXt/moSJqOhBDiNL9NCh6qyyZJQQghTvPbpOAZkqpzFsqzFIQQ4jS/TQoeOqfUFIQQwsNvk4J36myX9CkIIYSH/yaF04NSVWeRNB8JIcRpfpsUkNFHQghxDr2vA/AV79TZkhSE8Cmn00FmZioOh42UFBWXy+XrkM7xd41LrzcSFmZFpyv/qd6vk4IOJ6rmlOYjIXwoMzOVgIBAgoLqYjDocDhq3slXr1f/dnFpmkZeXjaZmalERESVe5t+23ykaRoBih2QeY+E8CWHw0ZQUC0URfF1KJcVRVEICqqFw2Gr0Pv8NimggcmTFKSmIIRPSUK4NCpTrn6bFDTw1hSQmoIQoga48cYhHD9+zKcx+G9SkOYjIYQ4h992NAOYFAcgzUdCiDN+/nkr8+a9itPpIioqCrM5kP37k3E6Xdx222j69buWYcPi+OijZQQGBnH//WPo0aM3t99+J998s5pfftnO/fc/xPTp00hNTSEtLZUrrriSp56ayvbt27zbbtq0GY888jhTp04hJeUkjRs3xWZzt//v27eXmTOTcDqdGI1GJk9+hgYNGlbL8fttUtA0CFBOd8BITUGIGuGHncdYt/3SNJ/07BhFjw7lG4Vz+PAhlixZwcKF7xIRYeXZZ6eRlZXNffeNoW3b9nTuHMP27T9z5ZWdOX78OL/88jO3334nP/64kWuuuZaNG3+gRYuWPP/8DOx2O7ffPoI9e3aX2LbFYuHll2fQsmVrZs16lV9++Zlvv/0agI8++pCRI2+nX7/+rFnzFb/99qskhUtNo1hNQZKCEKKYBg0aYbFY2Lr1J4qKClm58jM0DQoLC9m//y+6devJtm0/oaoKAwbEs2bNVzgcDnbs+IUJEyZjMpn4/fddfPTRhxw4sJ+srCwKCvJLbBtg+/ZtPPvsCwBccUUnoqPrAdCtWw9efnkmmzdvpHv3XvTpc021HbvfJgU0jQCkT0GImqRnx2i6tq3r6zAwmdxPYnS5nEyZMo127dricLjIyEinVq0QcnJyWLToA3Q6PZ07d+HQoQOsWLGMpk2bYjKZWLJkEWvXfsvQoddx441XsX9/sndmZs+2wT06qPjNZzqdDoC+ffvTvn1HNmz4no8//h8//riBiROfqpZj9+OOZjCcrinIoziFEKXp1KkLy5YtASAtLY077riFkydPEBYWhslkYsOG9XTseAWdOnVhwYL/0L17LwC2bNnM0KHXM2BAPKCwd++fpd55HBNzFV99tQqAP/74jaNHjwDw9NOT+P333xg+/Abuuec+b9NTdfDfpIC7+UhTVBTVfytMQoiyjRkzlqKiIm69dQTjx9/HAw88Qr169QF3E4/FEkxgYCCdO3chLS2V7t17AnDTTbfy7rtvMWbMbbz88gzat+9Y6lDTu+++l6NHj3D77Tfx/vsLvM1Ho0bdxcKF7zJmzG3MnTubhx9+rNqOWdE8dZq/sfT0XFyuih3GoZM57Fj8Br2C9hN69/xLFFnlWK3BpKbm+DqMc0hcFSNxlc+JEwepW7cR8PecTsKXyhNX8fIFUFWF2rUtZa7vtzUFACMONJ3R12EIIUSN4bdJQdPAqDhwSVIQQggvv00K4E4KmmrwdRhCCFFj+G1ScLhc7qQgNQUhhPDy26RQUOTAiANFhqMKIYSX3yaFwiInRsWBapCaghBCeFQ6KWzbto0bbriBYcOGcccdd3D06FEAsrOzGTduHPHx8dx2222kpqYCYLPZmDBhAvHx8Vx33XUkJycD7tlKZ8yYQVxcHIMGDWLbtm1VcFgXll/kwKA4UY1yN7MQQnhUOilMmDCBpKQkli9fzpAhQ3j++ecBmD17NjExMaxatYoRI0aQlJQEwMKFCzGbzaxatYrJkyeTmJgIwOrVq0lOTmblypXMnTuXxMREHA5HFRza+RUUOTApDvRGaT4SQlSvY8eOMn36VMA9K+tDD42r9LZWrvycpKRnqyiySiYFm83G+PHjad26NQCtWrXi+PHjAKxdu5YhQ4YAMHjwYNavX4/dbmft2rUMHToUgC5dupCZmcmxY8dYt24dgwYNQlVVmjRpQnR0NNu3b6+KYzuvgiIHBhzopKYghKhmJ04c905pUdNUan4Ho9HIsGHDAHC5XLz++uv0798fgJSUFKxWq3vjej0Wi4WMjIwSywGsVisnTpwgJSWFyMjIc5ZfavlFDoyKE1U6moUQp6WknGTq1CkUFBSgqgrjx0/g2Wcn07//AH744Xt0Oh333vsgixa9z5Ejh3nwwUe55pprychI58UXp3Hy5Al0Oh3jxj1I167dKSwsZMaM59m3709UVWXkyNuJjx/MnDmzOHbsKC+9NIO+fa/h1KlT/Otfj3D06BEaNmzEtGkzMBqNrFq1go8//h8ul0arVq15/PGJmEwmvvzyC9577z9YLBbq1KmL2RxYZWVwwaSwatUqpk+fXmJZ06ZNWbBgATabzdvcc++995a5DVUtvUKiqiqlzbJR1vplOd8t22XR0DAoToJCggmzBlf4/ZeatQbGBBJXRUlcF5aSoqLXu7/zRbt/wLZ7/SXZj7F1b0yte553nZUrP6Nnz17cfvsdbNu2ld9+2wG4L1YXLVrCtGnP8MEHC5g79y127tzB7NmzGDhwIHPmzCIm5ipuvfV2jh49wr33juG99/7Hhx8uJDQ0lP/9bwmnTmUyZsxoWrduzeOPP8G///0mEydOYtu2raSknODll+dQt24U99xzB9u3byEqKpoVK5bx9tsLMJlMvPHGayxe/AFDhgxj3rzX+O9//0dISAj//OcjBAYGecvwbKqqVujvfcGkEB8fT3x8/DnL8/LyuP/++wkNDWXevHkYDO6bwCIjI0lLS6Nu3bo4HA5yc3MJDQ0lMjKS1NRUGjVyz8GRmppKZGQkderU8XZGF19eEZWZ+yjnVC4A+TZw1KB5YKDmzU3jIXFVjMRVPi6Xq8T8PZdqOjaXS7vgPEGdOnXhySefYPfu3XTv3pPhw0fw8ceL6datBw6Hi8jIutSubQVUrNY6ZGdn43C42Lr1JyZMeBKHw0WdOtG0adOeX3/dydatW0hMnILD4cJiCaFnz95s3bqVZs2ao2nueJxOF82atSAyMgqXCxo2bExGRiZHjhzh8OHD3H33HQA4HHZatmzNL79sp337DoSEhKHXq1x7bTzbtm0p89hcLleJv/eF5j6q9PSgEyZMoFGjRkydOhVFUbzLY2NjWbZsGffddx8rV64kJiYGg8FAbGwsy5cvJyYmhq1bt2IymYiOjqZ379588sknDB48mCNHjnDgwAE6dOhQ2bDKzVFU4P5BL81HQtQUptY90TXv7rP9d+x4Be+//xEbN/7AmjVfsXLl5wDo9WdmPvA886C4cy9KNZxOJ5pW8kStaeB0njuQpvg2FUVB0zScThf9+vXn0UcnAJCfn4/T6WTbtp9K7K+0eC5GpTqaf//9d9asWcPPP//M8OHDGTZsGGPHjgVg/Pjx/PLLLyQkJPDhhx/y9NNPAzBq1ChsNhsJCQkkJSUxc+ZMAOLi4mjRogVDhw7lgQceICkpiYCAS9/567QVAqDo5T4FIYTbG2/MYfXqlcTHD+axxyby5597yvW+zp1jWLFiGQBHjx7h11930K5dRzp16sIXXywH4NSpU3z//VquvDIGnU6P0+k87zavvLIz69evJTMzA03TeOml6Xz00Yd07HgFv//+K6mpKbhcLu8jPKtKpWoKbdu2Zc+e0gsrNDSU+fPPnYraZDIxY8aMc5YrisLEiROZOHFiZUKpNKetEBSkpiCE8Lrhhpt57rmnWLlyBaqq8s9/JjJv3qsXfN+jj05g5swkVq78/PQ57SkiIiK46657eOmlGYwefTMul4vRo8fQqlVrsrJOkZubw7RpU0hIGFbqNlu0aMldd43lkUfuQ9M0WrRoxe2334nJZOLRRyfw6KMPYDabadSoSZWWgd8+T2HVG7Pood9F0IgkdGH1LlFklVPT2nw9JK6KkbjKR56nUHnyPIUq1EJ3jPQ6XWtcQhBCCF/y2+dQ1h81jah6VjIz830dihBC1Bh+W1MICAxCr6/aXnshhPi789ukIISoOS6Drs0aqTLlKklBCOFTer2RvLxsSQxVTNM08vKy0Vdw2L3f9ikIIWqGsDArmZmp5OaeQlVVXK6aN8rn7xqXXm8kLMxa5uulvudigxJCiIuh0+mJiIgCat5wWQ9/ikuaj4QQQnhJUhBCCOF1WTQfqapy4ZUuwXsvJYmrYiSuiqmpcUHNje1yietC618W01wIIYSoGtJ8JIQQwkuSghBCCC9JCkIIIbwkKQghhPCSpCCEEMJLkoIQQggvSQpCCCG8JCkIIYTwkqQghBDCyy+Twueff86gQYO49tpr+eCDD3way+jRo0lISGDYsGEMGzaMHTt2+DS+3NxcBg8ezJEjRwDYuHEjQ4YMYcCAAbzyyive9f744w9uuOEGBg4cyJNPPonD4ajWuCZNmsSAAQO85fb111+fN95L4fXXXychIYGEhARmzpx53v1Xd3mVFltNKLM5c+YwaNAgEhISePfdd8+7/+oss9Liqgnl5TFjxgwSExOBssvl2LFj3HbbbcTFxXH//feTl5dXuZ1pfubEiRNa3759tczMTC0vL08bMmSItnfvXp/E4nK5tB49emh2u71GxPfLL79ogwcP1tq1a6cdPnxYKygo0GJjY7VDhw5pdrtdGzNmjLZ27VpN0zQtISFB2759u6ZpmjZp0iTtgw8+qLa4NE3TBg8erJ08ebLEeueLt6pt2LBBu/nmm7WioiLNZrNpo0eP1j7//PMaUV6lxfbVV1/5vMw2b96sjRw5UrPb7VpBQYHWt29f7Y8//vB5mZUWV3Jyss/Ly2Pjxo3a1VdfrU2cOFHTtLLLZdy4cdqKFSs0TdO0119/XZs5c2al9ud3NYWNGzfStWtXQkNDCQwMZODAgXz55Zc+ieWvv/5CURTGjh3L0KFDef/9930a30cffcQzzzxDZGQkADt37qRRo0Y0aNAAvV7PkCFD+PLLLzl69CiFhYVcccUVAFx//fWXNMaz48rPz+fYsWNMmTKFIUOG8Oqrr+JyucqM91KwWq0kJiZiNBoxGAw0a9aMAwcO1IjyKi22Y8eO+bzMrrrqKv773/+i1+tJT0/H6XSSnZ3t8zIrLS6TyeTz8gI4deoUr7zyCvfddx9AmeVit9vZsmULAwcOLLG8Mi6LWVIrIiUlBav1zJOIIiMj2blzp09iyc7Oplu3bjz77LMUFhYyevRo4uPjfRZfUlJSid9LK6uTJ0+es9xqtXLy5Mlqiys9PZ2uXbsydepUAgMDuffee1myZAmBgYGlxnsptGjRwvvzgQMHWLlyJaNGjaoR5VVabB9++CE//fSTT8sMwGAw8Oqrr/LOO+8QFxdXYz5jZ8fldDp9/hkDePrpp3nsscc4fvw4cO530lMumZmZWCwW9Hp9ieWV4Xc1Ba2USWEVxTdT4l555ZXMnDmTwMBAwsPDufHGG3n11VfPWc9X8ZVVVr4uwwYNGjB37lxq166N2Wxm1KhRrFu3zidx7d27lzFjxjBx4kQaNmxY6v59VV7FY2vatGmNKbNHHnmETZs2cfz4cQ4cOFDq/n0d16ZNm3xeXh9//DFRUVF069bNu6w6vpN+V1OoU6cOW7du9f6ekpLibZaoblu3bsVut3v/6JqmUa9ePdLS0mpEfHXq1Ck1lrOXp6amVmuMe/bs4cCBA96qsqZp6PX6MuO9VLZt28YjjzzC5MmTSUhI4Keffqox5XV2bDWhzJKTk7HZbLRp0waz2cyAAQP48ssv0el05+y/OsustLhWrlxJaGioT8tr5cqVpKamMmzYMLKyssjPz0dRlFLLJTw8nNzcXJxOJzqd7qLKy+9qCt27d2fTpk1kZGRQUFDAV199Re/evX0SS05ODjNnzqSoqIjc3FyWLl3K//3f/9WY+P7xj3+wf/9+Dh48iNPpZMWKFfTu3Zt69ephMpnYtm0bAMuWLavWGDVN44UXXiArKwu73c7ixYu59tpry4z3Ujh+/DgPPvggs2bNIiEhAag55VVabDWhzI4cOcJTTz2FzWbDZrOxZs0aRo4c6fMyKy2uLl26+Ly83n33XVasWMHy5ct55JFH6NevH9OnTy+1XAwGAzExMaxcubLE8srwy5rCY489xujRo7Hb7dx444107NjRJ7H07duXHTt2MHz4cFwuF7feeiudO3euMfGZTCZefPFFHn74YYqKioiNjSUuLg6AWbNm8dRTT5GXl0fbtm0ZPXp0tcXVunVrxo0bxy233ILD4WDAgAEMHjwYoMx4q9p//vMfioqKePHFF73LRo4cWSPKq6zYfF1msbGx3s+7TqdjwIABJCQkEB4e7tMyKy2uhx56iLCwMJ+WV1nKKpdnnnmGxMRE5s2bR1RUFC+//HKlti9PXhNCCOHld81HQgghyiZJQQghhJckBSGEEF6SFIQQQnhJUhBCCOElSUGIKjB27Fj27dtXoffce++9fPrpp5coIiEqx+/uUxDiUnj77bd9HYIQVUKSgvBr3377LfPmzcNutxMQEMDEiRP54Ycf2Lt3L2lpaaSnp9O6dWuSkpKwWCx8+OGHLFq0CIPBgMlkYurUqTRv3px+/foxZ84cOnTowOLFi1m4cCGqqhIREcGUKVNo0qQJJ0+eJDExkZSUFKKjo0lPT/fGkZycTFJSEqdOncLpdDJq1ChuvPFG8vLymDRpEgcPHkRVVdq1a8fUqVNRVanki0ukUhNuC3EZ2L9/vzZ48GAtIyND0zRN+/PPP7UePXpoL774ota7d28tNTVVczqd2uOPP669+OKLmsPh0Nq1a+edY3/p0qXaokWLNE3TtL59+2o7d+7UNm7cqPXv319LT0/XNE3TPvnkEy0+Pl5zuVzaAw88oL3yyiuapmnagQMHtCuuuEL75JNPNLvdrg0aNEjbtWuXpmmalp2drcXHx2vbt2/Xli5dqo0ZM0bTNE1zOBzak08+qR04cKA6i0n4GakpCL+1YcMGUlJSuPPOO73LFEXh0KFDxMXFERERAcCNN97ICy+8wMSJE4mLi2PkyJH06dOHHj16MGTIkBLb/P777xk0aBDh4eGAe177pKQkjhw5wsaNG5k4cSIAjRo14uqrrwbc01sfOnSIyZMne7dTWFjI77//Tq9evXjllVcYNWoU3bt354477qBRo0aXsliEn5OkIPyWy+WiW7duzJ4927vs+PHjLF68GJvNVmI9T3PNrFmz+PPPP9m4cSNvv/02S5YsYd68ed51tVJmjdE0DYfDcc4Ux565751OJ7Vq1WL58uXe19LS0ggODsZkMvH111+zefNmfvzxR+666y6eeuqpap9vR/gPaZgUfqtr165s2LCB5ORkANatW8fQoUMpKipizZo15OTk4HK5+Oijj+jbty8ZGRnExsYSGhrKnXfeyaOPPsqePXtKbLNnz56sXLmSjIwMAD755BNCQ0Np1KgRvXr1YvHixYD7ebqbN28GoEmTJphMJm9SOH78OIMHD2bXrl18+OGHTJo0iZ49ezJhwgR69uzJ3r17q6uIhB+SCfGEX1u1ahXz58/3zpc/efJkNm3axI8//ojT6SQzM5MuXbrw1FNPERAQwKJFi/jvf/9LQEAAOp2Oxx57jO7du5foaP7ggw9YtGgRLpeL8PBwnn76aVq0aEFGRgaTJk3i0KFD1K1bF4fDwXXXXcf111/P7t27vR3NDoeD0aNHc8stt5Cfn8/kyZPZs2cPZrOZ6OhokpKSCAkJ8XXRicuUJAUhzvLaa6+RmZnJ008/7etQhKh20nwkhBDCS2oKQgghvKSmIIQQwkuSghBCCC9JCkIIIbwkKQghhPCSpCCEEMJLkoIQQgiv/wexkHIMKlrueAAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 获取参数\n", - "cfg = get_args() \n", - "# 训练\n", - "env, agent = env_agent_config(cfg)\n", - "res_dic = train(cfg, env, agent)\n", - " \n", - "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", - "# 测试\n", - "res_dic = test(cfg, env, agent)\n", - "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.7.13 ('easyrl')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.13" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/projects/notebooks/MonteCarlo.ipynb b/projects/notebooks/MonteCarlo.ipynb new file mode 100644 index 0000000..c0613ce --- /dev/null +++ b/projects/notebooks/MonteCarlo.ipynb @@ -0,0 +1,480 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "import numpy as np\n", + "class FisrtVisitMC:\n", + " ''' On-Policy First-Visit MC Control\n", + " '''\n", + " def __init__(self,cfg):\n", + " self.n_actions = cfg.n_actions\n", + " self.epsilon = cfg.epsilon\n", + " self.gamma = cfg.gamma \n", + " self.Q_table = defaultdict(lambda: np.zeros(cfg.n_actions))\n", + " self.returns_sum = defaultdict(float) # 保存return之和\n", + " self.returns_count = defaultdict(float)\n", + " \n", + " def sample_action(self,state):\n", + " state = str(state)\n", + " if np.random.uniform(0, 1) > self.epsilon:\n", + " action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作\n", + " else:\n", + " action = np.random.choice(self.n_actions) # 随机选择动作\n", + " return action\n", + " # if state in self.Q_table.keys():\n", + " # best_action = np.argmax(self.Q_table[state])\n", + " # action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions\n", + " # action_probs[best_action] += (1.0 - self.epsilon)\n", + " # action = np.random.choice(np.arange(len(action_probs)), p=action_probs)\n", + " # else:\n", + " # action = np.random.randint(0,self.n_actions)\n", + " # return action\n", + " def predict_action(self,state):\n", + " state = str(state)\n", + " state = str(state)\n", + " if np.random.uniform(0, 1) > self.epsilon:\n", + " action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作\n", + " else:\n", + " action = np.random.choice(self.n_actions) # 随机选择动作\n", + " return action\n", + " # if state in self.Q_table.keys():\n", + " # best_action = np.argmax(self.Q_table[state])\n", + " # action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions\n", + " # action_probs[best_action] += (1.0 - self.epsilon)\n", + " # action = np.argmax(self.Q_table[state])\n", + " # else:\n", + " # action = np.random.randint(0,self.n_actions)\n", + " # return action\n", + " def update(self,one_ep_transition):\n", + " # Find all (state, action) pairs we've visited in this one_ep_transition\n", + " # We convert each state to a tuple so that we can use it as a dict key\n", + " sa_in_episode = set([(str(x[0]), x[1]) for x in one_ep_transition])\n", + " for state, action in sa_in_episode:\n", + " sa_pair = (state, action)\n", + " # Find the first occurence of the (state, action) pair in the one_ep_transition\n", + "\n", + " first_occurence_idx = next(i for i,x in enumerate(one_ep_transition)\n", + " if str(x[0]) == state and x[1] == action)\n", + " # Sum up all rewards since the first occurance\n", + " G = sum([x[2]*(self.gamma**i) for i,x in enumerate(one_ep_transition[first_occurence_idx:])])\n", + " # Calculate average return for this state over all sampled episodes\n", + " self.returns_sum[sa_pair] += G\n", + " self.returns_count[sa_pair] += 1.0\n", + " self.Q_table[state][action] = self.returns_sum[sa_pair] / self.returns_count[sa_pair]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2、定义训练" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg,env,agent):\n", + " print('开始训练!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录奖励\n", + " for i_ep in range(cfg.train_eps):\n", + " ep_reward = 0 # 记录每个回合的奖励\n", + " one_ep_transition = []\n", + " state = env.reset(seed=cfg.seed) # 重置环境,即开始新的回合\n", + " for _ in range(cfg.max_steps):\n", + " action = agent.sample_action(state) # 根据算法采样一个动作\n", + " next_state, reward, terminated, info = env.step(action) # 与环境进行一次动作交互\n", + " one_ep_transition.append((state, action, reward)) # 保存transitions\n", + " agent.update(one_ep_transition) # 更新智能体\n", + " state = next_state # 更新状态\n", + " ep_reward += reward \n", + " if terminated:\n", + " break\n", + " rewards.append(ep_reward)\n", + " if (i_ep+1)%10==0:\n", + " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f}\")\n", + " print('完成训练!')\n", + " return {\"rewards\":rewards}\n", + "def test(cfg,env,agent):\n", + " print('开始测试!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg.test_eps):\n", + " ep_reward = 0 # 记录每个episode的reward\n", + " state = env.reset(seed=cfg.seed) # 重置环境, 重新开一局(即开始新的一个回合)\n", + " for _ in range(cfg.max_steps):\n", + " action = agent.predict_action(state) # 根据算法选择一个动作\n", + " next_state, reward, terminated, info = env.step(action) # 与环境进行一个交互\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if terminated:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", + " print('完成测试!')\n", + " return {\"rewards\":rewards}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3、定义环境" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import turtle\n", + "import numpy as np\n", + "\n", + "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", + "\n", + "class CliffWalkingWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.t = None\n", + " self.unit = 50\n", + " self.max_x = 12\n", + " self.max_y = 4\n", + "\n", + " def draw_x_line(self, y, x0, x1, color='gray'):\n", + " assert x1 > x0\n", + " self.t.color(color)\n", + " self.t.setheading(0)\n", + " self.t.up()\n", + " self.t.goto(x0, y)\n", + " self.t.down()\n", + " self.t.forward(x1 - x0)\n", + "\n", + " def draw_y_line(self, x, y0, y1, color='gray'):\n", + " assert y1 > y0\n", + " self.t.color(color)\n", + " self.t.setheading(90)\n", + " self.t.up()\n", + " self.t.goto(x, y0)\n", + " self.t.down()\n", + " self.t.forward(y1 - y0)\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for i in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for _ in range(2):\n", + " self.t.forward(self.max_x * self.unit)\n", + " self.t.left(90)\n", + " self.t.forward(self.max_y * self.unit)\n", + " self.t.left(90)\n", + " for i in range(1, self.max_y):\n", + " self.draw_x_line(\n", + " y=i * self.unit, x0=0, x1=self.max_x * self.unit)\n", + " for i in range(1, self.max_x):\n", + " self.draw_y_line(\n", + " x=i * self.unit, y0=0, y1=self.max_y * self.unit)\n", + "\n", + " for i in range(1, self.max_x - 1):\n", + " self.draw_box(i, 0, 'black')\n", + " self.draw_box(self.max_x - 1, 0, 'yellow')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import os\n", + "def all_seed(env,seed = 1):\n", + " ''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function\n", + " Args:\n", + " env (_type_): \n", + " seed (int, optional): _description_. Defaults to 1.\n", + " '''\n", + " import torch\n", + " import numpy as np\n", + " import random\n", + " # print(f\"seed = {seed}\")\n", + " env.seed(seed) # env config\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " torch.manual_seed(seed) # config for CPU\n", + " torch.cuda.manual_seed(seed) # config for GPU\n", + " os.environ['PYTHONHASHSEED'] = str(seed) # config for python scripts\n", + " # config for cudnn\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " torch.backends.cudnn.enabled = False\n", + " \n", + "def env_agent_config(cfg):\n", + " '''创建环境和智能体\n", + " ''' \n", + " env = gym.make(cfg.env_name,new_step_api=True) # 创建环境\n", + " env = CliffWalkingWapper(env)\n", + " if cfg.seed !=0: # set random seed\n", + " all_seed(env,seed=cfg.seed) \n", + " try: # 状态维度\n", + " n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))\n", + " except AttributeError:\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.n # 动作维度\n", + " setattr(cfg, 'n_states', n_states) # 将状态维度添加到配置参数中\n", + " setattr(cfg, 'n_actions', n_actions) # 将动作维度添加到配置参数中\n", + " agent = FisrtVisitMC(cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4、设置参数" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "class Config:\n", + " '''配置参数\n", + " '''\n", + " def __init__(self):\n", + " self.env_name = 'CliffWalking-v0' # 环境名称\n", + " self.algo_name = \"FirstVisitMC\" # 算法名称\n", + " self.train_eps = 400 # 训练回合数\n", + " self.test_eps = 20 # 测试回合数\n", + " self.max_steps = 200 # 每个回合最大步数\n", + " self.epsilon = 0.1 # 贪婪度\n", + " self.gamma = 0.9 # 折扣因子\n", + " self.lr = 0.5 # 学习率\n", + " self.seed = 1 # 随机种子\n", + " # if torch.cuda.is_available(): # 是否使用GPUs\n", + " # self.device = torch.device('cuda')\n", + " # else:\n", + " # self.device = torch.device('cpu')\n", + " self.device = torch.device('cpu')\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,title=\"learning curve\"):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{title}\")\n", + " plt.xlim(0, len(rewards), 10) # 设置x轴的范围\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5、我准备好了!" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始训练!\n", + "环境:CliffWalking-v0, 算法:FirstVisitMC, 设备:cpu\n", + "回合:10/400,奖励:-200.0\n", + "回合:20/400,奖励:-200.0\n", + "回合:30/400,奖励:-200.0\n", + "回合:40/400,奖励:-200.0\n", + "回合:50/400,奖励:-200.0\n", + "回合:60/400,奖励:-200.0\n", + "回合:70/400,奖励:-200.0\n", + "回合:80/400,奖励:-200.0\n", + "回合:90/400,奖励:-200.0\n", + "回合:100/400,奖励:-200.0\n", + "回合:110/400,奖励:-200.0\n", + "回合:120/400,奖励:-200.0\n", + "回合:130/400,奖励:-200.0\n", + "回合:140/400,奖励:-200.0\n", + "回合:150/400,奖励:-200.0\n", + "回合:160/400,奖励:-200.0\n", + "回合:170/400,奖励:-200.0\n", + "回合:180/400,奖励:-200.0\n", + "回合:190/400,奖励:-200.0\n", + "回合:200/400,奖励:-200.0\n", + "回合:210/400,奖励:-200.0\n", + "回合:220/400,奖励:-200.0\n", + "回合:230/400,奖励:-200.0\n", + "回合:240/400,奖励:-200.0\n", + "回合:250/400,奖励:-200.0\n", + "回合:260/400,奖励:-200.0\n", + "回合:270/400,奖励:-299.0\n", + "回合:280/400,奖励:-200.0\n", + "回合:290/400,奖励:-200.0\n", + "回合:300/400,奖励:-200.0\n", + "回合:310/400,奖励:-200.0\n", + "回合:320/400,奖励:-200.0\n", + "回合:330/400,奖励:-200.0\n", + "回合:340/400,奖励:-200.0\n", + "回合:350/400,奖励:-200.0\n", + "回合:360/400,奖励:-200.0\n", + "回合:370/400,奖励:-200.0\n", + "回合:380/400,奖励:-200.0\n", + "回合:390/400,奖励:-200.0\n", + "回合:400/400,奖励:-200.0\n", + "完成训练!\n", + "开始测试!\n", + "环境:CliffWalking-v0, 算法:FirstVisitMC, 设备:cpu\n", + "回合数:1/20, 奖励:-200.0\n", + "回合数:2/20, 奖励:-200.0\n", + "回合数:3/20, 奖励:-200.0\n", + "回合数:4/20, 奖励:-200.0\n", + "回合数:5/20, 奖励:-200.0\n", + "回合数:6/20, 奖励:-200.0\n", + "回合数:7/20, 奖励:-200.0\n", + "回合数:8/20, 奖励:-200.0\n", + "回合数:9/20, 奖励:-200.0\n", + "回合数:10/20, 奖励:-299.0\n", + "回合数:11/20, 奖励:-200.0\n", + "回合数:12/20, 奖励:-200.0\n", + "回合数:13/20, 奖励:-200.0\n", + "回合数:14/20, 奖励:-200.0\n", + "回合数:15/20, 奖励:-200.0\n", + "回合数:16/20, 奖励:-200.0\n", + "回合数:17/20, 奖励:-200.0\n", + "回合数:18/20, 奖励:-200.0\n", + "回合数:19/20, 奖励:-200.0\n", + "回合数:20/20, 奖励:-200.0\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = Config() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], title=f\"training curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], title=f\"testing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.12 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f5a9629e9f3b9957bf68a43815f911e93447d47b3d065b6a8a04975e44c504d9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/Q-learning/Q-learning探索策略研究.ipynb b/projects/notebooks/Q-learning/Q-learning探索策略研究.ipynb new file mode 100644 index 0000000..40583fd --- /dev/null +++ b/projects/notebooks/Q-learning/Q-learning探索策略研究.ipynb @@ -0,0 +1,32 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Q learning with different exploration strategies\n", + "\n", + "Authors: [johnjim0816](https://github.com/johnjim0816)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/Q-learning/QLearning.ipynb b/projects/notebooks/Q-learning/QLearning.ipynb new file mode 100644 index 0000000..debb47e --- /dev/null +++ b/projects/notebooks/Q-learning/QLearning.ipynb @@ -0,0 +1,459 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法\n", + "强化学习算法的模式都比较固定,一般包括sample(即训练时采样动作),predict(测试时预测动作),update(算法更新)以及保存模型和加载模型等几个方法,其中对于每种算法samle和update的方式是不相同,而其他方法就大同小异。" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import math\n", + "import torch\n", + "from collections import defaultdict\n", + "\n", + "class QLearning(object):\n", + " def __init__(self,n_states,\n", + " n_actions,cfg):\n", + " self.n_actions = n_actions \n", + " self.lr = cfg.lr # 学习率\n", + " self.gamma = cfg.gamma \n", + " self.epsilon = cfg.epsilon_start\n", + " self.sample_count = 0 \n", + " self.epsilon_start = cfg.epsilon_start\n", + " self.epsilon_end = cfg.epsilon_end\n", + " self.epsilon_decay = cfg.epsilon_decay\n", + " self.Q_table = defaultdict(lambda: np.zeros(n_actions)) # 用嵌套字典存放状态->动作->状态-动作值(Q值)的映射,即Q表\n", + " def sample_action(self, state):\n", + " ''' 采样动作,训练时用\n", + " '''\n", + " self.sample_count += 1\n", + " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", + " math.exp(-1. * self.sample_count / self.epsilon_decay) # epsilon是会递减的,这里选择指数递减\n", + " # e-greedy 策略\n", + " if np.random.uniform(0, 1) > self.epsilon:\n", + " action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作\n", + " else:\n", + " action = np.random.choice(self.n_actions) # 随机选择动作\n", + " return action\n", + " def predict_action(self,state):\n", + " ''' 预测或选择动作,测试时用\n", + " '''\n", + " action = np.argmax(self.Q_table[str(state)])\n", + " return action\n", + " def update(self, state, action, reward, next_state, terminated):\n", + " Q_predict = self.Q_table[str(state)][action] \n", + " if terminated: # 终止状态\n", + " Q_target = reward \n", + " else:\n", + " Q_target = reward + self.gamma * np.max(self.Q_table[str(next_state)]) \n", + " self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2、定义训练\n", + "强化学习算法的训练方式也比较固定,如下:\n", + "```python\n", + "for i_ep in range(train_eps): # 遍历每个回合\n", + " state = env.reset() # 重置环境,即开始新的回合\n", + " while True: # 对于一些比较复杂的游戏可以设置每回合最大的步长,例如while ep_step<100,就是每回合最大步长为100。\n", + " action = agent.sample(state) # 根据算法采样一个动作\n", + " next_state, reward, terminated, _ = env.step(action) # 与环境进行一次动作交互\n", + " agent.memory.push(state, action, reward, next_state, terminated) # 记录memory\n", + " agent.update(state, action, reward, next_state, terminated) # 算法更新\n", + " state = next_state # 更新状态\n", + " if terminated:\n", + " break\n", + "```\n", + "首先对于每个回合,回合开始时环境需要重置,好比我们每次开一把游戏需要从头再来一样。我们可以设置智能体在每回合数的最大步长,尤其是对于比较复杂的游戏,这样做的好处之一就是帮助智能体在训练中快速收敛,比如我们先验地知道最优解的大概步数,那么理论上智能体收敛时也应该是这个步数附近,设置最大步数可以方便智能体接近这个最优解。在每个回合中,智能体首先需要采样(sample),或者说采用探索策略例如常见的$\\varepsilon$-greedy策略或者UCB探索策略等等。采样的过程是将当前的状态state作为输入,智能体采样输出动作action。然后环境根据采样出来的动作反馈出下一个状态以及相应的reward等信息。接下来对于具有memory的智能体例如包含replay memory的DQN来说,需要将相应的transition(记住这个词,中文不好翻译,通常是状态、动作、奖励等信息)。紧接着就是智能体更新,对于深度强化学习此时一般从memory中随机采样一些transition进行更新,对于Q learning一般是采样上一次的transition。更新公式是比较关键的部分,但是也很通用,一般基于值的算法更新公式都是一个套路如下:\n", + "$$\n", + "y_{j}= \\begin{cases}r_{j} & \\text { for terminal } s_{t+1} \\\\ r_{j}+\\gamma \\max _{a^{\\prime}} Q\\left(s_{t+1}, a^{\\prime} ; \\theta\\right) & \\text { for non-terminal } s_{t+1}\\end{cases}\n", + "$$\n", + "智能体更新完之后,通常需要更新状态,即```state = next_state```,然后会检查是否完成了这一回合的游戏,即```terminated==True```,注意完成并不代表这回合成功,也有可能是失败的太离谱,等同学们有了自定义强化学习环境的经验就知道了(等你长大就知道了XD)。\n", + "如果需要记录奖励、损失等等的话可以再加上,如下方代码,实际项目中更多地使用tensorboard来记录相应的数据,甚至于笔者就在这些教学代码中使用过,但是看起来有些繁琐,容易给大家增加不必要的学习难度,因此学有余力以及需要在项目研究中做强化学习的可以去看看,也很简单。\n", + "此外稍微复杂一些的强化学习不是一次性写完代码就能收敛的,这时需要我们做一个调参侠。为了检查我们参数调得好不好,可以在终端print出奖励、损失以及epsilon等随着回合数的变化,这点说明一下强化学习的训练过程一般都是先探索然后收敛的,官方的话就是权衡exploration and exploitation。e-greedy策略的做法就是前期探索,然后逐渐减小探索率至慢慢收敛,也就是这个epsilon。这个值越大比如0.9就说明智能体90%的概率在随机探索,通常情况下会设置三个值,epsilon_start、epsilon_end以及epsilon_decay,即初始值、终止值和衰减率,其中初始值一般是0.95不变,终止值是0.01,也就是说即使在收敛阶段也让智能体保持很小概率的探索,这样做的原因就是智能体已经学出了一个不错的策略,但是保不齐还有更好的策略,好比我们知道要出人头地学历高比较重要,但是“人还是要有梦想的,万一实现了呢”,总是存在意外的可能,对吧。回归正题,比较关键的是epsilon_decay这个衰减率,这个epsilon衰减太快了学来的策略往往过拟合,好比一条只能选择一朵花的花道上,你早早选择了一朵看起来还可以的花,却错过了后面更多的好花。但是衰减的太慢会影响收敛的速度,好比你走过了花道的尽头也还没选出一朵花来,相比前者不如更甚。当然强化学习的调参相比于深度学习只能说是有过之无不及,比较复杂,不止epsilon这一个,这就需要同学们的耐心学习了。\n", + "强化学习测试的代码跟训练基本上是一样的,因此我放到同一个代码段里。相比于训练代码,测试代码主要有以下几点不同:1、测试模型的过程是不需要更新的,这个是不言而喻的;2、测试代码不需要采样(sample)动作,相比之代替的是预测(sample)动作,其区别就是采样动作时可能会使用各种策略例如$\\varepsilon$-greedy策略,而预测动作不需要,只需要根据训练时学习好的Q表或者网络模型代入状态得到动作即可;3、测试过程终端一般只需要看奖励,不需要看epislon等,反正它在测试中也是无意义的。" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg,env,agent):\n", + " print('开始训练!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录奖励\n", + " for i_ep in range(cfg.train_eps):\n", + " ep_reward = 0 # 记录每个回合的奖励\n", + " state = env.reset(seed=cfg.seed) # 重置环境,即开始新的回合\n", + " while True:\n", + " action = agent.sample_action(state) # 根据算法采样一个动作\n", + " next_state, reward, terminated, info = env.step(action) # 与环境进行一次动作交互\n", + " agent.update(state, action, reward, next_state, terminated) # Q学习算法更新\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if terminated:\n", + " break\n", + " rewards.append(ep_reward)\n", + " if (i_ep+1)%20==0:\n", + " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon:.3f}\")\n", + " print('完成训练!')\n", + " return {\"rewards\":rewards}\n", + "def test(cfg,env,agent):\n", + " print('开始测试!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg.test_eps):\n", + " ep_reward = 0 # 记录每个episode的reward\n", + " state = env.reset(seed=cfg.seed) # 重置环境, 重新开一局(即开始新的一个回合)\n", + " while True:\n", + " action = agent.predict_action(state) # 根据算法选择一个动作\n", + " next_state, reward, terminated, info = env.step(action) # 与环境进行一个交互\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if terminated:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", + " print('完成测试!')\n", + " return {\"rewards\":rewards}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3、定义环境\n", + "\n", + "OpenAI Gym中其实集成了很多强化学习环境,足够大家学习了,但是在做强化学习的应用中免不了要自己创建环境,比如在本项目中其实不太好找到Qlearning能学出来的环境,Qlearning实在是太弱了,需要足够简单的环境才行,因此本项目写了一个环境,大家感兴趣的话可以看一下,一般环境接口最关键的部分即使reset和step。" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import turtle\n", + "import numpy as np\n", + "\n", + "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", + "\n", + "class CliffWalkingWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.t = None\n", + " self.unit = 50\n", + " self.max_x = 12\n", + " self.max_y = 4\n", + "\n", + " def draw_x_line(self, y, x0, x1, color='gray'):\n", + " assert x1 > x0\n", + " self.t.color(color)\n", + " self.t.setheading(0)\n", + " self.t.up()\n", + " self.t.goto(x0, y)\n", + " self.t.down()\n", + " self.t.forward(x1 - x0)\n", + "\n", + " def draw_y_line(self, x, y0, y1, color='gray'):\n", + " assert y1 > y0\n", + " self.t.color(color)\n", + " self.t.setheading(90)\n", + " self.t.up()\n", + " self.t.goto(x, y0)\n", + " self.t.down()\n", + " self.t.forward(y1 - y0)\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for i in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for _ in range(2):\n", + " self.t.forward(self.max_x * self.unit)\n", + " self.t.left(90)\n", + " self.t.forward(self.max_y * self.unit)\n", + " self.t.left(90)\n", + " for i in range(1, self.max_y):\n", + " self.draw_x_line(\n", + " y=i * self.unit, x0=0, x1=self.max_x * self.unit)\n", + " for i in range(1, self.max_x):\n", + " self.draw_y_line(\n", + " x=i * self.unit, y0=0, y1=self.max_y * self.unit)\n", + "\n", + " for i in range(1, self.max_x - 1):\n", + " self.draw_box(i, 0, 'black')\n", + " self.draw_box(self.max_x - 1, 0, 'yellow')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "def env_agent_config(cfg,seed=1):\n", + " '''创建环境和智能体\n", + " ''' \n", + " env = gym.make(cfg.env_name,new_step_api=True) \n", + " env = CliffWalkingWapper(env)\n", + " n_states = env.observation_space.n # 状态维度\n", + " n_actions = env.action_space.n # 动作维度\n", + " agent = QLearning(n_states,n_actions,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4、设置参数\n", + "\n", + "到这里所有qlearning模块就算完成了,下面需要设置一些参数,方便大家“炼丹”,其中默认的是笔者已经调好的~。另外为了定义了一个画图函数,用来描述奖励的变化。" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "class Config:\n", + " '''配置参数\n", + " '''\n", + " def __init__(self):\n", + " self.env_name = 'CliffWalking-v0' # 环境名称\n", + " self.algo_name = 'Q-Learning' # 算法名称\n", + " self.train_eps = 400 # 训练回合数\n", + " self.test_eps = 20 # 测试回合数\n", + " self.max_steps = 200 # 每个回合最大步数\n", + " self.epsilon_start = 0.95 # e-greedy策略中epsilon的初始值\n", + " self.epsilon_end = 0.01 # e-greedy策略中epsilon的最终值\n", + " self.epsilon_decay = 300 # e-greedy策略中epsilon的衰减率\n", + " self.gamma = 0.9 # 折扣因子\n", + " self.lr = 0.1 # 学习率\n", + " self.seed = 1 # 随机种子\n", + " if torch.cuda.is_available(): # 是否使用GPUs\n", + " self.device = torch.device('cuda')\n", + " else:\n", + " self.device = torch.device('cpu')\n", + "\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,title=\"learning curve\"):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{title}\")\n", + " plt.xlim(0, len(rewards), 10) # 设置x轴的范围\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5、我准备好了!\n", + "\n", + "到现在我们真的可以像海绵宝宝那样大声说出来“我准备好了!“,跟着注释来看下效果吧~。" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\24438\\anaconda3\\envs\\easyrl\\lib\\site-packages\\gym\\core.py:318: DeprecationWarning: \u001b[33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n", + " \"Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始训练!\n", + "环境:CliffWalking-v0, 算法:Q-Learning, 设备:cuda\n", + "回合:20/400,奖励:-126.0,Epsilon:0.010\n", + "回合:40/400,奖励:-43.0,Epsilon:0.010\n", + "回合:60/400,奖励:-37.0,Epsilon:0.010\n", + "回合:80/400,奖励:-52.0,Epsilon:0.010\n", + "回合:100/400,奖励:-49.0,Epsilon:0.010\n", + "回合:120/400,奖励:-38.0,Epsilon:0.010\n", + "回合:140/400,奖励:-26.0,Epsilon:0.010\n", + "回合:160/400,奖励:-23.0,Epsilon:0.010\n", + "回合:180/400,奖励:-17.0,Epsilon:0.010\n", + "回合:200/400,奖励:-36.0,Epsilon:0.010\n", + "回合:220/400,奖励:-18.0,Epsilon:0.010\n", + "回合:240/400,奖励:-29.0,Epsilon:0.010\n", + "回合:260/400,奖励:-13.0,Epsilon:0.010\n", + "回合:280/400,奖励:-16.0,Epsilon:0.010\n", + "回合:300/400,奖励:-13.0,Epsilon:0.010\n", + "回合:320/400,奖励:-14.0,Epsilon:0.010\n", + "回合:340/400,奖励:-13.0,Epsilon:0.010\n", + "回合:360/400,奖励:-13.0,Epsilon:0.010\n", + "回合:380/400,奖励:-13.0,Epsilon:0.010\n", + "回合:400/400,奖励:-13.0,Epsilon:0.010\n", + "完成训练!\n", + "开始测试!\n", + "环境:CliffWalking-v0, 算法:Q-Learning, 设备:cuda\n", + "回合数:1/20, 奖励:-13.0\n", + "回合数:2/20, 奖励:-13.0\n", + "回合数:3/20, 奖励:-13.0\n", + "回合数:4/20, 奖励:-13.0\n", + "回合数:5/20, 奖励:-13.0\n", + "回合数:6/20, 奖励:-13.0\n", + "回合数:7/20, 奖励:-13.0\n", + "回合数:8/20, 奖励:-13.0\n", + "回合数:9/20, 奖励:-13.0\n", + "回合数:10/20, 奖励:-13.0\n", + "回合数:11/20, 奖励:-13.0\n", + "回合数:12/20, 奖励:-13.0\n", + "回合数:13/20, 奖励:-13.0\n", + "回合数:14/20, 奖励:-13.0\n", + "回合数:15/20, 奖励:-13.0\n", + "回合数:16/20, 奖励:-13.0\n", + "回合数:17/20, 奖励:-13.0\n", + "回合数:18/20, 奖励:-13.0\n", + "回合数:19/20, 奖励:-13.0\n", + "回合数:20/20, 奖励:-13.0\n", + "完成测试!\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\24438\\anaconda3\\envs\\easyrl\\lib\\site-packages\\seaborn\\rcmod.py:400: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " if LooseVersion(mpl.__version__) >= \"3.0\":\n", + "c:\\Users\\24438\\anaconda3\\envs\\easyrl\\lib\\site-packages\\setuptools\\_distutils\\version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " other = LooseVersion(other)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = Config() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], title=f\"training curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], title=f\"testing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.12 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f5a9629e9f3b9957bf68a43815f911e93447d47b3d065b6a8a04975e44c504d9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/Value Iteration/README.md b/projects/notebooks/Value Iteration/README.md new file mode 100644 index 0000000..e69de29 diff --git a/projects/requirements.txt b/projects/requirements.txt index 7dbd44a..5cda89e 100644 --- a/projects/requirements.txt +++ b/projects/requirements.txt @@ -1,10 +1,11 @@ pyyaml==6.0 ipykernel==6.15.1 jupyter==1.0.0 -matplotlib==3.5.2 -seaborn==0.11.2 +matplotlib==3.5.3 +seaborn==0.12.1 dill==0.3.5.1 argparse==1.4.0 pandas==1.3.5 pyglet==1.5.26 -importlib-metadata<5.0 \ No newline at end of file +importlib-metadata<5.0 +setuptools==65.2.0 \ No newline at end of file