diff --git a/projects/PARL/DQN.ipynb b/projects/PARL/DQN.ipynb new file mode 100644 index 0000000..57b0746 --- /dev/null +++ b/projects/PARL/DQN.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 定义模型\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import paddle\n", + "import paddle.nn as nn\n", + "import paddle.nn.functional as F\n", + "import parl\n", + "\n", + "class CartpoleModel(parl.Model):\n", + " \"\"\" Linear network to solve Cartpole problem.\n", + " Args:\n", + " n_states (int): Dimension of observation space.\n", + " n_actions (int): Dimension of action space.\n", + " \"\"\"\n", + "\n", + " def __init__(self, n_states, n_actions):\n", + " super(CartpoleModel, self).__init__()\n", + " hid1_size = 128\n", + " hid2_size = 128\n", + " self.fc1 = nn.Linear(n_states, hid1_size)\n", + " self.fc2 = nn.Linear(hid1_size, hid2_size)\n", + " self.fc3 = nn.Linear(hid2_size, n_actions)\n", + "\n", + " def forward(self, obs):\n", + " h1 = F.relu(self.fc1(obs))\n", + " h2 = F.relu(self.fc2(h1))\n", + " Q = self.fc3(h2)\n", + " return Q" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "import parl\n", + "import paddle\n", + "import numpy as np\n", + "\n", + "\n", + "class CartpoleAgent(parl.Agent):\n", + " \"\"\"Agent of Cartpole env.\n", + " Args:\n", + " algorithm(parl.Algorithm): algorithm used to solve the problem.\n", + " \"\"\"\n", + "\n", + " def __init__(self, algorithm, n_actions, e_greed=0.1, e_greed_decrement=0):\n", + " super(CartpoleAgent, self).__init__(algorithm)\n", + " assert isinstance(n_actions, int)\n", + " self.n_actions = n_actions\n", + "\n", + " self.global_step = 0\n", + " self.update_target_steps = 200\n", + "\n", + " self.e_greed = e_greed\n", + " self.e_greed_decrement = e_greed_decrement\n", + "\n", + " def sample(self, obs):\n", + " \"\"\"Sample an action `for exploration` when given an observation\n", + " Args:\n", + " obs(np.float32): shape of (n_states,)\n", + " Returns:\n", + " act(int): action\n", + " \"\"\"\n", + " sample = np.random.random()\n", + " if sample < self.e_greed:\n", + " act = np.random.randint(self.n_actions)\n", + " else:\n", + " if np.random.random() < 0.01:\n", + " act = np.random.randint(self.n_actions)\n", + " else:\n", + " act = self.predict(obs)\n", + " self.e_greed = max(0.01, self.e_greed - self.e_greed_decrement)\n", + " return act\n", + "\n", + " def predict(self, obs):\n", + " \"\"\"Predict an action when given an observation\n", + " Args:\n", + " obs(np.float32): shape of (n_states,)\n", + " Returns:\n", + " act(int): action\n", + " \"\"\"\n", + " obs = paddle.to_tensor(obs, dtype='float32')\n", + " pred_q = self.alg.predict(obs)\n", + " act = pred_q.argmax().numpy()[0]\n", + " return act\n", + "\n", + " def learn(self, obs, act, reward, next_obs, terminal):\n", + " \"\"\"Update model with an episode data\n", + " Args:\n", + " obs(np.float32): shape of (batch_size, n_states)\n", + " act(np.int32): shape of (batch_size)\n", + " reward(np.float32): shape of (batch_size)\n", + " next_obs(np.float32): shape of (batch_size, n_states)\n", + " terminal(np.float32): shape of (batch_size)\n", + " Returns:\n", + " loss(float)\n", + " \"\"\"\n", + " if self.global_step % self.update_target_steps == 0:\n", + " self.alg.sync_target()\n", + " self.global_step += 1\n", + "\n", + " act = np.expand_dims(act, axis=-1)\n", + " reward = np.expand_dims(reward, axis=-1)\n", + " terminal = np.expand_dims(terminal, axis=-1)\n", + "\n", + " obs = paddle.to_tensor(obs, dtype='float32')\n", + " act = paddle.to_tensor(act, dtype='int32')\n", + " reward = paddle.to_tensor(reward, dtype='float32')\n", + " next_obs = paddle.to_tensor(next_obs, dtype='float32')\n", + " terminal = paddle.to_tensor(terminal, dtype='float32')\n", + " loss = self.alg.learn(obs, act, reward, next_obs, terminal)\n", + " return loss.numpy()[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import gym\n", + "import numpy as np\n", + "import parl\n", + "\n", + "from parl.utils import logger, ReplayMemory\n", + "from parl.algorithms import DQN\n", + "\n", + "LEARN_FREQ = 5 # training frequency\n", + "MEMORY_SIZE = 200000\n", + "MEMORY_WARMUP_SIZE = 200\n", + "BATCH_SIZE = 64\n", + "LEARNING_RATE = 0.0005\n", + "GAMMA = 0.99\n", + "\n", + "# train an episode\n", + "def run_train_episode(agent, env, rpm):\n", + " total_reward = 0\n", + " obs = env.reset()\n", + " step = 0\n", + " while True:\n", + " step += 1\n", + " action = agent.sample(obs)\n", + " next_obs, reward, done, _ = env.step(action)\n", + " rpm.append(obs, action, reward, next_obs, done)\n", + "\n", + " # train model\n", + " if (len(rpm) > MEMORY_WARMUP_SIZE) and (step % LEARN_FREQ == 0):\n", + " # s,a,r,s',done\n", + " (batch_obs, batch_action, batch_reward, batch_next_obs,\n", + " batch_done) = rpm.sample_batch(BATCH_SIZE)\n", + " train_loss = agent.learn(batch_obs, batch_action, batch_reward,\n", + " batch_next_obs, batch_done)\n", + "\n", + " total_reward += reward\n", + " obs = next_obs\n", + " if done:\n", + " break\n", + " return total_reward\n", + "\n", + "\n", + "# evaluate 5 episodes\n", + "def run_evaluate_episodes(agent, env, eval_episodes=5, render=False):\n", + " eval_reward = []\n", + " for i in range(eval_episodes):\n", + " obs = env.reset()\n", + " episode_reward = 0\n", + " while True:\n", + " action = agent.predict(obs)\n", + " obs, reward, done, _ = env.step(action)\n", + " episode_reward += reward\n", + " if render:\n", + " env.render()\n", + " if done:\n", + " break\n", + " eval_reward.append(episode_reward)\n", + " return np.mean(eval_reward)\n", + "\n", + "\n", + "def main(args):\n", + " env = gym.make('CartPole-v0')\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.n\n", + " logger.info('n_states {}, n_actions {}'.format(n_states, n_actions))\n", + "\n", + " # set action_shape = 0 while in discrete control environment\n", + " rpm = ReplayMemory(MEMORY_SIZE, n_states, 0)\n", + "\n", + " # build an agent\n", + " model = CartpoleModel(n_states=n_states, n_actions=n_actions)\n", + " alg = DQN(model, gamma=GAMMA, lr=LEARNING_RATE)\n", + " agent = CartpoleAgent(\n", + " alg, n_actions=n_actions, e_greed=0.1, e_greed_decrement=1e-6)\n", + "\n", + " # warmup memory\n", + " while len(rpm) < MEMORY_WARMUP_SIZE:\n", + " run_train_episode(agent, env, rpm)\n", + "\n", + " max_episode = args.max_episode\n", + "\n", + " # start training\n", + " episode = 0\n", + " while episode < max_episode:\n", + " # train part\n", + " for i in range(50):\n", + " total_reward = run_train_episode(agent, env, rpm)\n", + " episode += 1\n", + "\n", + " # test part\n", + " eval_reward = run_evaluate_episodes(agent, env, render=False)\n", + " logger.info('episode:{} e_greed:{} Test reward:{}'.format(\n", + " episode, agent.e_greed, eval_reward))\n", + "\n", + " # save the parameters to ./model.ckpt\n", + " save_path = './model.ckpt'\n", + " agent.save(save_path)\n", + "\n", + " # save the model and parameters of policy network for inference\n", + " save_inference_path = './inference_model'\n", + " input_shapes = [[None, env.observation_space.shape[0]]]\n", + " input_dtypes = ['float32']\n", + " agent.save_inference_model(save_inference_path, input_shapes, input_dtypes)\n", + "\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m[08-01 21:48:19 MainThread @3996942455.py:64]\u001b[0m obs_dim 4, act_dim 2\n", + "\u001b[32m[08-01 21:48:19 MainThread @3996942455.py:92]\u001b[0m episode:50 e_greed:0.0988929999999989 Test reward:18.4\n", + "\u001b[32m[08-01 21:48:20 MainThread @3996942455.py:92]\u001b[0m episode:100 e_greed:0.09794799999999795 Test reward:9.6\n", + "\u001b[32m[08-01 21:48:20 MainThread @3996942455.py:92]\u001b[0m episode:150 e_greed:0.0973899999999974 Test reward:37.8\n", + "\u001b[32m[08-01 21:48:20 MainThread @3996942455.py:92]\u001b[0m episode:200 e_greed:0.09684299999999685 Test reward:8.8\n", + "\u001b[32m[08-01 21:48:20 MainThread @3996942455.py:92]\u001b[0m episode:250 e_greed:0.09635499999999636 Test reward:9.4\n", + "\u001b[32m[08-01 21:48:21 MainThread @3996942455.py:92]\u001b[0m episode:300 e_greed:0.09585299999999586 Test reward:9.2\n", + "\u001b[32m[08-01 21:48:21 MainThread @3996942455.py:92]\u001b[0m episode:350 e_greed:0.09535799999999536 Test reward:9.2\n", + "\u001b[32m[08-01 21:48:21 MainThread @3996942455.py:92]\u001b[0m episode:400 e_greed:0.09486399999999487 Test reward:10.0\n", + "\u001b[32m[08-01 21:48:21 MainThread @3996942455.py:92]\u001b[0m episode:450 e_greed:0.09435299999999436 Test reward:9.2\n", + "\u001b[32m[08-01 21:48:22 MainThread @3996942455.py:92]\u001b[0m episode:500 e_greed:0.09384899999999385 Test reward:9.4\n", + "\u001b[32m[08-01 21:48:22 MainThread @3996942455.py:92]\u001b[0m episode:550 e_greed:0.09302299999999303 Test reward:69.0\n", + "\u001b[32m[08-01 21:48:25 MainThread @3996942455.py:92]\u001b[0m episode:600 e_greed:0.08774199999998775 Test reward:141.2\n", + "\u001b[32m[08-01 21:48:30 MainThread @3996942455.py:92]\u001b[0m episode:650 e_greed:0.0791019999999791 Test reward:184.0\n", + "\u001b[32m[08-01 21:48:35 MainThread @3996942455.py:92]\u001b[0m episode:700 e_greed:0.07011299999997012 Test reward:182.0\n", + "\u001b[32m[08-01 21:48:40 MainThread @3996942455.py:92]\u001b[0m episode:750 e_greed:0.06089099999996089 Test reward:197.4\n", + "\u001b[32m[08-01 21:48:45 MainThread @3996942455.py:92]\u001b[0m episode:800 e_greed:0.05139199999995139 Test reward:183.4\n", + "\u001b[32m[08-01 21:48:50 MainThread @3996942455.py:92]\u001b[0m episode:850 e_greed:0.042255999999942256 Test reward:153.0\n", + "\u001b[32m[08-01 21:48:55 MainThread @3996942455.py:92]\u001b[0m episode:900 e_greed:0.033495999999933496 Test reward:192.6\n", + "\u001b[32m[08-01 21:49:00 MainThread @3996942455.py:92]\u001b[0m episode:950 e_greed:0.024318999999924318 Test reward:166.6\n", + "\u001b[32m[08-01 21:49:06 MainThread @3996942455.py:92]\u001b[0m episode:1000 e_greed:0.014873999999916176 Test reward:187.0\n" + ] + } + ], + "source": [ + "import argparse\n", + "parser = argparse.ArgumentParser()\n", + "parser.add_argument(\n", + " '--max_episode',\n", + " type=int,\n", + " default=1000,\n", + " help='stop condition: number of max episode')\n", + "args = parser.parse_args(args=[])\n", + "\n", + "main(args)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.12 ('rl_tutorials')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "4f613f1ab80ec98dc1b91d6e720de51301598a187317378e53e49b773c1123dd" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/PARL/README.md b/projects/PARL/README.md new file mode 100644 index 0000000..a8ddbb7 --- /dev/null +++ b/projects/PARL/README.md @@ -0,0 +1,11 @@ +[PARL](https://github.com/PaddlePaddle/PARL)是一个高性能、灵活的强化学习框架,由百度AI Studio开发。 + +## 安装 + +1. 安装parl,参考[PARL Github](https://github.com/PaddlePaddle/PARL) +2. 安装paddlepaddle:```pip install paddlepaddle``` + +## 常见问题 + +```jupyter-client 7.3.1 requires pyzmq>=22.3, but you have pyzmq 18.1.1 which is incompatible.```: +```pip install -U pyzmq``` \ No newline at end of file diff --git a/projects/README.md b/projects/README.md index ee63dd4..2ad650f 100644 --- a/projects/README.md +++ b/projects/README.md @@ -1,49 +1,34 @@ ## 0、写在前面 -本项目用于学习RL基础算法,尽量做到: **注释详细**(经过很长时间的纠结,还是中文注释好了!!!),**结构清晰**。 +本项目用于学习RL基础算法,主要面向对象为RL初学者、需要结合RL的非专业学习者,尽量做到: **(中文)注释详细**,**结构清晰**。 -代码结构主要分为以下几个脚本: +注意本项目为实战内容,建议首先掌握相关算法的一些理论基础,再来享用本项目,理论教程参考本人参与编写的[蘑菇书](https://github.com/datawhalechina/easy-rl)。 +未来开发计划包括但不限于:多智能体算法、强化学习Python包以及强化学习图形化编程平台等等。 + +## 1、项目说明 + +项目内容主要包含以下几个部分: +* [Jupyter Notebook](./notebooks/):使用Notebook写的算法,有比较详细的实战引导,推荐新手食用 +* [codes](./assets/):这些是基于Python脚本写的算法,风格比较接近实际项目的写法,推荐有一定代码基础的人阅读,下面会说明其具体的一些架构 +* [parl](./PARL/):应业务需求,写了一些基于百度飞浆平台和```parl```模块的RL实例 +* [附件](./assets/):目前包含强化学习各算法的中文伪代码 + + +[codes](./assets/)结构主要分为以下几个脚本: * ```[algorithm_name].py```:即保存算法的脚本,例如```dqn.py```,每种算法都会有一定的基础模块,例如```Replay Buffer```、```MLP```(多层感知机)等等; * ```task.py```: 即保存任务的脚本,基本包括基于```argparse```模块的参数,训练以及测试函数等等; * ```utils.py```:该脚本用于保存诸如存储结果以及画图的软件,在实际项目或研究中,推荐大家使用```Tensorboard```来保存结果,然后使用诸如```matplotlib```以及```seabron```来进一步画图。 -## 运行环境 +## 2、运行环境 python 3.7、pytorch 1.6.0-1.9.0、gym 0.21.0 -或者在```README.md```目录下执行以下命令复现环境: +在项目根目录下执行以下命令复现环境: ```bash -conda env create -f environment.yaml +pip install -r requirements.txt ``` -## 使用说明 +## 3、使用说明 直接运行带有```train```的py文件或ipynb文件会进行训练默认的任务; 也可以运行带有```task```的py文件训练不同的任务 - -## 内容导航 - -| 算法名称 | 相关论文材料 | 环境 | 备注 | -| :--------------------------------------: | :----------------------------------------------------------: | ----------------------------------------- | :--------------------------------: | -| [On-Policy First-Visit MC](./MonteCarlo) | [medium blog](https://medium.com/analytics-vidhya/monte-carlo-methods-in-reinforcement-learning-part-1-on-policy-methods-1f004d59686a) | [Racetrack](./envs/racetrack_env.md) | | -| [Q-Learning](./QLearning) | [towardsdatascience blog](https://towardsdatascience.com/simple-reinforcement-learning-q-learning-fcddc4b6fe56),[q learning paper](https://ieeexplore.ieee.org/document/8836506) | [CliffWalking-v0](./envs/gym_info.md) | | -| [Sarsa](./Sarsa) | [geeksforgeeks blog](https://www.geeksforgeeks.org/sarsa-reinforcement-learning/) | [Racetrack](./envs/racetrack_env.md) | | -| [DQN](./DQN) | [DQN Paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf),[Nature DQN Paper](https://www.nature.com/articles/nature14236) | [CartPole-v0](./envs/gym_info.md) | | -| [DQN-cnn](./DQN_cnn) | [DQN Paper](https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf) | [CartPole-v0](./envs/gym_info.md) | 与DQN相比使用了CNN而不是全链接网络 | -| [DoubleDQN](./DoubleDQN) | [DoubleDQN Paper](https://arxiv.org/abs/1509.06461) | [CartPole-v0](./envs/gym_info.md) | | -| [Hierarchical DQN](HierarchicalDQN) | [H-DQN Paper](https://arxiv.org/abs/1604.06057) | [CartPole-v0](./envs/gym_info.md) | | -| [PolicyGradient](./PolicyGradient) | [Lil'log](https://lilianweng.github.io/lil-log/2018/04/08/policy-gradient-algorithms.html) | [CartPole-v0](./envs/gym_info.md) | | -| [A2C](./A2C) | [A3C Paper](https://arxiv.org/abs/1602.01783) | [CartPole-v0](./envs/gym_info.md) | | -| [SAC](./SoftActorCritic) | [SAC Paper](https://arxiv.org/abs/1801.01290) | [Pendulum-v0](./envs/gym_info.md) | | -| [PPO](./PPO) | [PPO paper](https://arxiv.org/abs/1707.06347) | [CartPole-v0](./envs/gym_info.md) | | -| [DDPG](./DDPG) | [DDPG Paper](https://arxiv.org/abs/1509.02971) | [Pendulum-v0](./envs/gym_info.md) | | -| [TD3](./TD3) | [TD3 Paper](https://arxiv.org/abs/1802.09477) | [HalfCheetah-v2]((./envs/mujoco_info.md)) | | - - -## Refs - -[RL-Adventure-2](https://github.com/higgsfield/RL-Adventure-2) - -[RL-Adventure](https://github.com/higgsfield/RL-Adventure) - -[Google 开源项目风格指南——中文版](https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/#comments) \ No newline at end of file diff --git a/projects/assets/pseudocodes/pseudocodes.aux b/projects/assets/pseudocodes/pseudocodes.aux new file mode 100644 index 0000000..1373a7a --- /dev/null +++ b/projects/assets/pseudocodes/pseudocodes.aux @@ -0,0 +1,4 @@ +\relax +\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{1}{}\protected@file@percent } +\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{}\protected@file@percent } +\gdef \@abspage@last{2} diff --git a/projects/assets/pseudocodes/pseudocodes.log b/projects/assets/pseudocodes/pseudocodes.log new file mode 100644 index 0000000..4a91f11 --- /dev/null +++ b/projects/assets/pseudocodes/pseudocodes.log @@ -0,0 +1,398 @@ +This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22) 15 AUG 2022 15:05 +entering extended mode + restricted \write18 enabled. + file:line:error style messages enabled. + %&-line parsing enabled. +**/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes +(/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes.tex +LaTeX2e <2020-10-01> patch level 4 +L3 programming layer <2021-02-18> (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexart.cls (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexbackend.cfg +File: ctexbackend.cfg 2021/03/14 v2.5.6 Backend configuration file (CTEX) +) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3kernel/expl3.sty +Package: expl3 2021-02-18 L3 programming layer (loader) + (/usr/local/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-xetex.def +File: l3backend-xetex.def 2021-03-18 L3 backend support: XeTeX + (|extractbb --version) +\c__kernel_sys_dvipdfmx_version_int=\count175 +\l__color_backend_stack_int=\count176 +\g__color_backend_stack_int=\count177 +\g__graphics_track_int=\count178 +\l__pdf_internal_box=\box47 +\g__pdf_backend_object_int=\count179 +\g__pdf_backend_annotation_int=\count180 +\g__pdf_backend_link_int=\count181 +)) +Document Class: ctexart 2021/03/14 v2.5.6 Chinese adapter for class article (CTEX) +(/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-2020-10-01.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-generic.tex))) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty +Package: l3keys2e 2021-03-12 LaTeX2e option processing using LaTeX3 keys +) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexhook.sty +Package: ctexhook 2021/03/14 v2.5.6 Document and package hooks (CTEX) +) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexpatch.sty +Package: ctexpatch 2021/03/14 v2.5.6 Patching commands (CTEX) +) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fix-cm.sty +Package: fix-cm 2015/01/14 v1.1t fixes to LaTeX + (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ts1enc.def +File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file +LaTeX Font Info: Redeclaring font encoding TS1 on input line 47. +)) (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel.sty +Package: everysel 2021/01/20 v2.1 EverySelectfont Package (MS) + (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel-2011-10-28.sty)) +\l__ctex_tmp_int=\count182 +\l__ctex_tmp_box=\box48 +\l__ctex_tmp_dim=\dimen138 +\g__ctex_section_depth_int=\count183 +\g__ctex_font_size_int=\count184 + (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexopts.cfg +File: ctexopts.cfg 2021/03/14 v2.5.6 Option configuration file (CTEX) +) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/article.cls +Document Class: article 2020/04/10 v1.4m Standard LaTeX document class +(/usr/local/texlive/2021/texmf-dist/tex/latex/base/size11.clo +File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option) +) +\c@part=\count185 +\c@section=\count186 +\c@subsection=\count187 +\c@subsubsection=\count188 +\c@paragraph=\count189 +\c@subparagraph=\count190 +\c@figure=\count191 +\c@table=\count192 +\abovecaptionskip=\skip47 +\belowcaptionskip=\skip48 +\bibindent=\dimen139 +) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def +File: ctex-engine-xetex.def 2021/03/14 v2.5.6 XeLaTeX adapter (CTEX) + (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.sty +Package: xeCJK 2020/10/19 v3.8.6 Typesetting CJK scripts with XeLaTeX + (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty +Package: xtemplate 2021-03-12 L3 Experimental prototype document functions +\l__xtemplate_tmp_dim=\dimen140 +\l__xtemplate_tmp_int=\count193 +\l__xtemplate_tmp_muskip=\muskip16 +\l__xtemplate_tmp_skip=\skip49 +) +\l__xeCJK_tmp_int=\count194 +\l__xeCJK_tmp_box=\box49 +\l__xeCJK_tmp_dim=\dimen141 +\l__xeCJK_tmp_skip=\skip50 +\g__xeCJK_space_factor_int=\count195 +\l__xeCJK_begin_int=\count196 +\l__xeCJK_end_int=\count197 +\c__xeCJK_CJK_class_int=\XeTeXcharclass1 +\c__xeCJK_FullLeft_class_int=\XeTeXcharclass2 +\c__xeCJK_FullRight_class_int=\XeTeXcharclass3 +\c__xeCJK_HalfLeft_class_int=\XeTeXcharclass4 +\c__xeCJK_HalfRight_class_int=\XeTeXcharclass5 +\c__xeCJK_NormalSpace_class_int=\XeTeXcharclass6 +\c__xeCJK_CM_class_int=\XeTeXcharclass7 +\c__xeCJK_HangulJamo_class_int=\XeTeXcharclass8 +\l__xeCJK_last_skip=\skip51 +\g__xeCJK_node_int=\count198 +\c__xeCJK_CJK_node_dim=\dimen142 +\c__xeCJK_CJK-space_node_dim=\dimen143 +\c__xeCJK_default_node_dim=\dimen144 +\c__xeCJK_default-space_node_dim=\dimen145 +\c__xeCJK_CJK-widow_node_dim=\dimen146 +\c__xeCJK_normalspace_node_dim=\dimen147 +\l__xeCJK_ccglue_skip=\skip52 +\l__xeCJK_ecglue_skip=\skip53 +\l__xeCJK_punct_kern_skip=\skip54 +\l__xeCJK_last_penalty_int=\count199 +\l__xeCJK_last_bound_dim=\dimen148 +\l__xeCJK_last_kern_dim=\dimen149 +\l__xeCJK_widow_penalty_int=\count266 + +Package xtemplate Info: Declaring object type 'xeCJK/punctuation' taking 0 +(xtemplate) argument(s) on line 2341. + +\l__xeCJK_fixed_punct_width_dim=\dimen150 +\l__xeCJK_mixed_punct_width_dim=\dimen151 +\l__xeCJK_middle_punct_width_dim=\dimen152 +\l__xeCJK_fixed_margin_width_dim=\dimen153 +\l__xeCJK_mixed_margin_width_dim=\dimen154 +\l__xeCJK_middle_margin_width_dim=\dimen155 +\l__xeCJK_bound_punct_width_dim=\dimen156 +\l__xeCJK_bound_margin_width_dim=\dimen157 +\l__xeCJK_margin_minimum_dim=\dimen158 +\l__xeCJK_kerning_total_width_dim=\dimen159 +\l__xeCJK_same_align_margin_dim=\dimen160 +\l__xeCJK_different_align_margin_dim=\dimen161 +\l__xeCJK_kerning_margin_width_dim=\dimen162 +\l__xeCJK_kerning_margin_minimum_dim=\dimen163 +\l__xeCJK_bound_dim=\dimen164 +\l__xeCJK_reverse_bound_dim=\dimen165 +\l__xeCJK_margin_dim=\dimen166 +\l__xeCJK_minimum_bound_dim=\dimen167 +\l__xeCJK_kerning_margin_dim=\dimen168 +\g__xeCJK_family_int=\count267 +\l__xeCJK_fam_int=\count268 +\g__xeCJK_fam_allocation_int=\count269 +\l__xeCJK_verb_case_int=\count270 +\l__xeCJK_verb_exspace_skip=\skip55 + (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.sty +Package: fontspec 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX + (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty +Package: fontspec-xetex 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX +\l__fontspec_script_int=\count271 +\l__fontspec_language_int=\count272 +\l__fontspec_strnum_int=\count273 +\l__fontspec_tmp_int=\count274 +\l__fontspec_tmpa_int=\count275 +\l__fontspec_tmpb_int=\count276 +\l__fontspec_tmpc_int=\count277 +\l__fontspec_em_int=\count278 +\l__fontspec_emdef_int=\count279 +\l__fontspec_strong_int=\count280 +\l__fontspec_strongdef_int=\count281 +\l__fontspec_tmpa_dim=\dimen169 +\l__fontspec_tmpb_dim=\dimen170 +\l__fontspec_tmpc_dim=\dimen171 + (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fontenc.sty +Package: fontenc 2020/08/10 v2.0s Standard LaTeX package +) (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg +File: xeCJK.cfg 2020/10/19 v3.8.6 Configuration file for xeCJK package +)) +\ccwd=\dimen172 +\l__ctex_ccglue_skip=\skip56 +) +\l__ctex_ziju_dim=\dimen173 + (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber.sty +Package: zhnumber 2020/05/01 v2.8 Typesetting numbers with Chinese glyphs +\l__zhnum_scale_int=\count282 + (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber-utf8.cfg +File: zhnumber-utf8.cfg 2020/05/01 v2.8 Chinese numerals with UTF8 encoding +)) +\l__ctex_heading_skip=\skip57 + (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def +File: ctex-scheme-chinese-article.def 2021/03/14 v2.5.6 Chinese scheme for article (CTEX) + (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg +File: ctex-name-utf8.cfg 2021/03/14 v2.5.6 Caption with encoding UTF-8 (CTEX) +)) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-mac.def +File: ctex-fontset-mac.def 2021/03/14 v2.5.6 macOS fonts definition (CTEX) + (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-macnew.def +File: ctex-fontset-macnew.def 2021/03/14 v2.5.6 macOS fonts definition for El Capitan or later version (CTEX) + + +Package fontspec Warning: Font "Songti SC Light" does not contain requested +(fontspec) Script "CJK". + + +Package fontspec Info: Font family 'SongtiSCLight(0)' created for font 'Songti +(fontspec) SC Light' with options +(fontspec) [Script={CJK},BoldItalicFont={Kaiti SC +(fontspec) Bold},BoldFont={Songti SC Bold},ItalicFont={Kaiti SC}]. +(fontspec) +(fontspec) This font family consists of the following NFSS +(fontspec) series/shapes: +(fontspec) +(fontspec) - 'normal' (m/n) with NFSS spec.: <->"Songti SC +(fontspec) Light/OT:language=dflt;" +(fontspec) - 'small caps' (m/sc) with NFSS spec.: +(fontspec) - 'bold' (b/n) with NFSS spec.: <->"Songti SC +(fontspec) Bold/OT:language=dflt;" +(fontspec) - 'bold small caps' (b/sc) with NFSS spec.: +(fontspec) - 'italic' (m/it) with NFSS spec.: <->"Kaiti +(fontspec) SC/OT:language=dflt;" +(fontspec) - 'italic small caps' (m/scit) with NFSS spec.: +(fontspec) - 'bold italic' (b/it) with NFSS spec.: <->"Kaiti SC +(fontspec) Bold/OT:language=dflt;" +(fontspec) - 'bold italic small caps' (b/scit) with NFSS spec.: + +))) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex.cfg +File: ctex.cfg 2021/03/14 v2.5.6 Configuration file (CTEX) +) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithm.sty +Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD. +Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment + (/usr/local/texlive/2021/texmf-dist/tex/latex/float/float.sty +Package: float 2001/11/08 v1.3d Float enhancements (AL) +\c@float@type=\count283 +\float@exts=\toks15 +\float@box=\box50 +\@float@everytoks=\toks16 +\@floatcapt=\box51 +) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ifthen.sty +Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) +) +\@float@every@algorithm=\toks17 +\c@algorithm=\count284 +) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithmic.sty +Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD. +Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' + (/usr/local/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty +Package: keyval 2014/10/28 v1.15 key=value parser (DPC) +\KV@toks@=\toks18 +) +\c@ALC@unique=\count285 +\c@ALC@line=\count286 +\c@ALC@rem=\count287 +\c@ALC@depth=\count288 +\ALC@tlm=\skip58 +\algorithmicindent=\skip59 +) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amssymb.sty +Package: amssymb 2013/01/14 v3.01 AMS font symbols + (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amsfonts.sty +Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support +\@emptytoks=\toks19 +\symAMSa=\mathgroup4 +\symAMSb=\mathgroup5 +LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. +LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' +(Font) U/euf/m/n --> U/euf/b/n on input line 106. +)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsmath.sty +Package: amsmath 2020/09/23 v2.17i AMS math features +\@mathmargin=\skip60 + +For additional information on amsmath, use the `?' option. +(/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amstext.sty +Package: amstext 2000/06/29 v2.01 AMS text + (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsgen.sty +File: amsgen.sty 1999/11/30 v2.0 generic functions +\@emptytoks=\toks20 +\ex@=\dimen174 +)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsbsy.sty +Package: amsbsy 1999/11/29 v1.2d Bold Symbols +\pmbraise@=\dimen175 +) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsopn.sty +Package: amsopn 2016/03/08 v2.02 operator names +) +\inf@bad=\count289 +LaTeX Info: Redefining \frac on input line 234. +\uproot@=\count290 +\leftroot@=\count291 +LaTeX Info: Redefining \overline on input line 399. +\classnum@=\count292 +\DOTSCASE@=\count293 +LaTeX Info: Redefining \ldots on input line 496. +LaTeX Info: Redefining \dots on input line 499. +LaTeX Info: Redefining \cdots on input line 620. +\Mathstrutbox@=\box52 +\strutbox@=\box53 +\big@size=\dimen176 +LaTeX Font Info: Redeclaring font encoding OML on input line 743. +LaTeX Font Info: Redeclaring font encoding OMS on input line 744. +\macc@depth=\count294 +\c@MaxMatrixCols=\count295 +\dotsspace@=\muskip17 +\c@parentequation=\count296 +\dspbrk@lvl=\count297 +\tag@help=\toks21 +\row@=\count298 +\column@=\count299 +\maxfields@=\count300 +\andhelp@=\toks22 +\eqnshift@=\dimen177 +\alignsep@=\dimen178 +\tagshift@=\dimen179 +\tagwidth@=\dimen180 +\totwidth@=\dimen181 +\lineht@=\dimen182 +\@envbody=\toks23 +\multlinegap=\skip61 +\multlinetaggap=\skip62 +\mathdisplay@stack=\toks24 +LaTeX Info: Redefining \[ on input line 2923. +LaTeX Info: Redefining \] on input line 2924. +) (./pseudocodes.aux) +\openout1 = `pseudocodes.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 9. +LaTeX Font Info: ... okay on input line 9. + ABD: EverySelectfont initializing macros +LaTeX Info: Redefining \selectfont on input line 9. + +Package fontspec Info: Adjusting the maths setup (use [no-math] to avoid +(fontspec) this). + +\symlegacymaths=\mathgroup6 +LaTeX Font Info: Overwriting symbol font `legacymaths' in version `bold' +(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 9. +LaTeX Font Info: Redeclaring math accent \acute on input line 9. +LaTeX Font Info: Redeclaring math accent \grave on input line 9. +LaTeX Font Info: Redeclaring math accent \ddot on input line 9. +LaTeX Font Info: Redeclaring math accent \tilde on input line 9. +LaTeX Font Info: Redeclaring math accent \bar on input line 9. +LaTeX Font Info: Redeclaring math accent \breve on input line 9. +LaTeX Font Info: Redeclaring math accent \check on input line 9. +LaTeX Font Info: Redeclaring math accent \hat on input line 9. +LaTeX Font Info: Redeclaring math accent \dot on input line 9. +LaTeX Font Info: Redeclaring math accent \mathring on input line 9. +LaTeX Font Info: Redeclaring math symbol \Gamma on input line 9. +LaTeX Font Info: Redeclaring math symbol \Delta on input line 9. +LaTeX Font Info: Redeclaring math symbol \Theta on input line 9. +LaTeX Font Info: Redeclaring math symbol \Lambda on input line 9. +LaTeX Font Info: Redeclaring math symbol \Xi on input line 9. +LaTeX Font Info: Redeclaring math symbol \Pi on input line 9. +LaTeX Font Info: Redeclaring math symbol \Sigma on input line 9. +LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 9. +LaTeX Font Info: Redeclaring math symbol \Phi on input line 9. +LaTeX Font Info: Redeclaring math symbol \Psi on input line 9. +LaTeX Font Info: Redeclaring math symbol \Omega on input line 9. +LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 9. +LaTeX Font Info: Redeclaring symbol font `operators' on input line 9. +LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font +(Font) `operators' in the math version `normal' on input line 9. +LaTeX Font Info: Overwriting symbol font `operators' in version `normal' +(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 9. +LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font +(Font) `operators' in the math version `bold' on input line 9. +LaTeX Font Info: Overwriting symbol font `operators' in version `bold' +(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 9. +LaTeX Font Info: Overwriting symbol font `operators' in version `normal' +(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' +(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' +(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' +(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' +(Font) OT1/cmtt/m/n --> TU/lmtt/m/n on input line 9. +LaTeX Font Info: Overwriting symbol font `operators' in version `bold' +(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' +(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' +(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 9. +LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' +(Font) OT1/cmtt/m/n --> TU/lmtt/b/n on input line 9. +LaTeX Font Info: Trying to load font information for U+msa on input line 20. + (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsa.fd +File: umsa.fd 2013/01/14 v3.01 AMS symbols A +) +LaTeX Font Info: Trying to load font information for U+msb on input line 20. + (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsb.fd +File: umsb.fd 2013/01/14 v3.01 AMS symbols B +) +Overfull \hbox (38.0069pt too wide) in paragraph at lines 32--33 +[] []\TU/SongtiSCLight(0)/m/n/10.95 计 算 实 际 的 $\OML/cmm/m/it/10.95 Q$ \TU/SongtiSCLight(0)/m/n/10.95 值,| 即 $\OML/cmm/m/it/10.95 y[] \OT1/cmr/m/n/10.95 = []$ + [] + +[1 + +] [2 + +] (./pseudocodes.aux) ) +Here is how much of TeX's memory you used: + 7847 strings out of 476919 + 208964 string characters out of 5821840 + 529246 words of memory out of 5000000 + 27739 multiletter control sequences out of 15000+600000 + 410995 words of font info for 73 fonts, out of 8000000 for 9000 + 1348 hyphenation exceptions out of 8191 + 101i,11n,104p,414b,663s stack positions out of 5000i,500n,10000p,200000b,80000s + +Output written on pseudocodes.pdf (2 pages). diff --git a/projects/assets/pseudocodes/pseudocodes.pdf b/projects/assets/pseudocodes/pseudocodes.pdf new file mode 100644 index 0000000..06c1da7 Binary files /dev/null and b/projects/assets/pseudocodes/pseudocodes.pdf differ diff --git a/projects/assets/pseudocodes/pseudocodes.synctex.gz b/projects/assets/pseudocodes/pseudocodes.synctex.gz new file mode 100644 index 0000000..8754760 Binary files /dev/null and b/projects/assets/pseudocodes/pseudocodes.synctex.gz differ diff --git a/projects/assets/pseudocodes/pseudocodes.tex b/projects/assets/pseudocodes/pseudocodes.tex new file mode 100644 index 0000000..4db2296 --- /dev/null +++ b/projects/assets/pseudocodes/pseudocodes.tex @@ -0,0 +1,63 @@ +\documentclass[11pt]{ctexart} +\usepackage{ctex} +\usepackage{algorithm} +\usepackage{algorithmic} +\usepackage{amssymb} +\usepackage{amsmath} + + +\begin{document} + +\begin{algorithm} + \floatname{algorithm}{{DQN算法}} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \renewcommand{\algorithmicrequire}{\textbf{输入:}} + \renewcommand{\algorithmicensure}{\textbf{输出:}} + \begin{algorithmic} + % \REQUIRE $n \geq 0 \vee x \neq 0$ % 输入 + % \ENSURE $y = x^n$ % 输出 + \STATE 初始化策略网络参数$\theta$ % 初始化 + \STATE 复制参数到目标网络$\hat{Q} \leftarrow Q$ + \STATE 初始化经验回放$D$ + \FOR {回合数 = $1,M$} + \STATE 重置环境,获得初始状态$s_t$ + \FOR {时步 = $1,t$} + \STATE 根据$\varepsilon-greedy$策略采样动作$a_t$ + \STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$ + \STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中 + \STATE 更新环境状态$s_{t+1} \leftarrow s_t$ + \STATE {\bfseries 更新策略:} + \STATE 从$D$中采样一个batch的transition + \STATE 计算实际的$Q$值,即$y_{j}= \begin{cases}r_{j} & \text {对于终止状态} s_{j+1} \\ r_{j}+\gamma \max _{a^{\prime}} Q\left(s_{j+1}, a^{\prime} ; \theta\right) & \text {对于非终止状态} s_{j+1}\end{cases}$ + \STATE 对损失 $\left(y_{j}-Q\left(s_{j}, a_{j} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降 + \STATE 每$C$步复制参数$\hat{Q} \leftarrow Q$ + \ENDFOR + \ENDFOR + \end{algorithmic} +\end{algorithm} + +\clearpage + +\begin{algorithm} + \floatname{algorithm}{{SoftQ算法}} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \begin{algorithmic} + \STATE 初始化参数$\theta$和$\phi$% 初始化 + \STATE 复制参数$\bar{\theta} \leftarrow \theta, \bar{\phi} \leftarrow \phi$ + \STATE 初始化经验回放$D$ + \FOR {回合数 = $1,M$} + \FOR {时步 = $1,t$} + \STATE 根据$a_{t} \leftarrow f^{\phi}\left(\xi ; \mathbf{s}_{t}\right)$采样动作,其中$\xi \sim \mathcal{N}(\mathbf{0}, \boldsymbol{I})$ + \STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$ + \STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中 + \STATE 更新环境状态$s_{t+1} \leftarrow s_t$ + \STATE 待完善 + \ENDFOR + \ENDFOR + + \end{algorithmic} +\end{algorithm} + +\end{document} \ No newline at end of file diff --git a/projects/codes/DQN/dqn.py b/projects/codes/DQN/dqn.py index 5ce5e1e..2b28757 100644 --- a/projects/codes/DQN/dqn.py +++ b/projects/codes/DQN/dqn.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-12 00:50:49 @LastEditor: John -LastEditTime: 2022-07-20 23:57:16 +LastEditTime: 2022-08-11 09:52:23 @Discription: @Environment: python 3.7.7 ''' @@ -14,77 +14,39 @@ LastEditTime: 2022-07-20 23:57:16 import torch import torch.nn as nn -import torch.nn.functional as F import torch.optim as optim import random import math import numpy as np -class MLP(nn.Module): - def __init__(self, n_states,n_actions,hidden_dim=128): - """ 初始化q网络,为全连接网络 - n_states: 输入的特征数即环境的状态维度 - n_actions: 输出的动作维度 - """ - super(MLP, self).__init__() - self.fc1 = nn.Linear(n_states, hidden_dim) # 输入层 - self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层 - self.fc3 = nn.Linear(hidden_dim, n_actions) # 输出层 - - def forward(self, x): - # 各层对应的激活函数 - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - return self.fc3(x) - -class ReplayBuffer: - def __init__(self, capacity): - self.capacity = capacity # 经验回放的容量 - self.buffer = [] # 缓冲区 - self.position = 0 - - def push(self, state, action, reward, next_state, done): - ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) - ''' - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = (self.position + 1) % self.capacity - - def sample(self, batch_size): - batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 - state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 - return state, action, reward, next_state, done - - def __len__(self): - ''' 返回当前存储的量 - ''' - return len(self.buffer) - class DQN: - def __init__(self, n_states,n_actions,cfg): + def __init__(self,n_actions,model,memory,cfg): self.n_actions = n_actions self.device = torch.device(cfg.device) # cpu or cuda self.gamma = cfg.gamma # 奖励的折扣因子 # e-greedy策略相关参数 - self.frame_idx = 0 # 用于epsilon的衰减计数 - self.epsilon = lambda frame_idx: cfg.epsilon_end + \ - (cfg.epsilon_start - cfg.epsilon_end) * \ - math.exp(-1. * frame_idx / cfg.epsilon_decay) + self.sample_count = 0 # 用于epsilon的衰减计数 + self.epsilon = cfg.epsilon_start + self.sample_count = 0 + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay self.batch_size = cfg.batch_size - self.policy_net = MLP(n_states,n_actions).to(self.device) - self.target_net = MLP(n_states,n_actions).to(self.device) + self.policy_net = model.to(self.device) + self.target_net = model.to(self.device) for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net target_param.data.copy_(param.data) self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器 - self.memory = ReplayBuffer(cfg.memory_capacity) # 经验回放 + self.memory = memory # 经验回放 - def choose_action(self, state): + def sample(self, state): ''' 选择动作 ''' - self.frame_idx += 1 - if random.random() > self.epsilon(self.frame_idx): + self.sample_count += 1 + self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + math.exp(-1. * self.sample_count / self.epsilon_decay) # epsilon是会递减的,这里选择指数递减 + if random.random() > self.epsilon: with torch.no_grad(): state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) q_values = self.policy_net(state) @@ -92,11 +54,16 @@ class DQN: else: action = random.randrange(self.n_actions) return action + def predict(self,state): + with torch.no_grad(): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + q_values = self.policy_net(state) + action = q_values.max(1)[1].item() # 选择Q值最大的动作 + return action def update(self): if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略 return # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) - # print('updating') state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( self.batch_size) @@ -118,9 +85,11 @@ class DQN: self.optimizer.step() def save(self, path): - torch.save(self.target_net.state_dict(), path+'dqn_checkpoint.pth') + from pathlib import Path + Path(path).mkdir(parents=True, exist_ok=True) + torch.save(self.target_net.state_dict(), path+'checkpoint.pth') def load(self, path): - self.target_net.load_state_dict(torch.load(path+'dqn_checkpoint.pth')) + self.target_net.load_state_dict(torch.load(path+'checkpoint.pth')) for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): param.data.copy_(target_param.data) diff --git a/projects/codes/DQN/dqn_cnn.py b/projects/codes/DQN/dqn_cnn.py deleted file mode 100644 index 4c086b2..0000000 --- a/projects/codes/DQN/dqn_cnn.py +++ /dev/null @@ -1,134 +0,0 @@ -import torch -import torch.nn as nn -import torch.optim as optim -import torch.autograd as autograd -import random -import math -class CNN(nn.Module): - def __init__(self, input_dim, output_dim): - super(CNN, self).__init__() - - self.input_dim = input_dim - self.output_dim = output_dim - - self.features = nn.Sequential( - nn.Conv2d(input_dim[0], 32, kernel_size=8, stride=4), - nn.ReLU(), - nn.Conv2d(32, 64, kernel_size=4, stride=2), - nn.ReLU(), - nn.Conv2d(64, 64, kernel_size=3, stride=1), - nn.ReLU() - ) - - self.fc = nn.Sequential( - nn.Linear(self.feature_size(), 512), - nn.ReLU(), - nn.Linear(512, self.output_dim) - ) - - def forward(self, x): - x = self.features(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - return x - - def feature_size(self): - return self.features(autograd.Variable(torch.zeros(1, *self.input_dim))).view(1, -1).size(1) - - - def act(self, state, epsilon): - if random.random() > epsilon: - state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0), volatile=True) - q_value = self.forward(state) - action = q_value.max(1)[1].data[0] - else: - action = random.randrange(env.action_space.n) - return action - -class ReplayBuffer: - def __init__(self, capacity): - self.capacity = capacity # 经验回放的容量 - self.buffer = [] # 缓冲区 - self.position = 0 - - def push(self, state, action, reward, next_state, done): - ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) - ''' - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = (self.position + 1) % self.capacity - - def sample(self, batch_size): - batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 - state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 - return state, action, reward, next_state, done - - def __len__(self): - ''' 返回当前存储的量 - ''' - return len(self.buffer) - -class DQN: - def __init__(self, n_states, n_actions, cfg): - - self.n_actions = n_actions # 总的动作个数 - self.device = cfg.device # 设备,cpu或gpu等 - self.gamma = cfg.gamma # 奖励的折扣因子 - # e-greedy策略相关参数 - self.frame_idx = 0 # 用于epsilon的衰减计数 - self.epsilon = lambda frame_idx: cfg.epsilon_end + \ - (cfg.epsilon_start - cfg.epsilon_end) * \ - math.exp(-1. * frame_idx / cfg.epsilon_decay) - self.batch_size = cfg.batch_size - self.policy_net = CNN(n_states, n_actions).to(self.device) - self.target_net = CNN(n_states, n_actions).to(self.device) - for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net - target_param.data.copy_(param.data) - self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器 - self.memory = ReplayBuffer(cfg.memory_capacity) # 经验回放 - - def choose_action(self, state): - ''' 选择动作 - ''' - self.frame_idx += 1 - if random.random() > self.epsilon(self.frame_idx): - with torch.no_grad(): - print(type(state)) - state = torch.tensor([state], device=self.device, dtype=torch.float32) - q_values = self.policy_net(state) - action = q_values.max(1)[1].item() # 选择Q值最大的动作 - else: - action = random.randrange(self.n_actions) - return action - def update(self): - if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略 - return - # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) - state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( - self.batch_size) - # 转为张量 - state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) - action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) - reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) - next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) - done_batch = torch.tensor(np.float32(done_batch), device=self.device) - q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) # 计算当前状态(s_t,a)对应的Q(s_t, a) - next_q_values = self.target_net(next_state_batch).max(1)[0].detach() # 计算下一时刻的状态(s_t_,a)对应的Q值 - # 计算期望的Q值,对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward - expected_q_values = reward_batch + self.gamma * next_q_values * (1-done_batch) - loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算均方根损失 - # 优化更新模型 - self.optimizer.zero_grad() - loss.backward() - for param in self.policy_net.parameters(): # clip防止梯度爆炸 - param.grad.data.clamp_(-1, 1) - self.optimizer.step() - - def save(self, path): - torch.save(self.target_net.state_dict(), path+'dqn_checkpoint.pth') - - def load(self, path): - self.target_net.load_state_dict(torch.load(path+'dqn_checkpoint.pth')) - for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): - param.data.copy_(target_param.data) \ No newline at end of file diff --git a/projects/codes/DQN/dqn_cnn2.py b/projects/codes/DQN/dqn_cnn2.py deleted file mode 100644 index 67b7fd8..0000000 --- a/projects/codes/DQN/dqn_cnn2.py +++ /dev/null @@ -1,142 +0,0 @@ -import torch -import torch.nn as nn -import torch.optim as optim -import torch.autograd as autograd -import random -import math -import numpy as np -class CNN(nn.Module): - def __init__(self, n_frames, n_actions): - super(CNN,self).__init__() - self.n_frames = n_frames - self.n_actions = n_actions - - # Layers - self.conv1 = nn.Conv2d( - in_channels=n_frames, - out_channels=16, - kernel_size=8, - stride=4, - padding=2 - ) - self.conv2 = nn.Conv2d( - in_channels=16, - out_channels=32, - kernel_size=4, - stride=2, - padding=1 - ) - self.fc1 = nn.Linear( - in_features=3200, - out_features=256, - ) - self.fc2 = nn.Linear( - in_features=256, - out_features=n_actions, - ) - - # Activation Functions - self.relu = nn.ReLU() - - def flatten(self, x): - batch_size = x.size()[0] - x = x.view(batch_size, -1) - return x - - def forward(self, x): - - # Forward pass - x = self.relu(self.conv1(x)) # In: (80, 80, 4) Out: (20, 20, 16) - x = self.relu(self.conv2(x)) # In: (20, 20, 16) Out: (10, 10, 32) - x = self.flatten(x) # In: (10, 10, 32) Out: (3200,) - x = self.relu(self.fc1(x)) # In: (3200,) Out: (256,) - x = self.fc2(x) # In: (256,) Out: (4,) - - return x - -class ReplayBuffer: - def __init__(self, capacity): - self.capacity = capacity # 经验回放的容量 - self.buffer = [] # 缓冲区 - self.position = 0 - - def push(self, state, action, reward, next_state, done): - ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) - ''' - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = (self.position + 1) % self.capacity - - def sample(self, batch_size): - batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 - state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 - return state, action, reward, next_state, done - - def __len__(self): - ''' 返回当前存储的量 - ''' - return len(self.buffer) - -class DQN: - def __init__(self, n_states, n_actions, cfg): - - self.n_actions = n_actions # 总的动作个数 - self.device = cfg.device # 设备,cpu或gpu等 - self.gamma = cfg.gamma # 奖励的折扣因子 - # e-greedy策略相关参数 - self.frame_idx = 0 # 用于epsilon的衰减计数 - self.epsilon = lambda frame_idx: cfg.epsilon_end + \ - (cfg.epsilon_start - cfg.epsilon_end) * \ - math.exp(-1. * frame_idx / cfg.epsilon_decay) - self.batch_size = cfg.batch_size - self.policy_net = CNN(n_states, n_actions).to(self.device) - self.target_net = CNN(n_states, n_actions).to(self.device) - for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net - target_param.data.copy_(param.data) - self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器 - self.memory = ReplayBuffer(cfg.memory_capacity) # 经验回放 - - def choose_action(self, state): - ''' 选择动作 - ''' - self.frame_idx += 1 - if random.random() > self.epsilon(self.frame_idx): - with torch.no_grad(): - state = torch.tensor([state], device=self.device, dtype=torch.float32) - q_values = self.policy_net(state) - action = q_values.max(1)[1].item() # 选择Q值最大的动作 - else: - action = random.randrange(self.n_actions) - return action - def update(self): - if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略 - return - # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) - state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( - self.batch_size) - # 转为张量 - state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) - action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) - reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) - next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) - done_batch = torch.tensor(np.float32(done_batch), device=self.device) - q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) # 计算当前状态(s_t,a)对应的Q(s_t, a) - next_q_values = self.target_net(next_state_batch).max(1)[0].detach() # 计算下一时刻的状态(s_t_,a)对应的Q值 - # 计算期望的Q值,对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward - expected_q_values = reward_batch + self.gamma * next_q_values * (1-done_batch) - loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算均方根损失 - # 优化更新模型 - self.optimizer.zero_grad() - loss.backward() - for param in self.policy_net.parameters(): # clip防止梯度爆炸 - param.grad.data.clamp_(-1, 1) - self.optimizer.step() - - def save(self, path): - torch.save(self.target_net.state_dict(), path+'dqn_checkpoint.pth') - - def load(self, path): - self.target_net.load_state_dict(torch.load(path+'dqn_checkpoint.pth')) - for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): - param.data.copy_(target_param.data) \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth deleted file mode 100644 index 237eedd..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/models/dqn_checkpoint.pth and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json deleted file mode 100644 index 3dfcdd4..0000000 --- a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/params.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "algo_name": "DQN", - "env_name": "CartPole-v0", - "train_eps": 200, - "test_eps": 20, - "gamma": 0.95, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 500, - "lr": 0.0001, - "memory_capacity": 100000, - "batch_size": 64, - "target_update": 4, - "hidden_dim": 256, - "deivce": "cpu", - "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-211653/results/", - "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/CartPole-v0/20220713-211653/models/", - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png deleted file mode 100644 index 76f8a18..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy deleted file mode 100644 index 017fcb6..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy deleted file mode 100644 index 877f53a..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png deleted file mode 100644 index 8b2aa59..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy b/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy deleted file mode 100644 index 76fb505..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/train_steps.npy and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/models/checkpoint.pth b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/models/checkpoint.pth new file mode 100644 index 0000000..8172745 Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/models/checkpoint.pth differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/params.json b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/params.json new file mode 100644 index 0000000..7749c42 --- /dev/null +++ b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/params.json @@ -0,0 +1 @@ +{"algo_name": "DQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/DQN/outputs/CartPole-v0/20220815-185119/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/DQN/outputs/CartPole-v0/20220815-185119/models/", "show_fig": false, "save_fig": true} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/test_rewards.npy similarity index 100% rename from projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_rewards.npy rename to projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/test_rewards.npy diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/testing_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/testing_curve.png new file mode 100644 index 0000000..43ceb6f Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/testing_curve.png differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/train_rewards.npy b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/train_rewards.npy new file mode 100644 index 0000000..b96ce50 Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/train_rewards.npy differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/training_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/training_curve.png new file mode 100644 index 0000000..3e09a74 Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v0/20220815-185119/results/training_curve.png differ diff --git a/projects/codes/DQN/task0.py b/projects/codes/DQN/task0.py index 04344aa..8985103 100644 --- a/projects/codes/DQN/task0.py +++ b/projects/codes/DQN/task0.py @@ -1,23 +1,23 @@ import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path -parent_path = os.path.dirname(curr_path) # parent path -sys.path.append(parent_path) # add to system path -import torch.nn as nn -import torch.nn.functional as F +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 import gym import torch import datetime import numpy as np import argparse -from common.utils import save_results, make_dir +from common.utils import save_results from common.utils import plot_rewards,save_args +from common.models import MLP +from common.memories import ReplayBuffer from dqn import DQN def get_args(): - """ Hyperparameters + """ 超参数 """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 parser = argparse.ArgumentParser(description="hyperparameters") parser.add_argument('--algo_name',default='DQN',type=str,help="name of algorithm") parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") @@ -36,7 +36,8 @@ def get_args(): parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ '/' + curr_time + '/results/' ) parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) # path to save models + '/' + curr_time + '/models/' ) + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") args = parser.parse_args() return args @@ -47,8 +48,10 @@ def env_agent_config(cfg,seed=1): env = gym.make(cfg.env_name) # 创建环境 n_states = env.observation_space.shape[0] # 状态维度 n_actions = env.action_space.n # 动作维度 - print(f"n states: {n_states}, n actions: {n_actions}") - agent = DQN(n_states,n_actions, cfg) # 创建智能体 + print(f"状态数:{n_states},动作数:{n_actions}") + model = MLP(n_states,n_actions,hidden_dim=cfg.hidden_dim) + memory = ReplayBuffer(cfg.memory_capacity) # 经验回放 + agent = DQN(n_actions,model,memory,cfg) # 创建智能体 if seed !=0: # 设置随机种子 torch.manual_seed(seed) env.seed(seed) @@ -56,12 +59,11 @@ def env_agent_config(cfg,seed=1): return env, agent def train(cfg, env, agent): - ''' Training + ''' 训练 ''' - print('Start training!') - print(f'Env:{cfg.env_name}, A{cfg.algo_name}, 设备:{cfg.device}') + print("开始训练!") + print(f"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}") rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 steps = [] for i_ep in range(cfg.train_eps): ep_reward = 0 # 记录一回合内的奖励 @@ -69,7 +71,7 @@ def train(cfg, env, agent): state = env.reset() # 重置环境,返回初始状态 while True: ep_step += 1 - action = agent.choose_action(state) # 选择动作 + action = agent.sample(state) # 选择动作 next_state, reward, done, _ = env.step(action) # 更新环境,返回transition agent.memory.push(state, action, reward, next_state, done) # 保存transition @@ -82,27 +84,17 @@ def train(cfg, env, agent): agent.target_net.load_state_dict(agent.policy_net.state_dict()) steps.append(ep_step) rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9 * ma_rewards[-1] + 0.1 * ep_reward) - else: - ma_rewards.append(ep_reward) - if (i_ep + 1) % 1 == 0: - print(f'Episode:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f} Epislon:{agent.epsilon(agent.frame_idx):.3f}') - print('Finish training!') + if (i_ep + 1) % 10 == 0: + print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f},Epislon:{agent.epsilon:.3f}') + print("完成训练!") env.close() - res_dic = {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps} + res_dic = {'rewards':rewards} return res_dic - def test(cfg, env, agent): - print('Start testing!') - print(f'Env:{cfg.env_name}, A{cfg.algo_name}, 设备:{cfg.device}') - ############# 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 ############### - cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon - cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon - ################################################################################ + print("开始测试!") + print(f"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}") rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 steps = [] for i_ep in range(cfg.test_eps): ep_reward = 0 # 记录一回合内的奖励 @@ -110,7 +102,7 @@ def test(cfg, env, agent): state = env.reset() # 重置环境,返回初始状态 while True: ep_step+=1 - action = agent.choose_action(state) # 选择动作 + action = agent.predict(state) # 选择动作 next_state, reward, done, _ = env.step(action) # 更新环境,返回transition state = next_state # 更新下一个状态 ep_reward += reward # 累加奖励 @@ -118,14 +110,10 @@ def test(cfg, env, agent): break steps.append(ep_step) rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1] * 0.9 + ep_reward * 0.1) - else: - ma_rewards.append(ep_reward) - print(f'Episode:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.2f}, Step:{ep_step:.2f}') - print('Finish testing') + print(f'回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.2f}') + print("完成测试") env.close() - return {'rewards':rewards,'ma_rewards':ma_rewards,'steps':steps} + return {'rewards':rewards} if __name__ == "__main__": @@ -133,16 +121,14 @@ if __name__ == "__main__": # 训练 env, agent = env_agent_config(cfg) res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) # save parameters - agent.save(path=cfg.model_path) # save model - save_results(res_dic, tag='train', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") + save_args(cfg,path = cfg.result_path) # 保存参数到模型路径上 + agent.save(path = cfg.model_path) # 保存模型 + save_results(res_dic, tag = 'train', path = cfg.result_path) + plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "train") # 测试 - env, agent = env_agent_config(cfg) - agent.load(path=cfg.model_path) # 导入模型 + env, agent = env_agent_config(cfg) # 也可以不加,加这一行的是为了避免训练之后环境可能会出现问题,因此新建一个环境用于测试 + agent.load(path = cfg.model_path) # 导入模型 res_dic = test(cfg, env, agent) save_results(res_dic, tag='test', - path=cfg.result_path) # 保存结果 - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果 + path = cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "test") # 画出结果 diff --git a/projects/codes/DoubleDQN/double_dqn.py b/projects/codes/DoubleDQN/double_dqn.py index 78642ea..0488705 100644 --- a/projects/codes/DoubleDQN/double_dqn.py +++ b/projects/codes/DoubleDQN/double_dqn.py @@ -63,18 +63,18 @@ class MLP(nn.Module): return self.fc3(x) class DoubleDQN: - def __init__(self, n_states, n_actions, cfg): + def __init__(self, n_states, n_actions, model, memory, cfg): self.n_actions = n_actions # 总的动作个数 self.device = torch.device(cfg.device) # 设备,cpu或gpu等 self.gamma = cfg.gamma # e-greedy策略相关参数 - self.actions_count = 0 + self.sample_count = 0 self.epsilon_start = cfg.epsilon_start self.epsilon_end = cfg.epsilon_end self.epsilon_decay = cfg.epsilon_decay self.batch_size = cfg.batch_size - self.policy_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) - self.target_net = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim).to(self.device) + self.policy_net = model.to(self.device) + self.target_net = model.to(self.device) # target_net copy from policy_net for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): target_param.data.copy_(param.data) @@ -82,13 +82,13 @@ class DoubleDQN: # 可查parameters()与state_dict()的区别,前者require_grad=True self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) self.loss = 0 - self.memory = ReplayBuffer(cfg.memory_capacity) + self.memory = memory - def choose_action(self, state): + def sample(self, state): '''选择动作 ''' - self.actions_count += 1 - self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * math.exp(-1. * self.actions_count / self.epsilon_decay) + self.sample_count += 1 + self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * math.exp(-1. * self.sample_count / self.epsilon_decay) if random.random() > self.epsilon: with torch.no_grad(): # 先转为张量便于丢给神经网络,state元素数据原本为float64 @@ -104,9 +104,16 @@ class DoubleDQN: else: action = random.randrange(self.n_actions) return action + def predict(self, state): + '''选择动作 + ''' + with torch.no_grad(): + state = torch.tensor([state], device=self.device, dtype=torch.float32) + q_value = self.policy_net(state) + action = q_value.max(1)[1].item() + return action def update(self): - - if len(self.memory) < self.batch_size: + if len(self.memory) < self.batch_size: # 只有memory满了才会更新 return # 从memory中随机采样transition state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( @@ -150,7 +157,7 @@ class DoubleDQN: for param in self.policy_net.parameters(): # clip防止梯度爆炸 param.grad.data.clamp_(-1, 1) self.optimizer.step() # 更新模型 - + def save(self,path): torch.save(self.target_net.state_dict(), path+'checkpoint.pth') diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth deleted file mode 100644 index 2818144..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/models/checkpoint.pth and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json deleted file mode 100644 index abc1877..0000000 --- a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/params.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "algo_name": "DoubleDQN", - "env_name": "CartPole-v0", - "train_eps": 200, - "test_eps": 20, - "gamma": 0.99, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 500, - "lr": 0.0001, - "memory_capacity": 100000, - "batch_size": 64, - "target_update": 2, - "hidden_dim": 256, - "device": "cuda", - "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/results/", - "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DoubleDQN/outputs/CartPole-v0/20220721-215416/models/", - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy deleted file mode 100644 index da15b7f..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy deleted file mode 100644 index ce7e7be..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png deleted file mode 100644 index 9123a84..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy deleted file mode 100644 index b44206b..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png deleted file mode 100644 index d07d996..0000000 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/models/checkpoint.pth b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/models/checkpoint.pth new file mode 100644 index 0000000..2d4c362 Binary files /dev/null and b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/models/checkpoint.pth differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/params.json b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/params.json new file mode 100644 index 0000000..6f83ede --- /dev/null +++ b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/params.json @@ -0,0 +1 @@ +{"algo_name": "DoubleDQN", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 500, "lr": 0.0001, "memory_capacity": 100000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cpu", "result_path": "/root/Desktop/rl-tutorials/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/", "model_path": "/root/Desktop/rl-tutorials/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/models/", "save_fig": true} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards.npy similarity index 77% rename from projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards.npy index 14bca8b..c215808 100644 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_ma_rewards.npy and b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards.npy differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards_curve.png new file mode 100644 index 0000000..7b66b67 Binary files /dev/null and b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/test_rewards_curve.png differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards.npy similarity index 51% rename from projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy rename to projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards.npy index d9b5730..654d71d 100644 Binary files a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220721-215416/results/train_rewards.npy and b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards.npy differ diff --git a/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards_curve.png b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards_curve.png new file mode 100644 index 0000000..dbf66d7 Binary files /dev/null and b/projects/codes/DoubleDQN/outputs/CartPole-v0/20220803-104127/results/train_rewards_curve.png differ diff --git a/projects/codes/DoubleDQN/task0.py b/projects/codes/DoubleDQN/task0.py index 66dfcd9..7451d24 100644 --- a/projects/codes/DoubleDQN/task0.py +++ b/projects/codes/DoubleDQN/task0.py @@ -20,31 +20,33 @@ import argparse from common.utils import save_results,make_dir from common.utils import plot_rewards,save_args +from common.models import MLP +from common.memories import ReplayBuffer from DoubleDQN.double_dqn import DoubleDQN def get_args(): - """ Hyperparameters + """ 超参数 """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 parser = argparse.ArgumentParser(description="hyperparameters") parser.add_argument('--algo_name',default='DoubleDQN',type=str,help="name of algorithm") parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor") parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") parser.add_argument('--epsilon_decay',default=500,type=int,help="decay rate of epsilon") parser.add_argument('--lr',default=0.0001,type=float,help="learning rate") parser.add_argument('--memory_capacity',default=100000,type=int,help="memory capacity") parser.add_argument('--batch_size',default=64,type=int) - parser.add_argument('--target_update',default=2,type=int) + parser.add_argument('--target_update',default=4,type=int) parser.add_argument('--hidden_dim',default=256,type=int) parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ '/' + curr_time + '/results/' ) parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) # path to save models + '/' + curr_time + '/models/' ) # 保存模型的路径 parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") args = parser.parse_args() return args @@ -55,19 +57,20 @@ def env_agent_config(cfg,seed=1): env.seed(seed) n_states = env.observation_space.shape[0] n_actions = env.action_space.n - agent = DoubleDQN(n_states,n_actions,cfg) + model = MLP(n_states, n_actions,hidden_dim=cfg.hidden_dim) + memory = ReplayBuffer(cfg.memory_capacity) + agent = DoubleDQN(n_states,n_actions,model,memory,cfg) return env,agent def train(cfg,env,agent): - print('Start training!') - print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') + print("开始训练!") + print(f"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}") rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 for i_ep in range(cfg.train_eps): ep_reward = 0 # 记录一回合内的奖励 state = env.reset() # 重置环境,返回初始状态 while True: - action = agent.choose_action(state) + action = agent.sample(state) next_state, reward, done, _ = env.step(action) ep_reward += reward agent.memory.push(state, action, reward, next_state, done) @@ -78,61 +81,45 @@ def train(cfg,env,agent): if i_ep % cfg.target_update == 0: agent.target_net.load_state_dict(agent.policy_net.state_dict()) if (i_ep+1)%10 == 0: - print(f'Env:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}') - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append( - 0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - print('Finish training!') - return {'rewards':rewards,'ma_rewards':ma_rewards} + print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f},Epislon:{agent.epsilon:.3f}') + rewards.append(ep_reward) + print("完成训练!") + return {'rewards':rewards} def test(cfg,env,agent): - print('Start testing') - print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') - ############# 由于测试不需要使用epsilon-greedy策略,所以相应的值设置为0 ############### - cfg.epsilon_start = 0.0 # e-greedy策略中初始epsilon - cfg.epsilon_end = 0.0 # e-greedy策略中的终止epsilon - ################################################################################ + print("开始测试!") + print(f"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}") rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.test_eps): state = env.reset() ep_reward = 0 while True: - action = agent.choose_action(state) + action = agent.predict(state) next_state, reward, done, _ = env.step(action) state = next_state ep_reward += reward if done: break rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) - print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}") - print('Finish testing!') - return {'rewards':rewards,'ma_rewards':ma_rewards} + print(f'回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.2f}') + print("完成测试!") + return {'rewards':rewards} if __name__ == "__main__": cfg = get_args() - print(cfg.device) - # training - env,agent = env_agent_config(cfg,seed=1) + # 训练 + env, agent = env_agent_config(cfg,seed=1) res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) - agent.save(path=cfg.model_path) + make_dir(cfg.result_path, cfg.model_path) + save_args(cfg) # 保存参数 + agent.save(path=cfg.model_path) # 保存模型 save_results(res_dic, tag='train', path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") - # testing - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=cfg.model_path) - res_dic = test(cfg,env,agent) + plot_rewards(res_dic['rewards'], cfg, tag="train") + # 测试 + env, agent = env_agent_config(cfg,seed=1) + agent.load(path=cfg.model_path) # 导入模型 + res_dic = test(cfg, env, agent) save_results(res_dic, tag='test', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test") + path=cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 diff --git a/projects/codes/MonteCarlo/agent.py b/projects/codes/MonteCarlo/agent.py index bfe6940..fe2a287 100644 --- a/projects/codes/MonteCarlo/agent.py +++ b/projects/codes/MonteCarlo/agent.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:14:34 LastEditor: John -LastEditTime: 2021-05-05 16:58:39 +LastEditTime: 2022-08-15 18:10:13 Discription: Environment: ''' @@ -22,11 +22,10 @@ class FisrtVisitMC: self.epsilon = cfg.epsilon self.gamma = cfg.gamma self.Q_table = defaultdict(lambda: np.zeros(n_actions)) - self.returns_sum = defaultdict(float) # sum of returns + self.returns_sum = defaultdict(float) # 保存return之和 self.returns_count = defaultdict(float) - def choose_action(self,state): - ''' e-greed policy ''' + def sample(self,state): if state in self.Q_table.keys(): best_action = np.argmax(self.Q_table[state]) action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions @@ -35,6 +34,15 @@ class FisrtVisitMC: else: action = np.random.randint(0,self.n_actions) return action + def predict(self,state): + if state in self.Q_table.keys(): + best_action = np.argmax(self.Q_table[state]) + action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions + action_probs[best_action] += (1.0 - self.epsilon) + action = np.argmax(self.Q_table[state]) + else: + action = np.random.randint(0,self.n_actions) + return action def update(self,one_ep_transition): # Find all (state, action) pairs we've visited in this one_ep_transition # We convert each state to a tuple so that we can use it as a dict key @@ -50,16 +58,18 @@ class FisrtVisitMC: self.returns_sum[sa_pair] += G self.returns_count[sa_pair] += 1.0 self.Q_table[state][action] = self.returns_sum[sa_pair] / self.returns_count[sa_pair] - def save(self,path): + def save(self,path=None): '''把 Q表格 的数据保存到文件中 ''' + from pathlib import Path + Path(path).mkdir(parents=True, exist_ok=True) torch.save( obj=self.Q_table, f=path+"Q_table", pickle_module=dill ) - def load(self, path): + def load(self, path=None): '''从文件中读取数据到 Q表格 ''' self.Q_table =torch.load(f=path+"Q_table",pickle_module=dill) \ No newline at end of file diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table deleted file mode 100644 index 6205ee5..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/models/Q_table and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy deleted file mode 100644 index 5cc42f1..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy deleted file mode 100644 index 19bb2f1..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png deleted file mode 100644 index 0738ac5..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/eval_rewards_curve.png and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy deleted file mode 100644 index f52b398..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy deleted file mode 100644 index fe83b3c..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png deleted file mode 100644 index b10b7ab..0000000 Binary files a/projects/codes/MonteCarlo/outputs/Racetrack/20210505-165945/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table new file mode 100644 index 0000000..e21a117 Binary files /dev/null and b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/Q_table differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json new file mode 100644 index 0000000..6f75e32 --- /dev/null +++ b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/params.json @@ -0,0 +1 @@ +{"algo_name": "First-Visit MC", "env_name": "Racetrack", "train_eps": 200, "test_eps": 20, "gamma": 0.9, "epsilon": 0.15, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/MonteCarlo/outputs/Racetrack/20220815-180742/models/", "save_fig": true} \ No newline at end of file diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy new file mode 100644 index 0000000..c0de5ac Binary files /dev/null and b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/test_rewards.npy differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png new file mode 100644 index 0000000..3c9cda1 Binary files /dev/null and b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/testing_curve.png differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy new file mode 100644 index 0000000..026a78d Binary files /dev/null and b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/train_rewards.npy differ diff --git a/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png new file mode 100644 index 0000000..9e8c483 Binary files /dev/null and b/projects/codes/MonteCarlo/outputs/Racetrack/20220815-180742/results/training_curve.png differ diff --git a/projects/codes/MonteCarlo/task0.py b/projects/codes/MonteCarlo/task0.py new file mode 100644 index 0000000..fbddf0a --- /dev/null +++ b/projects/codes/MonteCarlo/task0.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: John +Email: johnjim0816@gmail.com +Date: 2021-03-11 14:26:44 +LastEditor: John +LastEditTime: 2022-08-15 18:12:13 +Discription: +Environment: +''' +import sys,os +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 + +import datetime +import argparse +from common.utils import save_results,save_args,plot_rewards + +from MonteCarlo.agent import FisrtVisitMC +from envs.racetrack_env import RacetrackEnv + +curr_time = datetime.datetime.now().strftime( + "%Y%m%d-%H%M%S") # obtain current time + +def get_args(): + """ 超参数 + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='First-Visit MC',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='Racetrack',type=str,help="name of environment") + parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--gamma',default=0.9,type=float,help="discounted factor") + parser.add_argument('--epsilon',default=0.15,type=float,help="the probability to select a random action") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/results/' ) + parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/models/' ) + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + return args + +def env_agent_config(cfg,seed=1): + env = RacetrackEnv() + n_actions = env.action_space.n + agent = FisrtVisitMC(n_actions, cfg) + return env,agent + +def train(cfg, env, agent): + print("开始训练!") + print(f"环境:{cfg.env_name},算法:{cfg.algo_name},设备:{cfg.device}") + rewards = [] + for i_ep in range(cfg.train_eps): + state = env.reset() + ep_reward = 0 + one_ep_transition = [] + while True: + action = agent.sample(state) + next_state, reward, done = env.step(action) + ep_reward += reward + one_ep_transition.append((state, action, reward)) + state = next_state + if done: + break + rewards.append(ep_reward) + agent.update(one_ep_transition) + if (i_ep+1) % 10 == 0: + print(f"Episode:{i_ep+1}/{cfg.train_eps}: Reward:{ep_reward}") + print("完成训练") + return {'rewards':rewards} + +def test(cfg, env, agent): + print("开始测试!") + print(f"环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}") + rewards = [] + for i_ep in range(cfg.test_eps): + state = env.reset() + ep_reward = 0 + while True: + action = agent.predict(state) + next_state, reward, done = env.step(action) + ep_reward += reward + state = next_state + if done: + break + rewards.append(ep_reward) + print(f'回合:{i_ep+1}/{cfg.test_eps},奖励:{ep_reward:.2f}') + return {'rewards':rewards} + +if __name__ == "__main__": + cfg = get_args() + # 训练 + env, agent = env_agent_config(cfg) + res_dic = train(cfg, env, agent) + save_args(cfg,path = cfg.result_path) # 保存参数到模型路径上 + agent.save(path = cfg.model_path) # 保存模型 + save_results(res_dic, tag = 'train', path = cfg.result_path) + plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "train") + # 测试 + env, agent = env_agent_config(cfg) # 也可以不加,加这一行的是为了避免训练之后环境可能会出现问题,因此新建一个环境用于测试 + agent.load(path = cfg.model_path) # 导入模型 + res_dic = test(cfg, env, agent) + save_results(res_dic, tag='test', + path = cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "test") # 画出结果 diff --git a/projects/codes/MonteCarlo/task0_train.py b/projects/codes/MonteCarlo/task0_train.py deleted file mode 100644 index 51858f8..0000000 --- a/projects/codes/MonteCarlo/task0_train.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2021-03-11 14:26:44 -LastEditor: John -LastEditTime: 2021-05-05 17:27:50 -Discription: -Environment: -''' - -import sys,os -curr_path = os.path.dirname(__file__) -parent_path = os.path.dirname(curr_path) -sys.path.append(parent_path) # add current terminal path to sys.path - -import torch -import datetime - -from common.utils import save_results,make_dir -from common.plot import plot_rewards -from MonteCarlo.agent import FisrtVisitMC -from envs.racetrack_env import RacetrackEnv - -curr_time = datetime.datetime.now().strftime( - "%Y%m%d-%H%M%S") # obtain current time - -class MCConfig: - def __init__(self): - self.algo = "MC" # name of algo - self.env = 'Racetrack' - self.result_path = curr_path+"/outputs/" + self.env + \ - '/'+curr_time+'/results/' # path to save results - self.model_path = curr_path+"/outputs/" + self.env + \ - '/'+curr_time+'/models/' # path to save models - # epsilon: The probability to select a random action . - self.epsilon = 0.15 - self.gamma = 0.9 # gamma: Gamma discount factor. - self.train_eps = 200 - self.device = torch.device( - "cuda" if torch.cuda.is_available() else "cpu") # check gpu - -def env_agent_config(cfg,seed=1): - env = RacetrackEnv() - n_actions = 9 - agent = FisrtVisitMC(n_actions, cfg) - return env,agent - -def train(cfg, env, agent): - print('Start to eval !') - print(f'Env:{cfg.env}, Algorithm:{cfg.algo}, Device:{cfg.device}') - rewards = [] - ma_rewards = [] # moving average rewards - for i_ep in range(cfg.train_eps): - state = env.reset() - ep_reward = 0 - one_ep_transition = [] - while True: - action = agent.choose_action(state) - next_state, reward, done = env.step(action) - ep_reward += reward - one_ep_transition.append((state, action, reward)) - state = next_state - if done: - break - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) - agent.update(one_ep_transition) - if (i_ep+1) % 10 == 0: - print(f"Episode:{i_ep+1}/{cfg.train_eps}: Reward:{ep_reward}") - print('Complete training!') - return rewards, ma_rewards - -def eval(cfg, env, agent): - print('Start to eval !') - print(f'Env:{cfg.env}, Algorithm:{cfg.algo}, Device:{cfg.device}') - rewards = [] - ma_rewards = [] # moving average rewards - for i_ep in range(cfg.train_eps): - state = env.reset() - ep_reward = 0 - while True: - action = agent.choose_action(state) - next_state, reward, done = env.step(action) - ep_reward += reward - state = next_state - if done: - break - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) - if (i_ep+1) % 10 == 0: - print(f"Episode:{i_ep+1}/{cfg.train_eps}: Reward:{ep_reward}") - return rewards, ma_rewards - -if __name__ == "__main__": - cfg = MCConfig() - - # train - env,agent = env_agent_config(cfg,seed=1) - rewards, ma_rewards = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - agent.save(path=cfg.model_path) - save_results(rewards, ma_rewards, tag='train', path=cfg.result_path) - plot_rewards(rewards, ma_rewards, tag="train", - algo=cfg.algo, path=cfg.result_path) - # eval - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=cfg.model_path) - rewards,ma_rewards = eval(cfg,env,agent) - save_results(rewards,ma_rewards,tag='eval',path=cfg.result_path) - plot_rewards(rewards,ma_rewards,tag="eval",env=cfg.env,algo = cfg.algo,path=cfg.result_path) diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl deleted file mode 100644 index 9053e52..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/models/Qleaning_model.pkl and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy deleted file mode 100644 index a67d064..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png deleted file mode 100644 index f7cee1b..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy deleted file mode 100644 index 5050935..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy deleted file mode 100644 index 12c27d8..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png deleted file mode 100644 index b7d33a6..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/models/Qleaning_model.pkl new file mode 100644 index 0000000..b28eedd Binary files /dev/null and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/params.json b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/params.json new file mode 100644 index 0000000..232925a --- /dev/null +++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/params.json @@ -0,0 +1,15 @@ +{ + "algo_name": "Q-learning", + "env_name": "CliffWalking-v0", + "train_eps": 400, + "test_eps": 20, + "gamma": 0.9, + "epsilon_start": 0.95, + "epsilon_end": 0.01, + "epsilon_decay": 300, + "lr": 0.1, + "device": "cpu", + "result_path": "/root/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/", + "model_path": "/root/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/models/", + "save_fig": true +} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards.npy similarity index 52% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards.npy index 6de67e1..8aeb5dd 100644 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220210-005501/results/test_rewards.npy and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards.npy differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards_curve.png new file mode 100644 index 0000000..72171e3 Binary files /dev/null and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/test_rewards_curve.png differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards.npy b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards.npy new file mode 100644 index 0000000..0f18270 Binary files /dev/null and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards.npy differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards_curve.png new file mode 100644 index 0000000..92992c3 Binary files /dev/null and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220802-163256/results/train_rewards_curve.png differ diff --git a/projects/codes/QLearning/qlearning.py b/projects/codes/QLearning/qlearning.py index be57831..e97a594 100644 --- a/projects/codes/QLearning/qlearning.py +++ b/projects/codes/QLearning/qlearning.py @@ -15,18 +15,20 @@ import torch from collections import defaultdict class QLearning(object): - def __init__(self,n_states, + def __init__(self, n_actions,cfg): self.n_actions = n_actions self.lr = cfg.lr # 学习率 self.gamma = cfg.gamma - self.epsilon = 0 + self.epsilon = cfg.epsilon_start self.sample_count = 0 self.epsilon_start = cfg.epsilon_start self.epsilon_end = cfg.epsilon_end self.epsilon_decay = cfg.epsilon_decay self.Q_table = defaultdict(lambda: np.zeros(n_actions)) # 用嵌套字典存放状态->动作->状态-动作值(Q值)的映射,即Q表 - def choose_action(self, state): + def sample(self, state): + ''' 采样动作,训练时用 + ''' self.sample_count += 1 self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.sample_count / self.epsilon_decay) # epsilon是会递减的,这里选择指数递减 @@ -37,6 +39,8 @@ class QLearning(object): action = np.random.choice(self.n_actions) # 随机选择动作 return action def predict(self,state): + ''' 预测或选择动作,测试时用 + ''' action = np.argmax(self.Q_table[str(state)]) return action def update(self, state, action, reward, next_state, done): diff --git a/projects/codes/QLearning/task0.py b/projects/codes/QLearning/task0.py index 98e620e..15100d5 100644 --- a/projects/codes/QLearning/task0.py +++ b/projects/codes/QLearning/task0.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2022-06-21 19:36:05 +LastEditTime: 2022-08-10 11:25:56 Discription: Environment: ''' @@ -18,54 +18,45 @@ sys.path.append(parent_path) # 添加路径到系统路径 import gym import torch import datetime - -from env.gridworld_env import CliffWalkingWapper +import argparse +from envs.gridworld_env import CliffWalkingWapper from qlearning import QLearning -from common.utils import plot_rewards +from common.utils import plot_rewards,save_args from common.utils import save_results,make_dir +def get_args(): + """ + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='Q-learning',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default=400,type=int,help="episodes of training") # 训练的回合数 + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") # 测试的回合数 + parser.add_argument('--gamma',default=0.90,type=float,help="discounted factor") # 折扣因子 + parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") # e-greedy策略中初始epsilon + parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") # e-greedy策略中的终止epsilon + parser.add_argument('--epsilon_decay',default=300,type=int,help="decay rate of epsilon") # e-greedy策略中epsilon的衰减率 + parser.add_argument('--lr',default=0.1,type=float,help="learning rate") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/results/',type=str ) + parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/models/',type=str,help="path to save models") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + return args curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 -class Config: - '''超参数 - ''' - - def __init__(self): - ################################## 环境超参数 ################################### - self.algo_name = 'Q-learning' # 算法名称 - self.env_name = 'CliffWalking-v0' # 环境名称 - self.device = torch.device( - "cuda" if torch.cuda.is_available() else "cpu") # 检测GPUgjgjlkhfsf风刀霜的撒发十 - self.seed = 10 # 随机种子,置0则不设置随机种子 - self.train_eps = 400 # 训练的回合数 - self.test_eps = 30 # 测试的回合数 - ################################################################################ - - ################################## 算法超参数 ################################### - self.gamma = 0.90 # 强化学习中的折扣因子 - self.epsilon_start = 0.95 # e-greedy策略中初始epsilon - self.epsilon_end = 0.01 # e-greedy策略中的终止epsilon - self.epsilon_decay = 300 # e-greedy策略中epsilon的衰减率 - self.lr = 0.1 # 学习率 - ################################################################################ - - ################################# 保存结果相关参数 ################################ - self.result_path = curr_path + "/outputs/" + self.env_name + \ - '/' + curr_time + '/results/' # 保存结果的路径 - self.model_path = curr_path + "/outputs/" + self.env_name + \ - '/' + curr_time + '/models/' # 保存模型的路径 - self.save = True # 是否保存图片 - ################################################################################ - + def train(cfg,env,agent): print('开始训练!') print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') rewards = [] # 记录奖励 - ma_rewards = [] # 记录滑动平均奖励 for i_ep in range(cfg.train_eps): ep_reward = 0 # 记录每个回合的奖励 state = env.reset() # 重置环境,即开始新的回合 while True: - action = agent.choose_action(state) # 根据算法选择一个动作 + action = agent.sample(state) # 根据算法采样一个动作 next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互 agent.update(state, action, reward, next_state, done) # Q学习算法更新 state = next_state # 更新状态 @@ -73,19 +64,14 @@ def train(cfg,env,agent): if done: break rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) - print("回合数:{}/{},奖励{:.1f}".format(i_ep+1, cfg.train_eps,ep_reward)) + print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}") print('完成训练!') - return rewards,ma_rewards + return {"rewards":rewards} def test(cfg,env,agent): print('开始测试!') print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 滑动平均的奖励 for i_ep in range(cfg.test_eps): ep_reward = 0 # 记录每个episode的reward state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合) @@ -97,13 +83,9 @@ def test(cfg,env,agent): if done: break rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) print(f"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}") print('完成测试!') - return rewards,ma_rewards + return {"rewards":rewards} def env_agent_config(cfg,seed=1): '''创建环境和智能体 @@ -119,23 +101,27 @@ def env_agent_config(cfg,seed=1): env.seed(seed) # 设置随机种子 n_states = env.observation_space.n # 状态维度 n_actions = env.action_space.n # 动作维度 - agent = QLearning(n_states,n_actions,cfg) + print(f"状态数:{n_states},动作数:{n_actions}") + agent = QLearning(n_actions,cfg) return env,agent if __name__ == "__main__": - cfg = Config() + cfg = get_args() # 训练 - env, agent = env_agent_config(cfg, seed=1) - rewards, ma_rewards = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) # 创建保存结果和模型路径的文件夹 - agent.save(path=cfg.model_path) # 保存模型 - save_results(rewards, ma_rewards, tag='train', - path=cfg.result_path) # 保存结果 - plot_rewards(rewards, ma_rewards, cfg, tag="train") # 画出结果 + env, agent = env_agent_config(cfg) + res_dic = train(cfg, env, agent) + make_dir(cfg.result_path, cfg.model_path) + save_args(cfg) # save parameters + agent.save(path=cfg.model_path) # save model + save_results(res_dic, tag='train', + path=cfg.result_path) + plot_rewards(res_dic['rewards'], cfg, tag="train") # 测试 - env, agent = env_agent_config(cfg, seed=10) + env, agent = env_agent_config(cfg) agent.load(path=cfg.model_path) # 导入模型 - rewards, ma_rewards = test(cfg, env, agent) - save_results(rewards, ma_rewards, tag='test', path=cfg.result_path) # 保存结果 - plot_rewards(rewards, ma_rewards, cfg, tag="test") # 画出结果 + res_dic = test(cfg, env, agent) + save_results(res_dic, tag='test', + path=cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 + diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl deleted file mode 100644 index 71c5339..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/models/sarsa_model.pkl and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy deleted file mode 100644 index 980eabe..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy deleted file mode 100644 index 5c08614..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png deleted file mode 100644 index b53212b..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy deleted file mode 100644 index d12b47a..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy deleted file mode 100644 index 5da3ce1..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png deleted file mode 100644 index d18775f..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220429-202317/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl new file mode 100644 index 0000000..1c8f133 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json new file mode 100644 index 0000000..8492e8e --- /dev/null +++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json @@ -0,0 +1 @@ +{"algo_name": "Sarsa", "env_name": "CliffWalking-v0", "train_eps": 300, "test_eps": 20, "ep_max_steps": 200, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/", "save_fig": true} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy new file mode 100644 index 0000000..ef51f5e Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png new file mode 100644 index 0000000..5b97ea1 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy new file mode 100644 index 0000000..c7ad308 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png new file mode 100644 index 0000000..111f028 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl new file mode 100644 index 0000000..f1d9dcf Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json new file mode 100644 index 0000000..517bb98 --- /dev/null +++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json @@ -0,0 +1,15 @@ +{ + "algo_name": "Sarsa", + "env_name": "CliffWalking-v0", + "train_eps": 400, + "test_eps": 20, + "gamma": 0.9, + "epsilon_start": 0.95, + "epsilon_end": 0.01, + "epsilon_decay": 300, + "lr": 0.1, + "device": "cpu", + "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/results/", + "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/models/", + "save_fig": true +} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy similarity index 61% rename from projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy index db9c3fd..1b35004 100644 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220713-211653/results/test_steps.npy and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png new file mode 100644 index 0000000..d600435 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy new file mode 100644 index 0000000..f9979cc Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png new file mode 100644 index 0000000..9ffa9bf Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png differ diff --git a/projects/codes/Sarsa/sarsa.py b/projects/codes/Sarsa/sarsa.py index 477ab14..79b5997 100644 --- a/projects/codes/Sarsa/sarsa.py +++ b/projects/codes/Sarsa/sarsa.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:58:16 LastEditor: John -LastEditTime: 2022-04-29 20:12:57 +LastEditTime: 2022-08-04 22:22:16 Discription: Environment: ''' @@ -15,7 +15,7 @@ import torch import math class Sarsa(object): def __init__(self, - n_actions,cfg,): + n_actions,cfg): self.n_actions = n_actions self.lr = cfg.lr self.gamma = cfg.gamma @@ -24,7 +24,7 @@ class Sarsa(object): self.epsilon_end = cfg.epsilon_end self.epsilon_decay = cfg.epsilon_decay self.Q = defaultdict(lambda: np.zeros(n_actions)) # Q table - def choose_action(self, state): + def sample(self, state): self.sample_count += 1 self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.sample_count / self.epsilon_decay) # The probability to select a random action, is is log decayed @@ -33,14 +33,14 @@ class Sarsa(object): action_probs[best_action] += (1.0 - self.epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) return action - def predict_action(self,state): + def predict(self,state): return np.argmax(self.Q[state]) def update(self, state, action, reward, next_state, next_action,done): Q_predict = self.Q[state][action] if done: - Q_target = reward # terminal state + Q_target = reward # 终止状态 else: - Q_target = reward + self.gamma * self.Q[next_state][next_action] + Q_target = reward + self.gamma * self.Q[next_state][next_action] # 与Q learning不同,Sarsa是拿下一步动作对应的Q值去更新 self.Q[state][action] += self.lr * (Q_target - Q_predict) def save(self,path): '''把 Q表格 的数据保存到文件中 diff --git a/projects/codes/Sarsa/task0.py b/projects/codes/Sarsa/task0.py index d60969f..fb84222 100644 --- a/projects/codes/Sarsa/task0.py +++ b/projects/codes/Sarsa/task0.py @@ -5,115 +5,114 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-11 17:59:16 LastEditor: John -LastEditTime: 2022-04-29 20:18:13 +LastEditTime: 2022-08-04 22:28:51 Discription: Environment: ''' import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path of file -parent_path = os.path.dirname(curr_path) -sys.path.append(parent_path) # add current terminal path to sys.path +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 import datetime -import torch +import argparse from envs.racetrack_env import RacetrackEnv from Sarsa.sarsa import Sarsa -from common.utils import save_results,make_dir,plot_rewards +from common.utils import save_results,make_dir,plot_rewards,save_args -curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time +def get_args(): + """ 超参数 + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default=300,type=int,help="episodes of training") # 训练的回合数 + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") # 测试的回合数 + parser.add_argument('--ep_max_steps',default=200,type=int) # 每回合最大的部署 + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") # 折扣因子 + parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") # e-greedy策略中初始epsilon + parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") # e-greedy策略中的终止epsilon + parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") # e-greedy策略中epsilon的衰减率 + parser.add_argument('--lr',default=0.2,type=float,help="learning rate") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/results/' ) + parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/models/' ) # path to save models + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + return args -class Config: - ''' parameters for Sarsa - ''' - def __init__(self): - self.algo_name = 'Qlearning' - self.env_name = 'CliffWalking-v0' # 0 up, 1 right, 2 down, 3 left - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # check GPU - self.result_path = curr_path+"/outputs/" +self.env_name+'/'+curr_time+'/results/' # path to save results - self.model_path = curr_path+"/outputs/" +self.env_name+'/'+curr_time+'/models/' # path to save models - self.train_eps = 300 # training episodes - self.test_eps = 20 # testing episodes - self.n_steps = 200 # maximum steps per episode - self.epsilon_start = 0.90 # start value of epsilon - self.epsilon_end = 0.01 # end value of epsilon - self.epsilon_decay = 200 # decay rate of epsilon - self.gamma = 0.99 # gamma: Gamma discount factor. - self.lr = 0.2 # learning rate: step size parameter - self.save = True # if save figures def env_agent_config(cfg,seed=1): env = RacetrackEnv() - n_states = 9 # number of actions - agent = Sarsa(n_states,cfg) + n_actions = 9 # 动作数 + agent = Sarsa(n_actions,cfg) return env,agent def train(cfg,env,agent): - rewards = [] - ma_rewards = [] + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录奖励 for i_ep in range(cfg.train_eps): state = env.reset() - action = agent.choose_action(state) + action = agent.sample(state) ep_reward = 0 # while True: - for _ in range(cfg.n_steps): + for _ in range(cfg.ep_max_steps): next_state, reward, done = env.step(action) ep_reward+=reward - next_action = agent.choose_action(next_state) + next_action = agent.sample(next_state) agent.update(state, action, reward, next_state, next_action,done) state = next_state action = next_action if done: break - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) rewards.append(ep_reward) if (i_ep+1)%2==0: - print(f"Episode:{i_ep+1}, Reward:{ep_reward}, Epsilon:{agent.epsilon}") - return rewards,ma_rewards + print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}") + print('完成训练!') + return {"rewards":rewards} def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') rewards = [] - ma_rewards = [] for i_ep in range(cfg.test_eps): - # Print out which episode we're on, useful for debugging. - # Generate an episode. - # An episode is an array of (state, action, reward) tuples state = env.reset() ep_reward = 0 - while True: - # for _ in range(cfg.n_steps): - action = agent.predict_action(state) + # while True: + for _ in range(cfg.ep_max_steps): + action = agent.predict(state) next_state, reward, done = env.step(action) ep_reward+=reward state = next_state if done: break - if ma_rewards: - ma_rewards.append(ma_rewards[-1]*0.9+ep_reward*0.1) - else: - ma_rewards.append(ep_reward) rewards.append(ep_reward) - if (i_ep+1)%1==0: - print("Episode:{}/{}: Reward:{}".format(i_ep+1, cfg.test_eps,ep_reward)) - print('Complete testing!') - return rewards,ma_rewards + print(f"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}") + print('完成测试!') + return {"rewards":rewards} if __name__ == "__main__": - cfg = Config() - env,agent = env_agent_config(cfg,seed=1) - rewards,ma_rewards = train(cfg,env,agent) - make_dir(cfg.result_path,cfg.model_path) - agent.save(path=cfg.model_path) - save_results(rewards,ma_rewards,tag='train',path=cfg.result_path) - plot_rewards(rewards, ma_rewards, cfg, tag="train") - - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=cfg.model_path) - rewards,ma_rewards = test(cfg,env,agent) - save_results(rewards,ma_rewards,tag='test',path=cfg.result_path) - plot_rewards(rewards, ma_rewards, cfg, tag="test") + cfg = get_args() + # 训练 + env, agent = env_agent_config(cfg) + res_dic = train(cfg, env, agent) + make_dir(cfg.result_path, cfg.model_path) + save_args(cfg) # save parameters + agent.save(path=cfg.model_path) # save model + save_results(res_dic, tag='train', + path=cfg.result_path) + plot_rewards(res_dic['rewards'], cfg, tag="train") + # 测试 + env, agent = env_agent_config(cfg) + agent.load(path=cfg.model_path) # 导入模型 + res_dic = test(cfg, env, agent) + save_results(res_dic, tag='test', + path=cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 diff --git a/projects/codes/Sarsa/task1.py b/projects/codes/Sarsa/task1.py new file mode 100644 index 0000000..3fe8fb9 --- /dev/null +++ b/projects/codes/Sarsa/task1.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: John +Email: johnjim0816@gmail.com +Date: 2020-09-11 23:03:00 +LastEditor: John +LastEditTime: 2022-08-04 22:44:00 +Discription: +Environment: +''' +import sys +import os +curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 +parent_path = os.path.dirname(curr_path) # 父路径 +sys.path.append(parent_path) # 添加路径到系统路径 + +import gym +import torch +import datetime +import argparse +from envs.gridworld_env import CliffWalkingWapper +from Sarsa.sarsa import Sarsa +from common.utils import plot_rewards,save_args +from common.utils import save_results,make_dir + + +def get_args(): + """ + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default=400,type=int,help="episodes of training") # 训练的回合数 + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") # 测试的回合数 + parser.add_argument('--gamma',default=0.90,type=float,help="discounted factor") # 折扣因子 + parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") # e-greedy策略中初始epsilon + parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") # e-greedy策略中的终止epsilon + parser.add_argument('--epsilon_decay',default=300,type=int,help="decay rate of epsilon") # e-greedy策略中epsilon的衰减率 + parser.add_argument('--lr',default=0.1,type=float,help="learning rate") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/results/' ) + parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ + '/' + curr_time + '/models/' ) # path to save models + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args([]) + return args +curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 + +def train(cfg,env,agent): + print('开始训练!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录奖励 + for i_ep in range(cfg.train_eps): + ep_reward = 0 # 记录每个回合的奖励 + state = env.reset() # 重置环境,即开始新的回合 + action = agent.sample(state) + while True: + action = agent.sample(state) # 根据算法采样一个动作 + next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互 + next_action = agent.sample(next_state) + agent.update(state, action, reward, next_state, next_action,done) # 算法更新 + state = next_state # 更新状态 + action = next_action + ep_reward += reward + if done: + break + rewards.append(ep_reward) + print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}") + print('完成训练!') + return {"rewards":rewards} + +def test(cfg,env,agent): + print('开始测试!') + print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') + rewards = [] # 记录所有回合的奖励 + for i_ep in range(cfg.test_eps): + ep_reward = 0 # 记录每个episode的reward + state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合) + while True: + action = agent.predict(state) # 根据算法选择一个动作 + next_state, reward, done, _ = env.step(action) # 与环境进行一个交互 + state = next_state # 更新状态 + ep_reward += reward + if done: + break + rewards.append(ep_reward) + print(f"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}") + print('完成测试!') + return {"rewards":rewards} + +def env_agent_config(cfg,seed=1): + '''创建环境和智能体 + Args: + cfg ([type]): [description] + seed (int, optional): 随机种子. Defaults to 1. + Returns: + env [type]: 环境 + agent : 智能体 + ''' + env = gym.make(cfg.env_name) + env = CliffWalkingWapper(env) + env.seed(seed) # 设置随机种子 + n_states = env.observation_space.n # 状态维度 + n_actions = env.action_space.n # 动作维度 + print(f"状态数:{n_states},动作数:{n_actions}") + agent = Sarsa(n_actions,cfg) + return env,agent +if __name__ == "__main__": + cfg = get_args() + # 训练 + env, agent = env_agent_config(cfg) + res_dic = train(cfg, env, agent) + make_dir(cfg.result_path, cfg.model_path) + save_args(cfg) # save parameters + agent.save(path=cfg.model_path) # save model + save_results(res_dic, tag='train', + path=cfg.result_path) + plot_rewards(res_dic['rewards'], cfg, tag="train") + # 测试 + env, agent = env_agent_config(cfg) + agent.load(path=cfg.model_path) # 导入模型 + res_dic = test(cfg, env, agent) + save_results(res_dic, tag='test', + path=cfg.result_path) # 保存结果 + plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 + + + diff --git a/projects/codes/common/memory.py b/projects/codes/common/memories.py similarity index 100% rename from projects/codes/common/memory.py rename to projects/codes/common/memories.py diff --git a/projects/codes/common/model.py b/projects/codes/common/models.py similarity index 100% rename from projects/codes/common/model.py rename to projects/codes/common/models.py diff --git a/projects/codes/common/utils.py b/projects/codes/common/utils.py index 9cc625e..dd21163 100644 --- a/projects/codes/common/utils.py +++ b/projects/codes/common/utils.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:02:24 LastEditor: John -LastEditTime: 2022-07-31 23:18:04 +LastEditTime: 2022-08-15 18:11:27 Discription: Environment: ''' @@ -42,21 +42,36 @@ def plot_rewards_cn(rewards, ma_rewards, cfg, tag='train'): if cfg.save: plt.savefig(cfg.result_path+f"{tag}_rewards_curve_cn") # plt.show() +def smooth(data, weight=0.9): + '''用于平滑曲线,类似于Tensorboard中的smooth + Args: + data (List):输入数据 + weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9 -def plot_rewards(rewards, ma_rewards, cfg, tag='train'): + Returns: + smoothed (List): 平滑后的数据 + ''' + last = data[0] # First value in the plot (first timestep) + smoothed = list() + for point in data: + smoothed_val = last * weight + (1 - weight) * point # 计算平滑值 + smoothed.append(smoothed_val) + last = smoothed_val + return smoothed + +def plot_rewards(rewards,cfg,path=None,tag='train'): sns.set() plt.figure() # 创建一个图形实例,方便同时多画几个图 - plt.title("learning curve on {} of {} for {}".format( - cfg.device, cfg.algo_name, cfg.env_name)) + plt.title(f"{tag}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}") plt.xlabel('epsiodes') plt.plot(rewards, label='rewards') - plt.plot(ma_rewards, label='ma rewards') + plt.plot(smooth(rewards), label='smoothed') plt.legend() if cfg.save_fig: - plt.savefig(cfg.result_path+"{}_rewards_curve".format(tag)) - plt.show() - + plt.savefig(f"{path}/{tag}ing_curve.png") + if cfg.show_fig: + plt.show() def plot_losses(losses, algo="DQN", save=True, path='./'): sns.set() @@ -69,19 +84,13 @@ def plot_losses(losses, algo="DQN", save=True, path='./'): plt.savefig(path+"losses_curve") plt.show() -def save_results(dic, tag='train', path='./results'): +def save_results(dic, tag='train', path = None): ''' 保存奖励 ''' + Path(path).mkdir(parents=True, exist_ok=True) for key,value in dic.items(): np.save(path+'{}_{}.npy'.format(tag,key),value) print('Results saved!') - -# def save_results(rewards, ma_rewards, tag='train', path='./results'): -# ''' 保存奖励 -# ''' -# np.save(path+'{}_rewards.npy'.format(tag), rewards) -# np.save(path+'{}_ma_rewards.npy'.format(tag), ma_rewards) -# print('Result saved!') def make_dir(*paths): @@ -100,27 +109,10 @@ def del_empty_dir(*paths): if not os.listdir(os.path.join(path, dir)): os.removedirs(os.path.join(path, dir)) -def save_args(args): - # save parameters - args_dict = vars(args) - with open(args.result_path+'params.json', 'w') as fp: +def save_args(args,path=None): + # 保存参数 + args_dict = vars(args) + Path(path).mkdir(parents=True, exist_ok=True) + with open(f"{path}/params.json", 'w') as fp: json.dump(args_dict, fp) - print("Parameters saved!") -def smooth(data, weight=0.9): - '''_summary_ - - Args: - data (List):输入数据 - weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9 - - Returns: - smoothed (List): 平滑后的数据 - ''' - last = data[0] # First value in the plot (first timestep) - smoothed = list() - for point in data: - smoothed_val = last * weight + (1 - weight) * point # 计算平滑值 - smoothed.append(smoothed_val) - last = smoothed_val - - return smoothed \ No newline at end of file + print("参数已保存!") diff --git a/projects/codes/QLearning/env/gridworld_env.py b/projects/codes/envs/gridworld_env.py similarity index 90% rename from projects/codes/QLearning/env/gridworld_env.py rename to projects/codes/envs/gridworld_env.py index 31d968f..ae3f871 100644 --- a/projects/codes/QLearning/env/gridworld_env.py +++ b/projects/codes/envs/gridworld_env.py @@ -1,26 +1,9 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- - import gym import turtle import numpy as np # turtle tutorial : https://docs.python.org/3.3/library/turtle.html - def GridWorld(gridmap=None, is_slippery=False): if gridmap is None: gridmap = ['SFFF', 'FHFH', 'FFFH', 'HFFG'] diff --git a/projects/codes/envs/racetrack_env.py b/projects/codes/envs/racetrack_env.py index d6684f5..e3a7413 100644 --- a/projects/codes/envs/racetrack_env.py +++ b/projects/codes/envs/racetrack_env.py @@ -4,6 +4,7 @@ # This file contains code for the racetrack environment that you will be using # as part of the second part of the CM50270: Reinforcement Learning coursework. +import imp import time import random import numpy as np @@ -11,7 +12,7 @@ import os import matplotlib.pyplot as plt import matplotlib.patheffects as pe from IPython.display import clear_output - +from gym.spaces import Discrete from matplotlib import colors class RacetrackEnv(object) : @@ -61,7 +62,7 @@ class RacetrackEnv(object) : if (self.CELL_TYPES_DICT[self.track[y, x]] == "start") : self.initial_states.append((y, x)) - + self.action_space = Discrete(9) self.is_reset = False #print("Racetrack Environment File Loaded Successfully.") diff --git a/projects/codes/envs/snake/checkpoint.npy b/projects/codes/envs/snake/checkpoint.npy deleted file mode 100644 index 591d49e..0000000 Binary files a/projects/codes/envs/snake/checkpoint.npy and /dev/null differ diff --git a/projects/codes/envs/snake/checkpoint1.npy b/projects/codes/envs/snake/checkpoint1.npy deleted file mode 100644 index 84b54ca..0000000 Binary files a/projects/codes/envs/snake/checkpoint1.npy and /dev/null differ diff --git a/projects/codes/envs/snake/checkpoint2.npy b/projects/codes/envs/snake/checkpoint2.npy deleted file mode 100644 index 4614eb7..0000000 Binary files a/projects/codes/envs/snake/checkpoint2.npy and /dev/null differ diff --git a/projects/codes/envs/snake/checkpoint3.npy b/projects/codes/envs/snake/checkpoint3.npy deleted file mode 100644 index 8737b4c..0000000 Binary files a/projects/codes/envs/snake/checkpoint3.npy and /dev/null differ diff --git a/projects/codes/envs/snake/q_agent.npy b/projects/codes/envs/snake/q_agent.npy deleted file mode 100644 index 75ef415..0000000 Binary files a/projects/codes/envs/snake/q_agent.npy and /dev/null differ diff --git a/projects/environment.yaml b/projects/environment.yaml deleted file mode 100644 index 6915f5f..0000000 --- a/projects/environment.yaml +++ /dev/null @@ -1,124 +0,0 @@ -name: rl_tutorials -channels: - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2 - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge - - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ - - defaults -dependencies: - - backcall=0.2.0=pyh9f0ad1d_0 - - backports=1.0=py_2 - - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 - - blas=2.114=mkl - - blas-devel=3.9.0=14_win64_mkl - - brotlipy=0.7.0=py37hcc03f2d_1004 - - ca-certificates=2022.5.18.1=h5b45459_0 - - certifi=2022.5.18.1=py37h03978a9_0 - - cffi=1.15.0=py37hd8e9650_0 - - charset-normalizer=2.0.12=pyhd8ed1ab_0 - - colorama=0.4.4=pyh9f0ad1d_0 - - cryptography=37.0.1=py37h21b164f_0 - - cudatoolkit=11.3.1=h59b6b97_2 - - debugpy=1.6.0=py37hf2a7229_0 - - decorator=5.1.1=pyhd8ed1ab_0 - - entrypoints=0.4=pyhd8ed1ab_0 - - freetype=2.10.4=h546665d_1 - - idna=3.3=pyhd8ed1ab_0 - - intel-openmp=2022.1.0=h57928b3_3787 - - ipykernel=6.13.0=py37h90c5f73_0 - - jedi=0.18.1=py37h03978a9_1 - - jpeg=9e=h8ffe710_1 - - jupyter_client=7.3.1=pyhd8ed1ab_0 - - jupyter_core=4.10.0=py37h03978a9_0 - - lcms2=2.12=h2a16943_0 - - lerc=3.0=h0e60522_0 - - libblas=3.9.0=14_win64_mkl - - libcblas=3.9.0=14_win64_mkl - - libdeflate=1.10=h8ffe710_0 - - liblapack=3.9.0=14_win64_mkl - - liblapacke=3.9.0=14_win64_mkl - - libpng=1.6.37=h1d00b33_2 - - libsodium=1.0.18=h8d14728_1 - - libtiff=4.3.0=hc4061b1_4 - - libuv=1.43.0=h8ffe710_0 - - libwebp=1.2.2=h57928b3_0 - - libwebp-base=1.2.2=h8ffe710_1 - - libxcb=1.13=hcd874cb_1004 - - libzlib=1.2.11=h8ffe710_1014 - - lz4-c=1.9.3=h8ffe710_1 - - m2w64-gcc-libgfortran=5.3.0=6 - - m2w64-gcc-libs=5.3.0=7 - - m2w64-gcc-libs-core=5.3.0=7 - - m2w64-gmp=6.1.0=2 - - m2w64-libwinpthread-git=5.0.0.4634.697f757=2 - - matplotlib-inline=0.1.3=pyhd8ed1ab_0 - - mkl=2022.0.0=h0e2418a_796 - - mkl-devel=2022.0.0=h57928b3_797 - - mkl-include=2022.0.0=h0e2418a_796 - - msys2-conda-epoch=20160418=1 - - nest-asyncio=1.5.5=pyhd8ed1ab_0 - - numpy=1.21.6=py37h2830a78_0 - - openjpeg=2.4.0=hb211442_1 - - openssl=3.0.3=h8ffe710_0 - - packaging=21.3=pyhd8ed1ab_0 - - parso=0.8.3=pyhd8ed1ab_0 - - pickleshare=0.7.5=py_1003 - - pillow=9.1.1=py37h8675073_0 - - pip=22.1.1=pyhd8ed1ab_0 - - prompt-toolkit=3.0.29=pyha770c72_0 - - psutil=5.9.1=py37hcc03f2d_0 - - pthread-stubs=0.4=hcd874cb_1001 - - pycparser=2.21=pyhd8ed1ab_0 - - pyopenssl=22.0.0=pyhd8ed1ab_0 - - pysocks=1.7.1=py37h03978a9_5 - - python=3.7.12=h900ac77_100_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python_abi=3.7=2_cp37m - - pytorch=1.11.0=py3.7_cuda11.3_cudnn8_0 - - pytorch-mutex=1.0=cuda - - pywin32=303=py37hcc03f2d_0 - - pyzmq=23.0.0=py37hcce574b_0 - - requests=2.27.1=pyhd8ed1ab_0 - - setuptools=62.3.2=py37h03978a9_0 - - six=1.16.0=pyh6c4a22f_0 - - sqlite=3.38.5=h8ffe710_0 - - tbb=2021.5.0=h2d74725_1 - - tk=8.6.12=h8ffe710_0 - - torchaudio=0.11.0=py37_cu113 - - torchvision=0.12.0=py37_cu113 - - tornado=6.1=py37hcc03f2d_3 - - typing_extensions=4.2.0=pyha770c72_1 - - ucrt=10.0.20348.0=h57928b3_0 - - urllib3=1.26.9=pyhd8ed1ab_0 - - vc=14.2=hb210afc_6 - - vs2015_runtime=14.29.30037=h902a5da_6 - - wcwidth=0.2.5=pyh9f0ad1d_2 - - wheel=0.37.1=pyhd8ed1ab_0 - - win_inet_pton=1.1.0=py37h03978a9_4 - - xorg-libxau=1.0.9=hcd874cb_0 - - xorg-libxdmcp=1.1.3=hcd874cb_0 - - xz=5.2.5=h62dcd97_1 - - zeromq=4.3.4=h0e60522_1 - - zlib=1.2.11=h8ffe710_1014 - - zstd=1.5.2=h6255e5f_1 - - pip: - - cloudpickle==2.1.0 - - cycler==0.11.0 - - dill==0.3.4 - - easydict==1.9 - - fonttools==4.33.2 - - gym==0.21.0 - - importlib-metadata==4.12.0 - - ipython==7.32.0 - - kiwisolver==1.4.2 - - matplotlib==3.5.1 - - pandas==1.3.5 - - pygments==2.11.2 - - pyparsing==3.0.8 - - pytz==2022.1 - - scipy==1.7.3 - - seaborn==0.11.2 - - traitlets==5.1.1 - - zipp==3.8.1 -prefix: C:\Users\24438\anaconda3\envs\rl_tutorials diff --git a/projects/notebooks/1.QLearning.ipynb b/projects/notebooks/1.QLearning.ipynb new file mode 100644 index 0000000..c5cc3f7 --- /dev/null +++ b/projects/notebooks/1.QLearning.ipynb @@ -0,0 +1,922 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法\n", + "强化学习算法的模式都比较固定,一般包括sample(即训练时采样动作),predict(测试时预测动作),update(算法更新)以及保存模型和加载模型等几个方法,其中对于每种算法samle和update的方式是不相同,而其他方法就大同小异。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import math\n", + "import torch\n", + "from collections import defaultdict\n", + "\n", + "class QLearning(object):\n", + " def __init__(self,n_states,\n", + " n_actions,cfg):\n", + " self.n_actions = n_actions \n", + " self.lr = cfg.lr # 学习率\n", + " self.gamma = cfg.gamma \n", + " self.epsilon = cfg.epsilon_start\n", + " self.sample_count = 0 \n", + " self.epsilon_start = cfg.epsilon_start\n", + " self.epsilon_end = cfg.epsilon_end\n", + " self.epsilon_decay = cfg.epsilon_decay\n", + " self.Q_table = defaultdict(lambda: np.zeros(n_actions)) # 用嵌套字典存放状态->动作->状态-动作值(Q值)的映射,即Q表\n", + " def sample(self, state):\n", + " ''' 采样动作,训练时用\n", + " '''\n", + " self.sample_count += 1\n", + " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", + " math.exp(-1. * self.sample_count / self.epsilon_decay) # epsilon是会递减的,这里选择指数递减\n", + " # e-greedy 策略\n", + " if np.random.uniform(0, 1) > self.epsilon:\n", + " action = np.argmax(self.Q_table[str(state)]) # 选择Q(s,a)最大对应的动作\n", + " else:\n", + " action = np.random.choice(self.n_actions) # 随机选择动作\n", + " return action\n", + " def predict(self,state):\n", + " ''' 预测或选择动作,测试时用\n", + " '''\n", + " action = np.argmax(self.Q_table[str(state)])\n", + " return action\n", + " def update(self, state, action, reward, next_state, done):\n", + " Q_predict = self.Q_table[str(state)][action] \n", + " if done: # 终止状态\n", + " Q_target = reward \n", + " else:\n", + " Q_target = reward + self.gamma * np.max(self.Q_table[str(next_state)]) \n", + " self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict)\n", + " def save(self,path):\n", + " import dill\n", + " torch.save(\n", + " obj=self.Q_table,\n", + " f=path+\"Qleaning_model.pkl\",\n", + " pickle_module=dill\n", + " )\n", + " print(\"保存模型成功!\")\n", + " def load(self, path):\n", + " import dill\n", + " self.Q_table =torch.load(f=path+'Qleaning_model.pkl',pickle_module=dill)\n", + " print(\"加载模型成功!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2、定义训练\n", + "强化学习算法的训练方式也比较固定,如下:\n", + "```python\n", + "for i_ep in range(train_eps): # 遍历每个回合\n", + " state = env.reset() # 重置环境,即开始新的回合\n", + " while True: # 对于一些比较复杂的游戏可以设置每回合最大的步长,例如while ep_step<100,就是每回合最大步长为100。\n", + " action = agent.sample(state) # 根据算法采样一个动作\n", + " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", + " agent.memory.push(state, action, reward, next_state, done) # 记录memory\n", + " agent.update(state, action, reward, next_state, done) # 算法更新\n", + " state = next_state # 更新状态\n", + " if done:\n", + " break\n", + "```\n", + "首先对于每个回合,回合开始时环境需要重置,好比我们每次开一把游戏需要从头再来一样。我们可以设置智能体在每回合数的最大步长,尤其是对于比较复杂的游戏,这样做的好处之一就是帮助智能体在训练中快速收敛,比如我们先验地知道最优解的大概步数,那么理论上智能体收敛时也应该是这个步数附近,设置最大步数可以方便智能体接近这个最优解。在每个回合中,智能体首先需要采样(sample),或者说采用探索策略例如常见的$\\varepsilon$-greedy策略或者UCB探索策略等等。采样的过程是将当前的状态state作为输入,智能体采样输出动作action。然后环境根据采样出来的动作反馈出下一个状态以及相应的reward等信息。接下来对于具有memory的智能体例如包含replay memory的DQN来说,需要将相应的transition(记住这个词,中文不好翻译,通常是状态、动作、奖励等信息)。紧接着就是智能体更新,对于深度强化学习此时一般从memory中随机采样一些transition进行更新,对于Q learning一般是采样上一次的transition。更新公式是比较关键的部分,但是也很通用,一般基于值的算法更新公式都是一个套路如下:\n", + "$$\n", + "y_{j}= \\begin{cases}r_{j} & \\text { for terminal } s_{t+1} \\\\ r_{j}+\\gamma \\max _{a^{\\prime}} Q\\left(s_{t+1}, a^{\\prime} ; \\theta\\right) & \\text { for non-terminal } s_{t+1}\\end{cases}\n", + "$$\n", + "智能体更新完之后,通常需要更新状态,即```state = next_state```,然后会检查是否完成了这一回合的游戏,即```done==True```,注意完成并不代表这回合成功,也有可能是失败的太离谱,等同学们有了自定义强化学习环境的经验就知道了(等你长大就知道了XD)。\n", + "如果需要记录奖励、损失等等的话可以再加上,如下方代码,实际项目中更多地使用tensorboard来记录相应的数据,甚至于笔者就在这些教学代码中使用过,但是看起来有些繁琐,容易给大家增加不必要的学习难度,因此学有余力以及需要在项目研究中做强化学习的可以去看看,也很简单。\n", + "此外稍微复杂一些的强化学习不是一次性写完代码就能收敛的,这时需要我们做一个调参侠。为了检查我们参数调得好不好,可以在终端print出奖励、损失以及epsilon等随着回合数的变化,这点说明一下强化学习的训练过程一般都是先探索然后收敛的,官方的话就是权衡exploration and exploitation。e-greedy策略的做法就是前期探索,然后逐渐减小探索率至慢慢收敛,也就是这个epsilon。这个值越大比如0.9就说明智能体90%的概率在随机探索,通常情况下会设置三个值,epsilon_start、epsilon_end以及epsilon_decay,即初始值、终止值和衰减率,其中初始值一般是0.95不变,终止值是0.01,也就是说即使在收敛阶段也让智能体保持很小概率的探索,这样做的原因就是智能体已经学出了一个不错的策略,但是保不齐还有更好的策略,好比我们知道要出人头地学历高比较重要,但是“人还是要有梦想的,万一实现了呢”,总是存在意外的可能,对吧。回归正题,比较关键的是epsilon_decay这个衰减率,这个epsilon衰减太快了学来的策略往往过拟合,好比一条只能选择一朵花的花道上,你早早选择了一朵看起来还可以的花,却错过了后面更多的好花。但是衰减的太慢会影响收敛的速度,好比你走过了花道的尽头也还没选出一朵花来,相比前者不如更甚。当然强化学习的调参相比于深度学习只能说是有过之无不及,比较复杂,不止epsilon这一个,这就需要同学们的耐心学习了。\n", + "强化学习测试的代码跟训练基本上是一样的,因此我放到同一个代码段里。相比于训练代码,测试代码主要有以下几点不同:1、测试模型的过程是不需要更新的,这个是不言而喻的;2、测试代码不需要采样(sample)动作,相比之代替的是预测(sample)动作,其区别就是采样动作时可能会使用各种策略例如$\\varepsilon$-greedy策略,而预测动作不需要,只需要根据训练时学习好的Q表或者网络模型代入状态得到动作即可;3、测试过程终端一般只需要看奖励,不需要看epislon等,反正它在测试中也是无意义的。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg,env,agent):\n", + " print('开始训练!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录奖励\n", + " for i_ep in range(cfg.train_eps):\n", + " ep_reward = 0 # 记录每个回合的奖励\n", + " state = env.reset() # 重置环境,即开始新的回合\n", + " while True:\n", + " action = agent.sample(state) # 根据算法采样一个动作\n", + " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", + " agent.update(state, action, reward, next_state, done) # Q学习算法更新\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}\")\n", + " print('完成训练!')\n", + " return {\"rewards\":rewards}\n", + "def test(cfg,env,agent):\n", + " print('开始测试!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg.test_eps):\n", + " ep_reward = 0 # 记录每个episode的reward\n", + " state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合)\n", + " while True:\n", + " action = agent.predict(state) # 根据算法选择一个动作\n", + " next_state, reward, done, _ = env.step(action) # 与环境进行一个交互\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", + " print('完成测试!')\n", + " return {\"rewards\":rewards}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3、定义环境\n", + "\n", + "OpenAI Gym中其实集成了很多强化学习环境,足够大家学习了,但是在做强化学习的应用中免不了要自己创建环境,比如在本项目中其实不太好找到Qlearning能学出来的环境,Qlearning实在是太弱了,需要足够简单的环境才行,因此本项目写了一个环境,大家感兴趣的话可以看一下,一般环境接口最关键的部分即使reset和step。" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import turtle\n", + "import numpy as np\n", + "\n", + "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", + "\n", + "def GridWorld(gridmap=None, is_slippery=False):\n", + " if gridmap is None:\n", + " gridmap = ['SFFF', 'FHFH', 'FFFH', 'HFFG']\n", + " env = gym.make(\"FrozenLake-v0\", desc=gridmap, is_slippery=False)\n", + " env = FrozenLakeWapper(env)\n", + " return env\n", + "\n", + "\n", + "class FrozenLakeWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.max_y = env.desc.shape[0]\n", + " self.max_x = env.desc.shape[1]\n", + " self.t = None\n", + " self.unit = 50\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for _ in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for i in range(self.desc.shape[0]):\n", + " for j in range(self.desc.shape[1]):\n", + " x = j\n", + " y = self.max_y - 1 - i\n", + " if self.desc[i][j] == b'S': # Start\n", + " self.draw_box(x, y, 'white')\n", + " elif self.desc[i][j] == b'F': # Frozen ice\n", + " self.draw_box(x, y, 'white')\n", + " elif self.desc[i][j] == b'G': # Goal\n", + " self.draw_box(x, y, 'yellow')\n", + " elif self.desc[i][j] == b'H': # Hole\n", + " self.draw_box(x, y, 'black')\n", + " else:\n", + " self.draw_box(x, y, 'white')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)\n", + "\n", + "\n", + "class CliffWalkingWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.t = None\n", + " self.unit = 50\n", + " self.max_x = 12\n", + " self.max_y = 4\n", + "\n", + " def draw_x_line(self, y, x0, x1, color='gray'):\n", + " assert x1 > x0\n", + " self.t.color(color)\n", + " self.t.setheading(0)\n", + " self.t.up()\n", + " self.t.goto(x0, y)\n", + " self.t.down()\n", + " self.t.forward(x1 - x0)\n", + "\n", + " def draw_y_line(self, x, y0, y1, color='gray'):\n", + " assert y1 > y0\n", + " self.t.color(color)\n", + " self.t.setheading(90)\n", + " self.t.up()\n", + " self.t.goto(x, y0)\n", + " self.t.down()\n", + " self.t.forward(y1 - y0)\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for i in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for _ in range(2):\n", + " self.t.forward(self.max_x * self.unit)\n", + " self.t.left(90)\n", + " self.t.forward(self.max_y * self.unit)\n", + " self.t.left(90)\n", + " for i in range(1, self.max_y):\n", + " self.draw_x_line(\n", + " y=i * self.unit, x0=0, x1=self.max_x * self.unit)\n", + " for i in range(1, self.max_x):\n", + " self.draw_y_line(\n", + " x=i * self.unit, y0=0, y1=self.max_y * self.unit)\n", + "\n", + " for i in range(1, self.max_x - 1):\n", + " self.draw_box(i, 0, 'black')\n", + " self.draw_box(self.max_x - 1, 0, 'yellow')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "def env_agent_config(cfg,seed=1):\n", + " '''创建环境和智能体\n", + " Args:\n", + " cfg ([type]): [description]\n", + " seed (int, optional): 随机种子. Defaults to 1.\n", + " Returns:\n", + " env [type]: 环境\n", + " agent : 智能体\n", + " ''' \n", + " env = gym.make(cfg.env_name) \n", + " env = CliffWalkingWapper(env)\n", + " env.seed(seed) # 设置随机种子\n", + " n_states = env.observation_space.n # 状态维度\n", + " n_actions = env.action_space.n # 动作维度\n", + " agent = QLearning(n_states,n_actions,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4、设置参数\n", + "\n", + "到这里所有qlearning模块就算完成了,下面需要设置一些参数,方便大家“炼丹”,其中默认的是笔者已经调好的~。另外为了定义了一个画图函数,用来描述奖励的变化。" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "def get_args():\n", + " \"\"\" \n", + " \"\"\"\n", + " curr_time = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='Q-learning',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=400,type=int,help=\"episodes of training\") # 训练的回合数\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\") # 测试的回合数\n", + " parser.add_argument('--gamma',default=0.90,type=float,help=\"discounted factor\") # 折扣因子\n", + " parser.add_argument('--epsilon_start',default=0.95,type=float,help=\"initial value of epsilon\") # e-greedy策略中初始epsilon\n", + " parser.add_argument('--epsilon_end',default=0.01,type=float,help=\"final value of epsilon\") # e-greedy策略中的终止epsilon\n", + " parser.add_argument('--epsilon_decay',default=300,type=int,help=\"decay rate of epsilon\") # e-greedy策略中epsilon的衰减率\n", + " parser.add_argument('--lr',default=0.1,type=float,help=\"learning rate\")\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", + " args = parser.parse_args([]) \n", + " return args\n", + "curr_time = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", + "\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth\n", + "\n", + " Args:\n", + " data (List):输入数据\n", + " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", + "\n", + " Returns:\n", + " smoothed (List): 平滑后的数据\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg, tag='train'):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(\"learning curve on {} of {} for {}\".format(\n", + " cfg.device, cfg.algo_name, cfg.env_name))\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5、我准备好了!\n", + "\n", + "到现在我们真的可以像海绵宝宝那样大声说出来“我准备好了!“,跟着注释来看下效果吧~。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始训练!\n", + "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", + "回合:1/400,奖励:-1668.0,Epsilon:0.3771901652370099\n", + "回合:2/400,奖励:-2328.0,Epsilon:0.03210668110464856\n", + "回合:3/400,奖励:-152.0,Epsilon:0.02331928797825333\n", + "回合:4/400,奖励:-296.0,Epsilon:0.014965661602689185\n", + "回合:5/400,奖励:-168.0,Epsilon:0.012836430915462094\n", + "回合:6/400,奖励:-149.0,Epsilon:0.011726126490407173\n", + "回合:7/400,奖励:-274.0,Epsilon:0.010963239247691907\n", + "回合:8/400,奖励:-127.0,Epsilon:0.010630787152305933\n", + "回合:9/400,奖励:-356.0,Epsilon:0.010267816440118822\n", + "回合:10/400,奖励:-105.0,Epsilon:0.0101887270555826\n", + "回合:11/400,奖励:-162.0,Epsilon:0.01010998036181645\n", + "回合:12/400,奖励:-124.0,Epsilon:0.010072745604688937\n", + "回合:13/400,奖励:-125.0,Epsilon:0.010047956858279448\n", + "回合:14/400,奖励:-69.0,Epsilon:0.010038103335373512\n", + "回合:15/400,奖励:-146.0,Epsilon:0.010023421049147612\n", + "回合:16/400,奖励:-99.0,Epsilon:0.010016837948094095\n", + "回合:17/400,奖励:-102.0,Epsilon:0.010011984751749595\n", + "回合:18/400,奖励:-114.0,Epsilon:0.010008195909220538\n", + "回合:19/400,奖励:-95.0,Epsilon:0.010005971322860786\n", + "回合:20/400,奖励:-50.0,Epsilon:0.010005054615675078\n", + "回合:21/400,奖励:-179.0,Epsilon:0.010002783294099886\n", + "回合:22/400,奖励:-51.0,Epsilon:0.010002348167306314\n", + "回合:23/400,奖励:-53.0,Epsilon:0.010001967902958245\n", + "回合:24/400,奖励:-126.0,Epsilon:0.01000129300438042\n", + "回合:25/400,奖励:-105.0,Epsilon:0.010000911164786836\n", + "回合:26/400,奖励:-55.0,Epsilon:0.010000758536131584\n", + "回合:27/400,奖励:-112.0,Epsilon:0.010000522203364875\n", + "回合:28/400,奖励:-81.0,Epsilon:0.01000039863934062\n", + "回合:29/400,奖励:-187.0,Epsilon:0.010000297294659517\n", + "回合:30/400,奖励:-176.0,Epsilon:0.01000022999489198\n", + "回合:31/400,奖励:-71.0,Epsilon:0.010000181524464132\n", + "回合:32/400,奖励:-77.0,Epsilon:0.010000140432053464\n", + "回合:33/400,奖励:-82.0,Epsilon:0.010000106846201706\n", + "回合:34/400,奖励:-95.0,Epsilon:0.010000077845318887\n", + "回合:35/400,奖励:-53.0,Epsilon:0.010000065238977184\n", + "回合:36/400,奖励:-30.0,Epsilon:0.010000059030667672\n", + "回合:37/400,奖励:-122.0,Epsilon:0.010000039306520976\n", + "回合:38/400,奖励:-37.0,Epsilon:0.010000034745744355\n", + "回合:39/400,奖励:-100.0,Epsilon:0.01000002489641374\n", + "回合:40/400,奖励:-201.0,Epsilon:0.010000017720528442\n", + "回合:41/400,奖励:-62.0,Epsilon:0.010000014411941012\n", + "回合:42/400,奖励:-61.0,Epsilon:0.010000011760233133\n", + "回合:43/400,奖励:-57.0,Epsilon:0.010000009725232207\n", + "回合:44/400,奖励:-73.0,Epsilon:0.0100000076246806\n", + "回合:45/400,奖励:-39.0,Epsilon:0.010000006695197199\n", + "回合:46/400,奖励:-71.0,Epsilon:0.010000005284213373\n", + "回合:47/400,奖励:-77.0,Epsilon:0.010000004088005098\n", + "回合:48/400,奖励:-53.0,Epsilon:0.010000003425989836\n", + "回合:49/400,奖励:-88.0,Epsilon:0.010000002555012459\n", + "回合:50/400,奖励:-65.0,Epsilon:0.01000000205729175\n", + "回合:51/400,奖励:-41.0,Epsilon:0.010000001794495218\n", + "回合:52/400,奖励:-67.0,Epsilon:0.010000001435323749\n", + "回合:53/400,奖励:-38.0,Epsilon:0.010000001264559407\n", + "回合:54/400,奖励:-50.0,Epsilon:0.010000001070426428\n", + "回合:55/400,奖励:-35.0,Epsilon:0.010000000952552966\n", + "回合:56/400,奖励:-74.0,Epsilon:0.010000000744325952\n", + "回合:57/400,奖励:-75.0,Epsilon:0.010000000579681634\n", + "回合:58/400,奖励:-31.0,Epsilon:0.010000000522772152\n", + "回合:59/400,奖励:-38.0,Epsilon:0.010000000460576537\n", + "回合:60/400,奖励:-51.0,Epsilon:0.01000000038857222\n", + "回合:61/400,奖励:-64.0,Epsilon:0.010000000313922366\n", + "回合:62/400,奖励:-78.0,Epsilon:0.010000000242050338\n", + "回合:63/400,奖励:-41.0,Epsilon:0.010000000211131054\n", + "回合:64/400,奖励:-62.0,Epsilon:0.010000000171710922\n", + "回合:65/400,奖励:-58.0,Epsilon:0.010000000141525377\n", + "回合:66/400,奖励:-34.0,Epsilon:0.010000000126361357\n", + "回合:67/400,奖励:-52.0,Epsilon:0.010000000106251867\n", + "回合:68/400,奖励:-28.0,Epsilon:0.010000000096783744\n", + "回合:69/400,奖励:-57.0,Epsilon:0.010000000080036202\n", + "回合:70/400,奖励:-39.0,Epsilon:0.010000000070279423\n", + "回合:71/400,奖励:-55.0,Epsilon:0.01000000005850696\n", + "回合:72/400,奖励:-33.0,Epsilon:0.010000000052412531\n", + "回合:73/400,奖励:-62.0,Epsilon:0.010000000042626625\n", + "回合:74/400,奖励:-56.0,Epsilon:0.010000000035368174\n", + "回合:75/400,奖励:-34.0,Epsilon:0.01000000003157858\n", + "回合:76/400,奖励:-37.0,Epsilon:0.010000000027914485\n", + "回合:77/400,奖励:-149.0,Epsilon:0.0100000000236291\n", + "回合:78/400,奖励:-46.0,Epsilon:0.010000000020270076\n", + "回合:79/400,奖励:-28.0,Epsilon:0.010000000018463805\n", + "回合:80/400,奖励:-37.0,Epsilon:0.010000000016321432\n", + "回合:81/400,奖励:-64.0,Epsilon:0.01000000001318587\n", + "回合:82/400,奖励:-52.0,Epsilon:0.010000000011087433\n", + "回合:83/400,奖励:-22.0,Epsilon:0.010000000010303453\n", + "回合:84/400,奖励:-32.0,Epsilon:0.010000000009261004\n", + "回合:85/400,奖励:-74.0,Epsilon:0.010000000007236559\n", + "回合:86/400,奖励:-33.0,Epsilon:0.010000000006482756\n", + "回合:87/400,奖励:-39.0,Epsilon:0.010000000005692478\n", + "回合:88/400,奖励:-40.0,Epsilon:0.010000000004981906\n", + "回合:89/400,奖励:-33.0,Epsilon:0.010000000004462961\n", + "回合:90/400,奖励:-47.0,Epsilon:0.010000000003815783\n", + "回合:91/400,奖励:-45.0,Epsilon:0.010000000003284274\n", + "回合:92/400,奖励:-28.0,Epsilon:0.010000000002991612\n", + "回合:93/400,奖励:-45.0,Epsilon:0.010000000002574904\n", + "回合:94/400,奖励:-56.0,Epsilon:0.010000000002136451\n", + "回合:95/400,奖励:-31.0,Epsilon:0.010000000001926707\n", + "回合:96/400,奖励:-38.0,Epsilon:0.010000000001697481\n", + "回合:97/400,奖励:-50.0,Epsilon:0.010000000001436887\n", + "回合:98/400,奖励:-41.0,Epsilon:0.010000000001253341\n", + "回合:99/400,奖励:-41.0,Epsilon:0.01000000000109324\n", + "回合:100/400,奖励:-13.0,Epsilon:0.010000000001046878\n", + "回合:101/400,奖励:-45.0,Epsilon:0.010000000000901057\n", + "回合:102/400,奖励:-19.0,Epsilon:0.01000000000084576\n", + "回合:103/400,奖励:-44.0,Epsilon:0.010000000000730383\n", + "回合:104/400,奖励:-23.0,Epsilon:0.010000000000676478\n", + "回合:105/400,奖励:-40.0,Epsilon:0.010000000000592037\n", + "回合:106/400,奖励:-52.0,Epsilon:0.010000000000497817\n", + "回合:107/400,奖励:-38.0,Epsilon:0.010000000000438592\n", + "回合:108/400,奖励:-24.0,Epsilon:0.01000000000040487\n", + "回合:109/400,奖励:-32.0,Epsilon:0.010000000000363909\n", + "回合:110/400,奖励:-38.0,Epsilon:0.010000000000320614\n", + "回合:111/400,奖励:-52.0,Epsilon:0.01000000000026959\n", + "回合:112/400,奖励:-22.0,Epsilon:0.010000000000250527\n", + "回合:113/400,奖励:-38.0,Epsilon:0.010000000000220721\n", + "回合:114/400,奖励:-33.0,Epsilon:0.01000000000019773\n", + "回合:115/400,奖励:-29.0,Epsilon:0.010000000000179511\n", + "回合:116/400,奖励:-56.0,Epsilon:0.010000000000148944\n", + "回合:117/400,奖励:-20.0,Epsilon:0.010000000000139338\n", + "回合:118/400,奖励:-31.0,Epsilon:0.010000000000125658\n", + "回合:119/400,奖励:-33.0,Epsilon:0.01000000000011257\n", + "回合:120/400,奖励:-39.0,Epsilon:0.010000000000098846\n", + "回合:121/400,奖励:-26.0,Epsilon:0.010000000000090641\n", + "回合:122/400,奖励:-31.0,Epsilon:0.010000000000081742\n", + "回合:123/400,奖励:-40.0,Epsilon:0.010000000000071538\n", + "回合:124/400,奖励:-33.0,Epsilon:0.010000000000064086\n", + "回合:125/400,奖励:-46.0,Epsilon:0.010000000000054977\n", + "回合:126/400,奖励:-28.0,Epsilon:0.010000000000050078\n", + "回合:127/400,奖励:-23.0,Epsilon:0.010000000000046382\n", + "回合:128/400,奖励:-30.0,Epsilon:0.010000000000041968\n", + "回合:129/400,奖励:-24.0,Epsilon:0.010000000000038742\n", + "回合:130/400,奖励:-36.0,Epsilon:0.01000000000003436\n", + "回合:131/400,奖励:-28.0,Epsilon:0.010000000000031298\n", + "回合:132/400,奖励:-28.0,Epsilon:0.01000000000002851\n", + "回合:133/400,奖励:-35.0,Epsilon:0.01000000000002537\n", + "回合:134/400,奖励:-27.0,Epsilon:0.010000000000023187\n", + "回合:135/400,奖励:-30.0,Epsilon:0.01000000000002098\n", + "回合:136/400,奖励:-35.0,Epsilon:0.01000000000001867\n", + "回合:137/400,奖励:-31.0,Epsilon:0.010000000000016837\n", + "回合:138/400,奖励:-27.0,Epsilon:0.010000000000015387\n", + "回合:139/400,奖励:-48.0,Epsilon:0.010000000000013113\n", + "回合:140/400,奖励:-23.0,Epsilon:0.010000000000012145\n", + "回合:141/400,奖励:-29.0,Epsilon:0.010000000000011026\n", + "回合:142/400,奖励:-21.0,Epsilon:0.01000000000001028\n", + "回合:143/400,奖励:-22.0,Epsilon:0.010000000000009553\n", + "回合:144/400,奖励:-42.0,Epsilon:0.010000000000008306\n", + "回合:145/400,奖励:-21.0,Epsilon:0.010000000000007744\n", + "回合:146/400,奖励:-141.0,Epsilon:0.010000000000006733\n", + "回合:147/400,奖励:-43.0,Epsilon:0.010000000000005834\n", + "回合:148/400,奖励:-44.0,Epsilon:0.010000000000005038\n", + "回合:149/400,奖励:-18.0,Epsilon:0.010000000000004745\n", + "回合:150/400,奖励:-23.0,Epsilon:0.010000000000004394\n", + "回合:151/400,奖励:-24.0,Epsilon:0.010000000000004056\n", + "回合:152/400,奖励:-30.0,Epsilon:0.010000000000003671\n", + "回合:153/400,奖励:-27.0,Epsilon:0.010000000000003355\n", + "回合:154/400,奖励:-15.0,Epsilon:0.01000000000000319\n", + "回合:155/400,奖励:-19.0,Epsilon:0.010000000000002994\n", + "回合:156/400,奖励:-50.0,Epsilon:0.010000000000002535\n", + "回合:157/400,奖励:-22.0,Epsilon:0.010000000000002356\n", + "回合:158/400,奖励:-28.0,Epsilon:0.010000000000002146\n", + "回合:159/400,奖励:-27.0,Epsilon:0.010000000000001962\n", + "回合:160/400,奖励:-13.0,Epsilon:0.010000000000001879\n", + "回合:161/400,奖励:-33.0,Epsilon:0.010000000000001683\n", + "回合:162/400,奖励:-24.0,Epsilon:0.010000000000001553\n", + "回合:163/400,奖励:-30.0,Epsilon:0.010000000000001405\n", + "回合:164/400,奖励:-19.0,Epsilon:0.010000000000001319\n", + "回合:165/400,奖励:-22.0,Epsilon:0.010000000000001227\n", + "回合:166/400,奖励:-32.0,Epsilon:0.010000000000001102\n", + "回合:167/400,奖励:-35.0,Epsilon:0.01000000000000098\n", + "回合:168/400,奖励:-32.0,Epsilon:0.010000000000000881\n", + "回合:169/400,奖励:-21.0,Epsilon:0.010000000000000822\n", + "回合:170/400,奖励:-27.0,Epsilon:0.010000000000000751\n", + "回合:171/400,奖励:-22.0,Epsilon:0.010000000000000698\n", + "回合:172/400,奖励:-22.0,Epsilon:0.010000000000000649\n", + "回合:173/400,奖励:-34.0,Epsilon:0.01000000000000058\n", + "回合:174/400,奖励:-22.0,Epsilon:0.010000000000000538\n", + "回合:175/400,奖励:-27.0,Epsilon:0.010000000000000491\n", + "回合:176/400,奖励:-13.0,Epsilon:0.01000000000000047\n", + "回合:177/400,奖励:-29.0,Epsilon:0.010000000000000427\n", + "回合:178/400,奖励:-20.0,Epsilon:0.010000000000000401\n", + "回合:179/400,奖励:-22.0,Epsilon:0.010000000000000371\n", + "回合:180/400,奖励:-33.0,Epsilon:0.010000000000000333\n", + "回合:181/400,奖励:-20.0,Epsilon:0.010000000000000312\n", + "回合:182/400,奖励:-26.0,Epsilon:0.010000000000000286\n", + "回合:183/400,奖励:-22.0,Epsilon:0.010000000000000266\n", + "回合:184/400,奖励:-29.0,Epsilon:0.010000000000000241\n", + "回合:185/400,奖励:-25.0,Epsilon:0.010000000000000222\n", + "回合:186/400,奖励:-16.0,Epsilon:0.01000000000000021\n", + "回合:187/400,奖励:-28.0,Epsilon:0.010000000000000191\n", + "回合:188/400,奖励:-23.0,Epsilon:0.010000000000000177\n", + "回合:189/400,奖励:-31.0,Epsilon:0.01000000000000016\n", + "回合:190/400,奖励:-17.0,Epsilon:0.010000000000000151\n", + "回合:191/400,奖励:-22.0,Epsilon:0.01000000000000014\n", + "回合:192/400,奖励:-18.0,Epsilon:0.010000000000000132\n", + "回合:193/400,奖励:-34.0,Epsilon:0.010000000000000118\n", + "回合:194/400,奖励:-32.0,Epsilon:0.010000000000000106\n", + "回合:195/400,奖励:-14.0,Epsilon:0.0100000000000001\n", + "回合:196/400,奖励:-23.0,Epsilon:0.010000000000000094\n", + "回合:197/400,奖励:-23.0,Epsilon:0.010000000000000087\n", + "回合:198/400,奖励:-28.0,Epsilon:0.01000000000000008\n", + "回合:199/400,奖励:-24.0,Epsilon:0.010000000000000073\n", + "回合:200/400,奖励:-21.0,Epsilon:0.010000000000000068\n", + "回合:201/400,奖励:-15.0,Epsilon:0.010000000000000064\n", + "回合:202/400,奖励:-16.0,Epsilon:0.010000000000000061\n", + "回合:203/400,奖励:-22.0,Epsilon:0.010000000000000057\n", + "回合:204/400,奖励:-28.0,Epsilon:0.010000000000000052\n", + "回合:205/400,奖励:-25.0,Epsilon:0.010000000000000049\n", + "回合:206/400,奖励:-16.0,Epsilon:0.010000000000000045\n", + "回合:207/400,奖励:-13.0,Epsilon:0.010000000000000044\n", + "回合:208/400,奖励:-31.0,Epsilon:0.01000000000000004\n", + "回合:209/400,奖励:-25.0,Epsilon:0.010000000000000037\n", + "回合:210/400,奖励:-21.0,Epsilon:0.010000000000000033\n", + "回合:211/400,奖励:-26.0,Epsilon:0.010000000000000031\n", + "回合:212/400,奖励:-13.0,Epsilon:0.01000000000000003\n", + "回合:213/400,奖励:-15.0,Epsilon:0.010000000000000028\n", + "回合:214/400,奖励:-23.0,Epsilon:0.010000000000000026\n", + "回合:215/400,奖励:-23.0,Epsilon:0.010000000000000024\n", + "回合:216/400,奖励:-13.0,Epsilon:0.010000000000000023\n", + "回合:217/400,奖励:-21.0,Epsilon:0.010000000000000021\n", + "回合:218/400,奖励:-28.0,Epsilon:0.01000000000000002\n", + "回合:219/400,奖励:-24.0,Epsilon:0.010000000000000018\n", + "回合:220/400,奖励:-20.0,Epsilon:0.010000000000000018\n", + "回合:221/400,奖励:-13.0,Epsilon:0.010000000000000016\n", + "回合:222/400,奖励:-15.0,Epsilon:0.010000000000000016\n", + "回合:223/400,奖励:-27.0,Epsilon:0.010000000000000014\n", + "回合:224/400,奖励:-18.0,Epsilon:0.010000000000000014\n", + "回合:225/400,奖励:-20.0,Epsilon:0.010000000000000012\n", + "回合:226/400,奖励:-27.0,Epsilon:0.010000000000000012\n", + "回合:227/400,奖励:-18.0,Epsilon:0.01000000000000001\n", + "回合:228/400,奖励:-15.0,Epsilon:0.01000000000000001\n", + "回合:229/400,奖励:-19.0,Epsilon:0.010000000000000009\n", + "回合:230/400,奖励:-20.0,Epsilon:0.010000000000000009\n", + "回合:231/400,奖励:-13.0,Epsilon:0.010000000000000009\n", + "回合:232/400,奖励:-28.0,Epsilon:0.010000000000000007\n", + "回合:233/400,奖励:-38.0,Epsilon:0.010000000000000007\n", + "回合:234/400,奖励:-17.0,Epsilon:0.010000000000000007\n", + "回合:235/400,奖励:-22.0,Epsilon:0.010000000000000005\n", + "回合:236/400,奖励:-13.0,Epsilon:0.010000000000000005\n", + "回合:237/400,奖励:-20.0,Epsilon:0.010000000000000005\n", + "回合:238/400,奖励:-18.0,Epsilon:0.010000000000000005\n", + "回合:239/400,奖励:-14.0,Epsilon:0.010000000000000005\n", + "回合:240/400,奖励:-13.0,Epsilon:0.010000000000000005\n", + "回合:241/400,奖励:-28.0,Epsilon:0.010000000000000004\n", + "回合:242/400,奖励:-13.0,Epsilon:0.010000000000000004\n", + "回合:243/400,奖励:-23.0,Epsilon:0.010000000000000004\n", + "回合:244/400,奖励:-17.0,Epsilon:0.010000000000000004\n", + "回合:245/400,奖励:-14.0,Epsilon:0.010000000000000004\n", + "回合:246/400,奖励:-22.0,Epsilon:0.010000000000000004\n", + "回合:247/400,奖励:-15.0,Epsilon:0.010000000000000004\n", + "回合:248/400,奖励:-19.0,Epsilon:0.010000000000000004\n", + "回合:249/400,奖励:-17.0,Epsilon:0.010000000000000004\n", + "回合:250/400,奖励:-27.0,Epsilon:0.010000000000000002\n", + "回合:251/400,奖励:-21.0,Epsilon:0.010000000000000002\n", + "回合:252/400,奖励:-23.0,Epsilon:0.010000000000000002\n", + "回合:253/400,奖励:-15.0,Epsilon:0.010000000000000002\n", + "回合:254/400,奖励:-15.0,Epsilon:0.010000000000000002\n", + "回合:255/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:256/400,奖励:-15.0,Epsilon:0.010000000000000002\n", + "回合:257/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:258/400,奖励:-28.0,Epsilon:0.010000000000000002\n", + "回合:259/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:260/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:261/400,奖励:-23.0,Epsilon:0.010000000000000002\n", + "回合:262/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:263/400,奖励:-24.0,Epsilon:0.010000000000000002\n", + "回合:264/400,奖励:-17.0,Epsilon:0.010000000000000002\n", + "回合:265/400,奖励:-19.0,Epsilon:0.010000000000000002\n", + "回合:266/400,奖励:-13.0,Epsilon:0.010000000000000002\n", + "回合:267/400,奖励:-25.0,Epsilon:0.010000000000000002\n", + "回合:268/400,奖励:-15.0,Epsilon:0.01\n", + "回合:269/400,奖励:-15.0,Epsilon:0.01\n", + "回合:270/400,奖励:-21.0,Epsilon:0.01\n", + "回合:271/400,奖励:-13.0,Epsilon:0.01\n", + "回合:272/400,奖励:-13.0,Epsilon:0.01\n", + "回合:273/400,奖励:-22.0,Epsilon:0.01\n", + "回合:274/400,奖励:-15.0,Epsilon:0.01\n", + "回合:275/400,奖励:-13.0,Epsilon:0.01\n", + "回合:276/400,奖励:-19.0,Epsilon:0.01\n", + "回合:277/400,奖励:-13.0,Epsilon:0.01\n", + "回合:278/400,奖励:-18.0,Epsilon:0.01\n", + "回合:279/400,奖励:-14.0,Epsilon:0.01\n", + "回合:280/400,奖励:-126.0,Epsilon:0.01\n", + "回合:281/400,奖励:-15.0,Epsilon:0.01\n", + "回合:282/400,奖励:-19.0,Epsilon:0.01\n", + "回合:283/400,奖励:-13.0,Epsilon:0.01\n", + "回合:284/400,奖励:-25.0,Epsilon:0.01\n", + "回合:285/400,奖励:-13.0,Epsilon:0.01\n", + "回合:286/400,奖励:-119.0,Epsilon:0.01\n", + "回合:287/400,奖励:-15.0,Epsilon:0.01\n", + "回合:288/400,奖励:-15.0,Epsilon:0.01\n", + "回合:289/400,奖励:-14.0,Epsilon:0.01\n", + "回合:290/400,奖励:-13.0,Epsilon:0.01\n", + "回合:291/400,奖励:-13.0,Epsilon:0.01\n", + "回合:292/400,奖励:-15.0,Epsilon:0.01\n", + "回合:293/400,奖励:-33.0,Epsilon:0.01\n", + "回合:294/400,奖励:-19.0,Epsilon:0.01\n", + "回合:295/400,奖励:-13.0,Epsilon:0.01\n", + "回合:296/400,奖励:-15.0,Epsilon:0.01\n", + "回合:297/400,奖励:-13.0,Epsilon:0.01\n", + "回合:298/400,奖励:-132.0,Epsilon:0.01\n", + "回合:299/400,奖励:-13.0,Epsilon:0.01\n", + "回合:300/400,奖励:-13.0,Epsilon:0.01\n", + "回合:301/400,奖励:-13.0,Epsilon:0.01\n", + "回合:302/400,奖励:-14.0,Epsilon:0.01\n", + "回合:303/400,奖励:-15.0,Epsilon:0.01\n", + "回合:304/400,奖励:-13.0,Epsilon:0.01\n", + "回合:305/400,奖励:-13.0,Epsilon:0.01\n", + "回合:306/400,奖励:-13.0,Epsilon:0.01\n", + "回合:307/400,奖励:-13.0,Epsilon:0.01\n", + "回合:308/400,奖励:-13.0,Epsilon:0.01\n", + "回合:309/400,奖励:-13.0,Epsilon:0.01\n", + "回合:310/400,奖励:-13.0,Epsilon:0.01\n", + "回合:311/400,奖励:-15.0,Epsilon:0.01\n", + "回合:312/400,奖励:-13.0,Epsilon:0.01\n", + "回合:313/400,奖励:-13.0,Epsilon:0.01\n", + "回合:314/400,奖励:-13.0,Epsilon:0.01\n", + "回合:315/400,奖励:-15.0,Epsilon:0.01\n", + "回合:316/400,奖励:-14.0,Epsilon:0.01\n", + "回合:317/400,奖励:-13.0,Epsilon:0.01\n", + "回合:318/400,奖励:-13.0,Epsilon:0.01\n", + "回合:319/400,奖励:-13.0,Epsilon:0.01\n", + "回合:320/400,奖励:-21.0,Epsilon:0.01\n", + "回合:321/400,奖励:-19.0,Epsilon:0.01\n", + "回合:322/400,奖励:-13.0,Epsilon:0.01\n", + "回合:323/400,奖励:-13.0,Epsilon:0.01\n", + "回合:324/400,奖励:-13.0,Epsilon:0.01\n", + "回合:325/400,奖励:-13.0,Epsilon:0.01\n", + "回合:326/400,奖励:-14.0,Epsilon:0.01\n", + "回合:327/400,奖励:-15.0,Epsilon:0.01\n", + "回合:328/400,奖励:-13.0,Epsilon:0.01\n", + "回合:329/400,奖励:-13.0,Epsilon:0.01\n", + "回合:330/400,奖励:-13.0,Epsilon:0.01\n", + "回合:331/400,奖励:-13.0,Epsilon:0.01\n", + "回合:332/400,奖励:-13.0,Epsilon:0.01\n", + "回合:333/400,奖励:-14.0,Epsilon:0.01\n", + "回合:334/400,奖励:-13.0,Epsilon:0.01\n", + "回合:335/400,奖励:-113.0,Epsilon:0.01\n", + "回合:336/400,奖励:-13.0,Epsilon:0.01\n", + "回合:337/400,奖励:-13.0,Epsilon:0.01\n", + "回合:338/400,奖励:-13.0,Epsilon:0.01\n", + "回合:339/400,奖励:-13.0,Epsilon:0.01\n", + "回合:340/400,奖励:-13.0,Epsilon:0.01\n", + "回合:341/400,奖励:-15.0,Epsilon:0.01\n", + "回合:342/400,奖励:-23.0,Epsilon:0.01\n", + "回合:343/400,奖励:-13.0,Epsilon:0.01\n", + "回合:344/400,奖励:-13.0,Epsilon:0.01\n", + "回合:345/400,奖励:-13.0,Epsilon:0.01\n", + "回合:346/400,奖励:-13.0,Epsilon:0.01\n", + "回合:347/400,奖励:-13.0,Epsilon:0.01\n", + "回合:348/400,奖励:-13.0,Epsilon:0.01\n", + "回合:349/400,奖励:-13.0,Epsilon:0.01\n", + "回合:350/400,奖励:-13.0,Epsilon:0.01\n", + "回合:351/400,奖励:-13.0,Epsilon:0.01\n", + "回合:352/400,奖励:-13.0,Epsilon:0.01\n", + "回合:353/400,奖励:-13.0,Epsilon:0.01\n", + "回合:354/400,奖励:-13.0,Epsilon:0.01\n", + "回合:355/400,奖励:-13.0,Epsilon:0.01\n", + "回合:356/400,奖励:-13.0,Epsilon:0.01\n", + "回合:357/400,奖励:-13.0,Epsilon:0.01\n", + "回合:358/400,奖励:-13.0,Epsilon:0.01\n", + "回合:359/400,奖励:-13.0,Epsilon:0.01\n", + "回合:360/400,奖励:-13.0,Epsilon:0.01\n", + "回合:361/400,奖励:-13.0,Epsilon:0.01\n", + "回合:362/400,奖励:-13.0,Epsilon:0.01\n", + "回合:363/400,奖励:-13.0,Epsilon:0.01\n", + "回合:364/400,奖励:-13.0,Epsilon:0.01\n", + "回合:365/400,奖励:-13.0,Epsilon:0.01\n", + "回合:366/400,奖励:-13.0,Epsilon:0.01\n", + "回合:367/400,奖励:-13.0,Epsilon:0.01\n", + "回合:368/400,奖励:-13.0,Epsilon:0.01\n", + "回合:369/400,奖励:-13.0,Epsilon:0.01\n", + "回合:370/400,奖励:-13.0,Epsilon:0.01\n", + "回合:371/400,奖励:-13.0,Epsilon:0.01\n", + "回合:372/400,奖励:-14.0,Epsilon:0.01\n", + "回合:373/400,奖励:-13.0,Epsilon:0.01\n", + "回合:374/400,奖励:-15.0,Epsilon:0.01\n", + "回合:375/400,奖励:-13.0,Epsilon:0.01\n", + "回合:376/400,奖励:-13.0,Epsilon:0.01\n", + "回合:377/400,奖励:-13.0,Epsilon:0.01\n", + "回合:378/400,奖励:-13.0,Epsilon:0.01\n", + "回合:379/400,奖励:-13.0,Epsilon:0.01\n", + "回合:380/400,奖励:-117.0,Epsilon:0.01\n", + "回合:381/400,奖励:-13.0,Epsilon:0.01\n", + "回合:382/400,奖励:-13.0,Epsilon:0.01\n", + "回合:383/400,奖励:-13.0,Epsilon:0.01\n", + "回合:384/400,奖励:-13.0,Epsilon:0.01\n", + "回合:385/400,奖励:-13.0,Epsilon:0.01\n", + "回合:386/400,奖励:-13.0,Epsilon:0.01\n", + "回合:387/400,奖励:-13.0,Epsilon:0.01\n", + "回合:388/400,奖励:-13.0,Epsilon:0.01\n", + "回合:389/400,奖励:-13.0,Epsilon:0.01\n", + "回合:390/400,奖励:-13.0,Epsilon:0.01\n", + "回合:391/400,奖励:-13.0,Epsilon:0.01\n", + "回合:392/400,奖励:-13.0,Epsilon:0.01\n", + "回合:393/400,奖励:-13.0,Epsilon:0.01\n", + "回合:394/400,奖励:-13.0,Epsilon:0.01\n", + "回合:395/400,奖励:-13.0,Epsilon:0.01\n", + "回合:396/400,奖励:-13.0,Epsilon:0.01\n", + "回合:397/400,奖励:-13.0,Epsilon:0.01\n", + "回合:398/400,奖励:-15.0,Epsilon:0.01\n", + "回合:399/400,奖励:-13.0,Epsilon:0.01\n", + "回合:400/400,奖励:-13.0,Epsilon:0.01\n", + "完成训练!\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEcCAYAAAA2g5hwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA/CElEQVR4nO3deZgcVbn48W9Vd8++ZjIzySSZ7DkQQiAhYd9BEdn1KvsiXlkEkStcwSsKKCCKKILyE0UB2QTEHdkVkS0hYYdwAtn3zExm37u7fn+c6pmeTvV0z9ILmffzPHnSU1Vd9XZ1db11ljplOY6DEEIIMRg70wEIIYTIfpIshBBCJCTJQgghREKSLIQQQiQkyUIIIURCkiyEEEIk5M90AOmilFoL/LfW+rk0b/cQ4G6ttUrndsXoUErlA48ChwLPaK2/MErrPQ9zPB48Gusb4rafBH6vtb4vBeu+GLgOKASmaq0bRnsbMdu7DpiltT5LKVULfACUaq1DSqlq4DFgAfAr4Ergt8DJwEda631HMY57gY1a62uUUocDD2itJ3ss94k9H4yZZJEpWuv/AJ+4A0P0+S+gGqjQWge9FlBKzQVuBg7DlNZfB/5Pa/1a2qIcAq31salYr1IqAPwE2F9r/fYorvcM4BvAbkAr8BZwo9b6pejltNbrgaKoSRcA9UCJ1tpxT9SfAiZrrduVUhr4rtb6EXc7BwEvAafFTHsKKI/3/Q9FJs8HSqn/Aa4CCoA/ABdrrbuTfb9UQ42QUsqX6RhGalf4DCk0FVg5SKKYCbwMvAtMB2qAPwPPKqVG7co1WUqpTF4AVgN5wPtDfaNSylJK7XQ+Ukp9A7gNuMldfy1wJ3BSEqudCnygtXai/l6rtW53/34RU2KMOBT40GPaq6ORKDJJKXUMcDVwFGY/zACuH8o6xmTJwj0ovwl8BSgDngcu0lrvcOc/BhwC5ANvYzLw++68e4FOzA4/DDhJKXU38HPgHHf6U8C5Wuuu2CKpWx3muaw7/5vA/wAO8F3g18BsrfXHHp9jHHArcIwb67+11id7VXEopZzIejw+w/fcq45JWuuQu/wpwPVa6/mJ9pdHXF/BXMGMw1ypXaS13hwVx8XAFUAl8CBwadQPOno9Pnc9XwaqgJXAyVrrDe56vg5cDpQA9wBXaa3D0VUT7nqmAWuAgNePXim1O/D/gL2BTcC3tNZ/VUpdD3wLsJRSJwNf11r/Jubt12FOJt+Omna7u84fAkd47SOPGHYD7gD2AeqA72itH3XnHQfcAMwEmoHfaK2vi/ls/w1cC6xVSv3W/fs1d981AV/VWj/pvucFzDF5d+RYGWTZ6cB9mKqcJYDGVPOcFRP/HOBN988mpdRSrfWRSqkDgZ8BczDf39e11q9ExfEycDiwENgT+DhqnaXA94Avaa3/GLW5v7n/YvdhZF8EgLuBMwFHKXU55nj7GRBQSrVhfjcvYo7riEMw39kVMdNedNcf97wwGKXUZcBFwKeBWaTgfKCU2g/4C3F+w8C5mOMmch77Pua3d3Wi+CPGasnia5h6y8MwV4KNwC+i5j8JzMacoN7A7NRoZwA3AsWYkyHAF4HPYK4u5wPnDbJ9z2WVUp/BFLePxhxUhyf4HPdjipR7uLH+NMHy8T7Dz4B24MiY+Q+5rxPtrz5KqSOBH2A+40RgHfD7mMWOBxZjPvsXMcnOyzeA04HPYhLC+UBH1PxTgEWYE81J7vwhcatO/gY8g9mHXwMeVEoprfW1mCvaR7TWRR6JAky1xmMe0x8FDlFK5SURQyHwLGZ/VwGnAXe61VtgvptzMIn6OOBiN3lFOwzYnf59uR/mxD4e+BHwG6WUFSeEwZZ9CFgKVGAS49leK9Bar8QchwBlbqIYBzwB3O6+/yfAE0qpiqi3no2pLirGHCvRDsCUVP4UJ+64tNbnYX63P3K/u7swJ+xX3b+vxSSBPZRS49wLokXAI0BZ1LSD3OUg8XlhJ0qp72J+34dprTfGWWzE5wOt9RIG/w3vgUlwEW8D1THfxaDGZMkCc9BcGvny3CvR9Uqps7XWQa31byMLuvMalVKlWutmd/JftNYvu6+7lFIAt0ddPf8Nc5UaT7xlvwjcE5X9r8NcHe1EKTUROBZTl97oTv53Up/e+zM8jDkxP6uUKsacoK905w+6v2LWeybwW631G+6y38Lsv2la67XuMjdrrZswV6D/cj//Ux4x/jfwTa21dv+OrQf/oVu62aGUus2N/+4h7AOA/TH13DdrrcPAP5VSf3fXdV0S7x8PbPGYvgXwYUpXmxOs43hM9cg97t9vKqUeB76AuTJ8IWrZd9zv6jBMdVfEdZHqFfd4XKe1/rX7932YqptqYKvH9j2XVUrlYJL6UVrrHuAlpdRfE3yWaMdhGpLvd/9+2L3KPgG415127yBX5xVAfaqqgLTW65RS6zGlhfVurJ1KqZejpuVgSlQkcV6IZimlfgLsCxwRZ5mIEZ8PXIP9hoswpdKIyOtiIKlOCGM1WUwF/qSUCkdNC2F+IFsxV9xfwFSTRJYZT/8O3uCxzugfYQfmCjyeeMvWAMui5nltJ2IKsCMqUQxV7LofAl5RpjfL54A3tNaRK724+wtTbROtBnPVBYDWuk0p1QBMAta6k2M/f3SjZLQpwKokP8M6Bt/n8dQAG9xEEb2uSUm+vx5Tgoo1EVN10OA2rD4ZWbfWeo+YZacC+ymlmqKm+TElx0gVw83APMzJK5edSzOx32ffPtZad7gJJN5+jrfseMwxFl2a24D5XpJRw86lhdh9O9gx3gCMV0r5U9hmEGm3WA/8x532UtS0pVrrbrdKNNF5IVoZpsR0aoJEAcM4H6j+nl8AaK2LGPw33IYpnUdEXrcmiK3PWE0WG4Dzo66s+yilzsZUaRyNObmVYqpdoovwqRqqdwsQ3d1usB/lBmCcUqrMvUqP1o6pngJAKTXB4/0DPoPW+gOl1DpMaSW6+BrZluf+8rAZc/KLbLsQc4UYm1SSsQFTT/9enPlT6G9MraX/Cn7A5we8Pn90vFOUUnZUwqjF1K8n4znMCeSemOlfBF5ze5v8h/gnajCf899a60/Fmf8Qpl77WLcd7DbMSSpaKo7JLZhjrCAqYSSbKCDmWHDVMrAUOVjcrwLdmCrQPwxhu0PxInAhJolFvsP/YOr419FfBXUGic8L0RqBs4BHlVKnJPnbiRX3fODR8yvRb/h9YC9M9Sju6216CF2bx2qy+CVwo1LqXLcoWgkcqLX+C6ZY1o25qinA1Fmny6PAb5VS92MO1O/EW1BrvUWZ/vJ3KqUuwVw5HKC1fhFTXbOHUmpvTO+O65Lc/kOYRuP9GVjcHWx/xXoYU93wELACs/+WRFVBDcXdwPeVUh9gGj73BDZFHeD/q5RagvnRfB1TJw6ma+VV7tVXM6aROp4lmKu5byqlbsXUUZ+AqX5JxvXA60qpGzGNpr2YOucvYaqXkvF34Gb3QiXSvrM30Ka1XoE5Jne4iWJfzIngmSTXPWzud70MuE4pdQ2m8f0EPBqX4/gHcIcyXV8fBT4PzMV83mS23+zW+f9CKRXEfOZezAn7CK31NwddQXJexBzfUzHVntDfs20GcJc7bcjnBa31C0qpM4E/KqVO0FovHWJsSZ8PosT7Df8OuFcp9SAmiV9Df1VgUsZqA/fPgL8CzyilWjE9QfZz5/0O88VswhTz0tZX3u2BcjvwL8zJMbLteH2hz8b8eD4EtmN6BkUaG7+Huer9iP5G+EQideH/1FrXR00fbH/FfobnMAf145gro5mYBtvh+AnmB/MM0AL8BtMTJeIvwHJMcnjCnY/W+llMQ+U77vy4Jye3Lv4EzNVYPaa+/hyt9YfJBKi1/gg4GHOlthbTm+j7wCk6yRtAtdatmJ4yp2F+yFsxvXJy3UW+iumx1orpEfOo13pS5ExMQ3MDpkfWI8Q/Hgdwk/rxmN5FDZieR8fHHFuJ1nErppH3GkwvsQ3ApQxsrxk297dSB2yNlNDdEuZSTFXNK+6iwzovuMfi+cDflFILhxjbUM8HEOc3rLV+CtN54V+Y6rV1mN5zSbPk4UfZS5nul+8BuSmss/1EUlFdgTMdSzSl1GTMj/raOL2nPtGUUo8AH7q9iUQaZfp8MFarobKW2zf6H5ii7g+Bv0mi+OTQWm9USh2Luf+mSGvdlumYRkIptRjYgbl/4dOYevubMxrUGJJN5wNJFtnnQkxdYgjTFfarGY1GDJnW+l1MvfeuYALwR0wnhY2YG9HeHPwtYhRlzflAqqGEEEIkNFYbuIUQQgzBrloNlYvp+rgFU3wTQgiRmA9zQ+nrxPS62lWTxWL678YUQggxNIcQ0+V+V00WWwAaG9sJh4feJlNRUURDQ/Z1YpG4hi5bY5O4hkbiGprhxmXbFuXlheAx3tmumixCAOGwM6xkEXlvNpK4hi5bY5O4hkbiGpoRxrVT9b00cAshhEhIkoUQQoiEsroaSpmnb92HuSGoATNmz0eZjUoIIcaebC9Z/BL4hdZ6DubJbHclWF4IIUQKZG2yUEpVYR6X+bA76WFgoTs8thBCiDTK5mqoKZhnF4QAtNYhpdRmd3pdRiMTn3iO42BZ8Z5bM/LlR9NIth373uGsy3EcHMC2rEHfH3Yc7GFuK96ysdPDHsMT2ZbVNz2y/eiYvdYTmR8tMjd6euT9XtsFt8flCIdMio5/sGmJeMU/mrI5WYxYRcVgDycbXGVl8ShGMnpSGVdjSxclRbn47OR+4B1dvVx48/OccthMaiqLmDdzPAW5fmzbIhR2+tbT0NxJYX6AvBxzuC1bsY3dp42jMD+w0zpDYYeOrl4K8gLYFnFPNqGwQ29viA3bW7Eti7LiXBwHxpfl896qejZsb+PAPScSCjs88Jxp5jrvuLk4wPqtLdz5h3f49vn7Ul1ewHOvr2fezPFMm1hCd2+INZubKS/Oo6W9m8lVxaze1MzPHnmT4w6azvjSfObOGMeHaxuZXlNCXo6fVZuaeOWdLZz5md1obe/hyZc/piw3zN671fDR1i6Wv7eRs46eQW1VPss/3Mq/lq7Fbzscf+BUSvN9PPzke3S2t3HRqftR39LDa+9s5MM19ey7exUdnV1s2tbMqUfNxnJCPPXyappbO9l7VgU2ISaOy2fNpkaaWzoYX5LDus1N7Lf3NNpCAd5d10ZjUxuzawrZVt9CV1c31WV5dHR0UV7op6OzG7/lUF7opzDfz6Sa8XzYEmLN+nqCXe3k0ouPEHmFRTQ1t1NZEqCrq4fS0kK6e8O0tndTmOsjHA4TDIbo7e0lx2eR43PwWQ7hcJiADT0FlXxUdTSrN7cS7unACvYwc/ZUPtrYTFtHD+1dQQiHOHSviYwv8fPKkg8pooOy2pksX2PulcoN+MjLsehpaaLU7qDU7qDM7qDQ6qYkJ4wV6iaHIEWBED7COKEgFg5lhX7CwSA9Pb34LBOX33JwwmEswtg42Jb5v9lfwZOhA+lsbaHE7qTU6qSiEOjthlAPAYLkWCFyrCABK0gO5rWfcN86zL8wthX1GgfbcrCiTuOR19FHd2RaNwFeL/k0b2wKUWJ1UGJ3Umh1k2f1kmv1kuf+y7WC+KwQPhz8hPDbDj7C9OAnfN71TJyS7JOBk5O1Awm61VArgQq3VOHDNHLP1lonKllMA9Y0NLQNq69xZWUxdXVJP5p22JZ9uJ2q8nxqq5NLANFxdfeEyM3x8ebKOqbXlFBWlDtg2cbWbhpaulj6wTZOO3o24bDD1oYOJlcVUd/cSWmhWf7Rf37M4t2rqBlfyGU/+w/TJxZz6efmA1CQ66e+pYvyolwefm4lJxw0jcbWbgrzAkyuKuK1D7byq79+MGC7n148heMPnMb197zO9JoSzvvMblx624tMrizkiAU11DV28NTrG6kucDjvmFk8vWQdUyrzOWiPKnoI8PCzmsYdTRRY3UwqsRhfAD4rDKEg5UV+yoryWLO1jZADdTvaKLS6ybd66cHH+vAE/ufyM7jyzldobO3GRwg1Lgyt2yi322kN55Fn9VJkdzHebsNnhbFwKHDXUV1RRHNHkJaOID4rjI8wAdvBckKU2h0ECLo/egb88KNZOASs/sd5Bx0bvxX2XDZbhB3zaXxW/2cKY9Fr5dIdgjyrlxA+ehwfDhZ+twt+5PRnWTbBMPj9PrqCYPt8hLAJhc2V9yR/I42hAgrsHnItM7r2Bz01rA1WMr+ii7JgPQW9O7Bj9umOUCFLemYyv7yT8lAdeb0t2Oy8L4OWn7CdQ8jOpdvxE7b9WLaP3hA0dQSxbZuSkgJz6nagN2zh9/uxfT5zercsmtt7mN2zwvO7Cjo2jh0g7AsQsgKE7QAhO4ewFYBADr1hG8cyqQHL6nvtWJb7v42DBZbt7rcIkyo6ekLkBnz4LIvuYJiihg+o8TftFEfIChC0cwj5cgnZuW4MPsKWD8fy0RM22/MVlrDP6RfS2jmkwwAwN+W5F9nTMQ/z6o82W5MFgFLqBeBurfUDSqmzgC9rrY9I4q3TyPJk0dLRw+W3m7vp83N9/ODCAygpyAFMkffef3zIwfMnMmdKGQDbGzt4f30zem0DK9Y10trRy1VnLOCHD5nRom+5+EDyc/08vXQ9h+1dw5V3vtK3rdqqIna0dtPW2ctln5/P7Y+/Q0Gun2P3r+Xxf68G4POHzeh7nZfjo6snRH6Ohb+3g5kVNm1NjcyszmPtNnNVN3/GOFasbWCSXU+R3U2pe5WXYwXx+2yCIXOydbAIY1FodeO3wvQ4PgByrNEbsqvH8ZFjhQg5FptK9mJ7fTNzCpooDjXFPal3kUdX2AYsfPlFNPf46O4xSSLXb5GXl0tXEJo7QwQCAfJKy2nosBhXksdHm1ooyAswviyfprYeivMD5AZs6pq7aO8KMmfGBEK+PJZ/sIkiu4f5u02mvddma1M3tRNLmVZTTkFRPv96YzNt3SEW7jaRZR+3sL2ukXnTyugJWSyeO4GN9Z04lk1FeSHPvL6JynGFLJ47kbBj88+3NrPv3Bpe/6ieWZPHsceMSrqCDlsbu7jtoaXkWz2ce/RUZk6pwPHlEAgEWLmplbdWN7Hv3AmMH1eEZfsIhi3KS/JZ+sFW1m9pYt/dxlE1rojcvHwsy+Ld1Q2883EDpx41i9WbW8gN+AiGw8ysKWXNlhZaO3qYN6OCto5eSgpz2NLQTmVZPj7bwrIsXnx7M6v++SdmB7YxsXYK+eXjee2NVXwq/z3CDvhKq/CVT4KyGp5avo1AToBPHzqP59/YxOLmZwgQwiqtpmjyTHpzyrEKzT/b/d/KK8ayfXGPjeb2Hory/fjswZtn//nGRl54/lUm+Ro5/MC5zJozFaugjC4nwJYd3cycVOr5vtE+VzS39/Dtnz/P3MAmaqdUc+wR87EKShN+ztGK65OcLHbDdJ0txzwA/RyttU7irdNIc7Lo6Q1x55/fY/bkUlZvbuGIBZOYOqGYzfXtqNrynZZ/eul6Hvln/0PeLjllHvuoKjq6gqzc0MTtj79DbsDHJafMIxR2WLpiO6++v3XAOvaeNZ63PjZPTtxjWjm7TxvHH15YRVF+gK7OLip9LUz27aDYLcaOs9vpcvyU2p0ErBB/61hIdV4X1eHtBLGp8bcwya6n1cmj0m6lyE789MxucmgO5bIjXESPlUdb0FQ11VYXUVpcwHsfb8OyHCoqq8gryOXjNdsBOOLgeazY2EZxYR6ObbNMNxDsamdKdSmH7zcbJ5DPii09zJpeRV1LiJLifIKORUtbN2+trOPldzdx1md2Z+G8abz03nZ+/9TbnF/0byb6mgg6NuXT5pBTWUvppKm0WyX8/rUG9EcbOOmouSycNx3Hn09Hd5AityqsNxjiw/VNlBflMrnKVF+Gww7bGjuYWFE44DO3dvRQlB9IWMe+bmsrpUU5O5X6YOdjbDTbRFasa6S+qZND9qoZ8ntH++T35so67vijebTHZZ+fz96zx/OTR99i3ZpNlJQW8/2LDu1btrM7SMBv4/fZ3P+05pU31xLG4udXHkXNxLKUXsAt+3A7d/75PQC+ffY+cZNDrNHeX6FwmAt+9AIOppR+2lGzh7WeVCSLrG6zcJ+D7Pms50zr6gkSDDl9J5uVG5p4Z1UD76xqAKCjK0hXT4h121r55RWHkRMYeFUQWS6ipaOXWx95iw/XNRJyE1x3b4ifPPo2ADn+/iujfXevYtmKrbz98Xb2zNvGibWNbNu8lVBjHpcVt5Br9VKd10Ig6uo95FjsCBeRa/XSZRdQFG7jitJ/9M3zWQ5dvmLWh8Zj9XYSrNmLnMmTaA3lsqM7wKQp1dz/7GoIB/n8Zxfx6gfbOXLhZMaVVTAuN4/a7h7A4r3VDSxZsY0jj9udgN/Hu0vX09zRw6cOn4XjOPz4h/9izxkVnLjPXuy7T//nn3skbN3RQUVJHgH3s+5Va+YVVvUvVwVMmVrDhMkTWTBvApZtccheNdQ1d/KLV8xJ+ZunL6B2qknQxZXFdNW1ctzRteSUjmf+/BlYfhsL+r47gIDfx54zKgZ8J7Zt7ZQoAIrdEqCX6BP+1AnJty+NZuP57lPLYerOFyiZUFTQv4+L3dclBTm0OvlUFRYMWDY/t/90VJgfoJsAOQGbgD/5K+rhKinM8Xydbj7bpjA/QFtnb0bj8JLVySKbffc3S6lv7uK3Vx8JmBNdtIaWLuqbuwBYtamZ2VPKeOxfq3hnVT1fPHIWa7a0DFj+o41NvL9mR9zt9QTDVJfnMtdazeftJZw5LqqA1ZKPnVeIP7iDFn8x/oIKrMkLyJs0g66SqTh5xRSXlLB+TSPvb21h/szx3Pa7Z1GBLUzfYw/yauawbGU9XzlhLm89s5KX393Kjw8/kNySPHKB8e5mLvjyHvT2hsnN8XHi5Nq+zVeOK6CuziSmBXMqWTCnv3fzMfv2L2dZFnd+49C4VQITxhV4To+VG/Bx8PyJA6YV5fWflCaO3/kEX1aUyxeOmJXU+sXoiU7IkcQRnTQSva/IoxNEKpRmSbKIbL+ts7dvP2ULSRbDFEkEkeqDdVsHFvkaW/urcG75/VsctXAy67a1sq2xkzseN8XyCeMK+pLMms0meVx88jwcxyHgt3lnVQMrPlzLuFAd+xVvZZ/AKpzeLsLt1TTWHsmqLa1Uz5jN7gcfQXdbkBt+t5zTjpzF/ntM6Nt29OE2f2YF82dW0NrRw7ZwGdu6y5hWOZv95tWw3zxTZXHkwsmUF+cxriRvp89sWxa5OSO7yov0iBptA65g03SCEYlFl8KK883rSJIYrIRWlO93/0/PdxlJELk5PnIDqS/JDCZy/A62fzJBksUQBENhHMfBjupaes8/PmT25FI+2tQ8YNlQTFvJ26vqsW2LfVQlKzc00drRy4I543nytfUAbGs0XRcK8vzMnVpOcNUSVOhFQgUrAAfHsinc/TBCE/fCV7sXRZZFbdT6x5cG+OmlByVVnVGYF101MPCAnD6xhOkTS5LZHVkl+qRiJ9n1V6ReQZ4fyzIXGvm55iQcSewlhfETQZGbWNKVLPJyfAT8NqVZcIIuLhyYVLOFJIshuPa3S9ne2Mn3vrxv37SX3t3CS+8OHPq9tCiH5raeAdNqxhfy0UbTgPr9L+/Hu6sbKC3M6UsWEcU9dXQ+fR+h9W9jlVSRs/BEfDW74auopWryhEEbrZKt944+mQ72g/0k8bpnQ2SebVkU5Qew3d5RkGzJIr3VUJZlUVKQk/EqKICSvmq67DqmJVkkaUtDO1saTJXRfU8N7JA1fWIxXT0hxhXn8v7aRqZWF/NOm2nAPnqfyTy3fCMt7T10docoLjQH5EF7TkSvbwQc5vi3EsKiytdC2YsPEwrkkLvfqQTmH4NlpXZElmz4cYyGdJ1UxNAVF+QMuLO7JIkr50g1VDovAmZPKR3QdpEp40ry8NlWXwkjW0iySMIbK+v4udv9D0zPp2jXnLOIUNjhnn+sAKC2uph3VjVw9D6TOeNTc2hu7+HtVaaLa/TVwvRxNl+b9BazOvvXTfVcCo+6ELsgua57I5VtRd3hkmSRvWoqCgaUZqdOKObMT81h79nj476nr2SRl77v9YIT9kjbtgZzxIJJ7D61PONtJ7EkWcTRGwzh99ls3dHBkg+27TT/jssP4Wu3mcd8W5aF32f1tQWUF+Vw5zcO7fuySwpy6Ok1d4ZGit7htgZ6/ngds7paaa7ZnzdWtdAULuCMz16E7Uvf15I3wgbrbBHpdjmxIrkeVSJ9Ljhx4EnYtiyO2mfyoO8pyAvwhcNnDuhZN1bk5/qzst1QkoWHru4gF/743xy7Xy1PLlnvuUxhXoAjF04acPNOpMhckBcY0OunOKpdoKQgBycUpPO5O3FCveR/9kq2M5k/v/cGAOekKVEsnFPJGyvrMjY43mizLYtvnbWQ6iS734r08fuGV5V67P5TRzkSMRKSLDz0BE0pIF6i2HuWKT6f9Wk1YHpBXqSedeBuHdB9MNeh85nbCW9fRd7Rl+CfPI+SpmEM4jJCl5wyL2WjU2bK7MllmQ5BiF2WJAsPg/W83Hf3Kr583FzPeZH61cKYetZIO4WFQ+GyewlteJfcg88lMGOxOz/97QaWZbFrlCmEEOmQtQ8/yqTBrrijh6OItefMCo7dr5YpVQOHRi8rNsNQnD55Pc6Gt8g94HRy5vaPhzjSG92EECLVpGThYbCxFeMlCjA9OLyGlJhebnPdId2Ur3gZ/9QFBOZ9ajTCFEKItJFk4WGwkXhjBwRMJNzeSMfj36W8qxW7rIbcw873bFS+4tS9pfunECJrSbJIIPbxhoOVLGI5jkPXf+7F6e0m/4Rv4ZswJ27voz2mjxtxrEIIkSrSZuEhumBRVZ4/YF7OEJJFcPVSQuvfJnfx5/FPVLtMN1UhxNgjycJD9KPcd04WyVVDOY5Dz5t/wy6fLG0UQohPPEkWXqJKFpVlA5NFstVQoY3vEd6xkZz5x2AleKSjEEJkOzmLeYhu3o4daC8nkHiXOU6Y7tcfxyqqwD9r/1GOTggh0k+ShYfo3lB5Ob4B49gk84jH4JplhOvXkrvoc1g+6eEkhPjkk2ThIbqBOzfg48xPzSHSNp1MA3fPm3/HLp+Ef9YBKYpQCCHSS5KFh0iyKMoPcID7iNLI4BiJ7rMIt2wn3LCegDpU2iqEELsMOZt5iPSG+q/DZ/Y1aCdbsgiuWQ6Af/rC1AUohBBpJsnCi1uy8LorIlFvqN61y7ErarGLx944/EKIXZckCw99TRZR2aKvZDFINVS4o4nwtlX4p++TstiEECITJFl4iPSGGjiIt3kdGORBLr0rXwIc/NMXpTA6IYRIPxkbahDRo3NcdeYCXnlva9z7LJxQL73vPoNv8jx85ZPSFKEQQqSHJAsPXoPOzqwpZWZN6c4zXKFtH+N0thCIek6FEELsKqQaykOkN9RQxv0Lbf4QLAv/xN1SFJUQQmSOJAsvfb2hks8Woc0rsMdPw8otTFFQQgiROZIsPHj1hhp0+WA3oe2r8EmpQgixi5Jk4aG/N1RyQls/gnAIf83uqQtKCCEySJKFh74G7iSzhWmvsPFNmJ2ymIQQIpMkWQwi2TaL4OYV2JXTsXLyEy8shBCfQJIsPPRVQyWRK5yeTsJ1a6QKSgixS0vpfRZKqXuBo4F6d9JjWusb3XnVwP3ANKATuEBrvSTRvHTwuM0irtDWj8AJ45NkIYTYhaWjZHGz1npv99+NUdN/ALyotZ4DXAI8oJSykpiXepGus0kULULbV4Fl4auemeKghBAiczJZDfVF4JcAWuuXgG5gURLzUm4o7duh+rXYZTVYgbxUhiSEEBmVjuE+vqGUuhBYBXxLa71CKVUBWFrr+qjl1gNTlFKr480DXh/KhisqioYVcNvmZgBKS/OprCyOu5zjOKxvWEfBjL0HXW40pWs7Q5WtcUH2xiZxDY3ENTSjHdeIkoVS6g2gNs7sauDbwBatdVgpdQ7wlFJqxki2ORQNDW2Ew0NpgTAiXWdbWrqoq2uNu1y4vZFQexO9xZMGXW60VFYWp2U7Q5WtcUH2xiZxDY3ENTTDjcu2rbgX2SNKFlrrRI+D2xS17O+UUj8FJmut1ymlUEqNjypB1AIbtNYN8eaNJNahSLY3VKhuDQC+8dNSHJEQQmRWStsslFKTol4fA4ToTyCPARe58w4G8oHlScxLuWTbLML1a8GysMfHK1wJIcSuIdVtFve53WDDQAtwotY66M67GtPL6VxM99iztdbhJOalXpLZIlS3FrtsEpY/N+UhCSFEJqU0WWitjx5k3lbMPRhDmpcOfUOUD5ItHMchXL8W35T56QpLCCEyRu7g9pDM2FBOeyNOZ4u0VwghxgRJFoMYrBYqVO82bldOS0ssQgiRSZIsPCTTGypctxYsG7tCGreFELs+SRYe+u/MiJ8tQg3rzZ3b/px0hCSEEBklycJL39hQ8RcJN2/DLpuQnniEECLDJFl4cPqewR1nfjiI01KHXSrJQggxNkiy8OAweLZwWurBCWGXTUxfUEIIkUGSLDz0lyy8s0W4eSsAdml1ukISQoiMkmQxmDgli/5kIdVQQoixQZKFh76us3Hmh5u3YuUWYeUNbwh0IYT4pJFk4SHRDdzhpq1Y0hNKCDGGSLLw0pct4rdZSBWUEGIskWThIdIbyvbIFU5vF05HkyQLIcSYIsnCgzPIYOjh5m2A9IQSQowtkiw89A1R7lENFW7ZDoBdUpXWmIQQIpMkWXhwBnlst9NqnvRql1SmKRohhMg8SRYeBmvfDrfWQW4hVk5BWmMSQohMkmThZZA7uMOtddjFUqoQQowtkiw8DDY2VLilDrt4fHoDEkKIDJNk4SHeqLOOE8ZprZfGbSHEmCPJYjAx2cJpb4JwEEtKFkKIMUaShYf+saEGZotwax2AtFkIIcYcSRYe4vWGkm6zQoixSpKFlzj3WZgb8iysooq0hiOEEJkmycJDXzVUTMki3FqPVViO5QtkICohhMgcSRYe+ocoH5gtnLZ66TYrhBiTJFl4iDfcR7i9EauwPL3BCCFEFpBk4WnnaijHcXAkWQghxihJFh48Sxbd7RDqxS6QZCGEGHskWXjo7zrbX7QIdzSaaUWSLIQQY48kCy+R4T6iq6HaTbKQkoUQYiySZOEh3HcHd9Q0N1lIm4UQYiySZOHF4xZup20HYGEVlGUiIiGEyCj/SFeglDoL+CYwF7hca/3zqHkFwD3APkAQuFJr/feRzEuHvseqRk0Lt2zHKhqH5RvxLhNCiE+c0ShZvAWcBjzkMe9KoEVrPQs4AbhbKVU0wnkp53i0WYRb62RociHEmDXiZKG1fk9r/QEQ9ph9KnCXu9xHwDLg2BHOSzmvnrNOy3YZQFAIMWalus2iFlgX9fd6YMoI56Ve39hQpmjh9HTidLZgFUvJQggxNiWsgFdKvYE5eXup1lqHRjek0VNRMbyaK2d9k3n/uEIqK4vo3tZAG1A2uZaiyuLRC3AYKjO8/XiyNS7I3tgkrqGRuIZmtONKmCy01gtHsP71wFSgzv27FvjXCOclraGhjXA4zkBPg4i0WexobCeAQ3DjBgDawgV01rUOeX2jpbKymLoMbj+ebI0Lsjc2iWtoJK6hGW5ctm3FvchOdTXUY8CFAEqp2cBi4KkRzkuDgb2hwh3N5m/pNiuEGKNGnCyUUqcrpTYCXwC+r5TaqJSa686+BShTSn0M/B24QGvdOsJ5KefE3GfhRJJFfkm6QhBCiKwy4psGtNYPAw/HmdeOSSKjNi8d+rrORv7ubIacAix/TqZCEkKIjJI7uD0NrIZyOpqxC0ozF44QQmSYJAsP/dVQ7t+dLVj5kiyEEGOXJAsPsY9VDXc0Y0nJQggxhkmy8BA73IfT2SwlCyHEmCbJwlP/vRlObxf0dknJQggxpkmy8NBfsrBwOlsAsKXbrBBiDJNk4SH6Nou+eyykZCGEGMMkWXhwop6UF+6M3JAnyUIIMXZJsvAQfQe3lCyEEEKShbeokoXT2QyWhZUnbRZCiLFLkoWHvr5QbpuFlVeMZcuuEkKMXXIG9BA9NpTckCeEEJIsPDmRsaHcrrNSBSWEGOskWXiJel6S092OlZedT8ISQoh0kWThYcB9Ft1tWLmFGY1HCCEyTZKFh76us+EwdHdg5UmyEEKMbZIsPLnZorcTcLByvZ9JK4QQY4UkCw99vaF62s3/Ug0lhBjjJFl46KuG6naTRZ6ULIQQY5skC09utuhpA6RkIYQQkiw89JcsOgCkzUIIMeZJsvDQd5tFj1RDCSEESLLwFBminO42wIKcgozGI4QQmSbJYjDd7ZBbIIMICiHGPDkLeojuDSWN20IIIcnCU6QaypFkIYQQgCQLT9EN3NK4LYQQkiw8RaqhnC4ZRFAIIUCShbcB1VBSshBCCEkWHhzAIgw9HVKyEEIIJFl4chwosHoAuSFPCCFAkoUnB6c/WUjJQgghJFl4cqDI7gZkXCghhABJFp4coMByk4VUQwkhBP6RrkApdRbwTWAucLnW+udR8+4Fjgbq3UmPaa1vdOdVA/cD04BO4AKt9ZJE89LBcRwKbamGEkKIiNEoWbwFnAY8FGf+zVrrvd1/N0ZN/wHwotZ6DnAJ8IBSykpiXloU2lKyEEKIiBEnC631e1rrD4DwEN/6ReCX7jpeArqBRUnMSznTG6obM+Jsfro2K4QQWSsdbRbfUEq9q5T6s1JqdwClVAVgaa3ro5ZbD0wZbF4aYgXcaiir24w4a0mzjhBCJGyzUEq9AdTGmV2ttQ4N8vZvA1u01mGl1DnAU0qpGcOIc1gqKoZfhVRg9xAoLKGysngUIxq5bIsnIlvjguyNTeIaGolraEY7roTJQmu9cLgr11pvinr9O6XUT4HJWut1SimUUuOjShC1wAatdUO8eUPdfkNDG+Gwk3jBGJFqqJA/n7q61iG/P1UqK4uzKp6IbI0Lsjc2iWtoJK6hGW5ctm3FvchOaR2LUmpS1OtjgBAQSSCPARe58w4G8oHlScxLuUjXWbnHQgghjNHoOns6cAtQDpyklLoa+LTb6H2f2w02DLQAJ2qtg+5br8b0cjoX0z32bK11OIl5Kec4jpsspNusEELAKCQLrfXDwMNx5h09yPu2Yu7BGNK8dCmwuqXbrBBCuKSrj5dwiHyrV6qhhBDCJcnCgy/YCYCVW5DhSIQQIjtIsvDgC7t3b+dIshBCCJBk4ckX7DIvcvIyG4gQQmQJSRYe/CG3ZBGQoT6EEAIkWXiSaighhBhIkoUHX8hUQ1lSDSWEEIAkC0+RkgVSshBCCECSxU7WvfcWk7e8AIAVkJKFEEKAJIud1L+/hFK7k5BjYflzMh2OEEJkBUkWMTr9ZQD4rKGPViuEELsqSRYxOnPKMx2CEEJkHUkWMbpyx2U6BCGEyDqSLGL05pRmOgQhhMg6kixi2b5MRyCEEFlnxM+z2NXYFtzSfBx5BQX8X6aDEUKILCHJIobPttgYqqDUkW6zQggRIdVQMWzbAsCRnrNCCNFHkkWM/mQh2UIIISIkWcSwLSlZCCFELEkWMSIli3BYsoUQQkRIsojRV7JAkoUQQkRIsojh6ytZZDgQIYTIIpIsYkgDtxBC7EySRYxINVRYkoUQQvSRZBHDdveI5AohhOgnySKG9IYSQoidSbKI0d8bSgghRIQkixiRZCGEEKKfJIsYka6zQggh+kmyiGFJshBCiJ1IsoghJQshhNiZJIsY0mYhhBA7k2QRw5aShRBC7GTET8pTSv0COAroBtqAr2utl7nzqoH7gWlAJ3CB1nrJSOalmuQKIYTY2WiULJ4E9tRa7wX8AHgkat4PgBe11nOAS4AHlFLWCOellM+WwpYQQsQacclCa/33qD9fBSYrpWytdRj4IqZ0gNb6JaVUN7AIeH0E81LKklwhRNo4jkNbWzOdnW2Ew6G4y23fbhPOwqGgP6lx+f05lJdX4vMlnwJGnCxiXAo8obUOK6UqAEtrXR81fz0wRSm1ejjzGGKyqKgoGvIHaO7qP2ArK4uH/P5Uy8aYIHvjguyNTeKCdevW4Thhqqom4vP5saSDSco5jkNrazMdHY3MmDEj6fclTBZKqTeA2jizq7XWIXe504AzgEOT3nqKNTS0DXmMp+bmjr7XdXWtox3SiFRWFmddTJC9cUH2xiZxGS0tbVRXTwZsQiGHeAPt+P02wWD2XcF/UuPKzy9m27bGnb5r27biXmQnTBZa64WJllFKnQLcCByltd7mvq9BKYVSanxUKaEW2DDceYniGA3SG0qIdHKwpO437YZTghvxt6SUOh74CXCM1nptzOzHgIvc5Q4G8oHlI5yXUnKfhRAiW9x443U8/vgjiRdMg9Fos7gH6AH+oJSKTDtKa90AXI3pyXQupgvs2W7DNyOYl1JSshBCRASDQfz+0W7azfy2hmM0ekNVDjJvK3D0aM5LNUkWQoxtBx+8iC996Su8+urL7LffAZxxxtncccdPWbXqI3p6eliwYBFf+9r/sGnTBr797W9y//2PEgwGOe64ozj33C9zxhnn8Pzzz/Kf/7zAddfdyMMPP8Dzzz9DKBQkJyeXK6+8mtmzlee2Tj7589xww7U0NNQzYcJE7Kiu/H/5yx959NGHCARycJww3/vezUydOi1t+yV701iGSK4QInNefncLL72zZafpljXyp1cePH8iB+05Mallc3Nzufvu3wFw883fZ++9F3L11d8hHA5z/fXX8MQTf+XEE0+hvb2d+vp6tm7dzPTpM1m27HXOOOMcli9fyqJFiwH4zGeO4/TTzwLg9deXcMstP+BXv7rXc1vf/vb/stdeCzj//AvYtGkj5513BvvtdwAAd975Mx588HHGjx9PT09P2rvsSrKIITflCSGOPfb4vtcvvfQiK1a8z+9//yAAXV1dVFVVA7Bo0WKWL1/Kli2bOemkz/Hgg7+jt7eXZcuWctZZ5wGg9Qruv/8eWlqasW2bDRvWx93WG28s5/LL/xeASZMm9yUcgIULF3Pjjddy0EGHcMABBzNp0uSUfPZ4JFnEkJKFEJlz0J7eV//p7qKan18Q9ZfDTTf92PPkvM8+i1m+/HU2b97Ed7/7fd566w2ee+5pHAdqaibR29vLd75zFT//+a9Rajfq6+s4+eRjB9lWfDfddAsrVrzP8uXLuOyyi7jyym9xwAEHjeRjDolcRseQNgshRLSDDjqUBx64j1DI3LDb1NTE5s2bAFi8eF+WLHmV1tZWqqqqWbRoX37zm7v6SgQ9Pd2EQqG+ksgf//jYoNvaZ59FPPHEXwHYvHkTy5aZ+5CDwSCbN29i7tx5nH32eey77/589JFOyeeNR0oWMSRZCCGiff3rV3Dnnbdz3nmnY1kWgUAOl112BTU1k6iqqqagoID58/cGTElj27atLFy4CIDCwiK+/OUL+cpXzqGkpJQjjjgqwbau5IYbruW5555m4sQaFizYB4BwOMyNN15HW1srlmVTXV3NRRddmtLPHctyRtpqlJ2mAWuGcwd3Z3eQS376IgC/vfrI0Y9sBOSu36HL1tgkLmPr1nVMmDA14XKf1DulMyWZuLz2fdQd3NOBtQPmjW6In3xSshBCiJ1Jsoghj1UVQoidSbKIIcN9CCHEziRZxJBcIYQQO5NkEUPG0xdCiJ1JshBCCJGQJAshhBAJSbIQQogM+81v7qK3t7fv79F6jsWll17Ayy//Z8TrAUkWQgiRcffc8+sBySIbyXAfQgjh6urq4oYbrmXt2tX4fH5qa6dyyin/xc9+ditz5+7B+++/i9/v55prvsc99/yaNWtWUVVVzY033kJ+fj4dHR3cdpsZ8A/M8ORnnnkuABs3buCWW26iqakRn8/HBRdcwv77H8itt/4QgIsvPh/LsrnjjrsAWL16FZdddhHbt29jjz325JprrseyLNrb2zyfr+Hz+VizZjU33XQ9XV2dzJgxk56enlHbN5IshBBZo3fly/TqF3eablkWIx2aKKAOJTBn8FFalyx5lY6Odh54wAz419LSwscfr2Tt2tVcc811XHXVNdx66w+54oqvcddd91BTM5HLL7+U5557mhNOOJl7772bcDjM7373CB0d7Vx44fnMmDGLAw44iOuvv4aTTjqF448/mTVrVnPppV/hgQf+wBVXXMWf/vQY/+///ZaCgv4RaFevXsVtt92Jbdt86UtnsmzZEhYv3p877vhp3OdrfP/73+ULXziNE044kbfeepuvfvXLI9pn0SRZCCGEa9as2axdu4Zbb/0hCxbsw4EHHgxAbe3UvqfbKaXYtm1L30iySu3Oxo0bAFi2bClf//qVWJZFYWERRx/9aZYtW8pee+3Nxx+v5LOfPRGA6dNnMGuW4v333+Xggw/1jOWQQw4nNze3b5ubNm1k8eL4z9dob29jzZpVHHPMZwGYN29PZsyYNWr7RpKFECJrBOYc5Hn1n64B+yZNmswDDzzKsmWv89prL/OrX/2Cyy//X3JycvuWsW0fOTk5UX/bfcOXj6bc3Oht+KK24f18jfb2tlGPIZo0cAshhGv79m3Yto9DDz2cyy67gqamRlpaWpJ+/6JF+/LEE3/BcRw6Otp5/vlnWLx4PwoKCpk1aw5PPvl3ANauXcOqVSvZY489ASgoKEz6ZB/v+RqFhUXMmDGLZ599CoAPPniP1as/HsrHH5SULIQQwrVq1cf88pc/ByAcDnHWWecxfvz4pN9/3nn/zU9/+iPOOedUAI455rPsv/+BAFx77Q3ccstNPProQ/h8Pq655nuUl5cDcNppZ3LZZReRm5vX18Adz2DP17jmmuu56abrefDB+5g+fSa77TZ3OLvBkzzPwsP5N/8TkOdZJCtb44LsjU3iMuR5Fqkhz7NIo6qy/EyHIIQQWUOqoTz84ebj2dGQ2sYiIYT4JJGShYfcgA+/T3aNEEJEyBlRCJFBFo6TfXX+u7rhtFVLshBCZExOTh5NTfUEg70jvkNbJMdxHNrbW/D7cxIvHEXaLIQQGVNeXklbWzM7dmwjHI5/Y5tt24TD2VcC+aTG5ffnUF5eOaR1SrIQQmSMZVkUF5dRXFw26HLS1XhoUhGXVEMJIYRISJKFEEKIhHbVaigfmLsRh2sk700liWvosjU2iWtoJK6hGU5cUe/xxc7bVYf7OBgYnWcJCiHE2HMI8FL0hF01WeQCi4EtwOiPHSyEELsmHzAReB3ojp6xqyYLIYQQo0gauIUQQiQkyUIIIURCkiyEEEIkJMlCCCFEQpIshBBCJCTJQgghREKSLIQQQiS0qw73MSxKqTnAfUAF0ACco7X+KEOxrAW63H8AV2mtn1ZK7Q/cBeRjHqh+ltZ6ewrj+DHweWAasKfW+j13etx9lY79OEhca/HYb+68lO87pVQFcD8wE+gBPgIu1FrXDbb9VMeWIC4HeBeIjGl9ttb6Xfd9JwC3YM4Vy4Evaa07Risudxt/Bqa7228Dvqa1fivTx1iC2NaSwePM3c61wHW4x3+qjy8pWQz0S+AXWus5wC8wOzeT/ktrvbf772mllA08AFzixvgicHOKY/gzcCiwLmb6YPsqHfsxXlwQs98A0rjvHOBHWmultd4TWAXcPNj20xSbZ1xR8w+M2meRRFEE/Bo4QWs9C2gFrhzluADO1VrvpbVeAPwY+K07PdPH2GCxQQaPM6XUQmB/3OM/HceXJAuXUqoKWAg87E56GFiolBraE0JSax+gS2sdGbPll8AXU7lBrfVLWusN0dMG21fp2o9ecSWQln2ntd6htX4hatJrwNQE2095bIPENZhjgWVRV+y/BE4dzbjc2Jqj/iwFwtlwjMWLLcFbUv5dKqVyMQny4iS3OyoxSbLoNwXYpLUOAbj/b3anZ8qDSql3lFJ3KqXKgFqirqS11vWArZQal+a4BttX2bAfY/cbZGDfuVd0FwN/TbD9tMYWE1fEC0qpt5RSP3BPRsTGBawnRd+jUupupdR64EbgXLLoGPOILSJTx9n3gAe01mujpqX8+JJkkb0O0VrvhRkQ0QJ+nuF4Pimyab/dgannzrbvLjauWq31Iky13lzgO+kOSGv931rrWuD/MG0kWSNObBk5zpRSBwCLgDvTsb1okiz6bQAmKaV8AO7/Ne70tItUsWituzEHxkGYK7u+qgOl1HggrLXekebwBttXGd2PcfYbpHnfuQ3ws4FTtdbhBNtPW2wecUXvsxbgbuLsM8wVakq/R631/cARwEay7BiLxKaUqsjgcXYYsDuwxm1knww8DcwaZLujEpMkC5fbM+At4HR30unAm1rrunTHopQqVEqVuq8t4DQ3tuVAvlLqYHfRi4DH0h3fYPsqk/txkP0Gadx3SqmbMPXEJ7snk0TbT0tsXnEppcqVUvnuaz/wX/Tvs6eAxUqp2VFxPTrKMRUppaZE/X0CsAPI+DE2SGxdmTrOtNY3a61rtNbTtNbTMEn1GEyJJ6XHlwxRHkUptRumO1450IjpjqczEMcM4HHM2PI+4APgMq31FqXUgZieH3n0d4HblsJYbgc+B0wA6oEGrfUeg+2rdOxHr7iAE4iz39z3pHzfKaX2AN4DVgKd7uQ1WutTBtt+qmOLFxfwI3e7DhAAXgEu11q3ue87yV3GB7wJnKe1bh/FuKqBvwCFmGfP7ACu1Fq/kQXHmGdsQBMZPs6iYlwLHK9N19mUHl+SLIQQQiQk1VBCCCESkmQhhBAiIUkWQgghEpJkIYQQIiFJFkIIIRKSZCFECiml/k8pdfcw33uvUuqG0Y5JiOGQIcqFSCGt9U2ZjkGI0SAlCyGEEAlJyUKIKEqpGsxAe4diBtv7qdb6dqXUdcA8zJ28n8U8POhLWuu33fddBVwGlGBGQP2q1vp5932ztNZnucudCPwAmIQZIuJirfUKd94C4DeYsZv+gbmrOjq244EbMA98+gC4SGv9zmDbH929I8YyKVkI4XKH7v4b8DbmZH4UcLlS6hh3kZMwY+qMAx4C/qyUCiilFHApsFhrXYwZq2etx/rnYJ69cDlQiUkIf1NK5SilcjAPdLrfXf9jmCcBRt67APPgnQsxT4e7C/irUio32e0LMRJSshCi32KgUmv9Pffv1UqpX2MGilsHLNda/wFAKfUT4ArM08q2ALnAXKVUXcxzBqKdCjyhtX7WXcePga8DB2IeqhMAbtNaO8AflFLfiHrvBcBdWusl7t/3KaX+z93+piS3L8SwSclCiH5TgRqlVFPkH+YZBtXu/L4hsN3hvTcCNVrrjzGlheuA7Uqp37vVWbFqGPgQmrC7zknuvE1uooiIfvDQVOCKmNimDHH7QgyblCyE6LcBM0Ls7NgZbttD9HDVNuZZApsBtNYPAQ8ppUowVUQ/BM6OWc1mYM+odVjuOjdh2icmKaWsqIRRi3lWdiS2G7XWN3oFnuT2hRg2SRZC9FsKtLqNxbcDPZgHzeS78/dRSn0O8zjSy4Bu4DW3zWAS8DLQhRkC3Oex/keBq5VSRwEvYqqgujHDggMEgcuUUndihlvfF/iXO+/XwJ+UUs+5cRYAh7vrqUly+0IMm1RDCeFyn+V8PLA35lkP9Zgnx5W6i/wF0+7QiLlq/5zWuhfTXnCzu/xWoAr4lsf6NXAWprdVPSYhnKC17tFa92Cez3Ee5rkJpwJ/jHrvMuArmMd3NgIfu8uS7PaFGAl5noUQSYjtAivEWCMlCyGEEAlJshBCCJGQVEMJIYRISEoWQgghEpJkIYQQIiFJFkIIIRKSZCGEECIhSRZCCCESkmQhhBAiof8PwOWbZ1y9wrIAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始测试!\n", + "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", + "回合数:1/20, 奖励:-13.0\n", + "回合数:2/20, 奖励:-13.0\n", + "回合数:3/20, 奖励:-13.0\n", + "回合数:4/20, 奖励:-13.0\n", + "回合数:5/20, 奖励:-13.0\n", + "回合数:6/20, 奖励:-13.0\n", + "回合数:7/20, 奖励:-13.0\n", + "回合数:8/20, 奖励:-13.0\n", + "回合数:9/20, 奖励:-13.0\n", + "回合数:10/20, 奖励:-13.0\n", + "回合数:11/20, 奖励:-13.0\n", + "回合数:12/20, 奖励:-13.0\n", + "回合数:13/20, 奖励:-13.0\n", + "回合数:14/20, 奖励:-13.0\n", + "回合数:15/20, 奖励:-13.0\n", + "回合数:16/20, 奖励:-13.0\n", + "回合数:17/20, 奖励:-13.0\n", + "回合数:18/20, 奖励:-13.0\n", + "回合数:19/20, 奖励:-13.0\n", + "回合数:20/20, 奖励:-13.0\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEcCAYAAADdtCNzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAA1KElEQVR4nO3de1wU5f4H8M/uAl4JKQEBT5oiBJoCgqgoClqAgFxMjxiaWnS0hPRoR7x0CjWvFSmVhnfDe4kGoqVk3o+K92NoQlnIVREERNmVfX5/eNyfyHAdLpaf9+vl67W788wz3x2enc/OzDqjEEIIEBERPUbZ1AUQEdGTiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkv7SAeHh4YFjx441+nKTk5Ph6enZ6Mul+nHv3j1MmDABPXv2RHh4eL31u2PHDgQHB9dbf7Xx5ptvIi4urkH63rRpE/r27QsHBwfk5+c3yDIeFR0djWnTpgEAMjMz4eDggLKyMgDAzZs38dprr8HBwQELFy6EEAIzZsyAs7MzXn311XqtIyIiAlFRUQCAEydOwM3NTbLdn3l78JcOiKbi5OSE77//vqnLoDrau3cvbt68iRMnTmDZsmWSbVJTU3Uh4uDggDFjxuDcuXONW2gtrFq1CoGBgfXer0ajwcKFC7FmzRqcPXsWxsbG9dJvfHw8goKC4ODggH79+uHNN99EcnJyhXYWFhY4e/YsVCoVAGDr1q0wNjbGmTNnEBERgdOnT+Po0aM4ePAgvvnmG3h6eiIxMVE3/+nTp2FjY1PhNQcHB9y/f79e3ktTbg/WrVsHV1dXODo6YsaMGVCr1bWanwFRBw+/rfyZ/RXeQ0PJzMxEx44doaenJzn9jz/+QHBwMGxsbJCUlITDhw9j8ODBGDduHC5cuNDI1aLeNmR1kZeXh9LSUlhZWdV6XiEEtFpthdfXrl2L+fPnY8KECTh69CgOHDiAUaNGISkpqdo+MzMz0blzZygUCgBARkYGLC0t0bJlSwCAs7MzTp06pWufnJyMTp06VXjNwcGh0r//n8Xhw4cRExODdevW4cCBA7h+/XqlX3gq89QEhFarRUxMDAYPHgwXFxe8++67KCgo0E0PDw+Hq6srevbsiddeew1Xr17VTYuIiMAHH3yA0NBQ2Nvb48SJE/Dw8MDq1avh5+eHnj17YvLkySgtLQVQcXezqrYAsHLlSvTr1w/9+vXD9u3bYWNjg99//13yfRQUFGDGjBno168fnJ2d8fbbbwOQPnzxaD+Pv4fVq1fD1dW1XFDs27cPfn5+NVpfj9u2bRtefvll9OrVCxMmTEBOTk65OjZv3oxXXnkFTk5OiIyMRGX/gb+srAwrVqzA4MGD4eDggKCgIGRlZen62bBhAwYNGgQXFxcsWrRIt4F59LADAFy/fh02NjaVbjzT0tIwevRoODk5wcfHR7fxWbZsGb788kvs2bMHDg4O2L59e4V5o6OjYW9vjylTpqBNmzZo3bo1xowZg6FDh2LJkiWVriOpGsaNG4devXpV+Gb7008/ISAgAI6OjhgwYACio6MrvLft27dj4MCBeP3113V//0WLFsHZ2RkeHh44ePCgbp7Ro0fr3kt1bdPT03WHacaOHYvIyMhy6/ah3377DV5eXgAebHjHjBkDADhz5gyGDRuGnj17YtiwYThz5ky5OqKiojBy5Ej06NED6enp5fosKirCsmXL8O9//xuvvPIKWrZsCX19fXh4eGD69OkVanj07xwREYGdO3di9erVcHBwwJYtWzB79mycO3cODg4OWLZsGZycnMrtiSQnJyM0NLTCa05OTgCq3i5UZcOGDRgyZAiys7MbbHtw/vz5Kj/DO3fuxKuvvoouXbrAyMgIb7/9du0PM4q/MHd3d3H06FEhhBDr1q0Tw4cPF1lZWaK0tFS8//77YsqUKbq227dvF0VFRaK0tFTMmzdPDB06VDdt+vTpwtHRUSQnJ4uysjJx79494e7uLoYNGyays7NFfn6+8PLyEps2bRJCCPGf//xH9O/fv1wdlbU9ePCg6Nu3r/jll19ESUmJmDp1qrC2thbXrl2TfE+hoaHi3XffFQUFBUKtVosTJ04IIYT49ttvxciRI8u1fbQfqfcwaNAgceTIEV37sLAw8dVXX9VofT3q2LFjolevXuK///2vKC0tFXPmzBGjRo0qV8dbb70lbt++LTIyMoSLi4s4ePCgZF8rV64Uvr6+Ii0tTWi1WpGSkiJu3bql6yckJETk5+eLjIwM8corr4ht27YJIYRYtmyZmDp1qq6f9PR0YW1tLTQaTYVlqNVqMXjwYLF8+XJRWloqjh07Juzt7UVaWppkX4/r27ev+Oabbyq8fvz4cWFrayvu3bsnOd+jf6M7d+4INzc38c033wiNRiMuXbokevXqJa5evSqEeDCGLl++LMrKykRKSoro06eP2LdvX7n39t5774k7d+6Iu3fvim+//VbY2dmJrVu3ivv374uNGzcKV1dXodVqhRBChISE6NZVdW1HjBghFi5cKEpLS8WpU6eEg4NDpevj8fWcn58vnJycRFxcnNBoNCI+Pl44OTnp/oYhISFiwIAB4pdffhEajUao1epy/R08eFDY2tpK/t0eevTv8/jyp0+fLj799FPJdS6EENevXxc2NjYiPz9flJWVid69e4u7d+8KNzc33WuOjo7i5MmTQojqtwsPl/XoZz46OloEBASIvLy8CtOEqN/tQVWfYT8/P7F7927dtLy8PGFtba37W9TEU7MHsWXLFkyZMgXt2rWDgYEBJk2ahO+//173DfPVV19F69atYWBggLCwMFy+fBlFRUW6+QcNGoSePXtCqVSiWbNmAB58GzIzM0ObNm3g7u6OlJSUSpdfWds9e/YgKCgIXbp0QYsWLRAWFlZpH7m5uTh06BAiIyNhZGQEfX199OrVq8br4PH34OPjg4SEBABAcXExDh06BB8fnxqtr0fFx8dj2LBh6Nq1KwwMDPDPf/4T586dw/Xr13VtQkND8cwzz8DCwgIuLi64fPmyZI3bt2/Hu+++i06dOkGhUODFF18sd1w7NDQUbdq0gYWFBcaMGaOrvzbOnz+PkpISvPXWWzAwMECfPn3g7u6O3bt312j+/Px8mJiYVHjdxMQEZWVlVe5pPfTTTz/B0tISw4YNg56eHuzs7ODp6Ym9e/cCAFxcXGBjYwOlUokXX3wRPj4+OHnyZLk+wsLC0LJlSzRv3hzAg+PxI0aMgEqlQmBgIG7cuIGbN29KLr+ytpmZmbh48SLCw8NhYGAAJycneHh41Gi9PHxfHTp0QEBAAPT09ODr64tOnTrhwIEDujaBgYHo0qUL9PT0oK+vX27+goICGBsbN9jhHUtLS1hYWCA5ORmXL19Ghw4d0Lx5czg6Oupe02g06NGjB4DqtwuPEkJgwYIFOHr0KDZs2IBnn3220jrqY3sAoMrPcElJCVq3bq1ra2hoCAC4c+dODdcW8Oc+yFYLmZmZeOedd6BU/n8mKpVK5OXloW3btoiKisLevXtx69YtXZv8/HzdSjU3N6/Q56MbiRYtWiA3N7fS5VfWNjc3F926ddNNk1rOQ9nZ2TAyMoKRkVF1b1fS4337+flh5MiRiIyMxL59+2BnZwdLS0sAVa8vMzOzcv3k5uaia9euuuetWrVCmzZtkJOTg/bt2wOo+P4rG6TZ2dl4/vnna/QeLC0tq1znlcnNzUW7du3KvTcLC4tyh8WqYmxsjBs3blR4/caNG1AoFDA2NtYdunjY9+Phk5GRgQsXLugOZQAPDq8NHToUwIMQ+/jjj3H16lVoNBqo1Wrd4ZyH2rVrV+5527ZtdY9btGgB4MFGQkplbfPz82FkZKR7DXiwzh8e5qtObm4uLCwsyr32+Lqtaoy3adMG+fn5uH//foOFxMPDTObm5rr137NnT91r3bt3h4GBAcrKyqrdLjyqqKgI27ZtQ1RUlOT0R9Vle5CZmanb+APA2bNnq/wMt2zZEsXFxbr2Dx+3atWqZisKT1FAtGvXDvPnz0fPnj0rTNu5cyeSkpKwdu1atG/fHkVFRXB2dq70OHl9MjU1LffhqeqD2K5dO9y+fRuFhYV45plnyk1r0aIF7t27p3sutQF7nJWVFSwsLHDo0CEkJCTA19e33LIqW19S7yEjI0P3vKSkBAUFBRWCpCbatWuHP/74A9bW1pLTs7Ky0KVLFwAPPjCmpqYAKr7/yr45P6w3OzsbWq1W96HPyspCx44da1Rjnz59sHfvXgwbNqzc63v27IG9vb3um/fZs2cr7cPc3BzOzs5Yu3at5PSpU6ciJCQEq1atQrNmzfDRRx9V+AnpwxOx9cnExAS3b9/G3bt3dSFR03AAHqzbzMzMcq9lZWWhf//+uudV1e3g4AADAwPs37+/QiDWF2dnZ2zZsgWWlpYICgoC8CA04uLiYGlpqQuN+Pj4Wm0XnnnmGSxZsgSTJ0/G559/XqPPzuOq2h48/MXWo6r6DHfp0gVXrlzBkCFDAACXL19G27Zta/VLs6fmEFNwcDA+++wz3Ybs1q1b2L9/P4AHu1wGBgYwNjbG3bt38emnnzZaXV5eXtixYwfS0tJw9+5dfPnll5W2NTU1hZubGyIjI3H79m1oNBrdry9efPFFXL16FSkpKSgtLS13UrMqvr6+WL9+PU6dOlXuA1nV+pLqY8eOHUhJSYFarcann36K7t276/YeamP48OFYunQprl27BiEELl++XG7DuHr1aty+fRtZWVm6E4EAYGtri1OnTiEzMxNFRUX46quvKl1G9+7d0bx5c6xatQoajQYnTpzAjz/+qOurOpMmTcLZs2cRFRWFgoICFBcX4+uvv8aOHTtq/P8mBg4ciGvXrmHnzp3QaDTQaDS4cOEC0tLSADwYk0ZGRmjWrBkuXLhQp0NpdWFpaYlu3bohOjoaarUaZ8+eLXd4qDoDBgzAtWvXEB8fj/v37yMxMRGpqakYOHBgjeY3NDREeHg45syZg/379+Pu3bvQaDQ4ePAgFi9eXMd3VZ6TkxNSUlJw6tQpODo6AgCsra1x/fp1nDhxAs7OzgDqtl1wcXHBxx9/jLCwsDr9oq0224OHKvsM+/v745tvvkFqaioKCwuxfPnyWv/U+akJiDFjxsDDwwPjx4+Hg4MDRowYofsDBgQEwMLCAv3794ePjw/s7e0bra4BAwZg9OjRGDNmDF5++WXdsU8DAwPJ9osXL4aenh68vb3Rt29frF+/HgDwwgsv4J133sHYsWPxyiuv1Pjbi6+vL06dOoXevXuXO2Za1fp6XN++ffHuu+8iLCwM/fr1Q3p6uu4/ENXWuHHj4O3tjfHjx8PR0RGzZs0q9wuPQYMGISgoCAEBARg4cKDuPz+5urpiyJAhGDp0KIKCguDu7l7pMgwMDLBixQocOnQIvXv3RmRkJBYvXozOnTvXqMaOHTti06ZNuHz5Mjw8PODs7IylS5fi888/R9++fWvUR+vWrbF69WokJiaif//+6NevHz7++GPd79Q/+OADLFu2DA4ODvjiiy/g7e1do37rw8cff4xz587BxcUFn332GYYMGVLpeHycsbExVqxYgbVr18LFxQWrVq3CihUrqjwe/7jx48cjIiICX375Jfr06YOBAwdi48aNGDx4cF3fUjkvvPACnn32WbRt21a3J65UKtG9e3cUFxfDwcEBQN23C66urrqf6V66dKlWtdV2ewBU/hl2c3PDm2++iTFjxmDgwIGwtLSs9X/8VIjGOI5CNZaWlgZfX19cvHjxT/877PpmY2ODH374AR06dGjqUsrJzs7GiBEjEBYWhuHDhzd1OfVu8uTJ6NSpU73+r3KqmabeHjw1exBPsn379kGtVuP27dtYsmQJ3N3dGQ5/Iu3atcPKlStx48aNWv1C5El14cIF/PHHH9BqtTh06BCSkpLq7ds7Ve9J2h5wK/QE2LJlCyIiIqBSqeDs7IwPPvigqUuiWrKxsYGNjU1Tl1Evbt68ibCwMBQUFKBdu3b48MMPYWdn19RlPTWepO0BDzEREZEkHmIiIiJJDAgiIpLEgCAiIkl/qZPU+fl3oNXW7ZTKc8+1Rl5ecfUNmwjrk4f1ycP65HlS61MqFTA2rvzSG3+pgNBqRZ0D4uH8TzLWJw/rk4f1yfOk1yeFh5iIiEgSA4KIiCT9pQ4xEdGTTwiB/PwbUKvvAaifwy65uUrJ25c+KZq6PpVKD61bt0GLFjW/1DfAgCCiRlZcfBsKhQJmZu2hUNTPQQw9PSXu339yA6Ip6xNCQKNRo6DgwS0AahMSPMRERI3q7t1iGBq2qbdwoKopFAoYGDRDmzYmKC4uqNW8/AsRUaPSasugUvHgRWPT1zdAWVnFWwZXhQFBRI2uIe6GR1WryzpnQBARPSE++uhDfPvt1qYuQ4cBQUT0P/fv1+4QzJ9lWXXFA4FE9FTr188J48aF4vjxo3Bx6YNRo0YjOjoKaWlXoVar4eDghLCwKcjISMfMmf9CbOw23L9/Hz4+g/D6629g1KgxSErah8OHf8KHH36EzZtjkZT0A8rK7sPAoBmmTYuAra2t5LICAoZh3rwPkJd3E+3amUOp/P/v7Lt27cC2bZugr28AIbSYM2chOnTo2KjrhgFBRE3m6MUsHLmQJbsfhQJ4/M42/bqbw/Ul8xrN36xZM6xatQEAsHDhXNjbOyIi4n1otVpERs7G7t3fYejQQJSU3MHNmzeRnZ2JF17ojOTkUxg1agxOnz4JJydnAICXlw+Cg0MAAKdOncCSJQuwZs0GyWXNmvUeevRwwPjxbyEj4zrGjh0FF5c+AIAvv1yKjRu/Rdu2baFWq5vk/1HUS0Ds2rULq1atQlpaGmbOnImQkBDdtMjISBw/fhwGBgZo2bIlZs2ahZdeeqnSvm7dugVfX184OTlh2bJl9VEeEVGVvL19dY+PHDmElJRL2LJlIwDg3r17MDU1AwA4Ojrh9OmTyMrKhL9/EDZu3ACNRoPk5JMICRkLALhyJQVff70WhYW3oVQqkZ7+R6XLOnPmNCZPfg8AYGnZXhcyD5bljI8++gCurv3Rp08/WFq2b5D3XpV6CQhbW1tERUUhJiamwjQ3NzfMnDkT+vr6OHDgAKZMmYL9+/dX2teHH36IAQMG/CXu7UtEVXN9qebf8qsi9z+itWjR8pFnAvPnfyy5Qe7Z0xmnT59CZmYG/v3vuTh37gz27/8eQgAWFpbQaDR4//3p+PzzlbCxeRE3b95AQIB3Fcuq3Pz5S5CScgmnTycjPHwCpk2bgT59XOv8HuuiXk5SW1tbw8rKqtzxs4fc3d2hr68PALC3t0d2dnalu0rfffcd2rZtC2dnZ8npREQNzdXVDbGx61FWVgYAKCgoQGZmBoAHAXHixHEUFRXB1NQMTk69sHr1V7pv/mp1KcrKynR7HDt2bK9yWT17OmH37u8AAJmZGUhOPgXgwQnszMwM2Nl1w+jRY9GrV29cvXqlQd5vVRr1HMTGjRsxcOBAySDJycnBunXr8PXXX+P777+vU//PPddaVn0mJoay5m9orE8e1idPfdWXm6uEnl79/4BSTp96ev9f0z//+R4+/3wpxo0bBYVCAX19fUyePA3PP/83WFiYo1WrVrC3t4eenhK9erlgzpz34ezcC3p6ShgZPYPQ0AkIDR0DIyMjeHgMrlBf+WX9C3PmvI+QkO9hYWEJR8eeUCoVUCqB+fM/RHFx8f8uS2KGSZPCZa83pVJZq7+jQojHT+1UFBgYiMzMTMlpx44dg0qlAgBERESgW7du5c5BPLR7924sW7YMGzduRNu2bStMf+uttzBu3Dj06dMHO3bswE8//VTrcxB5ecV1vua6iYkhbtwoqtO8jYH1ycP65KnP+rKzf0e7dh3qpa+HeC2mmnl83SuViiq/WNdoDyIuLk5WUfv27UNUVBTWrVsnGQ4AcO7cOcyaNQsAcOfOHZSWliI0NBQrV66UtWwiIqqbBj/EdODAASxYsABr165F+/aVn4U/efKk7nFd9yCIiKj+1MuBwISEBLi5uWHv3r1YunQp3NzckJqaCgCYMWMGNBoNwsPD4e/vD39/f+Tn5wMAZs2ahaSkpPoogYiI6lmNzkH8WfAcRNNhffI8TfXxHETTqe05CF6LiYiIJDEgiIhIEgOCiIgkMSCIiJrA6tVfQaPR6J7X170gJk16C0ePHpbdD8CAICJqEmvXriwXEE8iXu6biJqM5pej0Fw5JLsfhUKBx3+QqW/jBn3r6i9ud+/ePcyb9wGuXfsVKpUenn++AwIDX8XSpZ/Azq4rLl26CD09PcyePQdr167Eb7+lwdTUDB99tAQtWrRASUkJPvvswYX1gAeX+37ttdcBANevp2PJkvkoKMiHSqXCW2+9g969++KTTxYBACZOHA+FQono6K8AAL/+mobw8AnIzc1B164vYfbsSCgUCty5Uyx5jwqVSoXffvsV8+dH4u7du+jcuTPUarXs9fkQ9yCI6Kl24sRxlJTcQWzsdqxfvxnvvTcTAHDt2q8IChqODRu2omvX7pg6NQxhYVMQG7sdSqUS+/c/uGbcunWroNVqsWHDVqxYsQZ79uzG8eNHAQCRkbPx8sue2LhxG95/fy7mzn0f+fn5mDp1OgBg+fI1WLduEwwNH1wf6ddf07BkyVJ8/fU2XLlyGcnJJwAA0dFRsLd3xMqVG7B27Sbk59/SXeRv7tx/IyhoOGJjt2H48FG4fPnnels33IMgoiajb+1ao2/51ZHz/wysrLrg2rXf8Mkni+Dg0BN9+/YDADz/fAd06WIDALCxsUFOTpbuKq02Nra4fj0dAJCcfBLvvjsNCoUCrVq1xuDBryA5+SR69LBHauovGDJkKADghRc6wcrKBpcuXUS/fm6StfTvPxDNmjXTLTMj4zqcnSu/R8WdO8X47bc0eHoOAQB06/YSOnWyqtN6kMKAIKKnmqVle8TGbkNy8in85z9HERPzBSZPfg8GBs10bZRKFQwMDB55rtRdDrw+NWv26DJUjyxD+h4Vd+4U13sNj+IhJiJ6quXm5kCpVMHNbSDCw6eioCAfhYWFNZ7fyakXdu/eBSEESkruICnpBzg7u6Bly1awsrLGnj0JAIBr135DWtov6Nr1wR01W7ZsVeMNfGX3qGjVqjU6dbLCvn17AQA///xf/Ppram3efpW4B0FET7W0tFSsWPE5AECrLUNIyNhKrzotZezYNxEVtRhjxvwdAODpOQS9e/cFAHzwwTwsWTIf27ZtgkqlwuzZc2BsbAwAGDnyNYSHT0CzZs11J6kr8+67U/Hll8swdmzw/+5RYYDw8KmwsLDE7NmRmD8/ErGx69CpkxVefNGuLqtBEq/F9D9P07VwGgLrk+dpqo/XYmo6vBYTERHVCwYEERFJYkAQUaP7Cx3Z/tOoyzpnQBBRo3rw8837TV3GU0ejUUOlqt3vkhgQRNSoWrRojaKiAgjR9CdtnwZCCKjVpSgouIHWrdvUal7+zJWIGlXr1kbIz7+BnJzrAOrnUJNSqYRW++QGTlPXp1LpwdDQGC1atKrVfAwIImpUCoUCzz5rWq99Pk0/E25MPMRERESSGBBERCRJdkDs2rULfn5+sLOzQ2xsbLlpkZGR8PLywtChQzFy5EhcvHix0n6OHz+OoKAg+Pj4wMfHB5cvX5ZbGhERySD7HIStrS2ioqIQExNTYZqbmxtmzpwJfX19HDhwAFOmTMH+/fsrtMvJycGsWbOwatUqdOrUCffu3cP9+/wZHBFRU5IdENbW1gAenKV/nLu7u+6xvb09srOzodVqK7TdtGkT/P390alTJwBA8+bN5ZZFREQyNdqvmDZu3IiBAwdKBklqaiosLS0xZswYFBYWwsXFBVOnTi13/XUiImpc1QZEYGAgMjMzJacdO3YMKpWq2oXs3r0b8fHx2Lhxo+T0srIynDlzBmvXrkWzZs0wbdo0xMTEYNKkSdX2/aiqrkpYEyYmhrLmb2isTx7WJw/rk+dJr09KtQERFxcnawH79u1DVFQU1q1bV+k11i0sLNCtWzfdfVm9vLywa9euWi+Ll/tuOqxPHtYnD+urmya93PeBAwewYMECrF69Gu3bt6+0na+vL06cOAG1Wg0hBI4cOYIXX3yxIUsjIqJqyA6IhIQEuLm5Ye/evVi6dCnc3NyQmvrglnczZsyARqNBeHg4/P394e/vj/z8fADArFmzkJSUBABwdHRE//79ERAQgKFDh6KsrAz/+Mc/5JZGREQy8I5y//Ok7gI+xPrkYX3ysD55ntT6eEc5IiKqEwYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJYkAQEZEkBgQREUmSHRC7du2Cn58f7OzsEBsbW25aZGQkvLy8MHToUIwcORIXL16U7OPu3buYOnUqfH194ePjg8mTJ6O4uFhuaUREJIPsgLC1tUVUVBR8fX0rTHNzc0N8fDy+++47/OMf/8CUKVMk+9i6dSs0Gg3i4+ORkJAArVaLzZs3yy2NiIhk0JPbgbW1NQBAqayYNe7u7rrH9vb2yM7OhlarrdBWoVDg3r170Gg0AICSkhK0a9dObmlERCSD7ICoqY0bN2LgwIGSQTJy5EicO3cOrq6uAIB+/frBz8+v1st47rnWsmo0MTGUNX9DY33ysD55WJ88T3p9UqoNiMDAQGRmZkpOO3bsGFQqVbUL2b17N+Lj47Fx48ZK+wGAI0eOAACmTp2K1atX44033qi270fl5RVDqxW1muchExND3LhRVKd5GwPrk4f1ycP65HlS61MqFVV+sa42IOLi4mQVsG/fPkRFRWHdunVo27atZJstW7bA398fzZo1AwAMGTIEO3furHVAEBFR/WnQn7keOHAACxYswOrVq9G+fftK27Vv3x5HjhyBEAJarRaHDx9Gly5dGrI0IiKqhuyASEhIgJubG/bu3YulS5fCzc0NqampAIAZM2ZAo9EgPDwc/v7+8Pf3R35+PgBg1qxZSEpKAgC88847KCwshK+vL/z8/KBWqzFx4kS5pRERkQwKIUTdDto/gXgOoumwPnlYnzysr26qOwfB/0lNRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJJkB8SuXbvg5+cHOzs7xMbGlpu2fPly+Pn5ISAgAP7+/khMTKy0n23btuHll1/G4MGDMWfOHGi1WrmlERGRDHpyO7C1tUVUVBRiYmIqTAsJCcHEiRMBADk5OfD29oarqyuMjIzKtUtPT8fnn3+OnTt3ok2bNggNDcV3332HgIAAueUREVEdyd6DsLa2hpWVFZTKil0ZGhrqHpeUlEChUEjuGXz//fcYPHgwnn32WSiVSgwfPrzKvQ0iImp4svcgqrN582asX78e2dnZmD9/PoyNjSu0ycrKgoWFhe65hYUFsrKyGro0nUs/7obeteMQWtFoy6ytK0oF65OB9cnD+uRp6PrKOvVFVw+feu+32oAIDAxEZmam5LRjx45BpVJVOX9wcDCCg4Nx5coVTJs2DX369JEMifrw3HOt6zRfi+b60ABQKBX1W1A9Y33ysD55WJ88DVlfi+b6MDExrL5hLVUbEHFxcfWyIBsbG5iamuLkyZPw9PQsN83c3LxcCGVmZsLc3LzWy8jLK4a2Dindqe8rMPEfhhs3imo9b2MxMTFkfTKwPnlYnzyNUV9d+lcqFVV+sW7Qn7mmpqbqHqenpyMlJQVWVlYV2nl6emL//v24desWtFottm/fDm9v74YsjYiIqiH7HERCQgIWL16MwsJCJCUlISYmBmvWrIGVlRWio6ORmpoKPT09qFQqzJ49G507dwYALF26FKampggODsbf/vY3vP322xgxYgQAwNXVFUOHDpVbGhERyaAQQjy5Z3Zqqa6HmADuosrF+uRhffKwvrpp0kNMRET058WAICIiSQwIIiKSxIAgIiJJDAgiIpLEgCAiIkkMCCIiksSAICIiSQwIIiKSxIAgIiJJDAgiIpLEgCAiIkkMCCIiksSAICIiSQwIIiKSxIAgIiJJDAgiIpLEgCAiIkkMCCIiksSAICIiSQwIIiKSJDsgdu3aBT8/P9jZ2SE2NrbctOXLl8PPzw8BAQHw9/dHYmKiZB/79+9HUFAQfH194ePjgzVr1sgti4iIZNKT24GtrS2ioqIQExNTYVpISAgmTpwIAMjJyYG3tzdcXV1hZGRUrp2JiQmWL18OMzMzFBUVISgoCN27d4eTk5Pc8oiIqI5kB4S1tTUAQKmsuDNiaGioe1xSUgKFQgGtVluhXY8ePcrN07lzZ2RkZDAgiIiaUIOfg9i8eTO8vLwQGBiIuXPnwtjYuMr2aWlpOHfuHHr37t3QpRERURUUQghRVYPAwEBkZmZKTjt27BhUKhUAICIiAt26dUNISIhk2ytXrmDatGnYsGFDpSGRm5uL0aNHY/LkyfD29q7N+yAionpW7SGmuLi4elmQjY0NTE1NcfLkSXh6elaYnpeXh3HjxuHNN9+sczjk5RVDq60y7yplYmKIGzeK6jRvY2B98rA+eVifPE9qfUqlAs8917ry6Q258NTUVN3j9PR0pKSkwMrKqkK7/Px8jBs3Dq+99hqGDx/ekCUREVENyT5JnZCQgMWLF6OwsBBJSUmIiYnBmjVrYGVlhejoaKSmpkJPTw8qlQqzZ89G586dAQBLly6FqakpgoODERMTg2vXrmHr1q3YunUrAGDMmDEYNmyY3PKIiKiOqj0H8WfCQ0xNh/XJw/rkYX1106SHmIiI6M+LAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkmQHxK5du+Dn5wc7OzvExsaWm7Z8+XL4+fkhICAA/v7+SExMrLKv0tJS+Pj4ICgoSG5ZREQkk57cDmxtbREVFYWYmJgK00JCQjBx4kQAQE5ODry9veHq6gojIyPJvqKiotCjRw9cvnxZbllERCST7D0Ia2trWFlZQams2JWhoaHucUlJCRQKBbRarWQ/ycnJuHbtGvz9/eWWRERE9UD2HkR1Nm/ejPXr1yM7Oxvz58+HsbFxhTYlJSWYP38+li9fjmvXrtV5Wc8911pGpYCJiWH1jZoQ65OH9cnD+uR50uuTUm1ABAYGIjMzU3LasWPHoFKpqpw/ODgYwcHBuHLlCqZNm4Y+ffpUCInFixdj1KhRMDMzkxUQeXnF0GpFneY1MTHEjRtFdV52Q2N98rA+eVifPE9qfUqlosov1tUGRFxcXL0UYmNjA1NTU5w8eRKenp7lpp0+fRqHDh3Cl19+idLSUty+fRt+fn6Ij4+vl2UTEVHtNeghptTUVFhZWQEA0tPTkZKSonv+qEeD4MSJE1i0aBF27NjRkKUREVE1ZAdEQkICFi9ejMLCQiQlJSEmJgZr1qyBlZUVoqOjkZqaCj09PahUKsyePRudO3cGACxduhSmpqYIDg6W/SaIiKj+KYQQdTto/wTiOYimw/rkYX3ysL66qe4cBP8nNRERSWJAEBGRJAYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJYkAQEZEkBgQREUliQBARkSQGBBERSWJAEBGRJAYEERFJkh0Qu3btgp+fH+zs7BAbG1tu2vLly+Hn54eAgAD4+/sjMTGx0n5SUlLw2muvYciQIRgyZAgOHjwotzQiIpJBT24Htra2iIqKQkxMTIVpISEhmDhxIgAgJycH3t7ecHV1hZGRUbl2JSUlmDRpEj755BPY29vj/v37KCoqklsaERHJIDsgrK2tAQBKZcWdEUNDQ93jkpISKBQKaLXaCu0SEhLQs2dP2NvbPyhKTw/GxsZySyMiIhlkB0R1Nm/ejPXr1yM7Oxvz58+X3PCnpqZCT08PoaGhyM3NRdeuXTF9+vQKexpERNR4FEIIUVWDwMBAZGZmSk47duwYVCoVACAiIgLdunVDSEiIZNsrV65g2rRp2LBhQ4WQmDdvHn788Uds2bIFbdu2xYIFC1BcXIwFCxbU5T0REVE9qHYPIi4url4WZGNjA1NTU5w8eRKenp7lppmbm8PFxQWmpqYAAD8/P8ycObPWy8jLK4ZWW2XeVcrExBA3bjy55z1YnzysTx7WJ8+TWp9SqcBzz7WufHpDLjw1NVX3OD09HSkpKbCysqrQztvbGxcuXEBxcTEA4NChQ7CxsWnI0oiIqBqyz0EkJCRg8eLFKCwsRFJSEmJiYrBmzRpYWVkhOjpad35BpVJh9uzZ6Ny5MwBg6dKlMDU1RXBwMCwsLBAaGoqRI0dCoVCgffv2mDt3ruw3R0REdVftOYg/Ex5iajqsTx7WJw/rq5smPcRERER/XgwIIiKSxIAgIiJJDAgiIpLEgCAiIkkMCCIiksSAICIiSQwIIiKSxIAgIiJJDAgiIpLEgCAiIkkNfsOgxqRUKpp0/obG+uRhffKwPnmexPqqq+kvdbE+IiKqPzzEREREkhgQREQkiQFBRESSGBBERCSJAUFERJIYEEREJIkBQUREkhgQREQkiQFBRESS/lKX2qjOb7/9hoiICBQUFKBNmzZYtGgROnbsWK5NWVkZ5s2bh8OHD0OhUOCtt97C8OHDG7y2/Px8/Otf/8Iff/wBAwMDdOjQAXPmzMGzzz5brl1ERASOHTsGY2NjAICXlxcmTpzY4PUBgIeHBwwMDNCsWTMAwLRp09C/f/9ybe7evYsZM2bg0qVLUKlUmD59Otzd3Ru8tuvXr+Odd97RPS8qKkJxcTFOnjxZrl10dDQ2bdoEU1NTAICjoyM++OCDBqlp0aJF+P7775GRkYH4+HhYW1sDqNk4BBp+LErVV9NxCDT8WKxs/dVkHAINPxal6qvpOAQadyzWmXiKjB49WuzcuVMIIcTOnTvF6NGjK7SJi4sT48ePF2VlZSIvL0/0799fpKenN3ht+fn54j//+Y/u+cKFC8WMGTMqtJs+fbr4+uuvG7weKe7u7uLKlStVtomOjhazZs0SQgjx22+/ib59+4ri4uLGKK+cefPmicjIyAqvL1u2TCxcuLBRajh16pTIzMyssN5qMg6FaPixKFVfTcehEA0/FitbfzUZh0I0/FisrL5HVTYOhWjcsVhXT80hpry8PPz888/w9fUFAPj6+uLnn3/GrVu3yrVLTEzE8OHDoVQq8eyzz2Lw4MHYu3dvg9fXpk0buLi46J7b29sjMzOzwZdb3/bs2YO///3vAICOHTuiW7duOHToUKPWoFarER8fj2HDhjXqch/n5OQEc3Pzcq/VdBwCDT8Wpep7ksahVH210dBjsbr6npRxKMdTExBZWVkwMzODSqUCAKhUKpiamiIrK6tCOwsLC91zc3NzZGdnN2qtWq0WmzdvhoeHh+T0tWvXws/PD2+//TbS0tIatbZp06bBz88PH374IQoLCytMz8zMhKWlpe55U6y/H3/8EWZmZujatavk9N27d8PPzw/jx4/H2bNnG7W2mo7Dh22bcixWNw6BphuL1Y1DoOnHYnXjEGjasVgTT01A/JnMnTsXLVu2REhISIVpU6ZMwb59+xAfH49XXnkFb775JsrKyhqlro0bN+K7777Dt99+CyEE5syZ0yjLra1vv/220m9tI0eORFJSEuLj4/HGG2/g7bffRn5+fiNX+OdQ1TgEmm4s/hXGIfDnGItPTUCYm5sjJydHN4DLysqQm5tbYRfR3Ny83C51VlYW2rVr12h1Llq0CL///js+++wzKJUV/zxmZma61wMCAlBSUtJo34oerisDAwOMGjUKZ86cqdDGwsICGRkZuueNvf5ycnJw6tQp+Pn5SU43MTGBvr4+AMDV1RXm5ua4evVqo9VX03H4sG1TjcXqxiHQdGOxJuMQaNqxWN04BJp+LNbEUxMQzz33HGxtbZGQkAAASEhIgK2tbYVfZ3h5eWH79u3QarW4desW9u/fD09Pz0ap8dNPP8V///tffPHFFzAwMJBsk5OTo3t8+PBhKJVKmJmZNXhtJSUlKCoqAgAIIZCYmAhbW9sK7by8vLB161YAwLVr13Dx4kXJX5g0lLi4OAwYMED3y5rHPbr+UlJSkJGRgRdeeKGxyqvxOASabizWZBwCTTMWazoOgaYdi9WNQ6Dpx2JNPFU3DEpLS0NERAQKCwvxzDPPYNGiRejUqRNCQ0MRHh6Ol156CWVlZZgzZw6OHj0KAAgNDdWd6GpIV69eha+vLzp27IjmzZsDANq3b48vvvgC/v7+iImJgZmZGcaOHYu8vDwoFAq0bt0a//rXv2Bvb9/g9aWnpyMsLAxlZWXQarXo3LkzZs+eDVNT03L1lZSUICIiAikpKVAqlXjvvfcwePDgBq/vIU9PT8yaNQtubm661x79+06fPh2XLl2CUqmEvr4+wsPDMWDAgAapZd68efjhhx9w8+ZNGBsbo02bNti9e3el4/DxWht6LErV99lnn1U6DgE06liUqm/FihWVjsPH62vosVjZ3xeQHodA043FunqqAoKIiGruqTnEREREtcOAICIiSQwIIiKSxIAgIiJJDAgiIpLEgCCqZytWrMCsWbPqNG9ERASioqLquSKiunmqLvdN1BgmTJjQ1CUQ1QvuQRARkSQGBD31cnJyEBYWht69e8PDwwMbNmwA8OCGLuHh4Zg8eTIcHBwQGBiIy5cv6+aLiYlB//794eDgAE9PTxw/flw337Rp03TtkpKS4OPjAycnJ4wePbrcVU9//vlnBAYGwsHBAZMnT0ZpaWm52g4cOAB/f384OTlh5MiRNVo+Ub1pultREDW9srIyERgYKKKjo0Vpaan4448/hIeHhzh06JBYtmyZsLOzE3v27BFqtVqsWrVKuLu7C7VaLdLS0oSbm5vIzs4WQgiRnp4ufv/9dyHEgxvBTJ06VQghxK+//ip69Oghjhw5ItRqtYiJiRGDBw8WpaWlorS0VAwcOFCsXbtWqNVqsWfPHmFnZyc+/fRTIYQQly5dEr179xbnzp0T9+/fFzt27BDu7u6itLS0yuUT1RfuQdBT7eLFi7h16xYmTZoEAwMD/O1vf8OIESOQmJgIAOjatSu8vLygr6+PcePGQa1W4/z581CpVFCr1UhLS4NGo0H79u3x/PPPV+g/MTERAwYMgKurK/T19fHGG2/g3r17OHv2LM6fPw+NRoPXX38d+vr68PLywksvvaSbd+vWrfj73/+OHj16QKVSITAwEPr6+jh37lyNl08kB09S01MtIyMDubm5cHJy0r1WVlYGJycnWFhYlLs89MOrlT5sP3PmTERHRyM1NRX9+vVDREREhauZ5ubmlrvpj1Kp1F3yW6VSwczMDAqFQjf90baZmZnYuXMnYmNjda9pNBrk5uaiV69eNVo+kRzcg6Cnmrm5Odq3b4/k5GTdv7Nnz2LlypUAUO7+BlqtFjk5Oborh/r5+WHz5s04cOAAFAoFPv744wr9m5qalrungxBCd1c5ExMT5OTkQDxyvcxH25qbm2PChAnlajt//rzudqU1WT6RHAwIeqp1794drVq1QkxMDO7du4eysjL88ssvuHDhAgDg0qVL+OGHH3D//n2sX78eBgYG6NGjB3799VccP34carUaBgYGaNasmeSNdby9vXHw4EEcP34cGo0Ga9asgYGBARwcHGBvbw89PT1s2LABGo0GP/zwAy5evKibd/jw4diyZQvOnz8PIQRKSkrw008/obi4uMbLJ5KDh5joqaZSqbBixQosWrQIgwYNglqtxgsvvIDJkycDAAYNGoTExERMnz4dHTp0QHR0NPT19aFWq/HJJ58gLS0N+vr6cHBwkLz1ZadOnbBkyRLMnTsXOTk5sLW1xYoVK3Q34omOjsb777+Pzz77DAMGDMDLL7+sm/ell17C3LlzMWfOHPz+++9o3rw5HB0d4eTkVOPlE8nB+0EQVSI6Ohq///47D93QU4v7pEREJIkBQUREkniIiYiIJHEPgoiIJDEgiIhIEgOCiIgkMSCIiEgSA4KIiCQxIIiISNL/AQrd+XrWqhsMAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/2.Sarsa.ipynb b/projects/notebooks/2.Sarsa.ipynb new file mode 100644 index 0000000..493cb59 --- /dev/null +++ b/projects/notebooks/2.Sarsa.ipynb @@ -0,0 +1,896 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法\n", + "\n", + "在阅读该教程之前,请先阅读Q learning教程。Sarsa算法跟Q learning算法基本模式相同,但是根本的区别在于,Sarsa是先做出动作然后拿这个做的动作去更新,而Q learning是假定下一步最大奖励对应的动作拿去更新,然后再使用$\\varepsilon$-greedy策略,也就是说Sarsa是on-policy的,而Q learning是off-policy的。如下方代码所示,只有在更新的地方Sarsa与Q learning有着细微的区别。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from collections import defaultdict\n", + "import torch\n", + "import math\n", + "class Sarsa(object):\n", + " def __init__(self,\n", + " n_actions,cfg):\n", + " self.n_actions = n_actions \n", + " self.lr = cfg.lr \n", + " self.gamma = cfg.gamma \n", + " self.sample_count = 0 \n", + " self.epsilon_start = cfg.epsilon_start\n", + " self.epsilon_end = cfg.epsilon_end\n", + " self.epsilon_decay = cfg.epsilon_decay \n", + " self.Q = defaultdict(lambda: np.zeros(n_actions)) # Q table\n", + " def sample(self, state):\n", + " self.sample_count += 1\n", + " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", + " math.exp(-1. * self.sample_count / self.epsilon_decay) # The probability to select a random action, is is log decayed\n", + " best_action = np.argmax(self.Q[state])\n", + " action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions\n", + " action_probs[best_action] += (1.0 - self.epsilon)\n", + " action = np.random.choice(np.arange(len(action_probs)), p=action_probs) \n", + " return action\n", + " def predict(self,state):\n", + " return np.argmax(self.Q[state])\n", + " def update(self, state, action, reward, next_state, next_action,done):\n", + " Q_predict = self.Q[state][action]\n", + " if done:\n", + " Q_target = reward # 终止状态\n", + " else:\n", + " Q_target = reward + self.gamma * self.Q[next_state][next_action] # 与Q learning不同,Sarsa是拿下一步动作对应的Q值去更新\n", + " self.Q[state][action] += self.lr * (Q_target - Q_predict) \n", + " def save(self,path):\n", + " '''把 Q表格 的数据保存到文件中\n", + " '''\n", + " import dill\n", + " torch.save(\n", + " obj=self.Q,\n", + " f=path+\"sarsa_model.pkl\",\n", + " pickle_module=dill\n", + " )\n", + " def load(self, path):\n", + " '''从文件中读取数据到 Q表格\n", + " '''\n", + " import dill\n", + " self.Q =torch.load(f=path+'sarsa_model.pkl',pickle_module=dill)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2、定义训练\n", + "\n", + "同样地,跟Q learning差别也不大" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg,env,agent):\n", + " print('开始训练!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录奖励\n", + " for i_ep in range(cfg.train_eps):\n", + " ep_reward = 0 # 记录每个回合的奖励\n", + " state = env.reset() # 重置环境,即开始新的回合\n", + " action = agent.sample(state)\n", + " while True:\n", + " action = agent.sample(state) # 根据算法采样一个动作\n", + " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", + " next_action = agent.sample(next_state)\n", + " agent.update(state, action, reward, next_state, next_action,done) # 算法更新\n", + " state = next_state # 更新状态\n", + " action = next_action\n", + " ep_reward += reward\n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}\")\n", + " print('完成训练!')\n", + " return {\"rewards\":rewards}\n", + " \n", + "def test(cfg,env,agent):\n", + " print('开始测试!')\n", + " print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg.test_eps):\n", + " ep_reward = 0 # 记录每个episode的reward\n", + " state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合)\n", + " while True:\n", + " action = agent.predict(state) # 根据算法选择一个动作\n", + " next_state, reward, done, _ = env.step(action) # 与环境进行一个交互\n", + " state = next_state # 更新状态\n", + " ep_reward += reward\n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}\")\n", + " print('完成测试!')\n", + " return {\"rewards\":rewards}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3、定义环境\n", + "为了具体看看Q learning和Sarsa的不同,笔者决定跟Q learning使用相同的环境\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import turtle\n", + "import numpy as np\n", + "\n", + "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", + "\n", + "def GridWorld(gridmap=None, is_slippery=False):\n", + " if gridmap is None:\n", + " gridmap = ['SFFF', 'FHFH', 'FFFH', 'HFFG']\n", + " env = gym.make(\"FrozenLake-v0\", desc=gridmap, is_slippery=False)\n", + " env = FrozenLakeWapper(env)\n", + " return env\n", + "\n", + "\n", + "class FrozenLakeWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.max_y = env.desc.shape[0]\n", + " self.max_x = env.desc.shape[1]\n", + " self.t = None\n", + " self.unit = 50\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for _ in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for i in range(self.desc.shape[0]):\n", + " for j in range(self.desc.shape[1]):\n", + " x = j\n", + " y = self.max_y - 1 - i\n", + " if self.desc[i][j] == b'S': # Start\n", + " self.draw_box(x, y, 'white')\n", + " elif self.desc[i][j] == b'F': # Frozen ice\n", + " self.draw_box(x, y, 'white')\n", + " elif self.desc[i][j] == b'G': # Goal\n", + " self.draw_box(x, y, 'yellow')\n", + " elif self.desc[i][j] == b'H': # Hole\n", + " self.draw_box(x, y, 'black')\n", + " else:\n", + " self.draw_box(x, y, 'white')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)\n", + "\n", + "\n", + "class CliffWalkingWapper(gym.Wrapper):\n", + " def __init__(self, env):\n", + " gym.Wrapper.__init__(self, env)\n", + " self.t = None\n", + " self.unit = 50\n", + " self.max_x = 12\n", + " self.max_y = 4\n", + "\n", + " def draw_x_line(self, y, x0, x1, color='gray'):\n", + " assert x1 > x0\n", + " self.t.color(color)\n", + " self.t.setheading(0)\n", + " self.t.up()\n", + " self.t.goto(x0, y)\n", + " self.t.down()\n", + " self.t.forward(x1 - x0)\n", + "\n", + " def draw_y_line(self, x, y0, y1, color='gray'):\n", + " assert y1 > y0\n", + " self.t.color(color)\n", + " self.t.setheading(90)\n", + " self.t.up()\n", + " self.t.goto(x, y0)\n", + " self.t.down()\n", + " self.t.forward(y1 - y0)\n", + "\n", + " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", + " self.t.up()\n", + " self.t.goto(x * self.unit, y * self.unit)\n", + " self.t.color(line_color)\n", + " self.t.fillcolor(fillcolor)\n", + " self.t.setheading(90)\n", + " self.t.down()\n", + " self.t.begin_fill()\n", + " for i in range(4):\n", + " self.t.forward(self.unit)\n", + " self.t.right(90)\n", + " self.t.end_fill()\n", + "\n", + " def move_player(self, x, y):\n", + " self.t.up()\n", + " self.t.setheading(90)\n", + " self.t.fillcolor('red')\n", + " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", + "\n", + " def render(self):\n", + " if self.t == None:\n", + " self.t = turtle.Turtle()\n", + " self.wn = turtle.Screen()\n", + " self.wn.setup(self.unit * self.max_x + 100,\n", + " self.unit * self.max_y + 100)\n", + " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", + " self.unit * self.max_y)\n", + " self.t.shape('circle')\n", + " self.t.width(2)\n", + " self.t.speed(0)\n", + " self.t.color('gray')\n", + " for _ in range(2):\n", + " self.t.forward(self.max_x * self.unit)\n", + " self.t.left(90)\n", + " self.t.forward(self.max_y * self.unit)\n", + " self.t.left(90)\n", + " for i in range(1, self.max_y):\n", + " self.draw_x_line(\n", + " y=i * self.unit, x0=0, x1=self.max_x * self.unit)\n", + " for i in range(1, self.max_x):\n", + " self.draw_y_line(\n", + " x=i * self.unit, y0=0, y1=self.max_y * self.unit)\n", + "\n", + " for i in range(1, self.max_x - 1):\n", + " self.draw_box(i, 0, 'black')\n", + " self.draw_box(self.max_x - 1, 0, 'yellow')\n", + " self.t.shape('turtle')\n", + "\n", + " x_pos = self.s % self.max_x\n", + " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", + " self.move_player(x_pos, y_pos)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def env_agent_config(cfg,seed=1):\n", + " '''创建环境和智能体\n", + " Args:\n", + " cfg ([type]): [description]\n", + " seed (int, optional): 随机种子. Defaults to 1.\n", + " Returns:\n", + " env [type]: 环境\n", + " agent : 智能体\n", + " ''' \n", + " env = gym.make(cfg.env_name) \n", + " env = CliffWalkingWapper(env)\n", + " env.seed(seed) # 设置随机种子\n", + " n_states = env.observation_space.n # 状态维度\n", + " n_actions = env.action_space.n # 动作维度\n", + " print(f\"状态数:{n_states},动作数:{n_actions}\")\n", + " agent = Sarsa(n_actions,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4、设置参数\n", + "同样的参数也是一样" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "def get_args():\n", + " \"\"\" \n", + " \"\"\"\n", + " curr_time = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\") # 获取当前时间\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='Sarsa',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=400,type=int,help=\"episodes of training\") # 训练的回合数\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\") # 测试的回合数\n", + " parser.add_argument('--gamma',default=0.90,type=float,help=\"discounted factor\") # 折扣因子\n", + " parser.add_argument('--epsilon_start',default=0.95,type=float,help=\"initial value of epsilon\") # e-greedy策略中初始epsilon\n", + " parser.add_argument('--epsilon_end',default=0.01,type=float,help=\"final value of epsilon\") # e-greedy策略中的终止epsilon\n", + " parser.add_argument('--epsilon_decay',default=300,type=int,help=\"decay rate of epsilon\") # e-greedy策略中epsilon的衰减率\n", + " parser.add_argument('--lr',default=0.1,type=float,help=\"learning rate\")\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", + " args = parser.parse_args([]) \n", + " return args\n", + "\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth\n", + "\n", + " Args:\n", + " data (List):输入数据\n", + " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", + "\n", + " Returns:\n", + " smoothed (List): 平滑后的数据\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg, tag='train'):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{tag}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}\")\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5、我准备好了!\n", + "仔细看,会发现Sarsa收敛得快一些,但是收敛之会低些,Q learning会相反,至于为什么请同学们自行思考哟~" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "状态数:48,动作数:4\n", + "开始训练!\n", + "环境:CliffWalking-v0, 算法:Sarsa, 设备:cpu\n", + "回合:1/400,奖励:-1524.0,Epsilon:0.2029722781251147\n", + "回合:2/400,奖励:-1294.0,Epsilon:0.011808588201828951\n", + "回合:3/400,奖励:-192.0,Epsilon:0.01050118158853445\n", + "回合:4/400,奖励:-346.0,Epsilon:0.010049747911736582\n", + "回合:5/400,奖励:-252.0,Epsilon:0.010009240861841986\n", + "回合:6/400,奖励:-168.0,Epsilon:0.010003005072880926\n", + "回合:7/400,奖励:-393.0,Epsilon:0.01000042188120369\n", + "回合:8/400,奖励:-169.0,Epsilon:0.010000136281659052\n", + "回合:9/400,奖励:-97.0,Epsilon:0.010000071145264558\n", + "回合:10/400,奖励:-134.0,Epsilon:0.010000029022085234\n", + "回合:11/400,奖励:-124.0,Epsilon:0.010000012655059554\n", + "回合:12/400,奖励:-74.0,Epsilon:0.010000007701309915\n", + "回合:13/400,奖励:-135.0,Epsilon:0.010000003120699265\n", + "回合:14/400,奖励:-84.0,Epsilon:0.010000001776639691\n", + "回合:15/400,奖励:-101.0,Epsilon:0.010000000903081117\n", + "回合:16/400,奖励:-111.0,Epsilon:0.010000000429438717\n", + "回合:17/400,奖励:-114.0,Epsilon:0.010000000200165738\n", + "回合:18/400,奖励:-114.0,Epsilon:0.010000000093299278\n", + "回合:19/400,奖励:-82.0,Epsilon:0.010000000053829002\n", + "回合:20/400,奖励:-85.0,Epsilon:0.01000000003044167\n", + "回合:21/400,奖励:-108.0,Epsilon:0.010000000014768242\n", + "回合:22/400,奖励:-66.0,Epsilon:0.010000000009479634\n", + "回合:23/400,奖励:-74.0,Epsilon:0.010000000005768887\n", + "回合:24/400,奖励:-114.0,Epsilon:0.010000000002688936\n", + "回合:25/400,奖励:-98.0,Epsilon:0.010000000001394421\n", + "回合:26/400,奖励:-94.0,Epsilon:0.010000000000742658\n", + "回合:27/400,奖励:-58.0,Epsilon:0.010000000000502822\n", + "回合:28/400,奖励:-100.0,Epsilon:0.010000000000257298\n", + "回合:29/400,奖励:-208.0,Epsilon:0.010000000000123995\n", + "回合:30/400,奖励:-184.0,Epsilon:0.010000000000070121\n", + "回合:31/400,奖励:-62.0,Epsilon:0.010000000000046227\n", + "回合:32/400,奖励:-117.0,Epsilon:0.01000000000002112\n", + "回合:33/400,奖励:-47.0,Epsilon:0.010000000000015387\n", + "回合:34/400,奖励:-54.0,Epsilon:0.0100000000000107\n", + "回合:35/400,奖励:-120.0,Epsilon:0.010000000000004792\n", + "回合:36/400,奖励:-75.0,Epsilon:0.010000000000002897\n", + "回合:37/400,奖励:-62.0,Epsilon:0.01000000000000191\n", + "回合:38/400,奖励:-70.0,Epsilon:0.010000000000001194\n", + "回合:39/400,奖励:-67.0,Epsilon:0.010000000000000762\n", + "回合:40/400,奖励:-87.0,Epsilon:0.010000000000000425\n", + "回合:41/400,奖励:-92.0,Epsilon:0.01000000000000023\n", + "回合:42/400,奖励:-79.0,Epsilon:0.010000000000000136\n", + "回合:43/400,奖励:-49.0,Epsilon:0.010000000000000097\n", + "回合:44/400,奖励:-103.0,Epsilon:0.010000000000000049\n", + "回合:45/400,奖励:-40.0,Epsilon:0.010000000000000037\n", + "回合:46/400,奖励:-214.0,Epsilon:0.010000000000000018\n", + "回合:47/400,奖励:-83.0,Epsilon:0.01000000000000001\n", + "回合:48/400,奖励:-62.0,Epsilon:0.010000000000000007\n", + "回合:49/400,奖励:-37.0,Epsilon:0.010000000000000005\n", + "回合:50/400,奖励:-73.0,Epsilon:0.010000000000000004\n", + "回合:51/400,奖励:-66.0,Epsilon:0.010000000000000002\n", + "回合:52/400,奖励:-48.0,Epsilon:0.010000000000000002\n", + "回合:53/400,奖励:-96.0,Epsilon:0.01\n", + "回合:54/400,奖励:-189.0,Epsilon:0.01\n", + "回合:55/400,奖励:-42.0,Epsilon:0.01\n", + "回合:56/400,奖励:-46.0,Epsilon:0.01\n", + "回合:57/400,奖励:-85.0,Epsilon:0.01\n", + "回合:58/400,奖励:-52.0,Epsilon:0.01\n", + "回合:59/400,奖励:-86.0,Epsilon:0.01\n", + "回合:60/400,奖励:-41.0,Epsilon:0.01\n", + "回合:61/400,奖励:-51.0,Epsilon:0.01\n", + "回合:62/400,奖励:-59.0,Epsilon:0.01\n", + "回合:63/400,奖励:-145.0,Epsilon:0.01\n", + "回合:64/400,奖励:-76.0,Epsilon:0.01\n", + "回合:65/400,奖励:-43.0,Epsilon:0.01\n", + "回合:66/400,奖励:-49.0,Epsilon:0.01\n", + "回合:67/400,奖励:-36.0,Epsilon:0.01\n", + "回合:68/400,奖励:-41.0,Epsilon:0.01\n", + "回合:69/400,奖励:-69.0,Epsilon:0.01\n", + "回合:70/400,奖励:-38.0,Epsilon:0.01\n", + "回合:71/400,奖励:-63.0,Epsilon:0.01\n", + "回合:72/400,奖励:-46.0,Epsilon:0.01\n", + "回合:73/400,奖励:-30.0,Epsilon:0.01\n", + "回合:74/400,奖励:-45.0,Epsilon:0.01\n", + "回合:75/400,奖励:-38.0,Epsilon:0.01\n", + "回合:76/400,奖励:-88.0,Epsilon:0.01\n", + "回合:77/400,奖励:-19.0,Epsilon:0.01\n", + "回合:78/400,奖励:-40.0,Epsilon:0.01\n", + "回合:79/400,奖励:-62.0,Epsilon:0.01\n", + "回合:80/400,奖励:-25.0,Epsilon:0.01\n", + "回合:81/400,奖励:-54.0,Epsilon:0.01\n", + "回合:82/400,奖励:-41.0,Epsilon:0.01\n", + "回合:83/400,奖励:-57.0,Epsilon:0.01\n", + "回合:84/400,奖励:-52.0,Epsilon:0.01\n", + "回合:85/400,奖励:-42.0,Epsilon:0.01\n", + "回合:86/400,奖励:-51.0,Epsilon:0.01\n", + "回合:87/400,奖励:-53.0,Epsilon:0.01\n", + "回合:88/400,奖励:-42.0,Epsilon:0.01\n", + "回合:89/400,奖励:-53.0,Epsilon:0.01\n", + "回合:90/400,奖励:-31.0,Epsilon:0.01\n", + "回合:91/400,奖励:-75.0,Epsilon:0.01\n", + "回合:92/400,奖励:-148.0,Epsilon:0.01\n", + "回合:93/400,奖励:-41.0,Epsilon:0.01\n", + "回合:94/400,奖励:-47.0,Epsilon:0.01\n", + "回合:95/400,奖励:-184.0,Epsilon:0.01\n", + "回合:96/400,奖励:-34.0,Epsilon:0.01\n", + "回合:97/400,奖励:-45.0,Epsilon:0.01\n", + "回合:98/400,奖励:-52.0,Epsilon:0.01\n", + "回合:99/400,奖励:-44.0,Epsilon:0.01\n", + "回合:100/400,奖励:-49.0,Epsilon:0.01\n", + "回合:101/400,奖励:-30.0,Epsilon:0.01\n", + "回合:102/400,奖励:-49.0,Epsilon:0.01\n", + "回合:103/400,奖励:-23.0,Epsilon:0.01\n", + "回合:104/400,奖励:-37.0,Epsilon:0.01\n", + "回合:105/400,奖励:-37.0,Epsilon:0.01\n", + "回合:106/400,奖励:-44.0,Epsilon:0.01\n", + "回合:107/400,奖励:-40.0,Epsilon:0.01\n", + "回合:108/400,奖励:-28.0,Epsilon:0.01\n", + "回合:109/400,奖励:-50.0,Epsilon:0.01\n", + "回合:110/400,奖励:-46.0,Epsilon:0.01\n", + "回合:111/400,奖励:-28.0,Epsilon:0.01\n", + "回合:112/400,奖励:-35.0,Epsilon:0.01\n", + "回合:113/400,奖励:-35.0,Epsilon:0.01\n", + "回合:114/400,奖励:-45.0,Epsilon:0.01\n", + "回合:115/400,奖励:-38.0,Epsilon:0.01\n", + "回合:116/400,奖励:-39.0,Epsilon:0.01\n", + "回合:117/400,奖励:-27.0,Epsilon:0.01\n", + "回合:118/400,奖励:-49.0,Epsilon:0.01\n", + "回合:119/400,奖励:-27.0,Epsilon:0.01\n", + "回合:120/400,奖励:-25.0,Epsilon:0.01\n", + "回合:121/400,奖励:-50.0,Epsilon:0.01\n", + "回合:122/400,奖励:-41.0,Epsilon:0.01\n", + "回合:123/400,奖励:-22.0,Epsilon:0.01\n", + "回合:124/400,奖励:-38.0,Epsilon:0.01\n", + "回合:125/400,奖励:-125.0,Epsilon:0.01\n", + "回合:126/400,奖励:-25.0,Epsilon:0.01\n", + "回合:127/400,奖励:-40.0,Epsilon:0.01\n", + "回合:128/400,奖励:-33.0,Epsilon:0.01\n", + "回合:129/400,奖励:-56.0,Epsilon:0.01\n", + "回合:130/400,奖励:-32.0,Epsilon:0.01\n", + "回合:131/400,奖励:-21.0,Epsilon:0.01\n", + "回合:132/400,奖励:-33.0,Epsilon:0.01\n", + "回合:133/400,奖励:-23.0,Epsilon:0.01\n", + "回合:134/400,奖励:-33.0,Epsilon:0.01\n", + "回合:135/400,奖励:-34.0,Epsilon:0.01\n", + "回合:136/400,奖励:-33.0,Epsilon:0.01\n", + "回合:137/400,奖励:-21.0,Epsilon:0.01\n", + "回合:138/400,奖励:-40.0,Epsilon:0.01\n", + "回合:139/400,奖励:-23.0,Epsilon:0.01\n", + "回合:140/400,奖励:-31.0,Epsilon:0.01\n", + "回合:141/400,奖励:-31.0,Epsilon:0.01\n", + "回合:142/400,奖励:-26.0,Epsilon:0.01\n", + "回合:143/400,奖励:-26.0,Epsilon:0.01\n", + "回合:144/400,奖励:-32.0,Epsilon:0.01\n", + "回合:145/400,奖励:-27.0,Epsilon:0.01\n", + "回合:146/400,奖励:-33.0,Epsilon:0.01\n", + "回合:147/400,奖励:-35.0,Epsilon:0.01\n", + "回合:148/400,奖励:-21.0,Epsilon:0.01\n", + "回合:149/400,奖励:-23.0,Epsilon:0.01\n", + "回合:150/400,奖励:-33.0,Epsilon:0.01\n", + "回合:151/400,奖励:-25.0,Epsilon:0.01\n", + "回合:152/400,奖励:-41.0,Epsilon:0.01\n", + "回合:153/400,奖励:-31.0,Epsilon:0.01\n", + "回合:154/400,奖励:-28.0,Epsilon:0.01\n", + "回合:155/400,奖励:-133.0,Epsilon:0.01\n", + "回合:156/400,奖励:-22.0,Epsilon:0.01\n", + "回合:157/400,奖励:-21.0,Epsilon:0.01\n", + "回合:158/400,奖励:-33.0,Epsilon:0.01\n", + "回合:159/400,奖励:-33.0,Epsilon:0.01\n", + "回合:160/400,奖励:-24.0,Epsilon:0.01\n", + "回合:161/400,奖励:-34.0,Epsilon:0.01\n", + "回合:162/400,奖励:-20.0,Epsilon:0.01\n", + "回合:163/400,奖励:-21.0,Epsilon:0.01\n", + "回合:164/400,奖励:-126.0,Epsilon:0.01\n", + "回合:165/400,奖励:-36.0,Epsilon:0.01\n", + "回合:166/400,奖励:-18.0,Epsilon:0.01\n", + "回合:167/400,奖励:-35.0,Epsilon:0.01\n", + "回合:168/400,奖励:-26.0,Epsilon:0.01\n", + "回合:169/400,奖励:-24.0,Epsilon:0.01\n", + "回合:170/400,奖励:-33.0,Epsilon:0.01\n", + "回合:171/400,奖励:-17.0,Epsilon:0.01\n", + "回合:172/400,奖励:-23.0,Epsilon:0.01\n", + "回合:173/400,奖励:-26.0,Epsilon:0.01\n", + "回合:174/400,奖励:-23.0,Epsilon:0.01\n", + "回合:175/400,奖励:-21.0,Epsilon:0.01\n", + "回合:176/400,奖励:-35.0,Epsilon:0.01\n", + "回合:177/400,奖励:-26.0,Epsilon:0.01\n", + "回合:178/400,奖励:-17.0,Epsilon:0.01\n", + "回合:179/400,奖励:-20.0,Epsilon:0.01\n", + "回合:180/400,奖励:-28.0,Epsilon:0.01\n", + "回合:181/400,奖励:-34.0,Epsilon:0.01\n", + "回合:182/400,奖励:-27.0,Epsilon:0.01\n", + "回合:183/400,奖励:-22.0,Epsilon:0.01\n", + "回合:184/400,奖励:-24.0,Epsilon:0.01\n", + "回合:185/400,奖励:-26.0,Epsilon:0.01\n", + "回合:186/400,奖励:-20.0,Epsilon:0.01\n", + "回合:187/400,奖励:-30.0,Epsilon:0.01\n", + "回合:188/400,奖励:-28.0,Epsilon:0.01\n", + "回合:189/400,奖励:-15.0,Epsilon:0.01\n", + "回合:190/400,奖励:-30.0,Epsilon:0.01\n", + "回合:191/400,奖励:-29.0,Epsilon:0.01\n", + "回合:192/400,奖励:-22.0,Epsilon:0.01\n", + "回合:193/400,奖励:-25.0,Epsilon:0.01\n", + "回合:194/400,奖励:-21.0,Epsilon:0.01\n", + "回合:195/400,奖励:-19.0,Epsilon:0.01\n", + "回合:196/400,奖励:-23.0,Epsilon:0.01\n", + "回合:197/400,奖励:-21.0,Epsilon:0.01\n", + "回合:198/400,奖励:-32.0,Epsilon:0.01\n", + "回合:199/400,奖励:-30.0,Epsilon:0.01\n", + "回合:200/400,奖励:-22.0,Epsilon:0.01\n", + "回合:201/400,奖励:-20.0,Epsilon:0.01\n", + "回合:202/400,奖励:-27.0,Epsilon:0.01\n", + "回合:203/400,奖励:-21.0,Epsilon:0.01\n", + "回合:204/400,奖励:-26.0,Epsilon:0.01\n", + "回合:205/400,奖励:-19.0,Epsilon:0.01\n", + "回合:206/400,奖励:-17.0,Epsilon:0.01\n", + "回合:207/400,奖励:-31.0,Epsilon:0.01\n", + "回合:208/400,奖励:-18.0,Epsilon:0.01\n", + "回合:209/400,奖励:-24.0,Epsilon:0.01\n", + "回合:210/400,奖励:-17.0,Epsilon:0.01\n", + "回合:211/400,奖励:-26.0,Epsilon:0.01\n", + "回合:212/400,奖励:-27.0,Epsilon:0.01\n", + "回合:213/400,奖励:-33.0,Epsilon:0.01\n", + "回合:214/400,奖励:-16.0,Epsilon:0.01\n", + "回合:215/400,奖励:-32.0,Epsilon:0.01\n", + "回合:216/400,奖励:-19.0,Epsilon:0.01\n", + "回合:217/400,奖励:-20.0,Epsilon:0.01\n", + "回合:218/400,奖励:-15.0,Epsilon:0.01\n", + "回合:219/400,奖励:-119.0,Epsilon:0.01\n", + "回合:220/400,奖励:-26.0,Epsilon:0.01\n", + "回合:221/400,奖励:-26.0,Epsilon:0.01\n", + "回合:222/400,奖励:-22.0,Epsilon:0.01\n", + "回合:223/400,奖励:-22.0,Epsilon:0.01\n", + "回合:224/400,奖励:-15.0,Epsilon:0.01\n", + "回合:225/400,奖励:-24.0,Epsilon:0.01\n", + "回合:226/400,奖励:-15.0,Epsilon:0.01\n", + "回合:227/400,奖励:-31.0,Epsilon:0.01\n", + "回合:228/400,奖励:-24.0,Epsilon:0.01\n", + "回合:229/400,奖励:-20.0,Epsilon:0.01\n", + "回合:230/400,奖励:-20.0,Epsilon:0.01\n", + "回合:231/400,奖励:-22.0,Epsilon:0.01\n", + "回合:232/400,奖励:-15.0,Epsilon:0.01\n", + "回合:233/400,奖励:-19.0,Epsilon:0.01\n", + "回合:234/400,奖励:-21.0,Epsilon:0.01\n", + "回合:235/400,奖励:-27.0,Epsilon:0.01\n", + "回合:236/400,奖励:-15.0,Epsilon:0.01\n", + "回合:237/400,奖励:-25.0,Epsilon:0.01\n", + "回合:238/400,奖励:-22.0,Epsilon:0.01\n", + "回合:239/400,奖励:-16.0,Epsilon:0.01\n", + "回合:240/400,奖励:-18.0,Epsilon:0.01\n", + "回合:241/400,奖励:-13.0,Epsilon:0.01\n", + "回合:242/400,奖励:-13.0,Epsilon:0.01\n", + "回合:243/400,奖励:-13.0,Epsilon:0.01\n", + "回合:244/400,奖励:-23.0,Epsilon:0.01\n", + "回合:245/400,奖励:-29.0,Epsilon:0.01\n", + "回合:246/400,奖励:-26.0,Epsilon:0.01\n", + "回合:247/400,奖励:-19.0,Epsilon:0.01\n", + "回合:248/400,奖励:-21.0,Epsilon:0.01\n", + "回合:249/400,奖励:-17.0,Epsilon:0.01\n", + "回合:250/400,奖励:-17.0,Epsilon:0.01\n", + "回合:251/400,奖励:-15.0,Epsilon:0.01\n", + "回合:252/400,奖励:-20.0,Epsilon:0.01\n", + "回合:253/400,奖励:-23.0,Epsilon:0.01\n", + "回合:254/400,奖励:-19.0,Epsilon:0.01\n", + "回合:255/400,奖励:-21.0,Epsilon:0.01\n", + "回合:256/400,奖励:-19.0,Epsilon:0.01\n", + "回合:257/400,奖励:-17.0,Epsilon:0.01\n", + "回合:258/400,奖励:-17.0,Epsilon:0.01\n", + "回合:259/400,奖励:-15.0,Epsilon:0.01\n", + "回合:260/400,奖励:-21.0,Epsilon:0.01\n", + "回合:261/400,奖励:-17.0,Epsilon:0.01\n", + "回合:262/400,奖励:-19.0,Epsilon:0.01\n", + "回合:263/400,奖励:-19.0,Epsilon:0.01\n", + "回合:264/400,奖励:-15.0,Epsilon:0.01\n", + "回合:265/400,奖励:-19.0,Epsilon:0.01\n", + "回合:266/400,奖励:-17.0,Epsilon:0.01\n", + "回合:267/400,奖励:-15.0,Epsilon:0.01\n", + "回合:268/400,奖励:-19.0,Epsilon:0.01\n", + "回合:269/400,奖励:-27.0,Epsilon:0.01\n", + "回合:270/400,奖励:-15.0,Epsilon:0.01\n", + "回合:271/400,奖励:-17.0,Epsilon:0.01\n", + "回合:272/400,奖励:-17.0,Epsilon:0.01\n", + "回合:273/400,奖励:-25.0,Epsilon:0.01\n", + "回合:274/400,奖励:-19.0,Epsilon:0.01\n", + "回合:275/400,奖励:-22.0,Epsilon:0.01\n", + "回合:276/400,奖励:-23.0,Epsilon:0.01\n", + "回合:277/400,奖励:-18.0,Epsilon:0.01\n", + "回合:278/400,奖励:-23.0,Epsilon:0.01\n", + "回合:279/400,奖励:-21.0,Epsilon:0.01\n", + "回合:280/400,奖励:-21.0,Epsilon:0.01\n", + "回合:281/400,奖励:-21.0,Epsilon:0.01\n", + "回合:282/400,奖励:-19.0,Epsilon:0.01\n", + "回合:283/400,奖励:-18.0,Epsilon:0.01\n", + "回合:284/400,奖励:-15.0,Epsilon:0.01\n", + "回合:285/400,奖励:-19.0,Epsilon:0.01\n", + "回合:286/400,奖励:-19.0,Epsilon:0.01\n", + "回合:287/400,奖励:-21.0,Epsilon:0.01\n", + "回合:288/400,奖励:-15.0,Epsilon:0.01\n", + "回合:289/400,奖励:-32.0,Epsilon:0.01\n", + "回合:290/400,奖励:-18.0,Epsilon:0.01\n", + "回合:291/400,奖励:-17.0,Epsilon:0.01\n", + "回合:292/400,奖励:-15.0,Epsilon:0.01\n", + "回合:293/400,奖励:-24.0,Epsilon:0.01\n", + "回合:294/400,奖励:-22.0,Epsilon:0.01\n", + "回合:295/400,奖励:-31.0,Epsilon:0.01\n", + "回合:296/400,奖励:-17.0,Epsilon:0.01\n", + "回合:297/400,奖励:-19.0,Epsilon:0.01\n", + "回合:298/400,奖励:-19.0,Epsilon:0.01\n", + "回合:299/400,奖励:-20.0,Epsilon:0.01\n", + "回合:300/400,奖励:-21.0,Epsilon:0.01\n", + "回合:301/400,奖励:-26.0,Epsilon:0.01\n", + "回合:302/400,奖励:-20.0,Epsilon:0.01\n", + "回合:303/400,奖励:-16.0,Epsilon:0.01\n", + "回合:304/400,奖励:-20.0,Epsilon:0.01\n", + "回合:305/400,奖励:-21.0,Epsilon:0.01\n", + "回合:306/400,奖励:-16.0,Epsilon:0.01\n", + "回合:307/400,奖励:-19.0,Epsilon:0.01\n", + "回合:308/400,奖励:-24.0,Epsilon:0.01\n", + "回合:309/400,奖励:-20.0,Epsilon:0.01\n", + "回合:310/400,奖励:-17.0,Epsilon:0.01\n", + "回合:311/400,奖励:-16.0,Epsilon:0.01\n", + "回合:312/400,奖励:-25.0,Epsilon:0.01\n", + "回合:313/400,奖励:-16.0,Epsilon:0.01\n", + "回合:314/400,奖励:-19.0,Epsilon:0.01\n", + "回合:315/400,奖励:-19.0,Epsilon:0.01\n", + "回合:316/400,奖励:-27.0,Epsilon:0.01\n", + "回合:317/400,奖励:-15.0,Epsilon:0.01\n", + "回合:318/400,奖励:-15.0,Epsilon:0.01\n", + "回合:319/400,奖励:-15.0,Epsilon:0.01\n", + "回合:320/400,奖励:-19.0,Epsilon:0.01\n", + "回合:321/400,奖励:-23.0,Epsilon:0.01\n", + "回合:322/400,奖励:-24.0,Epsilon:0.01\n", + "回合:323/400,奖励:-15.0,Epsilon:0.01\n", + "回合:324/400,奖励:-20.0,Epsilon:0.01\n", + "回合:325/400,奖励:-18.0,Epsilon:0.01\n", + "回合:326/400,奖励:-19.0,Epsilon:0.01\n", + "回合:327/400,奖励:-19.0,Epsilon:0.01\n", + "回合:328/400,奖励:-26.0,Epsilon:0.01\n", + "回合:329/400,奖励:-16.0,Epsilon:0.01\n", + "回合:330/400,奖励:-18.0,Epsilon:0.01\n", + "回合:331/400,奖励:-15.0,Epsilon:0.01\n", + "回合:332/400,奖励:-15.0,Epsilon:0.01\n", + "回合:333/400,奖励:-17.0,Epsilon:0.01\n", + "回合:334/400,奖励:-17.0,Epsilon:0.01\n", + "回合:335/400,奖励:-16.0,Epsilon:0.01\n", + "回合:336/400,奖励:-24.0,Epsilon:0.01\n", + "回合:337/400,奖励:-15.0,Epsilon:0.01\n", + "回合:338/400,奖励:-18.0,Epsilon:0.01\n", + "回合:339/400,奖励:-16.0,Epsilon:0.01\n", + "回合:340/400,奖励:-15.0,Epsilon:0.01\n", + "回合:341/400,奖励:-18.0,Epsilon:0.01\n", + "回合:342/400,奖励:-15.0,Epsilon:0.01\n", + "回合:343/400,奖励:-20.0,Epsilon:0.01\n", + "回合:344/400,奖励:-18.0,Epsilon:0.01\n", + "回合:345/400,奖励:-17.0,Epsilon:0.01\n", + "回合:346/400,奖励:-19.0,Epsilon:0.01\n", + "回合:347/400,奖励:-15.0,Epsilon:0.01\n", + "回合:348/400,奖励:-15.0,Epsilon:0.01\n", + "回合:349/400,奖励:-15.0,Epsilon:0.01\n", + "回合:350/400,奖励:-18.0,Epsilon:0.01\n", + "回合:351/400,奖励:-16.0,Epsilon:0.01\n", + "回合:352/400,奖励:-16.0,Epsilon:0.01\n", + "回合:353/400,奖励:-15.0,Epsilon:0.01\n", + "回合:354/400,奖励:-20.0,Epsilon:0.01\n", + "回合:355/400,奖励:-15.0,Epsilon:0.01\n", + "回合:356/400,奖励:-17.0,Epsilon:0.01\n", + "回合:357/400,奖励:-15.0,Epsilon:0.01\n", + "回合:358/400,奖励:-17.0,Epsilon:0.01\n", + "回合:359/400,奖励:-15.0,Epsilon:0.01\n", + "回合:360/400,奖励:-16.0,Epsilon:0.01\n", + "回合:361/400,奖励:-15.0,Epsilon:0.01\n", + "回合:362/400,奖励:-18.0,Epsilon:0.01\n", + "回合:363/400,奖励:-17.0,Epsilon:0.01\n", + "回合:364/400,奖励:-22.0,Epsilon:0.01\n", + "回合:365/400,奖励:-15.0,Epsilon:0.01\n", + "回合:366/400,奖励:-15.0,Epsilon:0.01\n", + "回合:367/400,奖励:-15.0,Epsilon:0.01\n", + "回合:368/400,奖励:-16.0,Epsilon:0.01\n", + "回合:369/400,奖励:-16.0,Epsilon:0.01\n", + "回合:370/400,奖励:-15.0,Epsilon:0.01\n", + "回合:371/400,奖励:-20.0,Epsilon:0.01\n", + "回合:372/400,奖励:-15.0,Epsilon:0.01\n", + "回合:373/400,奖励:-15.0,Epsilon:0.01\n", + "回合:374/400,奖励:-15.0,Epsilon:0.01\n", + "回合:375/400,奖励:-16.0,Epsilon:0.01\n", + "回合:376/400,奖励:-15.0,Epsilon:0.01\n", + "回合:377/400,奖励:-15.0,Epsilon:0.01\n", + "回合:378/400,奖励:-17.0,Epsilon:0.01\n", + "回合:379/400,奖励:-20.0,Epsilon:0.01\n", + "回合:380/400,奖励:-17.0,Epsilon:0.01\n", + "回合:381/400,奖励:-15.0,Epsilon:0.01\n", + "回合:382/400,奖励:-15.0,Epsilon:0.01\n", + "回合:383/400,奖励:-15.0,Epsilon:0.01\n", + "回合:384/400,奖励:-15.0,Epsilon:0.01\n", + "回合:385/400,奖励:-16.0,Epsilon:0.01\n", + "回合:386/400,奖励:-15.0,Epsilon:0.01\n", + "回合:387/400,奖励:-18.0,Epsilon:0.01\n", + "回合:388/400,奖励:-15.0,Epsilon:0.01\n", + "回合:389/400,奖励:-15.0,Epsilon:0.01\n", + "回合:390/400,奖励:-15.0,Epsilon:0.01\n", + "回合:391/400,奖励:-16.0,Epsilon:0.01\n", + "回合:392/400,奖励:-18.0,Epsilon:0.01\n", + "回合:393/400,奖励:-15.0,Epsilon:0.01\n", + "回合:394/400,奖励:-15.0,Epsilon:0.01\n", + "回合:395/400,奖励:-15.0,Epsilon:0.01\n", + "回合:396/400,奖励:-20.0,Epsilon:0.01\n", + "回合:397/400,奖励:-15.0,Epsilon:0.01\n", + "回合:398/400,奖励:-15.0,Epsilon:0.01\n", + "回合:399/400,奖励:-15.0,Epsilon:0.01\n", + "回合:400/400,奖励:-15.0,Epsilon:0.01\n", + "完成训练!\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABUmElEQVR4nO3dd3hUZdrA4d+Zmk4KSSCUANIFZGlSFAQREkJVWFGK6C64rgXd/VgiKrIoYlhXxe5aV0Sxg7hEUBQUKQKiqBQxktBSJr1n2vv9MWRIyASSmAbz3NflZfLmnDPPHGbOc956NKWUQgghhAB0TR2AEEKI5kOSghBCCDdJCkIIIdwkKQghhHCTpCCEEMJNkoIQQgg3SQoN6JZbbiE7O7tW+/z444/cdddd591u0qRJ5Ofn1zU0UU+2bdvGyJEjue666ygtLa30t6SkJObNm8eECROYMGECM2fOZM+ePY0eY2FhIdOnTycuLo6NGzf+rmN99NFHXH/99UyaNIlx48bxwAMPuD+HTz/9NEuXLgVg7ty5/PrrrwA8+OCDjBo1iieeeILnn3+eq666ilmzZjFo0CCcTqf72H//+9/p1asXhYWF7rJ//vOfrFixotp4du3axfjx4wGIj4/nlVdeqbLNypUrWbt27e963zVRUlLC3//+d2JjYxk7diyff/55g79mg1CiwXTt2lVlZWU1dRiiAcXHx6tnn33W49/GjRunNm3a5P7922+/Vf369VM5OTmNFN2Z1x09evTvPs7zzz+vbrjhBmWxWJRSSlmtVrVkyRJ1ww03KKWUeuqpp9Q///nPKvt169ZNpaamKqWUGjVqlNq9e7dyOBxq0KBB6sCBA0oppWw2mxo6dKi65ZZb1IYNG9z7jh07Vu3atavamHbu3Kni4uKUUkotXLhQvfzyy7/7fdZVQkKCuv/++5VSSp08eVINGzbM/b4vJIamTkoXq3vvvReAm266if/85z/MmDGDPn36cPjwYf72t79hMBh48cUXsVqtZGdnM3nyZO6++2527drFQw89xCeffEJ8fDwBAQEcPnyYtLQ0OnXqxOOPP46/vz/dunVjx44dbNmyhc8++wydTkdKSgpGo5GEhAS6du1KSkoKixYtIi8vj/DwcJRSTJw4kWuvvbZSrBaLhQcffJDffvsNnU7H9OnTmT17NrNmzWLGjBnExMQAVPq9V69eXH311Rw6dIipU6eyZ88eXnzxRcB1hzxnzhy2bNlCcnIyy5YtIzc3F4fDwaxZs5g6dWqV83XkyBGWLl1Kbm4umqZxyy23MHnyZHbt2sUTTzxBu3btOHLkCFarlcWLFzN48OAqx3j//fd57bXX0Ol0hISEkJCQwLFjx1ixYgWRkZEcP34cHx8fHn30US655BLi4+Pp0qULf/rTnwCq/F7OZrPx6KOPsmPHDvR6PX369OHee+9lzZo1bN68GbPZTEFBAQsXLqxyXouLi92/Dxw4kCeffBK9Xg/ACy+8wOeff05ZWRklJSUsXLiQa665hqeffprvv/+ejIwMunXrxm233cZ9992H1WpFKcXUqVOZMWMGmZmZLF68mKysLCwWC23atOHJJ58kLCzM/Zq//fYbixYtIj09nUmTJvHOO++wbds2nnnmGRwOBwEBAdx777306dOnyus+9thj7uMUFxfz4osv8tFHH9GyZUsAjEYj//jHP/jss8+wWq2V3vuoUaNYuXIly5cvRynF3LlzCQ0NJT09nfvuu4/58+dzxRVXsGvXLnr06MHevXvp1q0bMTExfPHFF8TGxpKenk5WVhb9+vXjyy+/9Ph9qc7y5cs5dOgQzz33HA899JD737V3797MmzePb775hoyMDGbPns2cOXNwOBysWLGCL774gsDAQPr06UNSUhKrVq2qdNyjR48yffp0vv76a0wmEw6Hg5EjR/Lqq6/y+eefu89ZVFQUV1xxBYmJidx8883VxtksNXFSuqhVrCmMHDlSPfPMM0oppZxOp5o5c6Y6evSoUkqptLQ01aNHD5WVlVXlzuf6669XZWVlymq1qsmTJ6v333+/0rE/+OAD1b9/f/cdydKlS9U//vEPpZRSf/zjH9Xq1auVUkr9+uuv6rLLLlMffPBBlThvv/12lZCQoJRSKj8/X8XFxank5GQ1c+ZMlZiY6N6u4u9du3ZVH330kVJKqYKCAtW/f3+VkZGhlFJqxYoV6vHHH1c2m02NGzdO/fTTT+5jx8bGqn379lV6fZvNpq6++mq1ceNG9/m48sor1Xfffad27typevTo4b6jfOWVV9SMGTOqvIeDBw+qyy+/XJ06dUoppdRrr72mHnjgAbVz507VvXt3tXv3bqWUUm+99ZaaMmWK+/xWvLOs7k5z5cqV6o477lBWq1U5HA4VHx+vHnjggXPuo5RS69evVwMGDFDDhg1Td911l1q1apW7lnDixAk1a9YsVVJSopRS6pNPPlHjx49XSrnuuMeOHatsNptSSql7771Xvfjii0oppTIyMtTdd9+tHA6Hev31193lTqdT/fnPf1avvPJKlTgqfqZ+/fVXNXToUHXs2DGllFLbt29Xw4YNUwUFBVVet6Iff/xRDR482OP7LFexpjBy5Ei1f/9+pVTV70F5+dq1a9Vtt92mlFJq+fLl6r///a9KT09Xl19+ubLb7eqjjz5S8+fPr9X35aWXXlJLlixRt99+uyorK6vyb9S1a1e1atUq93vq1auXKi0tVW+//baaMWOGKi0tVWVlZeqWW25RM2fO9Pg+Z8yY4f4ebNmyRU2fPl0ppVSvXr3c3wGllHr88cfVI488cs5z1hxJn0IjGjBgAACapvHCCy/w888/88wzz/Doo4+ilKKkpKTKPldeeSUmkwmj0UjXrl3Jy8urss2ll15Kq1atAOjZsyd5eXnk5eWxf/9+pk2bBsAll1zi8e4aYPv27Vx//fUABAYG8sknnxAdHV3j9xMQEMDYsWP5+OOPcTgcfPzxx0ydOpXk5GSOHTvGokWLmDRpEjNnzqS0tJQDBw5UOk5ycjJlZWWMGTMGgMjISMaMGcPXX38NuO66evToUen9nW3Hjh1cccUVtG7dGoA5c+a427e7d+/ujvW6667j4MGD5OTknPf9lfvqq6+YPn06RqMRnU7HrFmz3LGdy/jx49m2bRsrVqygU6dOfPDBB8TFxXHixAnatGlDQkIC69ev57HHHmPNmjUUFRW59+3bty8Gg6sif8011/Dyyy9zxx13sGnTJu6//350Oh033XQT/fr147XXXmPJkiUcOXKkUs3Ek507dzJ48GDatWsHwJAhQwgNDeWnn36q8roV6XS6Su3/9eHKK69k7969OJ1OvvzyS0aOHElERARRUVH89NNP7Ny5k6uuuqpW35fXX3+dNWvWcOedd2IymTy+7tVXXw24vjdWq5Xi4mK2bt3KpEmTMJvNmEwm9/fBk2nTpvHRRx8B8OGHH7q/Y8rDikE63YV3ib3wIr6A+fn5Aa6q+JQpU/j555/p2bMn//jHPzAYDB4/VD4+Pu6fNU2r8TblTRQVty8vO5vBYEDTNPfvx48fd3f2VdzfZrN5fD/g+qKsXbuWr7/+ms6dO9OuXTscDgdBQUGsW7fO/d+7777LddddV+k4ni42SinsdnuNz4Fer6/0HkpLS0lKSvL4vsvPz9nHOvv9VRef0+msdttySUlJPPbYY5jNZoYOHcr8+fP56KOP6Nq1Kxs3buTnn39m+vTpFBYWMmzYMP785z9X2r/iuR05ciQbN24kNjaWgwcPMmHCBI4dO8a//vUvVq5cSUhICNdffz3Dhg3zeG7Ofu+eysrPdcXXrahz587Y7XZSUlIqlZeVlTF37lzS09PP+bqehIaG0q5dOzZt2oRer3cnqquuuoq9e/fy7bffMnz48Fp9XwYOHMiiRYu49957q/03MpvNAO7Pi1KqSiIsv5iXN7uV/5eenk5MTAw//PADSUlJ7N69m9jYWABat26NxWJxHyMjI8N9s3YhkaTQgPR6vfvLVlFKSgqFhYXcfffdjBo1im+//Rar1Vqvd2IBAQH069ePDz/8EHBd6Hfs2FHpwlluyJAhfPDBBwAUFBRw0003kZycXOkO8tixYxw+fLja1+vbty8Azz77rPvOqWPHjpjNZtatWwdAamoq48ePdx+zXMeOHTEajWzatAlwfRE3btzI0KFDa/x+L7/8cnbs2EFGRgYAa9as4V//+hcAhw4d4tChQwC888479OvXj6CgIEJCQtyxZGdnVzsy6Morr2TNmjXYbDacTierV69m2LBh54ynZcuWvPvuu3z66afustzcXDIzM+nZsye7d++mV69e3HzzzQwaNIjNmzfjcDg8Huvvf/87GzZsIC4ujgcffJCAgABSU1PZtm0bN910E5MnTyYsLIzt27dXe4xygwcP5ptvvuH48eOAq4aVmprKZZddds79TCYTc+fOZdGiRWRmZgJgtVp55JFHKCkpITIy8pz7V2f48OE899xzXHXVVe6yq666inXr1tGyZUtCQ0Nr9X3p1asXM2fOJDAwkGeeeabGcYwYMYKPP/4Yq9WK3W531wQiIyMr3dRERkZiNpuJi4sjPj6eMWPG4OvrC7hqIO+88w4AaWlpfP3114wcObJO56UpSUdzA7rmmmu48cYbee655yqVd+vWjauuuorY2FiCgoJo3749nTt3JiUlpdoqb10kJCRw33338dZbbxEZGUnbtm0r3XWXW7x4MUuWLGHChAkopbj11lvp1asXt912G/Hx8WzdupVOnTq5m2CqM23aNJ577jlGjx4NuC4kzz33HMuWLePll1/Gbrczf/58+vfvX2k/o9HIc889x8MPP8zTTz+Nw+Hg9ttvZ/DgwezatatG77Vbt24sWLDAfccdHh7OI488QnJyMi1btuTJJ5/k5MmThIaGuoc4zpo1i//7v/9j7NixtG3blkGDBnk89m233UZCQgKTJ0/GbrfTp08fHnjggXPG06JFC/773//y73//mxUrVuDr64vJZOJPf/oTQ4YMoUuXLmzatIlx48ZhNBoZMmQIeXl5lYZjlvvrX//KfffdxzvvvINer2f06NEMGjSI22+/nRUrVvDcc8+h1+vp168fx44dO2dcnTt35sEHH+SOO+7A4XDg4+PDCy+8QGBg4HnP8V/+8hd8fX3dHfFlZWUMGjSoyue7NoYPH86zzz5b6Xz27t2bzMxMbrzxRqD23xdN03jkkUeYPHkyI0aMqFEc1157LUePHmXy5Mn4+fnRtm1b98Xek2nTpvHmm2+yZMkSd9mdd97JkiVLiIuLw+FwsGDBAtq3b1/DM9F8aOp89U1xwXr++ecZM2YMl1xyCQUFBUycOJGXXnqJzp07N3VojabiaC4hqrNt2zaysrKYNGkSAA8//DBms5kFCxY0cWSNT2oKF7EOHTpwzz33oNPpcDgczJ0716sSghA11aVLF1555RVeeeUVHA4H3bt3r1QL8CZSUxBCCOEmHc1CCCHcJCkIIYRwk6QghBDCTZKCEEIIt4ti9FFOThFOZ+37y8PCAsjKqjouvKlJXLUjcdVec41N4qqdusal02mEhPh7/NtFkRScTlWnpFC+b3MkcdWOxFV7zTU2iat26jsuaT4SQgjh1mySwvr16xk3bhzXXHMNq1evbupwhBDCKzWL5qP09HSeeOIJPvzwQ0wmE9OnT+fyyy+X2bdCCNHImkVNYfv27QwePJjg4GD8/PwYO3ZspdUlhRBCNI5mkRQyMjIIDw93/x4REVGn9dmFEEL8Ps2i+cjT8kue1v2vTlhYQJ1fOzz8/EsGNwWJq3YkrjOUUpR/pXS66r9Hcs5qx1viahZJITIystIDTjIyMoiIiKjx/llZhXUalhUeHojFUlDr/RpafcZVWGLDqNdhNnl+6lptXKjny2Z3oNfp0Ok0UtIKCA/2wc/HiFMp9hzKoGeHUAJ8je7tD6bk4HA6ubRDaKWbE6UUTqU4kVHE5u9OMHtsNwx6V2U7p6AMP7Oh0nkOCvYjP9f1eMzDx3LILbTSNtwfnU7jWHohg3pEkF9s48vvTjCibxt+O5XHnsMWZo/thq/Z9dX8fM9xCktspOeUMHXEJQT5Gym1OtiflMVvqflMGtaR4xmFHDqWw6h+bdmwM4VDx3LIyswhwmxl1rhedIqOJKPASW6xncPJmeRk5xIzqA1+ZgNOp8YJSwEn0vNJzyqgQ4Q/vkYNX5PGT0kWOkT4kZZVSFSoD74mjZRTuUS08KFrxwiUwYy/vy/JllI6tQ1DZzRSZnVw9FQePydl0LqFno6RfvyUlElUdFsKrRq/HMuhW7sgLr80iiMZNnILSggPNBDRwoSf2UBGfim7fzzFwO4t2fVzKpd2CCEq1IfdB9IoKLESFmhCOZ10ah3I4ZRsfj2Ri8mgcUWvSEICTHy59zjtI/0J8NET4KOntMxG8qk88orKwOkkItgHq81BoJ8RS24JZoNGcUkZAT569CjCg82UllrJLyihpNSKSQ8BPjpQTmxWG74+RspsTux2B0a95nqokXIS7G/C16wnI6cE2+m/6XU6lFKYjDqKSqzoAL0OHA4nLfyN+Pn7kpZTht1uR4cTo04DnPgYdZSWWtFQ+Jj0KKUosznR6zR8THqKSl0P7nIEtCLu9rvq9J3U6bRqb6abxSqp6enp3HDDDbz//vv4+voyffp0HnroIfr06VOj/SUpVG/Ja9/SNjyA60ZcgiW3hK7tgut8rJYtA3j+ve8Z1COS6FauuxOllPuxlmfX7vKLreg0jZ9+y6L3JWF8sfcEBr2OK/q05lhGIZd2CAXg3S9+JSW9gHYRAYzq1wZfs4GEt/ZxWecwdJrG5Cs7cuhYLkF+JkICzfiZDbz75a/8+FsWU67sROvIQNqEuB6IsvNAGu9+8SuDe7biij6tOZCczdqvj6KAf9zwB/75+m6iWwUyvE9rjqYWsO3HVIL8TfTrGs7+pEzGDmrP258fARSTh7Thisva8d4Xh2mht5Kfk4UBRU6RFX1pPkP7RnP0VA6O0iJKCwvw09kIMkO/Ppew+5dscnLyMWl2939mzYZNGfDTyvDRbISHBVJkhfR8G3alx0ezYdAc+F55EwVOH77Ye4JTmYUEaKVE6PMJCzJhKy3FZC8iUFdCkK6EAK0UDYW/zgpAoFZCiL4IX63qoygdSkOvNfnX3c2mdBi1+n3uc0NyKA0N0GmumpgTDaVpOJWGQsN1ZjV0Gq7fFTgr1NgUGk40QMPhVBhwoNcUStO5/+ZQGg6n66mNStOwOVzfMaNBh92hTj86VIcGFPu24op7Hrk4kwK4hqS++OKL2Gw2pk6dyty5c2u8rySFytZsPkLnNi0Y0D2Cvz6+Fb1Ow2zSk51fxn8WXOW+u63os93H6dy2BR1bBwGuCTGWvBIiQ1zP7N2flMVX+1P57rDrcZevxo/iaGo+j6zay70z+/PmpsO0DvNjxjVd8fMxsmn3cdZsPkLLFj5k5pUSO7g9iTtdTwXr1zWc736xMCe2O5EhviS8tc8dR/vIAMJb+LL3Fwt6HARoZbT0sWG0uy6OgbpSWgVAUVEJEfp8DJrDdaH19yW/2MYObSC/5Jc/XU7hr5XRRp9NuL4AZ6eh7DiY5f6bCTvtDNlcYs7GXxURpBUTpCsmWF9CC10JRqo+SvV8nEpDd9aF16E07JqREqceA06KlZkyZUCPE6PmwIADo+bArhkJpIij9nBynf5EmoqJ1OdhcpZ6fi2DD/kOM1a7kyKnD0aDRlhkBAFhERiCwjleoOfzb4/io9kI9VV0bu1Pu6iWYPThwMkiCgpKCPLVExbsR+uWgTjQOHS8gIiwAOxOCAv2p6jMSXiIPx9uS+GXkwXMiulBZl4ZX+w+SvtQI3ochAcaKC4uIdRPj04HEaEBhIcGkF0MWUV2fj6aQ1luBv5mHWMHRfP5dycpKS4hUCulc4dw/P39yC60cSAll1YtA+jRIYz0nBIiQgM4ciKP3CIbg3q25teTeZzILGFonyg+33uSolIHN8f1xGg0sO3HNH5LLaBnxzAiQwMwmwzs+SWTg8fyuPGabrRvFYRer8ep4FhGIZ/uOs6EYR2ICPXHYDBgc8LJrFL0ej1+/maMBgMhQX7YlYbNqdG+XRiHkzJJzymm5+mbmTP/5ors/FK++uEU3dqFcGnHM3+35JaQlVdK9+iQSvtk5JaQllXEpR1D0evOfB+VUpRaHe6aot3hxGZ34ms2kFNQxsnMQnp1DHNvX9drxQWRFH6PCzUpOJ2Kbw+mM6hnJAXFNp5f+xN/jutBjy4RWCwFWG0O7A4nfj6upo307GJMRj0hga4Hj9sdTnb8nMb/tqdwU0w3tv+cxhW9W7svspf3jGTXgcod9nNiu3N5j9PP09XAoNc4kVHEP1/fja/ZwLK5l1NUYiMlvYCXPznIXVP70LdzSxJWf8fh47nu43Rp24JSq4PjGYVEtwokJa0ADSchRiu9o4NIOppKuD4fM3aiDRaKlZkAXRlBWgmflfbiN3skJmzYMBClz6anbyadzZmYrPkE6EoJ0Erx03l+8LpTgRMdNr9w0gscGHFg1OxE6As4bg+lVPMhXMslQCvFUOFO9EdrWyyOICL1eXTyycHXWXzmoAYzJboAThQZaNkqkrBWrfj6cCH5hcVccVlbQsJbUqb58V1SDgbNSUBoOEdPZNK/Ryvat40Ekx+a2Y9Pvz3Bzn2/MqZ/G665sjuHUvIJbuFHwOlEmVdUxvWjumCzO/jmxzRWf/YLHVoFotdpzBzbje1v/YcxPj9SoAUQ2rot+uBIdCFtSbMFsONQFrFDOhEQ2hLNLwjNYMbucHLSUkS7yADXPehZtbV9Ryxk55cx/LLWGA1nmrZq+9m3O5ykZRfTNjzA/dk9V39FRXmFZTzy5l7mxHSnx+kL6tc/nOLn5GxunXipO+b8Iisd2oeSfY5lG5xKodM0yqwOcgvLiAz1q3ZbpRRFpfZKTYN11dTXiupIUqhGc0wKOQVl7DmUwaj+bSrdCVT8+2e7j/Ppt8eYO6EnqVnFfLI9mZjL2zNyYHsMSpG4K4XP95xwX5hvefQLAP6z4CoA7nl6m7t9saaM2BnQoxUpqbmYnCX07hDK9p9OEanPI09rQct2HfghKYs/dGnJviOuB7QH+RnJLz5zgR7WM5STvxwmXJdPZ2M6GooQXRGXmLPQO6teyEsxY1RWrBgw4sCgOSl0upJExSYNzT8Eu38EDqM/Vr0/244UMmpoD4LCwtB8gtD5BqL5BpFRoMgrstK1fQh2h5M3Nv3Cjh9TGWw8xB/9d5GjBRPaqSf4tkD5BFFkDOPglkQGmI/iQI8jIALfVh3RWrRCH9YWY6uuaD4B2B1ODh/LpWeHEDRNw+5wkl9kJTSo6nOta6Imny+7w+muuWXnl/J/z20HFFf1bcPsmO51et36iq0pSFy10xBJoVl0NF8M7A5XR1D5Xc9XP5xi3bajWHJLuPGarlW2//uz37h/ttoc5OS7mggKS2wsfGYb7SMD8DtdhVy96TCfbE92b79p93H6XBLmTgjld+rlLmkTRJiWT376SXw0Gz2NJwnRFdFOn4WvzgbpuAYj64ATMDrYtZ9SsL5gHIGaP/uPpAOuO8v8YhtmrHQ1pjEq8Fc6ZaRCkAMAm96XfKsOvW8APl1GkGYPIrvYSe9ubdh4sJQfk/MZOaIf//nkINNGdOKHPd9zrf5LMg2RBES25WhyGn4Rbbl6YiyaX3ClO91pMZ7PdWQYRIa5FvMy6HX87cb+pKXncddK2J/TnuiObfjb1X3d2xvK7Lz9vzy+Ku1Ou86XcMuUAR6Pa9DrKlX9DXpdnRNCTVVsyitvMgANXx/5aoqmIZ+8OnKermDpNA2H08m8f20h9vL2TBvZmW9+TCU5NR+A745YGD+0A9t/SuPq/m0xGnRYbY5Kx/I1G/j1lGv7bftTAUjPKaFzmxYAZOWXkZVf5t7+o69+o/zSuWB6X0qtDp7+cD+t9bm00ufyRy0Jv+JTcHqkWjG+lDj0HNF3pkQfSGa+lehWgYS0bMmxzGJCg3z5NqmIq/W7idM+ZWKIk91lnVhf3I8/93PQsjQF36xDaNZi8AnC2HUMhtZd0YIicfhHkJtZTHTrQDRNowPQ4XRsY9s7ucrmwM/HQGgLX7q0bcHXP6bxaPYkLokKYvaQ7jx58FvmD+yDzr9ym2ttGA06fEwGfM0GCop98TFV/libTXoc6ElxhNPRt+7DlxtaxZFLfmb5aoqmIZ+8Onry3R8oLLFx/00DyMgpASBx1zGmjezMK/876N4uv8jGmi+OsPPndN798leuHd6pSkdVdn4Z6dnFlcqC/U1VkgdA7OXtSdx1jO9+saDXaVzSwkrJ7g9Z3OIAYfrTbbG6lpiG3Mh/PjuOj2ajX+wUNuw6zj1/vIwAXyMZuSWEt/BB0zTKGyjKfkxl9acmxvj9hAkrA82/MdD8GxwFzbcF+vZ9CL88hgJzazSD2R2PHugUFeTxHBkNOowG151w+agn/9N3wL5mA+0iAnjh7yMwGX//cFkAX5OBgmIbvmcNv9Vpro72UqvD3T/THOkq1JKac5zi4iZJoRpf/XCK1xMPebxoKaX46Wg2ADt/TnPfmep1rlpDRXaHk50/n+ns/WzPcfzP6vj6/ogFgP5dw9n7i4WQQDM5BWVV5hZE6nIZnP8zvYN+4530wUwNyaDsw9XoND1l/u3Iancp7S7tg65lRzSdjn3/c/VBzOwYRv/uke7jRAT7Vnm/4cG+nHCE8WrBCIK0Yu4M2sivzjaMvnEWuuAoNE3DNzyQwt/Zrlp+sStvKqmvhADgY3Yd6+yaArj+beBMUmrupKYgmop88qrxwdYkAN787Bcu7xHJpR1DKSmz8/AbeypdVI+mFtDC3wSAXu8aFeHJ/Kl9yCuy8nriIRJ3plT62y8n8ggOMPGn8T2IyWxPwYlf2PBVCql5bWjnV0obx3H6m47SyZCBPtNAsMHK31okggJDh8GYB19Pt3M0v/jUYOJacOCZu/985ceyvCm08DcxNqTNefetjYo1hfpWngw8vd/ycQgXSlu93wUSp7j4yCevGg6H6yqybX8q2/an8mr8KFLSCkjNKiY160xTT3pOMUUlrhE3VpuT3EKr+2+BfkYKTo/aCQ/2pcPpCV+ZeaV0btuCX0/kubdtHeaPMe84UQc/xZ60izuD4LuyaPqYT2LATh6B7Ld3YuD1f+Wf//mKvqYUAjv0ZPzV11T7HsprHDVZMiTQw7A9k7H+l8bycyeF+qshlCsfIekp4ZQPsvO/QJplpKYgmop88s7y9Af7KSyxYT+rGSgjt4TjGZXHT7cND+Cn37JPjw93jd45YTmzTWSoHwXFrgt/yxY+mIx67p52GV9+d4LYwdGseedT4ny/5+2iIfRyplH80SYw+qB6xZDy/R76mVPINkXRJmYOARGdaXt62nymCuKz0t6MD40+53tZ+qdBlJTVbMiqp7vr+mzaKVfeJObroYnn9ypPfp5rCq6kcKHcgV8ocYqLj3zyzlI+Nl9/1sSc/21PRlH57r9Xp1BOWAppFebHjdd05d9rvietQi0ioMJdafkFts8lYfRu50dR4pPcE3QYgAeDP4Ii0Lfthe/o29H8Ann16wD6m3/Dp9M1RLc6M6TVtaSE6+dAP9M534u/j7HGd8aa5prCrziT4EyGhribP31ea77eYY2VH9rHQy2k/JxdKHfgF0qc4uIjn7xqOCpMhusRHcLXp4eK9uwQQr+u4RxIznH3JfTuFEbw6Z9zC88MHT27GUM5ndiPfEPZno9QxbnYu45m349H6W/6jeTAy+gz5q9oBhP+vgbylB9flPZiotlMdYLOkxRqy2DQYbM7aeFvIrfQirkBmo/Kk0JDTpn01NFcPrnxgulovkDiFBcf+eTVwJ/H9+SbH1P59WQeV/VtQ98uLRnVry0FxVbSs4uZOKwjpVZXM03FPoVBPSLY8XMaAKqsiOLEf+PM+A1deCd8rv4LhlZdWfv9Vt7PHcjwzpdwmcF1ka+YTM7VhBPoV7/t4wa9hs0OLQLM5BZaPa6R9HuV3803xEPQyysfBg/LL5xpPmrefQrtIwM4ll5YaUkKIRqTJIUKyjzMCwAIDjAxfmiHKuWBfqYqSxHknK4pPPTny2nT0p//m96XsEADJZuewpl5DJ+R8zB0HuJu/9Yb9BSWmjBXuPhX7Bg2Gaq/MNd7TUGvAxwE+5tIgQZp4ukRHcLH3yRXWSCsXpTXQjz8qV8X13Dfhujgrk8LbvgDORUmKgrR2CQpVFBQfOYuv3zkDtTsgT/lnZvlzUflbcI9O4RS+vV/saUexmfUrRg7D/G4/9lzElxzHlQj1xRcCahFwOkmqwZo4unWPoTn/zaiXp7vcLa+nVvy89Fsj/Mw5k3sSV6h1eM6VM1JbfqBhGgIkhQqKO9AnjK8E6P7tyWvyFrj0Tu608tT551uPjIbXQ/HsP30GbaDX2LqG+cxIajTzSjmsy7+er0rKZxdXlFAAzQfwZlk01DN/g2REABG9WvDoB4RHjvgjQY9LT0kCyFEZc37tqmRlSeFHu1D8DUbaBXq536+QE1UXF7Bx6SnbMdblO14C337vpgGXOdxn/Km9bMv/obTd7Se5gosvPEPxA5uX+93vVf2iQIgrMXpReAusAV0NU0774gsIcS5SU2hgsKS00+vquMduGvUi9XVD1BowfbzZgxdr8Bn+By0ai7g5R2uVZqPTt+1e2o+6tY+hG7t679NPm5INKMHtOWX4665FRdWShBC1AdJCrhmu36w9Td3f0Bdk0J5J6bZpMe67xPQ6TAPvA5NV/1pVpQ3H1VOGuXt++ZGHIWiaRo+JoN7jsYFVlEQQtQDSQpAqdXBhp0p7sE2dV2Xp3x8fLQxG9sv32DsOfK8S0KXT5yu0qegK68pNEELXwOMOhJCXBjq/Yqzd+9errvuOiZNmsRNN93EyZMnAcjPz2fevHnExsYyY8YMLBbXyqBWq5UFCxYQGxvLlClTSEpKqu+Qzquo1NWXoHB1ttZktJEnvmYDBhxM129C8w/G1G/iefcpHz9/dvNReU2hpo88rE/lr3gRPJRPCFFL9Z4UFixYwLJly1i3bh0TJkzg4YcfBuDJJ59kwIABJCYmMm3aNJYtWwbAqlWr8PX1JTExkUWLFhEfH1/fIVWrtMxO4q4UikrOjDCq64Qt5XTQxXGEiX57CaQInxF/Qud7/k5qZzWjjyJCXCNlzl5uozFcEtWC6MhA/jiqc6O/thCiadVrUrBarcyfP5/u3V0Turp160Zqqmt5iC1btjBhwgQAxo8fz1dffYXNZmPLli1MnOi6ox44cCA5OTmcOnWqPsOq1tubDvPel0l88d0Jd1ldk4Lt580MzfmYET6HyPdtiz6qR432c9cUzkoKcyf0ZO74nrQ+/djJxmQ26Xnw5oF0aFXzkVdCiItDvfYpmEwmJk2aBIDT6eSZZ55h9OjRAGRkZBAeHu56UYOBgIAAsrOzK5UDhIeHk5aWRlRUVI1ft7oHUJ9P+ZPN8io8lN5k1BMeHlir4zhtZRz7/mMshPFW/gDmTRtPRETNLqjlLTRtWrcgoMJwyg7tQunQLrSavZpObc9NY5G4aq+5xiZx1U59x1XnpJCYmMjy5csrlXXq1InXX38dq9VKfHw8drudW2+9tdpj6KoZplldeXWysgrrtJZO+XDPjAqPwtRpYKnl08Vsv3yDs6SQgktvIHmblZBAv1ofoyC/hJIi1+in8PDAWu/fGCSu2mmucUHzjU3iqp26xqXTadXeTNc5KcTGxhIbG1ulvKioiNtuu43g4GCef/55jEbX8M6IiAgyMzNp1aoVdrudwsJCgoODiYiIwGKxEB3tejaAxWIhIiKirmHVivH0yJ68ojNrzdSl+ch2+Cu0oEj6DB3Ky8Nq1wcQP6Mfuw6kN8jic0IIUVsN0tEcHR3NypUrMZnONIeMGDGCtWvXArBhwwYGDBiA0WhkxIgRrFu3DoA9e/ZgNptr1XT0e5Q/L6Ck7MxCeLW9ODvz0nCkHsbY7co6jVrq2i6YWWO71Xo/IYRoCPXap3DgwAE2b95M586dmTx5MuCqIbz00kvMnz+f+Ph44uLiCAwM5LHHHgNg1qxZLF68mLi4OEwmEytWrKjPkM7J02zh8vV/asp2eBtoGsauw+orLCGEaDL1mhR69uzJ4cOHPf4tODiYF154oUq52WwmISGhPsOoMU8TwwznWKr6bMrpwPbLNvTt+px3kpoQQlwIvLoh29OzCoy1aD5yHP8RVZyLsfvw+gxLCCGajFcnBU/9B/paNB/ZDn+F5huEof1l9RmWEEI0Ga9OCp4Gsda0pqDKirCn/IChy9BzLngnhBAXEu9OCh6yQk1HH9lTD4FyYIj+Qz1HJYQQTcerk4KnukJNRx85Th4EvQl9xCX1HZQQQjQZr04K5ctWV1TTmoLj1EH0rbui6aXpSAhx8fDqpFCxplA+76wmQ1KdxXk4c07WeNE7IYS4UHh1UqjYpxB0ejE6Qw3WXXKkHnJtK0lBCHGR8eqkUHENvSD/00nBcP4+BcfJg2D0RdcyuqFCE0KIJuHVSaFi81F5UqjJkFT7qYPoW3dD0zXe85OFEKIxeHVSqNh81OJ0UtCfJyk4C7NQ+ekY2kjTkRDi4uPlSeFMVmhRw5qCI9W1tpO+dfeGC0wIIZqIdyeFCj8Hlnc0n2eegsOSDHoTutC2DReYEEI0Ee9OCqezQkig2b1i6vnmKTizUtCFtZP+BCHERcnLk4IrKzx480B3MjhXUlDKiSMzBb2MOhJCXKS8PCm4/q/TNHdfwrmGpKr8DLCVom/ZoRGiE0KIxuflSeFMr0L5ktnnmrzmsCQDyPwEIcRFq8GSwoEDB+jVq5f7d6vVyoIFC4iNjWXKlCkkJSUBrgtzQkICMTExjBs3jr179zZUSFWUpwRNo0JN4RxJITMFdAZ0IW0aITohhGh8DbKaW0lJCUuXLsVms7nLVq1aha+vL4mJiezevZv4+Hjee+89Nm7cSFJSEhs2bCAlJYV58+aRmJiIwdDwC82VVxQ0NHcyONeQVHcnsyyCJ4S4SDVITeHRRx9lzpw5lcq2bNnCxIkTARg4cCA5OTmcOnWKrVu3Mm7cOHQ6HR07diQqKop9+/Y1RFhVlDcfaRoEB5gBaBFgqnZbR2YK+jBpOhJCXLzq/ZZ38+bNlJaWEhMTU6k8IyOD8PBw9+/h4eGkpaWRkZFBRERElfLaCAsLqFOsSqUD0LJlAO3bhvDGkrGEBPp43NaWm05hWREtOnYjKDywTq9XG+GN8Bp1IXHVTnONC5pvbBJX7dR3XHVOComJiSxfvrxSWadOnSgsLOT111+v0TF0Ol2lzt6K5bWRlVWI0+np4Zrn49onO6uIIpNr3oGl1OZxS9tvPwNQbI6kzFJQh9equfDwQCwN/Bp1IXHVTnONC5pvbBJX7dQ1Lp1Oq/Zmus5JITY2ltjY2Epl7733Hi+++CIzZsxwl02aNInVq1cTERGBxWIhOtrV/GKxWIiIiCAyMhKLxeLevry8MbjzSA0etubMTAFNLzOZhRAXtXrtU5g2bRqff/4569atY926dQCsW7eOgIAARowY4S7bs2cPZrOZqKgohg8fzvr163E4HKSkpJCcnEzv3r3rM6xqufsUarCtI/s4uuDWaAbPfQ5CCHExaLRhNLNmzWLx4sXExcVhMplYsWIFADExMezfv9/dCb1s2TJ8fDy36zcUTTt/WnDmpsqkNSHERa9Bk8Lhw4fdP5vNZhISEqpso2kaCxcuZOHChQ0ZikfOCqOPzkXZrah8C7rOQxohKiGEaDpePaOZGvZNO/PSACWT1oQQFz2vTgrOCmsfnXO7nFOu7UJaN3RIQgjRpLw6KVSY0nxOztxToGnoWrRq+JiEEKIJeXVSqOmIVGfOKbSgCDS9saFDEkKIJuXVSeFMR/N5mo9yU9EHRzVGSEII0aS8Oimgzl9LUE4Hzrw0dMHSnyCEuPh5dVJQnL+WoAos4HSgC5GaghDi4ufdSUGp885RcBZkAqAFhp97QyGEuAh4eVI4/zblSUEXENbA0QghRNPz8qSgzt98VJgFmg7NP6SRohJCiKbj5Unh/EtcOAsy0fxD0HT6xglKCCGakHcnBWow+qgwC11gy8YIRwghmpx3J4UaNB85C7PQpD9BCOElvDwpcM6qgnI6UEU50skshPAa3p0UUOjOlRSKckA50aT5SAjhJbw7KSg4V1VBhqMKIbyNlycFdc6OZlWYBYAuQGoKQgjvUO9JISMjg3nz5jF58mSmT5/OiRMnAMjPz2fevHnExsYyY8YMLBYLAFarlQULFhAbG8uUKVNISkqq75Cqd54hqc7C07OZA0IbKSAhhGha9Z4U/vGPfzBy5EjWrl3LpEmTeOyxxwB48sknGTBgAImJiUybNo1ly5YBsGrVKnx9fUlMTGTRokXEx8fXd0jVcp5n9JEqyELzDUIzmBotJiGEaEr1mhSys7M5dOgQ06dPB+C6667j7rvvBmDLli1MmDABgPHjx/PVV19hs9nYsmULEydOBGDgwIHk5ORw6tSp+gyrWudb5cI1HFWajoQQ3qNek8Lx48eJiorikUceYeLEidx1110Yja4H02RkZBAe7lpUzmAwEBAQQHZ2dqVygPDwcNLS0uozrOopzjn6yFmYiS5QOpmFEN7DUNcdExMTWb58eaWy6OhoDhw4wJ133sl9993He++9R3x8PKtWrfJ4DJ3Oc06qrrw6YWEBtdq+nFMpdHod4eGBVf6mlJPCwmz8e1xOmIe/NzRPMTUHElftNNe4oPnGJnHVTn3HVeekEBsbS2xsbKWyY8eOMWXKFEaOHAm4mokefvhhACIiIsjMzKRVq1bY7XYKCwsJDg4mIiICi8VCdHQ0ABaLhYiIiFrFkpVViNNZgyVPPVBOhcVSUKXcWZyLctgo1QV5/HtDCg8PbPTXrAmJq3aaa1zQfGOTuGqnrnHpdFq1N9P12nzUvn17IiMj2bp1KwBffvkll156KQAjRoxg7dq1AGzYsIEBAwZgNBoZMWIE69atA2DPnj2YzWaiohrngTbnWhDPPRxVmo+EEF6kzjWF6jzzzDM8+OCD/Otf/yIgIIBHH30UgPnz5xMfH09cXByBgYHuUUmzZs1i8eLFxMXFYTKZWLFiRX2HVK1zrX3kLHAlBeloFkJ4k3pPCp06dfLYhxAcHMwLL7xQpdxsNpOQkFDfYdTIuR6yc2bimsxREEJ4D6+e0exU1a995CzJA70JTH6NG5QQQjQhr04KLp6zgirOQ/Nrcd6ltYUQ4mLi1UnB1adQzd9K8tH8WjRuQEII0cS8PCmcY/RRcR4636DGDUgIIZqYJIXqmo9K8tB8paYghPAu3p0U8Nx8pJx2VGmBNB8JIbyOdycFhcf2I1XimiEoNQUhhLfx8qTg+SE7qiQPAM1P+hSEEN7Fu5MCnjuaVXE+ADqpKQghvIx3J4VqlrlQpa6koMnoIyGEl/HypOB56poqkaQghPBOXp0UwHPzkbMk37XEhcHc+AEJIUQT8uqk4HoGg6fRR/lovoGyxIUQwut4dVIAz4/jdCUF6WQWQngfr04Kzmo6FVRJAZpv83z0nhBCNCSvTgrVLXOhSmTdIyGEd/LqpABVO5qVUqdrCpIUhBDep96TwokTJ5gxYwaTJk1i1qxZnDx5EgCr1cqCBQuIjY1lypQpJCUlAa6LcEJCAjExMYwbN469e/fWd0jVcnpaOttaDMohSUEI4ZXqPSmsXLmSuLg41q1bx5gxY3jiiScAWLVqFb6+viQmJrJo0SLi4+MB2LhxI0lJSWzYsIFnn32W+Ph47HZ7fYflmYKzOxWc5UtcSFIQQnihek8KTqeTwsJCAEpKSvDx8QFgy5YtTJw4EYCBAweSk5PDqVOn2Lp1K+PGjUOn09GxY0eioqLYt29ffYflkfLwOE73Yng+khSEEN7HUN8HnD9/PtOnT2fVqlXYbDbeeecdADIyMggPD3dvFx4eTlpaGhkZGURERFQpbwzKwzQFmc0shPBmdU4KiYmJLF++vFJZp06dKCsrY+nSpYwePZqNGzdyxx138PHHH3s8hk6nQynlsbw2wsICarV9OYXCZDQQHn5m+GleipVSoGXbKAwBTTcstWJMzYnEVTvNNS5ovrFJXLVT33HVOSnExsYSGxtbqSw7O5vY2FhGjx4NwNixY3nwwQfJyckhIiICi8VCdHQ0ABaLhYiICCIjI7FYLO5jlJfXRlZW4enZybWjFNjtDiyWAndZmSUD0MguAq2koPqdG1B4eGClmJoLiat2mmtc0Hxjk7hqp65x6XRatTfT9dqnEBISgtlsZs+ePQDs3bsXf39/QkNDGTFiBOvWrQNgz549mM1moqKiGD58OOvXr8fhcJCSkkJycjK9e/euz7Cq5WnumirJR/MJQNPpGyUGIYRoTuq1T0HTNJ555hkeeughSktL8ff35+mnnwZg1qxZLF68mLi4OEwmEytWrAAgJiaG/fv3uzuhly1b5u6cbmgKVWWiQvm6R0II4Y3qvaO5T58+vPfee1XKzWYzCQkJVco1TWPhwoUsXLiwvkM5L6Wqrn2kSgvQfCQpCCG8k1fPaPbUya3KitDMdeu4FkKIC52XJwWqLI+tyorQfPybKCIhhGhaXp4UKi9zoZRClRVKTUEI4bW8Oylw1ugjhxUcdjBLTUEI4Z28OilwVvORKi0CQJOkIITwUl6dFM5+yI4qc63ZJElBCOGtvDopuJqPKtQUyk7XFHykT0EI4Z28OilwdkdzaXlNQZKCEMI7eXVScJ61zIW7piDNR0IIL+XVSaFKR7O7+UiSghDCO3l1UlCc9TjOsiLQG0BvarKYhBCiKXl3UjhrlYvyiWtnz3IWQghv4eVJQaE7a56C9CcIIbyZVycF51lTml01BUkKQgjv5dVJAVSV0UeSFIQQ3syrk8LZq6S6VkiVOQpCCO/l5Unh7MlrRbIYnhDCq/3upLBy5Ur3IzcB8vPzmTdvHrGxscyYMQOLxQKA1WplwYIFxMbGMmXKFJKSkgDXhTkhIYGYmBjGjRvH3r17f29INVZx9JGyW8FhldnMQgivVuekUFBQwKJFi3j11VcrlT/55JMMGDCAxMREpk2bxrJlywBYtWoVvr6+JCYmsmjRIuLj4wHYuHEjSUlJbNiwgWeffZb4+HjsdvvveEs1p8A9+khmMwshxO9ICps3b6ZDhw7cfPPNlcq3bNnChAkTABg/fjxfffUVNpuNLVu2MHHiRAAGDhxITk4Op06dYuvWrYwbNw6dTkfHjh2Jiopi3759v+Mt1ZyqsEqqzGYWQojfkRQmT57MvHnz0Ov1lcozMjIIDw8HwGAwEBAQQHZ2dqVygPDwcNLS0sjIyCAiIqJKeWOouHK2LIYnhBBgON8GiYmJLF++vFJZp06deP3112v8Ijqd59yj0+lcd+s13L46YWF1u5ArwNfXRHh4IIWZdkqA0NaRmMMD63S8+hTeDGLwROKqneYaFzTf2CSu2qnvuM6bFGJjY4mNja3xASMiIsjMzKRVq1bY7XYKCwsJDg4mIiICi8VCdHQ0ABaLhYiICCIjI92d0RXLayMrqxCns2pyOR+lFGWlNiyWAqzp6QDklurRWQpqfaz6FB4eiKWJY/BE4qqd5hoXNN/YJK7aqWtcOp1W7c10vQ9JHTFiBGvXrgVgw4YNDBgwAKPRyIgRI1i3bh0Ae/bswWw2ExUVxfDhw1m/fj0Oh4OUlBSSk5Pp3bt3fYflkaowo1mV5AMamk/zvBsQQojGcN6aQm3Nnz+f+Ph44uLiCAwM5LHHHgNg1qxZLF68mLi4OEwmEytWrAAgJiaG/fv3uzuhly1bho+PT32H5ZlS7slrqrQAzScATac/z05CCHHx+t1J4c4776z0e3BwMC+88EKV7cxmMwkJCVXKNU1j4cKFLFy48PeGUmsVH7KjSvLRfKWWIITwbl49oxkq1BRK8tF8gpo4HiGEaFpenRScCvcyF86SfDRfSQpCCO/m1UkBaT4SQohKvDopqNPNR8ppB2uxNB8JIbyeVycFpxPQQFlLANDMfk0bkBBCNDGvTgruh+yUJwWTb5NGI4QQTc2rk0L5Q3bKawpIUhBCeDkvTwqumoKyFgOgmaT5SAjh3bw7KUDlPgWj1BSEEN7Nu5OCOv2QHelTEEIIwOuTgmtlVelTEEIIF+9OCrhmNJ/pU5CkIITwbt6dFJzqzOgjvQFNb2zqkIQQokl5d1Lg9DIXthIZeSSEEHh7Uqg4T0GajoQQwtuTgjrdpyA1BSGEAG9PCuX/txZLJ7MQQlAPSWHlypU8/fTT7t+TkpK48cYbmTRpEtdffz0HDx4EwGq1smDBAmJjY5kyZQpJSUmA6249ISGBmJgYxo0bx969e39vSDVX/jhOa6lMXBNCCH5HUigoKGDRokW8+uqrlcrvv/9+5s6dy7p167j77rvdj9lctWoVvr6+JCYmsmjRIuLj4wHYuHEjSUlJbNiwgWeffZb4+HjsdvvveEs1V/44TmUtlj4FIYTgdySFzZs306FDB26++eZK5dOmTWP48OEAdOvWjdTUVAC2bNnCxIkTARg4cCA5OTmcOnWKrVu3Mm7cOHQ6HR07diQqKop9+/bVNaxaO9OnIElBCCHqnBQmT57MvHnz0Ov1lcqvvfZad9lTTz3F6NGjAcjIyCA8PNy9XXh4OGlpaWRkZBAREVGlvKGVz2Z2DUktlY5mIYQADOfbIDExkeXLl1cq69SpE6+//nq1+yilWLFiBT/88ANvvPFGtdvpdDr3xfns8toICwuo1fYADqfrdYN8ARQBIcEEhzefx3GGN6NYKpK4aqe5xgXNNzaJq3bqO67zJoXY2FhiY2NrfEC73c7ChQtJT0/njTfeIDDQFXBERAQWi4Xo6GgALBYLERERREZGYrFY3PuXl9dGVlYhTmfV5HIuDqcTAGthPgBFNh02S0GtjtFQwsMDsTSTWCqSuGqnucYFzTc2iat26hqXTqdVezNd70NSExISKCws5NVXX3UnBIARI0awbt06APbs2YPZbCYqKorhw4ezfv16HA4HKSkpJCcn07t37/oOq4ryCorRWQbIukdCCAE1qCnURnZ2NqtXr6Zt27ZMmzbNXb5u3TpmzZrF4sWLiYuLw2QysWLFCgBiYmLYv3+/uxN62bJl+Pj41GdYHpUnBYOjPClIn4IQQvzupHDnnXe6fw4NDeXAgQMetzObzSQkJFQp1zSNhQsXuoeuNh5XVjCo00nB2PCJSAghmjuvndHsPKv5CKkpCCGE9yYF3M1HpYD0KQghBHhxUlDlzUdO6VMQQohy3psUTtcU9I4y0HRgMDVtQEII0Qx4fVIwOMvA5OtaGE8IIbyc1yYF9+gjR6n0JwghxGlemxTKRx/pnWWSFIQQ4jSvTQrlay4ZHGXyLAUhhDitXmc0X0jsjvLRR6VgCmriaITwXg6HnZwcC3a7lYwMHc7T65I1JxdqXAaDiZCQcPT6ml/qvTgpuE6k3lEmw1GFaEI5ORZ8fPzw92+F0ajHbm9+F1+DQXfBxaWUoqgon5wcCy1btq7xMb22+ahyUpDmIyGait1uxd8/SEYA1jNN0/D3D8Jut9ZqPy9OCgpQ6GX0kRBNThJCw6jLefXipODEiANNOeT5zEKIZmHq1Amkpp5q0hi8Oin4aDZAlrgQQohyXtzRrCokBakpCCFcvvtuD88//xQOh5PWrVvj6+vH0aNJOBxOZsyYzahR1zBpUgzvvrsWPz9/brvtFoYNG87MmXP4/PONfP/9Pm677Q6WL38IiyWDzEwLffv+gfvvX8q+fXvdx+7U6RLuuutvLF36ABkZ6XTo0Amr1dX+/+uvR1ixYhkOhwOTycSiRQ/Srl37Rnn/XpwUnPhqrn8AmacgRPOwbf8ptu5rmOaTK/q0Zljvmo3COX78GO+//wmrVr1Gy5bhLFnyEHl5+fzlL7fQs2cv+vcfwL593/GHP/QnNTWV77//jpkz57Bz53auvvoatm/fRpcuXXn44QRsNhszZ07j8OFDlY4dEBDA448n0LVrdx577Cm+//47vvjiMwDeffctpk+fyahRo9m8eRM///yjJIWG5mo+Ot0rLzUFIUQF7dpFExAQwJ4931JWVsqGDR+jFJSWlnL06G8MGXIFe/d+i06nMWZMLJs3b8Jut/PDD9+zYMEizGYzBw78xLvvvkVy8lHy8vIoKSmudGyAffv2smTJIwD07duPqKg2AAwZMozHH1/Brl3bGTr0Sq666upGe+9enBSk+UiI5uaKPlEM7tmqqcPAbDYD4HQ6eOCBh7j00p7Y7U6ys7MICmpBQUEBa9asRq830L//QI4dS+aTT9bSqVMnzGYz77+/hi1bvmDixClMnTqIo0eT3KsolB8bXKODKk4+0+v1AIwcOZpevfrwzTdf8957b7Nz5zcsXHh/o7z3393RvHLlSp5++ukq5WlpaQwaNIgTJ04ArokUCQkJxMTEMG7cOPbu3eve9tVXXyUmJoaxY8eyadOm3xtSjdgdTkyaHZBHcQohPOvXbyBr174PQGZmJjfddAPp6WmEhIRgNpv55puv6NOnL/36DeT1119h6NArAdi9excTJ17LmDGxgMaRI794nHk8YMAgNm1KBODgwZ85edJ1vVy8+F4OHPiZyZOv489//ou76akx1DkpFBQUsGjRIl599dUqf3M6ndx3333YbDZ32caNG0lKSmLDhg08++yzxMfHY7fb2b9/Px9//DHr1q3jrbfeYsWKFeTm5tY1rBqz252YTycFeZaCEMKTW26ZS1lZGTfeOI358//CX/96F23atAVcTTwBAYH4+fnRv/9AMjMtDB16BQB//OONvPbaf7jllhk8/ngCvXr18TjU9E9/upWTJ08wc+YfefPN193NR7Nm3cyqVa9xyy0zePbZJ7nzznsa7T3Xuflo8+bNdOjQgZtvvrnK315++WWGDh3K0aNH3WVbt25l3Lhx6HQ6OnbsSFRUFPv27WPXrl1cc801mM1mzGYzgwYNYsuWLUyePLmuodWI3anO1BQM5vNsLYTwFv36DaBfvwEA+PsHsHjxQx6Xk5gz58/MmfNnAC65pDPbtu1x/61//4G8/faH1R6/nL9/AI888i+P27388hu/633UVZ2TQvlF++ymo59++oldu3bx0ksvsXr1and5RkYGERER7t/Dw8NJS0sjIyOD3r17VymvjbCwgFrH7+NjxIwrKYS3DkPT6Wt9jIYUHh7Y1CF4JHHVTnONC5pPbBkZOgyGM40WFX9uTi7UuHQ6Xa3+rc+bFBITE1m+fHmlsk6dOvH6669X2bakpISlS5fy5JNPotNVDrS8k+XsYKsrr42srEKczqrHOZfc/BJMmg30JjKzimu1b0MLDw/EYilo6jCqkLhqp7nGBc0rNqfT6b4LvxAXnmtKNYnL6XRW+bfW6bRqb6bPmxRiY2OJjY2tUYB79uwhMzOT2267DXDVDubNm8czzzxDZGQkFovFva3FYiEiIsJjeceOHWv0er+Hu0/BKE1HQghRrl7rQ1deeSVffPEF69atY926dURERPCf//yHTp06MXz4cNavX4/D4SAlJYXk5GR69+7N8OHD2bRpEyUlJWRnZ7Nz506GDBlSn2F5ZHe4+hSkP0EIIc5otHkKMTEx7N+/n4kTJwKwbNkyfHx86NOnDxMnTmTq1KnY7XbuuusuIiMjGzweu8OJj86BJjUFIYRw05SnRv0LTF36FN7+/Ahdf3md7lFm/CcvbqDI6qY5tfdWJHHVTnONC5pXbGlpKbRqFQ1c2G33TaEmcVU8v+XO1afQPLvTG4Hd4cSsk+YjIYSoyKuTgkmzy2xmIUSjO3XqJMuXLwVcq7Lecce8Oh9rw4b1LFu2pJ4i8/KkYNZsIDUFIUQjS0tLdS9p0dx49YJ4JuxoRlniQgjhkpGRztKlD1BSUoJOpzF//gKWLFnE6NFj2Lbta/R6Pbfeejtr1rzJiRPHuf32u7n66mvIzs7i0UcfIj09Db1ez7x5tzN48FBKS0tJSHiYX3/9BZ1Ox/TpM4mNHc/KlY9x6tRJ/v3vBEaOvJrc3Fz+7//u4uTJE7RvH81DDyVgMplITPyE9957G6dT0a1bd/72t4WYzWY+/fR//Pe/rxAQEEBkZCt8fevvQWFenBScGLGBQZqPhGguyg5to+zg1gY5trHbcIxdh51zm08+WcfQoVdw442z+e67Pezf/z3gWmnhzTff5ZFH/smbb77OU0+9wI8//sBTT/2bq6++hiee+Bf9+g1g+vSZnDx5gr/+9c+89tpq1qx5kxYtWrBq1bvk5uYyd+5NdOnSjfnz/49XX/0Pf//7Qr77bg/p6WmsWPEErVq15tZb57Bnz7e0atWa9evX8vzzr2I2m3nhhWd4++1VjB8/ieeff4rXXnuL0NAQ/va3u+o1KXhv85HdiRG7DEkVQrgNGDCIt99+kyVL7iMz08J11/0RcC1+BxAZ2Yq+ffthMBho1ao1BQWuEVzffbeb8eMnA9CmTVt69uzFgQM/sXfvHuLiJgEQHBzMlVcOZ9++vVVet3PnLkRFtUGn0xEd3ZG8vFz27dvDiRPHufXWm5kz50a2bdvKsWMp/PjjD/Tq1YfQ0DAMBsPplVjrj9fWFJTDhg4lfQpCNCPm7leg7zy0yV6/T5++vPnmu2zfvo3NmzexYcN6AAwGo3ub8mceVFR1SLzC4XCgVOXhokqBw2Gvsn/FY2qahlIKh8PJqFGjufvuBQAUFxfjcDjYu/fbSq/nKZ7fw2trCia7K8NrZv8mjkQI0Vw899xKNm7cQGzseO65ZyG//HK4Rvv17z+ATz5ZC8DJkyf48ccfuPTSPvTrN5D//W8dALm5uXz99Rb+8IcB6PUGHA7HOY/5hz/056uvtpCTk41Sin//eznvvvsWffr05cCBH7FYMnA6ne5HeNYXr60ptLafBEDfqnMTRyKEaC6uu+56/vnP+9mw4RN0Oh1//3s8zz//1Hn3u/vuBaxYsYwNG9ajaRoLF95Py5YtufnmP/Pvfycwe/b1OJ1OZs++hW7dupOXl0thYQEPPfSAu3npbF26dOXmm+dy111/QSlFly7dmDlzDmazmbvvXsDdd/8VX19foqPrd604r53RvPOl5VyiHaPln55F05pXhak5zTatSOKqneYaFzSv2GRGc93JjOZ6FKgrJSuwW7NLCEII0ZS8tvmoy8xFhLcMJK/A2tShCCFEs+G1t8kmsxmTj4w8EkKIirw2KQghmo+LoGuzWarLeZWkIIRoUgaDiaKifEkM9UwpRVFRPgZD7Zby8do+BSFE8xASEk5OjoXCwlx0Oh1OZ/Mb5XOhxmUwmAgJCa/VMSUpCCGalF5voGXL1kDzGipbkTfFJc1HQggh3CQpCCGEcLsomo90Oq1J9m1IElftSFy111xjk7hqpy5xnWufi2KZCyGEEPVDmo+EEEK4SVIQQgjhJklBCCGEmyQFIYQQbpIUhBBCuElSEEII4SZJQQghhJskBSGEEG6SFIQQQrh5ZVJYv34948aN45prrmH16tVNGsvs2bOJi4tj0qRJTJo0iR9++KFJ4yssLGT8+PGcOHECgO3btzNhwgTGjBnDE0884d7u4MGDXHfddYwdO5b77rsPu93eqHHde++9jBkzxn3ePvvss3PG21CeeeYZ4uLiiIuLY8WKFeeMoTHPmae4msM5W7lyJePGjSMuLo7XXnvtnK/fmOfLU1zN4XyVS0hIID4+Hqj+vJw6dYoZM2YQExPDbbfdRlFRUd1eTHmZtLQ0NXLkSJWTk6OKiorUhAkT1JEjR5okFqfTqYYNG6ZsNluziO/7779X48ePV5deeqk6fvy4KikpUSNGjFDHjh1TNptN3XLLLWrLli1KKaXi4uLUvn37lFJK3XvvvWr16tWNFpdSSo0fP16lp6dX2u5c8TaEb775Rl1//fWqrKxMWa1WNXv2bLV+/fomP2ee4tq0aVOTn7Ndu3ap6dOnK5vNpkpKStTIkSPVwYMHm/x8eYorKSmpyc9Xue3bt6vLL79cLVy4UClV/XmZN2+e+uSTT5RSSj3zzDNqxYoVdXo9r6spbN++ncGDBxMcHIyfnx9jx47l008/bZJYfvvtNzRNY+7cuUycOJE333yzSeN79913efDBB4mIiABg//79REdH065dOwwGAxMmTODTTz/l5MmTlJaW0rdvXwCuvfbaBo3x7LiKi4s5deoUDzzwABMmTOCpp57C6XRWG29DCQ8PJz4+HpPJhNFo5JJLLiE5ObnJz5mnuE6dOtXk52zQoEG88cYbGAwGsrKycDgc5OfnN/n58hSX2Wxu8vMFkJubyxNPPMFf/vIXgGrPi81mY/fu3YwdO7ZSeV1cFKuk1kZGRgbh4WeeRBQREcH+/fubJJb8/HyGDBnCkiVLKC0tZfbs2cTGxjZZfMuWLav0u6dzlZ6eXqU8PDyc9PT0RosrKyuLwYMHs3TpUvz8/Lj11lt5//338fPz8xhvQ+nSpYv75+TkZDZs2MCsWbOa/Jx5iuutt97i22+/bfJzZjQaeeqpp3j11VeJiYlpNp+xs+NyOBzN4jO2ePFi7rnnHlJTU4Gq38ny85KTk0NAQAAGg6FSeV14XU1BeVgUVtOaZkncP/zhD6xYsQI/Pz9CQ0OZOnUqTz31VJXtmiq+6s5VU5/Ddu3a8eyzzxIWFoavry+zZs1i69atTRbXkSNHuOWWW1i4cCHt27f3GENTxFYxrk6dOjWbc3bXXXexY8cOUlNTSU5O9vj6TR3Xjh07mvx8vffee7Ru3ZohQ4a4yxrjO+l1NYXIyEj27Nnj/j0jI8PdLNHY9uzZg81mc/+jK6Vo06YNmZmZzSK+yMhIj7GcXW6xWBo1xsOHD5OcnOyuKiulMBgM1cbbkPbu3ctdd93FokWLiIuL49tvv20W5+zsuJrDOUtKSsJqtdKjRw98fX0ZM2YMn376KXq9vsrrN+b58hTXhg0bCA4ObtLztWHDBiwWC5MmTSIvL4/i4mI0TfN4XkJDQyksLMThcKDX63/X+fK6msLQoUPZsWMH2dnZlJSUsGnTJoYPH94ksRQUFLBixQrKysooLCzko48+4l//+lezie+yyy7j6NGjpKSk4HA4+OSTTxg+fDht2rTBbDazd+9eANauXduoMSqleOSRR8jLy8Nms/HOO+9wzTXXVBtvQ0lNTeX222/nscceIy4uDmge58xTXM3hnJ04cYL7778fq9WK1Wpl8+bNTJ8+vcnPl6e4Bg4c2OTn67XXXuOTTz5h3bp13HXXXYwaNYrly5d7PC9Go5EBAwawYcOGSuV14ZU1hXvuuYfZs2djs9mYOnUqffr0aZJYRo4cyQ8//MDkyZNxOp3ceOON9O/fv9nEZzabefTRR7nzzjspKytjxIgRxMTEAPDYY49x//33U1RURM+ePZk9e3ajxdW9e3fmzZvHDTfcgN1uZ8yYMYwfPx6g2ngbwiuvvEJZWRmPPvqou2z69OlNfs6qi6upz9mIESPcn3e9Xs+YMWOIi4sjNDS0Sc+Xp7juuOMOQkJCmvwz5kl15+XBBx8kPj6e559/ntatW/P444/X6fjy5DUhhBBuXtd8JIQQonqSFIQQQrhJUhBCCOEmSUEIIYSbJAUhhBBukhSEqCdz587l119/rdU+t956Kx9++GEDRSRE7XndPAUhGspLL73U1CEI8btJUhBe74svvuD555/HZrPh4+PDwoUL2bZtG0eOHCEzM5OsrCy6d+/OsmXLCAgI4K233mLNmjUYjUbMZjNLly6lc+fOjBo1ipUrV9K7d2/eeecdVq1ahU6no2XLljzwwAN07NiR9PR04uPjycjIICoqiqysLHccSUlJLFu2jNzcXBwOB7NmzWLq1KkUFRVx7733kpKSgk6n49JLL2Xp0qXodFLRFw2gTgtuC3GROHr0qBo/frzKzs5WSin1yy+/qGHDhqlHH31UDR8+XFksFuVwONTf/vY39eijjyq73a4uvfRS9zr7H330kVqzZo1SSqmRI0eq/fv3q+3bt6vRo0errKwspZRSH3zwgYqNjVVOp1P99a9/VU888YRSSqnk5GTVt29f9cEHHyibzabGjRunfvrpJ6WUUvn5+So2Nlbt27dPffTRR+qWW25RSillt9vVfffdp5KTkxvzNAkvIjUF4dW++eYbMjIymDNnjrtM0zSOHTtGTEwMLVu2BGDq1Kk88sgjLFy4kJiYGKZPn85VV13FsGHDmDBhQqVjfv3114wbN47Q0FDAtbb9smXLOHHiBNu3b2fhwoUAREdHc/nllwOuJa6PHTvGokWL3McpLS3lwIEDXHnllTzxxBPMmjWLoUOHctNNNxEdHd2Qp0V4MUkKwqs5nU6GDBnCk08+6S5LTU3lnXfewWq1VtquvLnmscce45dffmH79u289NJLvP/++zz//PPubZWHlWOUUtjt9irLHJevf+9wOAgKCmLdunXuv2VmZhIYGIjZbOazzz5j165d7Ny5k5tvvpn777+/0dfcEd5BGiWFVxs8eDDffPMNSUlJAGzdupWJEydSVlbG5s2bKSgowOl08u677zJy5Eiys7MZMWIEwcHBzJkzh7vvvpvDhw9XOuYVV1zBhg0byM7OBuCDDz4gODiY6OhorrzySt555x3A9UzdXbt2AdCxY0fMZrM7KaSmpjJ+/Hh++ukn3nrrLe69916uuOIKFixYwBVXXMGRI0ca6xQJLyML4gmvl5iYyAsvvOBeM3/RokXs2LGDnTt34nA4yMnJYeDAgdx///34+PiwZs0a3njjDXx8fNDr9dxzzz0MHTq0Ukfz6tWrWbNmDU6nk9DQUBYvXkyXLl3Izs7m3nvv5dixY7Rq1Qq73c6UKVO49tprOXTokLuj2W63M3v2bG644QaKi4tZtGgRhw8fxtfXl6ioKJYtW0aLFi2a+tSJi5AkBSE8ePrpp8nJyWHx4sVNHYoQjUqaj4QQQrhJTUEIIYSb1BSEEEK4SVIQQgjhJklBCCGEmyQFIYQQbpIUhBBCuElSEEII4fb/sUajzpoUYJcAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始测试!\n", + "环境:CliffWalking-v0, 算法:Sarsa, 设备:cpu\n", + "回合数:1/20, 奖励:-15.0\n", + "回合数:2/20, 奖励:-15.0\n", + "回合数:3/20, 奖励:-15.0\n", + "回合数:4/20, 奖励:-15.0\n", + "回合数:5/20, 奖励:-15.0\n", + "回合数:6/20, 奖励:-15.0\n", + "回合数:7/20, 奖励:-15.0\n", + "回合数:8/20, 奖励:-15.0\n", + "回合数:9/20, 奖励:-15.0\n", + "回合数:10/20, 奖励:-15.0\n", + "回合数:11/20, 奖励:-15.0\n", + "回合数:12/20, 奖励:-15.0\n", + "回合数:13/20, 奖励:-15.0\n", + "回合数:14/20, 奖励:-15.0\n", + "回合数:15/20, 奖励:-15.0\n", + "回合数:16/20, 奖励:-15.0\n", + "回合数:17/20, 奖励:-15.0\n", + "回合数:18/20, 奖励:-15.0\n", + "回合数:19/20, 奖励:-15.0\n", + "回合数:20/20, 奖励:-15.0\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEXCAYAAACgUUN5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA4dUlEQVR4nO3deVxU9f7H8dcMKKKQpeKSt1CuW2qp1x0hwgUdFsXSm5WokKJmlnotxSUVJZIsJbRyuUrumguYiku4homYpdii5m6iCLhggMDM9/eHP+c6ggozCtR8no+Hj4dzzvme85nvzPA+53tmztEopRRCCCGslra0CxBCCFG6JAiEEMLKSRAIIYSVkyAQQggrJ0EghBBWToJACCGsnATBYxIUFERGRobZ7SdMmMDRo0cBGD9+PPv27XtUpQkzpaSk4OvrS/fu3fnxxx9N5t28eZMJEybg5+dH9+7d8ff35+uvvy6VOidNmkTHjh2ZOXOmRes5dOgQb775Jj169MDX15fg4GCOHz8OQGJiIr6+vgBERkYSExMDwLp163jppZd48803+e677/D09MTf358mTZrw66+/Gte9atUqGjZsyN69e43T4uLi6NWr1wNratiwIRkZGaxbt47BgwcXmB8fH8+0adMset5F9eWXX9KtWze6dOlCVFQUf+lv4ivxWDRo0EClp6eb3d7T01MdOXLkEVYkLLV+/XrVv3//QudNnjxZhYWFKYPBoJRS6tKlS8rDw0Pt3bu3BCu8rWHDhiolJcWidRw4cEB5eHio5ORk47TY2FjVpk0blZ6ervbv3698fHwKtAsICFAxMTFKKaXGjh2r5syZo5RSatiwYWrRokXG5QYPHqwGDx6sJk+ebJw2ceJEFRUV9cC67nyu1q5dq4KDgy15ihbZtWuX6tGjh/rzzz9VTk6OeuONN9SmTZtKrR5L2ZZ2EP0dhYSEANC/f3/mzZuHVqslNDSUlJQU8vLy8PHxYciQIeTn5zN16lQOHTpEuXLl+Mc//kF4eDjz5s0jNTWV0aNHExERwYwZM3jjjTdo2rQpAwYMwMPDg8OHD3P9+nVGjhyJt7c32dnZTJo0icOHD+Po6Ei9evUA+Oijj0xqy8/P5+OPP2bXrl3Y2NjQokULJk2axNy5c7l69SoffPABAFFRUcbHAQEBVK5cmVOnTvHqq6/y+eefs3fvXsqXL49er8fT05OFCxdSo0YNwsLCOH78OHl5ebRv3573338fW1vTt1lmZiZTpkzht99+Q6PR4O7uzqhRo7C1teX5558nODiYhIQEUlNT6devHwMGDCjQx4cPH2batGlkZ2dTrlw53n//fdq3b0/jxo3p378/iYmJZGVlMWrUKLy8vFi3bh1bt25l7ty5AAUe323VqlUsWbIErVZLtWrVmDhxIpcvX2bWrFlkZmYSEBDAkiVLTNpcuXKFqlWrkpeXR/ny5alRowZRUVE8+eSTAOzcuZO5c+eSm5tLRkYG/v7+jBgxgsTERMLCwqhYsSJZWVksW7aM8ePHc/bsWbRaLU2aNCE0NBSADz/8kMOHD/Pnn3+ilGLatGm0bNnSpI7XX38dpRSDBg1i0qRJVK5cmdDQUK5du4ZGoyEoKAh/f/8C212zZg3ly5c3ruezzz7jrbfeomnTpsZp3bt3x87ODr1eb7LNsWPHUr9+fS5fvkxycjIXLlzgypUrxMfHY2dnR2ZmJi+++CK7du1iwIAB5OTkcOTIERYvXszAgQOZNGkSAPv37+eTTz7h9OnThIaGkpWVRWpqKo0aNWLWrFnY2dkVeK0AtmzZwowZM5g3bx4//fST8XUNCAigefPmHDp0iJSUFFq2bMn06dPRarWsW7eOefPmUaFCBdq1a8fixYv55ZdfTNZrMBjw9PRk9uzZPP/88wCMHDmS1q1b88svv+Dr60vFihUBePnll9mwYQPe3t6F1ljmlXYS/V3dfUQQEBCg4uPjlVJK5eTkqICAALVp0yaVlJSkunXrZtyLjIiIUD/88INSyvSIoG/fviouLk6dP39eNWjQQO3YsUMppdSWLVvUSy+9pJRSasaMGWrUqFFKr9erzMxM5efnp8aMGVOgrq+++kq98cYbKjs7W+n1evXuu++q9evXq88++0xNmTLFuNzdj/v27atCQkKM89544w0VFxenlLq9Z9SnTx+l1O09wMWLFyullMrPz1ejR49W8+bNK1DD+++/r6ZOnaoMBoO6deuWCgoKUnPnzjX225IlS5RSSiUnJ6umTZuqnJwck/a5ubmqQ4cOaufOncblfH19lV6vVw0aNFBffPGFUkqpX3/9VbVs2bLQPcj77VHu27dPde7c2fjarV27Vul0OmUwGB64F/rrr78qLy8v1aJFCxUUFKRmz56tTp06pZRSymAwqL59+6rTp08rpW4fLTz33HPGPetGjRqpCxcuKKVuH3UEBQUZ+3D8+PHqzJkz6tChQ2r48OFKr9crpZSaO3euGjx4cKG13Hnv5eXlqU6dOqmtW7cat+vu7q4OHTpUYLv3at68uTpx4kSh85RSJkcEY8aMUQsWLFBK/e+9eu/0lJQU1aZNG6XX61V8fLwaNmyYUkqprl27qp9//ln98ccfytXVVRkMBvXRRx8Zjypyc3OVr6+v2rJli8lzu/NabNiwQfn4+KiLFy8aX687r1Hfvn3VO++8Y/xMuLm5qe+//16dOHFCtW/f3njUFBUVpRo0aFDo84yMjDR+Dq5du6batGmjbty4oYKCgtTGjRuNyyUkJCh/f//79ldZJ0cEj1lWVhZJSUlcv36dyMhI47TffvsNNzc3bGxs6N27N25ubnTt2pUXXnjhgesrV64cHh4eADRu3Jhr164BsHv3bkJCQtBqtTg4ONCzZ0+OHTtWoP2+ffvo0aMHFSpUAGDWrFnA7SOAB2nVqpXx/71792b9+vV069aNdevW0bt3bwB27dpFcnIya9asASAnJ6fQde3Zs4cVK1ag0WgoX748ffr04auvviI4OBiATp06AdCkSRNyc3PJysoy2Rs8fvw4Wq2Wl156CYCmTZvyzTffGOf37dsXgEaNGtGgQQOSkpIe+NzutnfvXry9valSpQpwe08vLCyMCxcuPLBdo0aN2LJlCz///DNJSUkkJCTw5ZdfEhkZSceOHfnyyy/ZtWsXGzdu5OTJkyilyM7OBqBWrVrUrl0bgJYtWzJz5kwCAgJwdXWlf//+ODs74+zsTOXKlVm5ciXnz58nMTGRSpUqPbCmM2fOcOvWLby8vACoUaMGXl5e7N27l7Zt25ps915arRaDwVDkfnuYmjVrUr16dY4dO8bOnTuNr52npyffffcd1apV48UXX0Sj0fDee++RkJDA/PnzOXPmDKmpqWRlZRVYZ3JyMnv37mXcuHHUqlWr0O16enoaPxPOzs5cv36d3377jQ4dOlCzZk3g9vvlfu//V155hV69ejF27Fg2btyIp6cnjo6OhZ4P0Gr/uqdcJQgeM4PBgFKKlStXYm9vD0BGRgZ2dnZUqlSJ2NhYDh06xP79+xkxYsR9h0LuKFeunPENp9FojNNtbW1N3pz3e1PeO0yTlpaGwWBAo9GYtM/LyzNZ7s4hMEC3bt0IDw/n5MmTJCUlGYefDAYDkZGR/POf/wTgxo0bJjXe3Sf3Ps7Pzzc+vvNH/07bez90NjY2BdZ7/PhxXFxcjPPvXved5R/0/O4o7AOulDKp7175+flMmTKF//znPzRt2pSmTZsSGBjI559/zqpVq2jXrh09e/akc+fOtGrVildeeYVvv/3WuK27+/aZZ55h+/btJCYmsn//fgIDA5kwYQIVKlQgLCyMwMBAOnXqhIuLCxs2bLhvTXee+4Oey93bvVfz5s05fPgwDRo0MJk+ZcoUunTpYtLHReXu7s6BAwfYvXs37777LgAeHh5ER0fzxBNPGHcARo0ahV6vR6fT8dJLL5GSklLo6+Lo6Mgnn3zCiBEjeOmll/jHP/5RYJk7OzyA8T1gY2Njsr67n8ugQYNITU0F4J133qFTp040btyYXbt2sW7dOsaNGwfcDu8rV64Y212+fNkYLH9Ff90IK+NsbGzIz8/HwcGB5s2bs2jRIuD2H8fXXnuN+Ph4du7cyYABA2jRogXDhw/H39+f3377zaR9UXl4eLB27VoMBgPZ2dls3Lix0D/C7du3Z+PGjeTm5mIwGJg8eTKbNm3iqaee4ueff0YpRVZWFt999919t2VnZ4ePjw9jx47Fy8vLGHBubm5ER0ejlCI3N5ehQ4eydOnSAu3d3NxYtmyZcbnVq1fj6upa5Ofq4uKCRqMhISEBgJ9//pn+/fsb//Dd+QbLzz//zOnTp2ndujVVqlThxIkT3Lp1i/z8fHbu3Fnout3c3Ni8ebPxG19r167lySefxNnZ+b712NracubMGT7//HNjwOTn53P+/HkaN27M2bNnuXnzJiNGjKBjx44cOHDA2P/3Wr58OSEhIbi5ufHee+/h5ubGiRMnSEhIwNPTk9dff53nn3+eb7/9tsBY/b3q1q1LuXLl2LZtG3D7j9XWrVuL1NdDhw5l9uzZxm+uwf/Oq9wbDkX14osvsnbtWqpXr061atWA20eax48f58cff6RDhw4AfPfddwwbNgxvb280Gg2HDx8u9LnWqVOH9u3bExAQwJgxY4p8BOPm5sb333/P5cuXAUy+3TV//nxiY2OJjY01BtO///1v5s+fT05OjvGcTKdOndiwYQNZWVnk5uaybt06OnfubFa/lAVyRPCYdOnShddff53PP/+cGTNmMHXqVPz8/MjNzTV+BVGv17Nnzx7jSafKlSszdepUADp37szIkSOL/FW4wYMHExoaip+fH46OjlStWtVkb+iOPn368Mcff/Dyyy+jlKJNmzYEBASQnZ3N3r178fLyokaNGrRo0eKBX4fr3bs3S5cuZfLkycZp48ePJywsDD8/P/Ly8nB1dWXgwIEF2k6YMIFp06YZl3N3d2fIkCFFep4A5cuXJyoqig8//JCIiAjKlStHVFSU8WTnoUOHWL16NQaDgZkzZ1K5cmU6dOhA69at0el0ODk50bZt20KHzjp06MCAAQOMwVKlShXmzp370MP+yMhIPv74Y7p27Yq9vT1KKTp37sywYcOMw1g6nY4nnniCZ599lnr16nH27FmTE7QA/v7+HDhwAG9vb+zt7Xn66afp168faWlpjB49Gj8/P2xsbGjVqhXbtm3DYDDct7Zy5crx+eefM23aNKKiotDr9QwbNox27dqRmJj4wOfTqlUrpk2bRlhYGFlZWeTl5fHss8+yePFiqlWrxsmTJx/YvjAtW7bkwoULBAUFGafd+YLAtWvXcHBwAG6fkB02bBiVK1fG3t6e1q1bc+7cufuud8iQIezYsYMFCxYYA+ZB6tatS0hICG+++Sbly5fnueeeM+7MFKZjx45MmTKFQYMGmUw7fvw4vXv3Ji8vj06dOuHv71+EXiibNOpBn3bxl7Fp0yYcHBzw8PDAYDAwfPhwOnTowOuvv17apZWohg0b8v333xvH+IW41/nz54mNjeWtt95Cq9Wybds25s+fX2q/+ygL5Ijgb6J+/fp88MEHfPrpp+Tl5dG2bVvjSVwhxP/UrFmT1NRU49GVo6MjH374YWmXVarkiEAIIaycnCwWQggrJ0EghBBWToJACCGsnMVBEBkZWeiv8i5dukSbNm0K/UWmXq9n0qRJ+Pr64uPjQ3R0tKVlCCGEMJPZ3xrKzMwkPDycTZs2FfiuuMFgYPz48ff99ea6deu4du0aGzZsICcnh169etG6dWuaNGlS5O1fvfonBkPxz3NXrepAevrNYrcrKVKfZaQ+y0h9linL9Wm1Gp56qvDLkpgdBPHx8dSpU4fAwMAC8xYsWICrqyunT58utG39+vVp3rw5Wq2WihUr8swzz5CSklKsIDAYlFlBcKdtWSb1WUbqs4zUZ5myXl9hzA6CO7+iu3dY6OjRoyQmJjJ//nyWLVtWaNvmzZsb/3/o0CGOHDlCREREsbZftapDsZa/m5OTo9ltS4LUZxmpzzJSn2XKen2FeWgQxMXFER4ebjLNxcWl0HH97OxsQkNDmTVrVpGuxHfgwAFGjRrFjBkzqFy5ctGrBtLTb5qVvE5Ojly5klnsdiVF6rOM1GcZqc8yZbk+rVZz3x3ohwaBTqdDp9MVaUMHDx4kLS2NoUOHApCamkpwcDCzZ882Xhnyjm3btjF58mRmzpxJ27Zti7R+IcTfg1KKq1evkJubAxR9hy419dFeHvtRK/36NJQvX4GnnnIq9KKT9/NILzHh7u7Ojh07jI87duzIvHnzClwe9siRI0yePJmFCxfSqFGjR1mCEOIv4ObN62g0GmrU+AcaTdG/vGhrqyU/v+wGQWnXp5SBa9fSuHnzOo6OTxa5XYn9jiA5Odl49b4vvvgCvV7PmDFj6NGjBz169CA+Pr6kShFClLLs7Js4Oj5ZrBAQD6fRaHF0fIrs7OJ9c+kve60hOUdQOqQ+y0h9t126dJYaNZ4t1vAFlP4e98OUhfqUUly+fI6aNU3vofGgcwQSx0KIUlHcEBBFY06/ShAIIUQp6tXLj5SUi6VagwSBEEJYObkxjRDC6h06dJAvvvgMvd5ArVq1sLevyKlTJzEYDLzxRj86duxCjx7dWL06hooVKzF0aBAdOrxI374D+Pbbrfz0048MHfo206dP4/Lly6SlXaF58xZMmBDKjz/+YFy3i8s/eeedUYSGTiQ19TJ16riQm5sLwO+/nyAiIgy9Xk/58uUZN24SzzzzbIk8fwkCIUSpS0hO4bsjKQ9dTqOB4n69xe2FWnR4vtZDlzt//hxr1mxkyZJFVKvmxIQJU/jzz5sMGRJE48ZNadmyFT/+eIgWLVqSkpLCTz8dom/fAezfv49Onbqwb9931K/fgNDQj8jLy6Nv394cO/abybodHBz49NPpNGjQiBkzPuOnnw6xY8d2AFavXk6fPn3p2LEz8fHb+PnnZAkCIYQoSc8844yDgwMHDx7g1q0cNm3aAEBOTg6nT5+ifXs3fvjhAFqtBi8vHfHx28jPz+fw4Z94771x2NnZcezYL6xevZwzZ05z/fp1srOzTNYN8OOPPzB58u1bYzZv/i+efro2AO3bd+DTTyNITNyHq6s7L73UqcSeuwSBEKLUdXi+aHvtj/PrmXZ2dgAYDHomTpxKw4a3f+yakZHOE09UJjMzk5Url2FjY0vLlq05d+4MGzfG4OLigp2dHWvWrGT37h34+fWkV682nD59kjvfzr+zbrj9rZ67f31sY2MDgKdnZ5o2fYGEhL18/fUK9u9PYMyYCY/lud5LThYLIcRd/vWv1sTErAEgLS2N/v1f4/LlSzz11FPY2dmRkLCHF15ozr/+1Zro6P/i6uoOQFJSIv7+r+DlpQM0nDhxvNDLTbRq1YZt2+IA+PXXn/njj9v3bPnggxB++eVn/P1fYeDAIcZhpZIgQSCEEHcJChrErVu3CAj4N+++O4S33nqH2rVvXyanffsOODg4UrFiRVq2bE1a2hVcXd0A+Pe/X+e//51HUNAbfPrpdJo2faHQr4W++eZg/vjjAn37/pulS6ONQ0MBAYEsWbKIoKA3mDNnFsOHjyyx5yy/LC5jpD7LSH2WKclfFt/7y9eiKAu/3H2QslJfYf0rvywWQghxXxIEQghh5SQIhBDCykkQCCGElZMgEEIIKydBIIQQVk6CQAghrJzFQRAZGUlUVFSB6ZcuXaJNmzZcuHDhge3feeedQtsLIcTfzcWLfxAeHgrcvuLp228Hm72uzZu/ISxs8iOpy+wgyMzMZNy4cSxcuLDAPIPBwPjx48nLy3vgOtasWUNiYqK5JQghxF/KpUspxktKlCVmX3QuPj6eOnXqEBgYWGDeggULcHV15fTp0/dtf/bsWdavX0+fPn3MLUEI8TeRdzyBvGN7HrqcRqOhuBdDKNfwRco16PDAZVJTLxMaOpHs7Gy0Wg3vvvsekyePo2PH25eXtrGxYfDgYaxcuZQLF84zbNgIOnXqQkZGOh99NJXLly9hY2PD0KFv07p1e3Jycpg+fRq//34crVZLnz590el8iYycwcWLf/DJJ9Px9OzEtWvXGD36Hf744wLPPuvM1KnTKV++PHFxG/n66xUYDIqGDRsxatQY7Ozs2LJlE1999V8qVXKgZs2a2NtXLFZf3I/ZRwT+/v4EBwcbr5x3x9GjR0lMTCw0IO7Iz89nwoQJTJkyBVtbuQCqEKJ0bdwYi6urG//97xKGDn2HI0d+AqBaNSeWLl1Nw4aNWLo0mk8/nc3EiaEsXboIgJkzP+Zf/2rFV1+tZOrU6YSFTSEjI52FC+dSuXJllixZTWTklyxcOJ/ffz/Bu++OpmHD5/jPf8YAcPnyJUaNGsOyZWvIyEjn4MEDnDp1km++ieGLLxYSHb2cp56qwooVS0hLu8IXX3zGnDnz+fLLhWRlZT2y5//Qv8JxcXGEh4ebTHNxcSE6OrrAstnZ2YSGhjJr1iy02vtnTFRUFF26dKFevXrFr/j/3e+aGUXh5ORodtuSIPVZRuqzTEnUl5qqxdb2f38jbBu7Y9/Y/bFv937atm3H2LGj+f3347i6uvHqq31Yt241bm5u2NpqqVWrFtWrV6dChfLUrl2bzMxMbG21HDp0kPHjJ2Jrq8XZ+VmaNHme33775f+nT8LWVku1alXw8PDg8OFD1KtXH41Gg62tFhsbLfXrN+DZZ58BoG5dFzIzr3P58kUuXDjPkCG3d6bz8vJo2LARv/ySzPPPN6N6dScAdDpvkpKSTPrxDq1WW6zX8aFBoNPp0Ol0RVrZwYMHSUtLY+jQoQCkpqYSHBzM7NmzcXFxMS63detWypcvz9q1a0lLSwPA3t6egQMHFrlwuehc6ZD6LCP13WYwGMy6ONvjuqhbkyYvsHTpavbt+47t27eycePtm9JoNDbk5xswGBQaze1t6/W3t397uoH8fGWsSSlFXl6e8fndma7X356u1xtQShnXo9Vq72oLer2BvDw9HTt2ZsSI9wDIyspCr9fzww8H0Ovv7jetcV33MhgMBV7HErvonLu7Ozt27CA2NpbY2FiqV6/OvHnzTEIAYMuWLWzYsIHY2Fj69OlDnz59ihUCQgjxKH3+eSRbt25Gp/Nl5MgxHD9+rEjtWrZsxcaNMQD88ccFjhz5iSZNXuBf/2rNpk2xAFy7do29e3fRokUrbGxs0ev1D1xnixYt2bNnF1evZqCU4pNPwlm9ejkvvNCcX35J5sqVVAwGg/EWl49CiQ3QJycn89lnnzF//vyS2qQQQhTJK6+8ypQpE9i8eSNarZb//GcsX3zx2UPbjRjxHhERYWze/A0ajYZx4z6gWrVqBAYO5JNPptOv36sYDAb69QuiYcNGXL9+jZs3M5k6dSI+Pj0KXWf9+g0IDBzEO+8MQSlF/foN6dt3AHZ2dowY8R4jRrxFhQr21KlT95E9f7kfQRkj9VlG6rOM3I/AMmWlPrkfgRBCiGKRIBBCCCsnQSCEKBV/0VHpMs+cfpUgEEKUOK3WBr0+v7TL+FvS6/PRam0evuBdJAiEECXO3t6BzMxrKFX6J1b/TpQykJl5FXv74v3gVq7vIIQocQ4Olbl69QqXL18Aij6UodVqMRjKbniUfn0aypevgIND5WK1kiAQQpQ4jUZDlSrVi91Ovn77eMjQkBBCWDkJAiGEsHISBEIIYeUkCIQQwspJEAghhJWTIBBCCCsnQSCEEFZOgkAIIaycBIEQQlg5CQIhhLByFgdBZGQkUVFRBaZfunSJNm3acOHChULbrV69mp49e9K1a1e5faUQQpQis4MgMzOTcePGsXDhwgLzDAYD48ePJy8vr9C2Bw8eZOHChSxevJh169bx9ddf8/vvv5tbihBCCAuYfdG5+Ph46tSpQ2BgYIF5CxYswNXVldOnTxfaNi4ujtdffx1HR0cAFi5cyJNPPmluKUIIISxg9hGBv78/wcHB2NiY3gDh6NGjJCYmFhoQd5w9e5aMjAz69u1Ljx492LlzJw4Oxbt+thBCiEfjoUcEcXFxhIeHm0xzcXEhOjq6wLLZ2dmEhoYya9YstNr7Z4xer+fQoUPMnTuX/Px8+vbtS/369WnXrl2RC69a1fzgcHJyNLttSZD6LCP1WUbqs0xZr68wDw0CnU6HTqcr0soOHjxIWloaQ4cOBSA1NZXg4GBmz56Ni4uLcblq1arRpEkTKlWqBIC7uzvJycnFCoL09JsYDMW/N2dZv1641GcZqc8yUp9lynJ9Wq3mvjvQj/Tro+7u7uzYsYPY2FhiY2OpXr068+bNMwkBAE9PT7Zv305ubi45OTns37+fpk2bPspShBBCFFGJ/Y4gOTmZQYMGAeDt7Y27uzs9e/akR48edO7cmfbt25dUKUIIIe6iUUoVf3ylDJChodIh9VlG6rOM1Ge+EhsaEkII8dcjQSCEEFZOgkAIIaycBIEQQlg5CQIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggrJ0EghBBWToJACCGsnASBEEJYOQkCIYSwchIEQghh5SQIhBDCykkQCCGElZMgEEIIKydBIIQQVs7iIIiMjCQqKqrA9EuXLtGmTRsuXLhQaLsFCxbg7e2Nt7c3ixYtsrQMIYQQZjI7CDIzMxk3bhwLFy4sMM9gMDB+/Hjy8vIKbXv27FmWL1/OunXrWLNmDYsXL+bs2bPmliKEEMICZgdBfHw8derUITAwsMC8BQsW4OrqylNPPVVoW4PBQF5eHrdu3SI3NxelFLa2tuaWIoQQwgJm//X19/cHKDAsdPToURITE5k/fz7Lli0rtG3dunXx9fXF09MTpRS9e/emdu3axdp+1aoOZtUN4OTkaHbbkiD1WUbqs4zUZ5myXl9hHhoEcXFxhIeHm0xzcXEhOjq6wLLZ2dmEhoYya9YstNr7H2zs2bOHo0ePsnfvXpRSDBo0iM2bN+Pt7V3kwtPTb2IwqCIvf4eTkyNXrmQWu11JkfosI/VZRuqzTFmuT6vV3HcH+qFBoNPp0Ol0RdrQwYMHSUtLY+jQoQCkpqYSHBzM7NmzcXFxMS63c+dOunbtSqVKlQDw9fUlKSmpWEEghBDi0XikA/Pu7u7s2LHD+Lhjx47MmzePf/zjHybLNWrUiG3btvHaa69hMBjYs2cPXbt2fZSlCCGEKKIS+x1BcnIygwYNAqB3797Uq1cPHx8fevbsSb169ejZs2dJlSKEEOIuGqVU8QfaywA5R1A6pD7LSH2WkfrM96BzBPLLYiGEsHISBEIIYeUkCIQQwspJEAghhJWTIBBCCCsnQSCEEFZOgkAIIaycBIEQQlg5CQIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggrJ0EghBBWToJACCGsnASBEEJYOQkCIYSwchbfszgyMhKtVsvw4cMBSEpK4u2336ZmzZoANG7cmPDwcJM2ubm5jB8/nqNHj1KhQgVmzJjBP//5T0tLEUIIYQazgyAzM5Pw8HA2bdrEwIEDjdOTk5MJCgpi8ODB9227ZMkS7O3tiYuLIykpibFjx/L111+bW4oQQggLmD00FB8fT506dQgMDDSZnpycTEJCAv7+/gwZMoSUlJQCbXft2kX37t0BaN26NVevXuXixYvmliKEEMICZh8R+Pv7AxAVFWUy3dHRER8fHzp37syKFSsYOXIkK1euNFkmNTUVJycn42MnJycuXbrE008/bW45RfLzjk3YnvkeZcZN70vKMa1G6rOA1GcZqc8yj7s+vYsrTTr6PPL1PjQI4uLiCozxu7i4EB0dXejyoaGhxv+/9tprfPLJJ2RmZuLo6PjA7Wi1xTs4qVrVoVjLA9hXKEceoNFqit22JEl9lpH6LCP1WeZx1mdfoRxOTg/+W2qOhwaBTqdDp9MVaWUGg4G5c+cSHByMjY3N/zZia7qZ6tWrc+XKFZydnQG4cuUK1atXL07dpKffxFDM5HVx9cKpxytcuZJZrHYlycnJUeqzgNRnGanPMiVRn7nr12o1992BfqRfH9VqtWzfvp2tW7cCEBMTQ7NmzbC3tzdZzsPDg9jYWAAOHjyInZ3dYx8WEkIIUbhH/juC6dOns3jxYnx8fFi7di3Tpk0DYMWKFURGRgIQEBBAbm4uPj4+hIWFERER8ajLEEIIUUQapVTZPfPyAOYMDYEcWlpK6rOM1GcZqc98JTY0JIQQ4q9HgkAIIaycBIEQQlg5CQIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggrJ0EghBBWToJACCGsnASBEEJYOQkCIYSwchIEQghh5SQIhBDCykkQCCGElZMgEEIIKydBIIQQVu6hN69/mMjISLRaLcOHDwcgKSmJt99+m5o1awLQuHFjwsPDTdqkpqYSEhJCWloaWq2W999/n/bt21taihBCCDOYHQSZmZmEh4ezadMmBg4caJyenJxMUFAQgwcPvm/biIgIPD096du3L6dOnSIgIIA9e/ZgY2NjbjlCCCHMZHYQxMfHU6dOHQIDA02mJycnk56eTlxcHDVr1mTSpEnUqlXLZBkvLy/atm0LgLOzM7du3SIrKwtHR0dzyxFCCGEms88R+Pv7ExwcXGAv3tHRkX79+hETE4OHhwcjR44s0NbLy4vKlSsD8N///pfnnntOQkAIIUqJRimlHrRAXFxcgTF+FxcXoqOjAYiKigIwniO4V6tWrdi5c2ehf+ijo6NZsmQJS5cuLXDUIIQQomQ8dGhIp9Oh0+mKtDKDwcDcuXMLHCnY2hbcTEREBLt372bZsmXGE8vFkZ5+E4PhgRlWKCcnR65cySx2u5Ii9VlG6rOM1GeZslyfVquhalWHwuc92g1p2b59O1u3bgUgJiaGZs2aYW9vb7JcdHQ0iYmJrFixwqwQEEII8ehY/PXRe02fPp2JEycyZ84cqlSpQkREBAArVqwgNTWVd955hzlz5uDg4EBAQICx3bx586hRo8ajLkcIIcRDPPQcQVklQ0OlQ+qzjNRnGanPfCU2NCSEEOKvR4JACCGsnASBEEJYOQkCIYSwchIEQghh5SQIhBDCykkQCCGElZMgEEIIKydBIIQQVk6CQAghrJwEgRBCWDkJAiGEsHISBEIIYeUkCIQQwspJEAghhJWTIBBCCCsnQSCEEFZOgkAIIaycxUEQGRlJVFSU8XFSUhJt27alR48e9OjRg5CQkPu2vXnzJp07dyYxMdHSMoQQQpjJ7JvXZ2ZmEh4ezqZNmxg4cKBxenJyMkFBQQwePPih65g6dSo3btwwtwQhhBCPgNlHBPHx8dSpU4fAwECT6cnJySQkJODv78+QIUNISUkptP3mzZupVKkSDRs2NLcEIYQQj4DZRwT+/v4AJsNCAI6Ojvj4+NC5c2dWrFjByJEjWblypckyFy9e5KuvvuKrr75i0KBBZm2/alUHs9oBODk5mt22JEh9lpH6LCP1Waas11eYhwZBXFwc4eHhJtNcXFyIjo4udPnQ0FDj/1977TU++eQTMjMzcXS83TkGg4Hx48czceJEKlSoYHbh6ek3MRhUsds5OTly5Uqm2dt93KQ+y0h9lpH6LFOW69NqNffdgX5oEOh0OnQ6XZE2ZDAYmDt3LsHBwdjY2PxvI7b/28ypU6c4deoU48ePB+DcuXNMmDCBqVOn0q5duyJtRwghxKNj9tBQYbRaLdu3b8fZ2Rlvb29iYmJo1qwZ9vb2xmXq1avH7t27jY8DAgJ4++23adu27aMsRQghRBE98t8RTJ8+ncWLF+Pj48PatWuZNm0aACtWrCAyMvJRb04IIYSFNEqp4g+0lwFyjqB0SH2WkfosI/WZ70HnCOSXxUIIYeUkCIQQwspJEAghhJWTIBBCCCsnQSCEEFZOgkAIIaycBIEQQlg5CQIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggrJ0EghBBWToJACCGsnASBEEJYOQkCIYSwchIEQghh5Sy+Z3FkZCRarZbhw4cDkJSUxNtvv03NmjUBaNy4MeHh4SZtcnNziYiI4ODBg+Tl5RESEoKbm5ulpQghhDCD2UGQmZlJeHg4mzZtYuDAgcbpycnJBAUFMXjw4Pu2XbBgAVevXmX9+vX8/vvvBAUFsWfPHjQajbnlCCGEMJPZQ0Px8fHUqVOHwMBAk+nJyckkJCTg7+/PkCFDSElJKdA2Li6OQYMGodFoqF+/PosWLeIveutkIYT4yzM7CPz9/QkODsbGxsZkuqOjI/369SMmJgYPDw9GjhxZoO3Zs2dJSkri5Zdf5tVXXyUtLQ2tVk5XCCFEadCoh+yKx8XFFRjjd3FxITo6GoCoqCgA4zmCe7Vq1YqdO3fi6OhonNakSRMGDBjA6NGjOXbsGAMHDiQuLs5kGSGEECXjoecIdDodOp2uSCszGAzMnTu3wJGCra3pZqpVq4aPjw8ajYZGjRpRs2ZNTp8+zQsvvFDkwtPTb2IwFH84ycnJkStXMovdrqRIfZaR+iwj9VmmLNen1WqoWtWh8HmPdkNatm/fztatWwGIiYmhWbNm2Nvbmyzn6enJ5s2bATh//jwpKSnUrVv3UZYihBCiiB75wPz06dNZvHgxPj4+rF27lmnTpgGwYsUKIiMjARg9ejSpqan4+PgwZMgQpk2bJsNCQghRSh56jqCskqGh0iH1WUbqs4zUZ74SGxoSQgjx1yNBIIQQVk6CQAghrJwEgRBCWDkJAiGEsHISBEIIYeUkCIQQwspJEAghhJWTIBBCCCsnQSCEEFZOgkAIIaycBIEQQlg5CQIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggr99Cb1z9MZGQkWq2W4cOHA5CUlMTbb79NzZo1AWjcuDHh4eEmbXJzcwkJCeH48eNotVrGjBmDq6urpaUIIYQwg9lBkJmZSXh4OJs2bWLgwIHG6cnJyQQFBTF48OD7to2NjcVgMPDNN99w7NgxBg0axJ49e8wtRQghhAXMHhqKj4+nTp06BAYGmkxPTk4mISEBf39/hgwZQkpKSoG2BoOB7Oxs9Ho92dnZVKhQwdwyhBBCWMjsIPD39yc4OBgbGxuT6Y6OjvTr14+YmBg8PDwYOXJkgbY9e/bk2rVruLu707dvX0aPHm1uGUIIISykUUqpBy0QFxdXYIzfxcWF6OhoAKKiogCM5wju1apVK3bu3Imjo6Nx2qeffkpubi5jxozhzJkzDBgwgOXLl1O7dm1LnosQQggzPPQcgU6nQ6fTFWllBoOBuXPnFjhSsLU13Ux8fDwzZ85Eo9FQt25dmjVrxpEjR4oVBOnpNzEYHphhhXJycuTKlcxityspUp9lpD7LSH2WKcv1abUaqlZ1KHSexd8aMt2Qlu3bt+Ps7Iy3tzcxMTE0a9YMe3t7k+UaNWrEt99+S4MGDcjIyODo0aOMGjWqmNvSWFCn+W1LgtRnGanPMlKfZcpqfQ+q66FDQw9z79DQiRMnmDhxIpmZmVSpUoWIiAhq1arFihUrSE1N5d133yUtLY2JEydy7tw5tFotgwcPxtfX15IyhBBCmMniIBBCCPHXJr8sFkIIKydBIIQQVk6CQAghrJwEgRBCWDkJAiGEsHISBEIIYeUkCIQQwspJEAghhJX72wbBN998g7e3N126dGHZsmUF5v/666+88sordO3alfHjx5Ofn1+i9c2ePRsfHx98fHyIiIgodL6npyc9evSgR48ehT6Hx6lfv374+PgYt3/48GGT+fv27cPPzw8vLy9mzpxZorV9/fXXxrp69OhBy5YtCQ0NNVmmtPrv5s2b+Pr6cuHCBaBo/XTx4kXeeOMNunXrxtChQ/nzzz9LrL5Vq1bh6+uLn58fISEh5ObmFmgTExODm5ubsS8f5+t9b30hISF4eXkZt719+/YCbUrys3x3fbt37zZ5H7Zr167Q+7CUZP+ZTf0NXbp0SXl6eqqrV6+qP//8U/n5+akTJ06YLOPj46N+/PFHpZRSISEhatmyZSVWX0JCgnr11VfVrVu3VG5ururXr5/atm2byTKDBw9Whw4dKrGa7mYwGFSHDh1UXl5eofOzs7OVh4eHOnfunMrLy1NBQUFq165dJVzlbcePH1ddunRR6enpJtNLo/9++ukn5evrq5o0aaLOnz9f5H4KDg5WGzduVEopNXv2bBUREVEi9Z06dUp16dJFZWZmKoPBoN5//321aNGiAu1CQ0PVN99881hqelB9Sinl6+urLl++/MB2JfVZLqy+O1JTU1WnTp3U6dOnC7Qrqf6zxN/yiGDfvn20a9eOJ598kooVK9K1a1e2bNlinP/HH3+Qk5ND8+bNAXj55ZdN5j9uTk5OjB07lvLly1OuXDn++c9/cvHiRZNljh49yvz58/Hz8yM0NJRbt26VWH2nTp1Co9EwaNAgunfvztKlS03mHzlyBGdnZ5555hlsbW3x8/Mr0f672+TJkxk5ciRVqlQxmV4a/bd69WomTZpE9erVgaL1U15eHklJSXTt2hV4vO/Fe+srX748kydPxsHBAY1GQ4MGDQq8D+H2zaZiYmLo3r07o0eP5vr16yVSX1ZWFhcvXmTixIn4+fnx2WefYTAYTNqU5Gf53vruFhERQZ8+fahTp06BeSXVf5b4WwZBamoqTk5OxsfVq1fn8uXL953v5ORkMv9xq1+/vvGNe+bMGTZv3oyHh4dx/p9//slzzz3HmDFjWL9+PTdu3ODzzz8vsfpu3LhB+/btmTNnDtHR0axcuZKEhATj/If1b0nZt28fOTk5BS6TXlr9FxYWRqtWrYyPi9JPV69excHBwXip9sf5Xry3vtq1axvvFZ6RkcGyZcvo1KlTgXZOTk4MHz6c2NhYatWqVWAY7nHVl56eTrt27fjwww9ZvXo1Bw8eZM2aNSZtSvKzfG99d5w5c4YDBw7Qr1+/QtuVVP9Z4m8ZBKqQ6+hpNJoizy8pJ06cICgoiDFjxpjsSVSqVIn58+fj7OyMra0tQUFB7N69u8TqatGiBREREVSsWJEqVarQq1cvk+2Xlf5buXJlgVulQun33x1F6aey0JeXL1+mf//+vPLKK7Rt27bA/Dlz5tCsWTM0Gg0DBw4ssfuLP/PMM8yZM4eqVatib29PQEBAgdexLPTfqlWreP311ylfvnyh80ur/4rjbxkENWrUIC0tzfg4NTXV5HDu3vlXrlwp9HDvcfrhhx8YMGAA//nPf+jZs6fJvIsXL5rs+SilCtzc53E6ePAg33///X23/7D+LQm5ubkkJSXRsWPHAvNKu//uKEo/ValShZs3b6LX64GSfy+ePHmS1157jZ49ezJs2LAC8zMzM413I4SS7ctjx46xdevWB267LHyW4+Pj8fb2LnReafZfcfwtg8DV1ZXvv/+ejIwMsrOz2bZtGy+++KJxfu3atbGzs+OHH34Abp/Vv3v+45aSksKwYcOYMWMGPj4+BeZXqFCBjz/+mPPnz6OUYtmyZXTp0qXE6svMzCQiIoJbt25x8+ZN1q9fb7L9Zs2acfr0ac6ePYter2fjxo0l2n9w+49EnTp1qFixYoF5pd1/dxSln8qVK0erVq3YvHkzULLvxZs3b/Lmm2/y7rvvEhQUVOgyFStWZMGCBcZvjS1durTE+lIpxYcffsj169fJy8tj1apVBbZd2p/ljIwMcnJyeOaZZwqdX5r9VyylcIK6RGzYsEH5+PgoLy8vNW/ePKWUUgMHDlRHjhxRSin166+/qldeeUV169ZNjRo1St26davEaps6dapq3ry56t69u/Hf8uXLTerbsmWLsf6xY8eWaH1KKTVz5kzVrVs35eXlpaKjo5VSSnXv3l1dunRJKaXUvn37lJ+fn/Ly8lJhYWHKYDCUaH2bNm1SI0aMMJlWVvrP09PT+K2S+/XTuHHj1LfffquUUurChQuqb9++SqfTqaCgIHXt2rUSqW/RokWqSZMmJu/DWbNmFagvKSlJ+fv7q27duqkhQ4aoGzdulEh9Sim1dOlSpdPpVJcuXdTHH39sXKY0P8t313f48GHVu3fvAsuUZv+ZQ25MI4QQVu5vOTQkhBCi6CQIhBDCykkQCCGElZMgEEIIKydBIIQQVk6CQAgLDBo0iN9//71YbQYPHsy6deseU0VCFF/Z+4mbEH8h8+fPL+0ShLCYBIGwSjt27OCLL74gLy+PChUqMGbMGL777jtOnDhBWloa6enpNGrUiLCwMBwcHFi+fDkrV66kXLly2NnZERoaSr169ejYsSORkZE8//zzrFq1iiVLlqDVaqlWrRoTJ06kbt26XL58mbFjx5KamsrTTz9Nenq6sY6TJ08SFhbGtWvX0Ov1BAQE0KtXL/78809CQkI4e/YsWq2WJk2aEBoailYrB/HiMSjtX7QJUdJOnz6tfH19VUZGhlLq9j0NOnTooD766CP14osvqitXrii9Xq9GjRqlPvroI5Wfn6+aNGlivC7++vXr1cqVK5VSt39leuTIEbVv3z7VuXNn430R1q5dq3Q6nTIYDOqtt95SM2fOVEopdebMGdW8eXO1du1alZeXp7y9vdXRo0eVUkrduHFD6XQ69eOPP6r169eroKAgpZRS+fn5avz48erMmTMl2U3CisgRgbA6CQkJpKamMmDAAOM0jUbDuXPn6NatG9WqVQOgV69efPjhh4wZM4Zu3brRp08fXnrpJTp06ICfn5/JOvfu3Yu3t7fxvggvv/wyYWFhXLhwgX379jFmzBgAnJ2djVf4PHPmDOfOnWPcuHHG9eTk5PDLL7/g7u7OzJkzCQgIwNXVlf79++Ps7Pw4u0VYMQkCYXUMBgPt27dn1qxZxmkpKSmsWrXK5FaNBoPBOBQzY8YMjh8/zr59+5g/fz5r1qzhiy++MC6rCrlSi1KK/Px8NBqNyfw7V5/U6/U88cQTxMbGGuelpaXh6OiInZ0d27dvJzExkf379xMYGMiECRPo1q3bI+sHIe6QAUdhddq1a0dCQgInT54EYPfu3XTv3p1bt24RHx9PZmYmBoOB1atX4+npSUZGBh4eHjz55JMMGDCAESNGcOzYMZN1urm5sXnzZjIyMgBYu3YtTz75JM7Ozri7u7Nq1Srg9iWyExMTAahbty52dnbGIEhJScHX15ejR4+yfPlyQkJCcHNz47333sPNzY0TJ06UVBcJKyMXnRNWKS4uji+//NJ4ffhx48bx/fffs3//fvR6PVevXqV169ZMmDCBChUqsHLlShYvXkyFChWwsbFh5MiRuLq6mpwsXrZsGStXrsRgMFClShU++OAD6tevT0ZGBiEhIZw7d46aNWuSn59Pz549efnll/ntt9+MJ4vz8/Pp168fr732GllZWYwbN45jx45hb2/P008/TVhYGJUrVy7trhN/QxIEQvy/qKgorl69ygcffFDapQhRomRoSAghrJwcEQghhJWTIwIhhLByEgRCCGHlJAiEEMLKSRAIIYSVkyAQQggrJ0EghBBW7v8Ab4ItqHBRUq8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.12 ('rl_tutorials')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "4f613f1ab80ec98dc1b91d6e720de51301598a187317378e53e49b773c1123dd" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/3.DQN.ipynb b/projects/notebooks/3.DQN.ipynb new file mode 100644 index 0000000..9fed7b6 --- /dev/null +++ b/projects/notebooks/3.DQN.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、分析伪代码\n", + "\n", + "目前DQN算法基本遵循[Nature DQN](https://www.nature.com/articles/nature14236)的伪代码步骤,如下:\n", + "\n", + "
\n", + "\"\" \n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法\n", + "\n", + "教程中提到相比于Q learning,DQN本质上是为了适应更为复杂的环境,并且经过不断的改良迭代,到了Nature DQN(即Volodymyr Mnih发表的Nature论文)这里才算是基本完善。DQN主要改动的点有三个:\n", + "* 使用深度神经网络替代原来的Q表:这个很容易理解原因\n", + "* 使用了经验回放(Replay Buffer):这个好处有很多,一个是使用一堆历史数据去训练,比之前用一次就扔掉好多了,大大提高样本效率,另外一个是面试常提到的,减少样本之间的相关性,原则上获取经验跟学习阶段是分开的,原来时序的训练数据有可能是不稳定的,打乱之后再学习有助于提高训练的稳定性,跟深度学习中划分训练测试集时打乱样本是一个道理。\n", + "* 使用了两个网络:即策略网络和目标网络,每隔若干步才把每步更新的策略网络参数复制给目标网络,这样做也是为了训练的稳定,避免Q值的估计发散。想象一下,如果当前有个transition(这个Q learning中提过的,一定要记住!!!)样本导致对Q值进行了较差的过估计,如果接下来从经验回放中提取到的样本正好连续几个都这样的,很有可能导致Q值的发散(它的青春小鸟一去不回来了)。再打个比方,我们玩RPG或者闯关类游戏,有些人为了破纪录经常Save和Load,只要我出了错,我不满意我就加载之前的存档,假设不允许加载呢,就像DQN算法一样训练过程中会退不了,这时候是不是搞两个档,一个档每帧都存一下,另外一个档打了不错的结果再存,也就是若干个间隔再存一下,到最后用间隔若干步数再存的档一般都比每帧都存的档好些呢。当然你也可以再搞更多个档,也就是DQN增加多个目标网络,但是对于DQN则没有多大必要,多几个网络效果不见得会好很多。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1、定义模型\n", + "\n", + "前面说了DQN的模型不再是Q表,而是一个深度神经网络,这里我只用了一个三层的全连接网络(FCN),这种网络也叫多层感知机(MLP),至于怎么用Torch写网络这里就不多说明了,以下仅供参考。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class MLP(nn.Module):\n", + " def __init__(self, n_states,n_actions,hidden_dim=128):\n", + " \"\"\" 初始化q网络,为全连接网络\n", + " n_states: 输入的特征数即环境的状态维度\n", + " n_actions: 输出的动作维度\n", + " \"\"\"\n", + " super(MLP, self).__init__()\n", + " self.fc1 = nn.Linear(n_states, hidden_dim) # 输入层\n", + " self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层\n", + " self.fc3 = nn.Linear(hidden_dim, n_actions) # 输出层\n", + " \n", + " def forward(self, x):\n", + " # 各层对应的激活函数\n", + " x = F.relu(self.fc1(x)) \n", + " x = F.relu(self.fc2(x))\n", + " return self.fc3(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2、定义经验回放\n", + "\n", + "经验回放首先是具有一定容量的,只有存储一定的transition网络才会更新,否则就退回到了之前的逐步更新了。另外写经验回放的时候一般需要包涵两个功能或方法,一个是push,即将一个transition样本按顺序放到经验回放中,如果满了就把最开始放进去的样本挤掉,因此如果大家学过数据结构的话推荐用队列来写,虽然这里不是。另外一个是sample,很简单就是随机采样出一个或者若干个(具体多少就是batch_size了)样本供DQN网络更新。功能讲清楚了,大家可以按照自己的想法用代码来实现,可以肯定地说,我这里不是最高效的,毕竟这还是青涩时期写出的代码。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class ReplayBuffer:\n", + " def __init__(self, capacity):\n", + " self.capacity = capacity # 经验回放的容量\n", + " self.buffer = [] # 缓冲区\n", + " self.position = 0 \n", + " \n", + " def push(self, state, action, reward, next_state, done):\n", + " ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)\n", + " '''\n", + " if len(self.buffer) < self.capacity:\n", + " self.buffer.append(None)\n", + " self.buffer[self.position] = (state, action, reward, next_state, done)\n", + " self.position = (self.position + 1) % self.capacity \n", + " \n", + " def sample(self, batch_size):\n", + " batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移\n", + " state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等\n", + " return state, action, reward, next_state, done\n", + " \n", + " def __len__(self):\n", + " ''' 返回当前存储的量\n", + " '''\n", + " return len(self.buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.3、真--定义算法\n", + "\n", + "到了高级一点的算法,定义算法就比较麻烦,要先定义一些子模块。可以看到,其实去掉子模块的话,DQN跟Q learning的算法结构没啥区别,当然因为神经网络一般需要Torch或者Tensorflow来写,因此推荐大家先去学一学这些工具,比如\"eat_pytorch_in_20_days\"。\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class DQN:\n", + " def __init__(self,n_actions,model,memory,cfg):\n", + "\n", + " self.n_actions = n_actions \n", + " self.device = torch.device(cfg.device) # cpu or cuda\n", + " self.gamma = cfg.gamma # 奖励的折扣因子\n", + " # e-greedy策略相关参数\n", + " self.sample_count = 0 # 用于epsilon的衰减计数\n", + " self.epsilon = lambda sample_count: cfg.epsilon_end + \\\n", + " (cfg.epsilon_start - cfg.epsilon_end) * \\\n", + " math.exp(-1. * sample_count / cfg.epsilon_decay)\n", + " self.batch_size = cfg.batch_size\n", + " self.policy_net = model.to(self.device)\n", + " self.target_net = model.to(self.device)\n", + " for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net\n", + " target_param.data.copy_(param.data)\n", + " self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n", + " self.memory = memory # 经验回放\n", + "\n", + " def sample(self, state):\n", + " ''' 选择动作\n", + " '''\n", + " self.sample_count += 1\n", + " if random.random() > self.epsilon(self.sample_count):\n", + " with torch.no_grad():\n", + " state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)\n", + " q_values = self.policy_net(state)\n", + " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", + " else:\n", + " action = random.randrange(self.n_actions)\n", + " return action\n", + " def predict(self,state):\n", + " with torch.no_grad():\n", + " state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)\n", + " q_values = self.policy_net(state)\n", + " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", + " return action\n", + " def update(self):\n", + " if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略\n", + " return\n", + " # 从经验回放中(replay memory)中随机采样一个批量的转移(transition)\n", + " \n", + " state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(\n", + " self.batch_size)\n", + " state_batch = torch.tensor(np.array(state_batch), device=self.device, dtype=torch.float)\n", + " action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) \n", + " reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) \n", + " next_state_batch = torch.tensor(np.array(next_state_batch), device=self.device, dtype=torch.float)\n", + " done_batch = torch.tensor(np.float32(done_batch), device=self.device)\n", + " q_values = self.policy_net(state_batch).gather(dim=1, index=action_batch) # 计算当前状态(s_t,a)对应的Q(s_t, a)\n", + " next_q_values = self.target_net(next_state_batch).max(1)[0].detach() # 计算下一时刻的状态(s_t_,a)对应的Q值\n", + " # 计算期望的Q值,对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward\n", + " expected_q_values = reward_batch + self.gamma * next_q_values * (1-done_batch)\n", + " loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算均方根损失\n", + " # 优化更新模型\n", + " self.optimizer.zero_grad() \n", + " loss.backward()\n", + " for param in self.policy_net.parameters(): # clip防止梯度爆炸\n", + " param.grad.data.clamp_(-1, 1)\n", + " self.optimizer.step() \n", + "\n", + " def save(self, path):\n", + " torch.save(self.target_net.state_dict(), path+'checkpoint.pth')\n", + "\n", + " def load(self, path):\n", + " self.target_net.load_state_dict(torch.load(path+'checkpoint.pth'))\n", + " for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()):\n", + " param.data.copy_(target_param.data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2、定义训练" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg, env, agent):\n", + " ''' 训练\n", + " '''\n", + " print(\"开始训练!\")\n", + " print(f\"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}\")\n", + " rewards = [] # 记录所有回合的奖励\n", + " steps = []\n", + " for i_ep in range(cfg.train_eps):\n", + " ep_reward = 0 # 记录一回合内的奖励\n", + " ep_step = 0\n", + " state = env.reset() # 重置环境,返回初始状态\n", + " while True:\n", + " ep_step += 1\n", + " action = agent.sample(state) # 选择动作\n", + " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", + " agent.memory.push(state, action, reward,\n", + " next_state, done) # 保存transition\n", + " state = next_state # 更新下一个状态\n", + " agent.update() # 更新智能体\n", + " ep_reward += reward # 累加奖励\n", + " if done:\n", + " break\n", + " if (i_ep + 1) % cfg.target_update == 0: # 智能体目标网络更新\n", + " agent.target_net.load_state_dict(agent.policy_net.state_dict())\n", + " steps.append(ep_step)\n", + " rewards.append(ep_reward)\n", + " if (i_ep + 1) % 10 == 0:\n", + " print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f},Epislon:{agent.epsilon(agent.frame_idx):.3f}')\n", + " print(\"完成训练!\")\n", + " env.close()\n", + " res_dic = {'rewards':rewards}\n", + " return res_dic\n", + "\n", + "def test(cfg, env, agent):\n", + " print(\"开始测试!\")\n", + " print(f\"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}\")\n", + " rewards = [] # 记录所有回合的奖励\n", + " steps = []\n", + " for i_ep in range(cfg.test_eps):\n", + " ep_reward = 0 # 记录一回合内的奖励\n", + " ep_step = 0\n", + " state = env.reset() # 重置环境,返回初始状态\n", + " while True:\n", + " ep_step+=1\n", + " action = agent.predict(state) # 选择动作\n", + " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", + " state = next_state # 更新下一个状态\n", + " ep_reward += reward # 累加奖励\n", + " if done:\n", + " break\n", + " steps.append(ep_step)\n", + " rewards.append(ep_reward)\n", + " print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f}')\n", + " print(\"完成测试\")\n", + " env.close()\n", + " return {'rewards':rewards}" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/DQN.ipynb b/projects/notebooks/DQN.ipynb deleted file mode 100644 index fba9f1f..0000000 --- a/projects/notebooks/DQN.ipynb +++ /dev/null @@ -1,36 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1、分析伪代码\n", - "\n", - "目前DQN算法基本遵循[Nature DQN](https://www.nature.com/articles/nature14236)的伪代码步骤,如下:\n", - "\n", - "
\n", - "\"\" \n", - "
" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/projects/notebooks/QLearning.ipynb b/projects/notebooks/QLearning.ipynb deleted file mode 100644 index a610084..0000000 --- a/projects/notebooks/QLearning.ipynb +++ /dev/null @@ -1,19 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/projects/requirements.txt b/projects/requirements.txt new file mode 100644 index 0000000..42e65c6 --- /dev/null +++ b/projects/requirements.txt @@ -0,0 +1,10 @@ +gym==0.21.0 +torch==1.9.0 +torchvision==0.10.0 +torchaudio==0.9.0 +ipykernel==6.15.1 +jupyter==1.0.0 +matplotlib==3.5.2 +seaborn==0.11.2 +dill==0.3.5.1 +argparse==1.4.0 \ No newline at end of file