diff --git a/projects/README.md b/projects/README.md index fcee9df..276c7e5 100644 --- a/projects/README.md +++ b/projects/README.md @@ -1,4 +1,4 @@ -## 0、写在前面 +## 0. 写在前面 本项目用于学习RL基础算法,主要面向对象为RL初学者、需要结合RL的非专业学习者,尽量做到: **注释详细**,**结构清晰**。 @@ -6,7 +6,7 @@ 未来开发计划包括但不限于:多智能体算法、强化学习Python包以及强化学习图形化编程平台等等。 -## 1、项目说明 +## 1. 项目说明 项目内容主要包含以下几个部分: * [Jupyter Notebook](./notebooks/):使用Notebook写的算法,有比较详细的实战引导,推荐新手食用 @@ -18,7 +18,7 @@ * ```[algorithm_name].py```:即保存算法的脚本,例如```dqn.py```,每种算法都会有一定的基础模块,例如```Replay Buffer```、```MLP```(多层感知机)等等; * ```task.py```: 即保存任务的脚本,基本包括基于```argparse```模块的参数,训练以及测试函数等等,其中训练函数即```train```遵循伪代码而设计,想读懂代码可从该函数入手; * ```utils.py```:该脚本用于保存诸如存储结果以及画图的软件,在实际项目或研究中,推荐大家使用```Tensorboard```来保存结果,然后使用诸如```matplotlib```以及```seabron```来进一步画图。 -## 2、算法列表 +## 2. 算法列表 注:点击对应的名称会跳到[codes](./codes/)下对应的算法中,其他版本还请读者自行翻阅 @@ -26,26 +26,27 @@ | :-------------------------------------: | :----------------------------------------------------------: | :--: | | [Policy Gradient](codes/PolicyGradient) | [Policy Gradient paper](https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf) | | | DQN-CNN | | 待更 | +| [DoubleDQN](codes/DoubleDQN) | [Double DQN Paper](https://arxiv.org/abs/1509.06461) | | | [SoftQ](codes/SoftQ) | [Soft Q-learning paper](https://arxiv.org/abs/1702.08165) | | | [SAC](codes/SAC) | [SAC paper](https://arxiv.org/pdf/1812.05905.pdf) | | | [SAC-Discrete](codes/SAC) | [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf) | | | SAC-S | [SAC-S paper](https://arxiv.org/abs/1801.01290) | | | DSAC | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 | -## 3、算法环境 +## 3. 算法环境 算法环境说明请跳转[env](./codes/envs/README.md) -## 4、运行环境 +## 4. 运行环境 -主要依赖:Python 3.7、PyTorch 1.10.0、Gym 0.21.0。 +主要依赖:Python 3.7、PyTorch 1.10.0、Gym 0.25.2。 -### 4.1、创建Conda环境 +### 4.1. 创建Conda环境 ```bash conda create -n easyrl python=3.7 conda activate easyrl # 激活环境 ``` -### 4.2、安装Torch +### 4.2. 安装Torch 安装CPU版本: ```bash @@ -63,30 +64,49 @@ conda install pytorch==1.10.0 torchvision==0.11.0 torchaudio==0.10.0 cudatoolkit ```bash pip install torch==1.10.0+cu113 torchvision==0.11.0+cu113 torchaudio==0.10.0 --extra-index-url https://download.pytorch.org/whl/cu113 ``` -### 4.3、安装其他依赖 - -项目根目录下执行: -```bash -pip install -r requirements.txt -``` -### 4.4、检验CUDA版本Torch安装 +### 4.3. 检验CUDA版本Torch安装 CPU版本Torch请忽略此步,执行如下Python脚本,如果返回True说明CUDA版本安装成功: ```python import torch print(torch.cuda.is_available()) ``` +### 4.4. 安装Gym -## 5、使用说明 +```bash +pip install gym==0.25.2 +``` +如需安装Atari环境,则需另外安装 -对于[codes](./codes/): -* 运行带有```main.py```脚本 -* 执行[scripts](codes\scripts)下对应的Bash脚本,例如```sh codes/scripts/DQN_task0.sh```,推荐创建名为"easyrl"的conda环境,否则需要更改sh脚本相关信息。对于Windows系统,建议安装Git(不要更改默认安装路径,否则VS Code可能不会显示Git Bash)然后使用git bash终端,而非PowerShell或者cmd终端! +```bash +pip install gym[atari,accept-rom-license]==0.25.2 +``` + +### 4.5. 安装其他依赖 + +项目根目录下执行: +```bash +pip install -r requirements.txt +``` + +## 6.使用说明 + +对于[codes](./codes/),`cd`到对应的算法目录下,例如`DQN`: + +```bash +python task_0.py +``` + +或者加载配置文件: + +```bash +python task0.py --yaml configs/CartPole-v1_DQN_Train.yaml +``` 对于[Jupyter Notebook](./notebooks/): * 直接运行对应的ipynb文件就行 -## 6、友情说明 +## 6. 友情说明 推荐使用VS Code做项目,入门可参考[VSCode上手指南](https://blog.csdn.net/JohnJim0/article/details/126366454) \ No newline at end of file diff --git a/projects/assets/pseudocodes/pseudocodes.pdf b/projects/assets/pseudocodes/pseudocodes.pdf index b34cabc..cfe734a 100644 Binary files a/projects/assets/pseudocodes/pseudocodes.pdf and b/projects/assets/pseudocodes/pseudocodes.pdf differ diff --git a/projects/assets/pseudocodes/pseudocodes.tex b/projects/assets/pseudocodes/pseudocodes.tex index 929453a..7af7feb 100644 --- a/projects/assets/pseudocodes/pseudocodes.tex +++ b/projects/assets/pseudocodes/pseudocodes.tex @@ -38,13 +38,14 @@ \clearpage \section{模版备用} \begin{algorithm}[H] % [H]固定位置 - \floatname{algorithm}{{算法}} + \floatname{algorithm}{{算法}\footnotemark[1]} \renewcommand{\thealgorithm}{} % 去掉算法标号 \caption{} \begin{algorithmic}[1] % [1]显示步数 \STATE 测试 \end{algorithmic} \end{algorithm} +\footnotetext[1]{脚注} \clearpage \section{Q learning算法} \begin{algorithm}[H] % [H]固定位置 @@ -55,7 +56,7 @@ \STATE 初始化Q表$Q(s,a)$为任意值,但其中$Q(s_{terminal},)=0$,即终止状态对应的Q值为0 \FOR {回合数 = $1,M$} \STATE 重置环境,获得初始状态$s_1$ - \FOR {时步 = $1,t$} + \FOR {时步 = $1,T$} \STATE 根据$\varepsilon-greedy$策略采样动作$a_t$ \STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$ \STATE {\bfseries 更新策略:} @@ -134,7 +135,7 @@ \STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$ \FOR {回合数 = $1,M$} \STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition - \FOR {时步 = $1,t$} + \FOR {时步 = $0,1,2,...,T-1$} \STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$ \STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$ \ENDFOR @@ -164,6 +165,65 @@ \end{algorithm} \footnotetext[1]{这里结合TD error的特性按照从$t+1$到$1$计算法Advantage更方便} +\clearpage + +\section{PPO-Clip算法} +\begin{algorithm}[H] % [H]固定位置 + \floatname{algorithm}{{PPO-Clip算法}\footnotemark[1]\footnotemark[2]} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \begin{algorithmic}[1] % [1]显示步数 + \STATE 初始化策略网络(Actor)参数$\theta$和价值网络(Critic)参数$\phi$ + \STATE 初始化Clip参数$\epsilon$ + \STATE 初始化epoch数量$K$ + \STATE 初始化经验回放$D$ + \STATE 初始化总时步数$c=0$ + \FOR {回合数 = $1,2,\cdots,M$} + \STATE 重置环境,获得初始状态$s_0$ + \FOR {时步 $t = 1,2,\cdots,T$} + \STATE 计数总时步$c \leftarrow c+1$ + \STATE 根据策略$\pi_{\theta}$选择$a_t$ + \STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$ + \STATE 存储$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中 + \IF{$c$被$C$整除\footnotemark[3]} + \FOR {$k= 1,2,\cdots,K$} + \STATE 测试 + \ENDFOR + \STATE 清空经验回放$D$ + \ENDIF + \ENDFOR + \ENDFOR + \end{algorithmic} +\end{algorithm} +\footnotetext[1]{Proximal Policy Optimization Algorithms} +\footnotetext[2]{https://spinningup.openai.com/en/latest/algorithms/ppo.html} +\footnotetext[3]{\bfseries 即每$C$个时步更新策略} +\clearpage +\section{DDPG算法} +\begin{algorithm}[H] % [H]固定位置 + \floatname{algorithm}{{DDPG算法}\footnotemark[1]} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \begin{algorithmic}[1] % [1]显示步数 + \STATE 初始化critic网络$Q\left(s, a \mid \theta^Q\right)$和actor网络$\mu(s|\theta^{\mu})$的参数$\theta^Q$和$\theta^{\mu}$ + \STATE 初始化对应的目标网络参数,即$\theta^{Q^{\prime}} \leftarrow \theta^Q, \theta^{\mu^{\prime}} \leftarrow \theta^\mu$ + \STATE 初始化经验回放$R$ + \FOR {回合数 = $1,M$} + \STATE 选择动作$a_t=\mu\left(s_t \mid \theta^\mu\right)+\mathcal{N}_t$,$\mathcal{N}_t$为探索噪声 + \STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$ + \STATE 存储transition$(s_t,a_t,r_t,s_{t+1})$到经验回放$R$中 + \STATE 更新环境状态$s_{t+1} \leftarrow s_t$ + \STATE {\bfseries 更新策略:} + \STATE 从$R$中取出一个随机批量的$(s_i,a_i,r_i,s_{i+1})$ + \STATE 求得$y_i=r_i+\gamma Q^{\prime}\left(s_{i+1}, \mu^{\prime}\left(s_{i+1} \mid \theta^{\mu^{\prime}}\right) \mid \theta^{Q^{\prime}}\right)$ + \STATE 更新critic参数,其损失为:$L=\frac{1}{N} \sum_i\left(y_i-Q\left(s_i, a_i \mid \theta^Q\right)\right)^2$ + \STATE 更新actor参数:$\left.\left.\nabla_{\theta^\mu} J \approx \frac{1}{N} \sum_i \nabla_a Q\left(s, a \mid \theta^Q\right)\right|_{s=s_i, a=\mu\left(s_i\right)} \nabla_{\theta^\mu} \mu\left(s \mid \theta^\mu\right)\right|_{s_i}$ + \STATE 软更新目标网络:$\theta^{Q^{\prime}} \leftarrow \tau \theta^Q+(1-\tau) \theta^{Q^{\prime}}$, + $\theta^{\mu^{\prime}} \leftarrow \tau \theta^\mu+(1-\tau) \theta^{\mu^{\prime}}$ + \ENDFOR + \end{algorithmic} +\end{algorithm} +\footnotetext[1]{Continuous control with deep reinforcement learning} \clearpage \section{SoftQ算法} \begin{algorithm}[H] diff --git a/projects/codes/A2C/README.md b/projects/codes/A2C/README.md new file mode 100644 index 0000000..5252838 --- /dev/null +++ b/projects/codes/A2C/README.md @@ -0,0 +1,7 @@ +## 脚本描述 + +* `task0.py`:离散动作任务 + +* `task1.py`:离散动作任务,与`task0.py`唯一的区别就是Actor的激活函数是tanh而不是relu,在`CartPole-v1`上效果更好 + +* `task2.py`:连续动作任务,#TODO待调试 \ No newline at end of file diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/config.yaml b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/config.yaml new file mode 100644 index 0000000..865f5bb --- /dev/null +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/config.yaml @@ -0,0 +1,24 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + load_checkpoint: true + load_path: Train_CartPole-v1_A2C_20221030-211435 + max_steps: 200 + mode: test + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + actor_hidden_dim: 256 + actor_lr: 0.0003 + batch_size: 64 + buffer_size: 100000 + critic_hidden_dim: 256 + critic_lr: 0.001 + gamma: 0.99 + hidden_dim: 256 + target_update: 4 diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/logs/log.txt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/logs/log.txt new file mode 100644 index 0000000..0ecfa0a --- /dev/null +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/logs/log.txt @@ -0,0 +1,23 @@ +2022-10-30 21:25:53 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-30 21:25:55 - r - INFO: - Start testing! +2022-10-30 21:25:55 - r - INFO: - Env: CartPole-v1, Algorithm: A2C, Device: cuda +2022-10-30 21:25:56 - r - INFO: - Episode: 1/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 2/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 3/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 4/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 5/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 6/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 7/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 8/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 9/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:56 - r - INFO: - Episode: 10/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 11/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 12/20, Reward: 190.0, Step: 190 +2022-10-30 21:25:57 - r - INFO: - Episode: 13/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 14/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 15/20, Reward: 96.0, Step: 96 +2022-10-30 21:25:57 - r - INFO: - Episode: 16/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 17/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 18/20, Reward: 200.0, Step: 200 +2022-10-30 21:25:57 - r - INFO: - Episode: 19/20, Reward: 112.0, Step: 112 +2022-10-30 21:25:57 - r - INFO: - Episode: 20/20, Reward: 200.0, Step: 200 diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/actor_checkpoint.pt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/actor_checkpoint.pt new file mode 100644 index 0000000..89d0854 Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/actor_checkpoint.pt differ diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/critic_checkpoint.pt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/critic_checkpoint.pt new file mode 100644 index 0000000..720f388 Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/models/critic_checkpoint.pt differ diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/learning_curve.png b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/learning_curve.png new file mode 100644 index 0000000..bfee34b Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/learning_curve.png differ diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/res.csv b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/res.csv new file mode 100644 index 0000000..ce0e7d1 --- /dev/null +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221030-212553/results/res.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,200.0,200 +1,200.0,200 +2,200.0,200 +3,200.0,200 +4,200.0,200 +5,200.0,200 +6,200.0,200 +7,200.0,200 +8,200.0,200 +9,200.0,200 +10,200.0,200 +11,190.0,190 +12,200.0,200 +13,200.0,200 +14,96.0,96 +15,200.0,200 +16,200.0,200 +17,200.0,200 +18,112.0,112 +19,200.0,200 diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/config.yaml b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/config.yaml new file mode 100644 index 0000000..709a1e3 --- /dev/null +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/config.yaml @@ -0,0 +1,25 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_CartPole-v1_A2C_20221031-232138 + max_steps: 200 + mode: test + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + actor_hidden_dim: 256 + actor_lr: 0.0003 + batch_size: 64 + buffer_size: 100000 + critic_hidden_dim: 256 + critic_lr: 0.001 + gamma: 0.99 + hidden_dim: 256 + target_update: 4 diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/logs/log.txt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/logs/log.txt new file mode 100644 index 0000000..d84edb2 --- /dev/null +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/logs/log.txt @@ -0,0 +1,28 @@ +2022-10-31 23:33:16 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-31 23:33:16 - r - INFO: - Actor model name: ActorSoftmaxTanh +2022-10-31 23:33:16 - r - INFO: - Critic model name: Critic +2022-10-31 23:33:16 - r - INFO: - ACMemory memory name: PGReplay +2022-10-31 23:33:16 - r - INFO: - agent name: A2C +2022-10-31 23:33:17 - r - INFO: - Start testing! +2022-10-31 23:33:17 - r - INFO: - Env: CartPole-v1, Algorithm: A2C, Device: cuda +2022-10-31 23:33:18 - r - INFO: - Episode: 1/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:18 - r - INFO: - Episode: 2/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:18 - r - INFO: - Episode: 3/20, Reward: 186.0, Step: 186 +2022-10-31 23:33:18 - r - INFO: - Episode: 4/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:18 - r - INFO: - Episode: 5/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 6/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 7/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 8/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 9/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 10/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 11/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 12/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 13/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 14/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 15/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 16/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 17/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 18/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:19 - r - INFO: - Episode: 19/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:20 - r - INFO: - Episode: 20/20, Reward: 200.0, Step: 200 +2022-10-31 23:33:20 - r - INFO: - Finish testing! diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/actor_checkpoint.pt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/actor_checkpoint.pt new file mode 100644 index 0000000..05bd7b6 Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/actor_checkpoint.pt differ diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/critic_checkpoint.pt b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/critic_checkpoint.pt new file mode 100644 index 0000000..720f388 Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/models/critic_checkpoint.pt differ diff --git a/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/learning_curve.png b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/learning_curve.png new file mode 100644 index 0000000..33274af Binary files /dev/null and b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/learning_curve.png differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/res.csv similarity index 54% rename from projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv rename to projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/res.csv index 221744d..571b1e6 100644 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv +++ b/projects/codes/A2C/Test_CartPole-v1_A2C_20221031-233316/results/res.csv @@ -1,21 +1,21 @@ episodes,rewards,steps 0,200.0,200 1,200.0,200 -2,93.0,93 -3,155.0,155 -4,116.0,116 +2,186.0,186 +3,200.0,200 +4,200.0,200 5,200.0,200 -6,190.0,190 -7,176.0,176 +6,200.0,200 +7,200.0,200 8,200.0,200 9,200.0,200 10,200.0,200 -11,179.0,179 +11,200.0,200 12,200.0,200 -13,185.0,185 -14,191.0,191 +13,200.0,200 +14,200.0,200 15,200.0,200 16,200.0,200 -17,124.0,124 +17,200.0,200 18,200.0,200 -19,172.0,172 +19,200.0,200 diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/config.yaml b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/config.yaml new file mode 100644 index 0000000..7dde5b7 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/config.yaml @@ -0,0 +1,23 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + actor_hidden_dim: 256 + actor_lr: 0.0003 + batch_size: 64 + buffer_size: 100000 + critic_hidden_dim: 256 + critic_lr: 0.001 + gamma: 0.99 + hidden_dim: 256 diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/logs/log.txt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/logs/log.txt new file mode 100644 index 0000000..b13b335 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/logs/log.txt @@ -0,0 +1,1066 @@ +2022-10-30 21:14:35 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-30 21:14:35 - r - INFO: - Start training! +2022-10-30 21:14:35 - r - INFO: - Env: CartPole-v1, Algorithm: A2C, Device: cuda +2022-10-30 21:14:37 - r - INFO: - Episode: 1/1000, Reward: 25.0, Step: 25 +2022-10-30 21:14:38 - r - INFO: - Current episode 1 has the best eval reward: 29.2 +2022-10-30 21:14:38 - r - INFO: - Episode: 2/1000, Reward: 13.0, Step: 13 +2022-10-30 21:14:38 - r - INFO: - Episode: 3/1000, Reward: 58.0, Step: 58 +2022-10-30 21:14:38 - r - INFO: - Episode: 4/1000, Reward: 10.0, Step: 10 +2022-10-30 21:14:38 - r - INFO: - Episode: 5/1000, Reward: 39.0, Step: 39 +2022-10-30 21:14:38 - r - INFO: - Episode: 6/1000, Reward: 39.0, Step: 39 +2022-10-30 21:14:38 - r - INFO: - Episode: 7/1000, Reward: 25.0, Step: 25 +2022-10-30 21:14:39 - r - INFO: - Episode: 8/1000, Reward: 22.0, Step: 22 +2022-10-30 21:14:39 - r - INFO: - Episode: 9/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:39 - r - INFO: - Episode: 10/1000, Reward: 27.0, Step: 27 +2022-10-30 21:14:39 - r - INFO: - Episode: 11/1000, Reward: 35.0, Step: 35 +2022-10-30 21:14:40 - r - INFO: - Episode: 12/1000, Reward: 26.0, Step: 26 +2022-10-30 21:14:40 - r - INFO: - Episode: 13/1000, Reward: 38.0, Step: 38 +2022-10-30 21:14:40 - r - INFO: - Episode: 14/1000, Reward: 29.0, Step: 29 +2022-10-30 21:14:40 - r - INFO: - Episode: 15/1000, Reward: 50.0, Step: 50 +2022-10-30 21:14:40 - r - INFO: - Episode: 16/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:40 - r - INFO: - Episode: 17/1000, Reward: 52.0, Step: 52 +2022-10-30 21:14:41 - r - INFO: - Current episode 17 has the best eval reward: 32.9 +2022-10-30 21:14:41 - r - INFO: - Episode: 18/1000, Reward: 12.0, Step: 12 +2022-10-30 21:14:41 - r - INFO: - Episode: 19/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:41 - r - INFO: - Episode: 20/1000, Reward: 38.0, Step: 38 +2022-10-30 21:14:41 - r - INFO: - Current episode 20 has the best eval reward: 38.9 +2022-10-30 21:14:41 - r - INFO: - Episode: 21/1000, Reward: 22.0, Step: 22 +2022-10-30 21:14:41 - r - INFO: - Episode: 22/1000, Reward: 36.0, Step: 36 +2022-10-30 21:14:42 - r - INFO: - Episode: 23/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:42 - r - INFO: - Episode: 24/1000, Reward: 35.0, Step: 35 +2022-10-30 21:14:42 - r - INFO: - Episode: 25/1000, Reward: 90.0, Step: 90 +2022-10-30 21:14:42 - r - INFO: - Episode: 26/1000, Reward: 29.0, Step: 29 +2022-10-30 21:14:42 - r - INFO: - Episode: 27/1000, Reward: 16.0, Step: 16 +2022-10-30 21:14:43 - r - INFO: - Episode: 28/1000, Reward: 25.0, Step: 25 +2022-10-30 21:14:43 - r - INFO: - Episode: 29/1000, Reward: 46.0, Step: 46 +2022-10-30 21:14:43 - r - INFO: - Episode: 30/1000, Reward: 33.0, Step: 33 +2022-10-30 21:14:43 - r - INFO: - Episode: 31/1000, Reward: 11.0, Step: 11 +2022-10-30 21:14:43 - r - INFO: - Episode: 32/1000, Reward: 27.0, Step: 27 +2022-10-30 21:14:44 - r - INFO: - Episode: 33/1000, Reward: 32.0, Step: 32 +2022-10-30 21:14:44 - r - INFO: - Current episode 33 has the best eval reward: 39.2 +2022-10-30 21:14:44 - r - INFO: - Episode: 34/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:44 - r - INFO: - Episode: 35/1000, Reward: 11.0, Step: 11 +2022-10-30 21:14:44 - r - INFO: - Episode: 36/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:44 - r - INFO: - Episode: 37/1000, Reward: 51.0, Step: 51 +2022-10-30 21:14:44 - r - INFO: - Episode: 38/1000, Reward: 29.0, Step: 29 +2022-10-30 21:14:45 - r - INFO: - Current episode 38 has the best eval reward: 41.7 +2022-10-30 21:14:45 - r - INFO: - Episode: 39/1000, Reward: 50.0, Step: 50 +2022-10-30 21:14:45 - r - INFO: - Current episode 39 has the best eval reward: 48.5 +2022-10-30 21:14:45 - r - INFO: - Episode: 40/1000, Reward: 19.0, Step: 19 +2022-10-30 21:14:45 - r - INFO: - Episode: 41/1000, Reward: 41.0, Step: 41 +2022-10-30 21:14:45 - r - INFO: - Episode: 42/1000, Reward: 28.0, Step: 28 +2022-10-30 21:14:46 - r - INFO: - Episode: 43/1000, Reward: 71.0, Step: 71 +2022-10-30 21:14:46 - r - INFO: - Episode: 44/1000, Reward: 45.0, Step: 45 +2022-10-30 21:14:46 - r - INFO: - Episode: 45/1000, Reward: 42.0, Step: 42 +2022-10-30 21:14:46 - r - INFO: - Current episode 45 has the best eval reward: 49.6 +2022-10-30 21:14:46 - r - INFO: - Episode: 46/1000, Reward: 39.0, Step: 39 +2022-10-30 21:14:47 - r - INFO: - Episode: 47/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:47 - r - INFO: - Episode: 48/1000, Reward: 14.0, Step: 14 +2022-10-30 21:14:47 - r - INFO: - Episode: 49/1000, Reward: 23.0, Step: 23 +2022-10-30 21:14:47 - r - INFO: - Episode: 50/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:47 - r - INFO: - Episode: 51/1000, Reward: 34.0, Step: 34 +2022-10-30 21:14:48 - r - INFO: - Episode: 52/1000, Reward: 14.0, Step: 14 +2022-10-30 21:14:48 - r - INFO: - Episode: 53/1000, Reward: 41.0, Step: 41 +2022-10-30 21:14:48 - r - INFO: - Episode: 54/1000, Reward: 99.0, Step: 99 +2022-10-30 21:14:48 - r - INFO: - Episode: 55/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:49 - r - INFO: - Episode: 56/1000, Reward: 52.0, Step: 52 +2022-10-30 21:14:49 - r - INFO: - Episode: 57/1000, Reward: 34.0, Step: 34 +2022-10-30 21:14:49 - r - INFO: - Episode: 58/1000, Reward: 73.0, Step: 73 +2022-10-30 21:14:49 - r - INFO: - Episode: 59/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:49 - r - INFO: - Episode: 60/1000, Reward: 27.0, Step: 27 +2022-10-30 21:14:50 - r - INFO: - Episode: 61/1000, Reward: 51.0, Step: 51 +2022-10-30 21:14:50 - r - INFO: - Episode: 62/1000, Reward: 46.0, Step: 46 +2022-10-30 21:14:50 - r - INFO: - Episode: 63/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:50 - r - INFO: - Episode: 64/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:51 - r - INFO: - Episode: 65/1000, Reward: 44.0, Step: 44 +2022-10-30 21:14:51 - r - INFO: - Episode: 66/1000, Reward: 16.0, Step: 16 +2022-10-30 21:14:51 - r - INFO: - Episode: 67/1000, Reward: 39.0, Step: 39 +2022-10-30 21:14:51 - r - INFO: - Episode: 68/1000, Reward: 30.0, Step: 30 +2022-10-30 21:14:51 - r - INFO: - Episode: 69/1000, Reward: 37.0, Step: 37 +2022-10-30 21:14:52 - r - INFO: - Episode: 70/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:52 - r - INFO: - Episode: 71/1000, Reward: 21.0, Step: 21 +2022-10-30 21:14:52 - r - INFO: - Episode: 72/1000, Reward: 13.0, Step: 13 +2022-10-30 21:14:52 - r - INFO: - Episode: 73/1000, Reward: 65.0, Step: 65 +2022-10-30 21:14:53 - r - INFO: - Episode: 74/1000, Reward: 45.0, Step: 45 +2022-10-30 21:14:53 - r - INFO: - Episode: 75/1000, Reward: 45.0, Step: 45 +2022-10-30 21:14:53 - r - INFO: - Episode: 76/1000, Reward: 46.0, Step: 46 +2022-10-30 21:14:53 - r - INFO: - Episode: 77/1000, Reward: 13.0, Step: 13 +2022-10-30 21:14:53 - r - INFO: - Episode: 78/1000, Reward: 33.0, Step: 33 +2022-10-30 21:14:54 - r - INFO: - Episode: 79/1000, Reward: 30.0, Step: 30 +2022-10-30 21:14:54 - r - INFO: - Episode: 80/1000, Reward: 52.0, Step: 52 +2022-10-30 21:14:54 - r - INFO: - Episode: 81/1000, Reward: 27.0, Step: 27 +2022-10-30 21:14:54 - r - INFO: - Episode: 82/1000, Reward: 30.0, Step: 30 +2022-10-30 21:14:55 - r - INFO: - Episode: 83/1000, Reward: 47.0, Step: 47 +2022-10-30 21:14:55 - r - INFO: - Episode: 84/1000, Reward: 56.0, Step: 56 +2022-10-30 21:14:55 - r - INFO: - Episode: 85/1000, Reward: 19.0, Step: 19 +2022-10-30 21:14:55 - r - INFO: - Episode: 86/1000, Reward: 33.0, Step: 33 +2022-10-30 21:14:56 - r - INFO: - Episode: 87/1000, Reward: 25.0, Step: 25 +2022-10-30 21:14:56 - r - INFO: - Episode: 88/1000, Reward: 41.0, Step: 41 +2022-10-30 21:14:56 - r - INFO: - Episode: 89/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:56 - r - INFO: - Episode: 90/1000, Reward: 58.0, Step: 58 +2022-10-30 21:14:56 - r - INFO: - Episode: 91/1000, Reward: 35.0, Step: 35 +2022-10-30 21:14:57 - r - INFO: - Episode: 92/1000, Reward: 23.0, Step: 23 +2022-10-30 21:14:57 - r - INFO: - Episode: 93/1000, Reward: 12.0, Step: 12 +2022-10-30 21:14:57 - r - INFO: - Episode: 94/1000, Reward: 20.0, Step: 20 +2022-10-30 21:14:57 - r - INFO: - Episode: 95/1000, Reward: 10.0, Step: 10 +2022-10-30 21:14:57 - r - INFO: - Episode: 96/1000, Reward: 49.0, Step: 49 +2022-10-30 21:14:58 - r - INFO: - Episode: 97/1000, Reward: 29.0, Step: 29 +2022-10-30 21:14:58 - r - INFO: - Episode: 98/1000, Reward: 35.0, Step: 35 +2022-10-30 21:14:58 - r - INFO: - Episode: 99/1000, Reward: 36.0, Step: 36 +2022-10-30 21:14:58 - r - INFO: - Current episode 99 has the best eval reward: 53.4 +2022-10-30 21:14:58 - r - INFO: - Episode: 100/1000, Reward: 36.0, Step: 36 +2022-10-30 21:14:59 - r - INFO: - Episode: 101/1000, Reward: 16.0, Step: 16 +2022-10-30 21:14:59 - r - INFO: - Episode: 102/1000, Reward: 36.0, Step: 36 +2022-10-30 21:14:59 - r - INFO: - Current episode 102 has the best eval reward: 70.3 +2022-10-30 21:14:59 - r - INFO: - Episode: 103/1000, Reward: 30.0, Step: 30 +2022-10-30 21:15:00 - r - INFO: - Episode: 104/1000, Reward: 76.0, Step: 76 +2022-10-30 21:15:00 - r - INFO: - Episode: 105/1000, Reward: 52.0, Step: 52 +2022-10-30 21:15:00 - r - INFO: - Episode: 106/1000, Reward: 39.0, Step: 39 +2022-10-30 21:15:00 - r - INFO: - Episode: 107/1000, Reward: 52.0, Step: 52 +2022-10-30 21:15:01 - r - INFO: - Episode: 108/1000, Reward: 69.0, Step: 69 +2022-10-30 21:15:01 - r - INFO: - Episode: 109/1000, Reward: 27.0, Step: 27 +2022-10-30 21:15:01 - r - INFO: - Episode: 110/1000, Reward: 14.0, Step: 14 +2022-10-30 21:15:01 - r - INFO: - Episode: 111/1000, Reward: 28.0, Step: 28 +2022-10-30 21:15:01 - r - INFO: - Episode: 112/1000, Reward: 12.0, Step: 12 +2022-10-30 21:15:02 - r - INFO: - Episode: 113/1000, Reward: 26.0, Step: 26 +2022-10-30 21:15:03 - r - INFO: - Episode: 114/1000, Reward: 50.0, Step: 50 +2022-10-30 21:15:03 - r - INFO: - Episode: 115/1000, Reward: 25.0, Step: 25 +2022-10-30 21:15:03 - r - INFO: - Episode: 116/1000, Reward: 53.0, Step: 53 +2022-10-30 21:15:03 - r - INFO: - Episode: 117/1000, Reward: 19.0, Step: 19 +2022-10-30 21:15:04 - r - INFO: - Episode: 118/1000, Reward: 33.0, Step: 33 +2022-10-30 21:15:04 - r - INFO: - Episode: 119/1000, Reward: 34.0, Step: 34 +2022-10-30 21:15:04 - r - INFO: - Episode: 120/1000, Reward: 41.0, Step: 41 +2022-10-30 21:15:04 - r - INFO: - Episode: 121/1000, Reward: 25.0, Step: 25 +2022-10-30 21:15:05 - r - INFO: - Episode: 122/1000, Reward: 18.0, Step: 18 +2022-10-30 21:15:05 - r - INFO: - Episode: 123/1000, Reward: 114.0, Step: 114 +2022-10-30 21:15:05 - r - INFO: - Episode: 124/1000, Reward: 25.0, Step: 25 +2022-10-30 21:15:05 - r - INFO: - Episode: 125/1000, Reward: 46.0, Step: 46 +2022-10-30 21:15:06 - r - INFO: - Episode: 126/1000, Reward: 22.0, Step: 22 +2022-10-30 21:15:06 - r - INFO: - Episode: 127/1000, Reward: 71.0, Step: 71 +2022-10-30 21:15:06 - r - INFO: - Episode: 128/1000, Reward: 30.0, Step: 30 +2022-10-30 21:15:07 - r - INFO: - Episode: 129/1000, Reward: 130.0, Step: 130 +2022-10-30 21:15:07 - r - INFO: - Episode: 130/1000, Reward: 65.0, Step: 65 +2022-10-30 21:15:07 - r - INFO: - Episode: 131/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:07 - r - INFO: - Episode: 132/1000, Reward: 37.0, Step: 37 +2022-10-30 21:15:08 - r - INFO: - Episode: 133/1000, Reward: 46.0, Step: 46 +2022-10-30 21:15:08 - r - INFO: - Episode: 134/1000, Reward: 65.0, Step: 65 +2022-10-30 21:15:08 - r - INFO: - Episode: 135/1000, Reward: 31.0, Step: 31 +2022-10-30 21:15:08 - r - INFO: - Episode: 136/1000, Reward: 33.0, Step: 33 +2022-10-30 21:15:09 - r - INFO: - Episode: 137/1000, Reward: 39.0, Step: 39 +2022-10-30 21:15:09 - r - INFO: - Episode: 138/1000, Reward: 73.0, Step: 73 +2022-10-30 21:15:09 - r - INFO: - Episode: 139/1000, Reward: 78.0, Step: 78 +2022-10-30 21:15:10 - r - INFO: - Episode: 140/1000, Reward: 36.0, Step: 36 +2022-10-30 21:15:10 - r - INFO: - Episode: 141/1000, Reward: 56.0, Step: 56 +2022-10-30 21:15:10 - r - INFO: - Episode: 142/1000, Reward: 12.0, Step: 12 +2022-10-30 21:15:10 - r - INFO: - Episode: 143/1000, Reward: 36.0, Step: 36 +2022-10-30 21:15:11 - r - INFO: - Episode: 144/1000, Reward: 13.0, Step: 13 +2022-10-30 21:15:11 - r - INFO: - Episode: 145/1000, Reward: 85.0, Step: 85 +2022-10-30 21:15:11 - r - INFO: - Episode: 146/1000, Reward: 34.0, Step: 34 +2022-10-30 21:15:11 - r - INFO: - Episode: 147/1000, Reward: 16.0, Step: 16 +2022-10-30 21:15:12 - r - INFO: - Episode: 148/1000, Reward: 68.0, Step: 68 +2022-10-30 21:15:12 - r - INFO: - Episode: 149/1000, Reward: 94.0, Step: 94 +2022-10-30 21:15:12 - r - INFO: - Episode: 150/1000, Reward: 17.0, Step: 17 +2022-10-30 21:15:13 - r - INFO: - Episode: 151/1000, Reward: 64.0, Step: 64 +2022-10-30 21:15:13 - r - INFO: - Episode: 152/1000, Reward: 33.0, Step: 33 +2022-10-30 21:15:13 - r - INFO: - Episode: 153/1000, Reward: 63.0, Step: 63 +2022-10-30 21:15:13 - r - INFO: - Episode: 154/1000, Reward: 39.0, Step: 39 +2022-10-30 21:15:14 - r - INFO: - Episode: 155/1000, Reward: 72.0, Step: 72 +2022-10-30 21:15:14 - r - INFO: - Episode: 156/1000, Reward: 39.0, Step: 39 +2022-10-30 21:15:14 - r - INFO: - Episode: 157/1000, Reward: 37.0, Step: 37 +2022-10-30 21:15:14 - r - INFO: - Episode: 158/1000, Reward: 18.0, Step: 18 +2022-10-30 21:15:15 - r - INFO: - Episode: 159/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:15 - r - INFO: - Episode: 160/1000, Reward: 21.0, Step: 21 +2022-10-30 21:15:15 - r - INFO: - Episode: 161/1000, Reward: 54.0, Step: 54 +2022-10-30 21:15:15 - r - INFO: - Episode: 162/1000, Reward: 46.0, Step: 46 +2022-10-30 21:15:16 - r - INFO: - Episode: 163/1000, Reward: 21.0, Step: 21 +2022-10-30 21:15:16 - r - INFO: - Episode: 164/1000, Reward: 26.0, Step: 26 +2022-10-30 21:15:16 - r - INFO: - Episode: 165/1000, Reward: 70.0, Step: 70 +2022-10-30 21:15:17 - r - INFO: - Episode: 166/1000, Reward: 20.0, Step: 20 +2022-10-30 21:15:17 - r - INFO: - Episode: 167/1000, Reward: 41.0, Step: 41 +2022-10-30 21:15:17 - r - INFO: - Episode: 168/1000, Reward: 77.0, Step: 77 +2022-10-30 21:15:17 - r - INFO: - Episode: 169/1000, Reward: 13.0, Step: 13 +2022-10-30 21:15:18 - r - INFO: - Episode: 170/1000, Reward: 66.0, Step: 66 +2022-10-30 21:15:18 - r - INFO: - Episode: 171/1000, Reward: 72.0, Step: 72 +2022-10-30 21:15:18 - r - INFO: - Episode: 172/1000, Reward: 28.0, Step: 28 +2022-10-30 21:15:19 - r - INFO: - Episode: 173/1000, Reward: 68.0, Step: 68 +2022-10-30 21:15:19 - r - INFO: - Episode: 174/1000, Reward: 124.0, Step: 124 +2022-10-30 21:15:19 - r - INFO: - Episode: 175/1000, Reward: 41.0, Step: 41 +2022-10-30 21:15:20 - r - INFO: - Episode: 176/1000, Reward: 54.0, Step: 54 +2022-10-30 21:15:20 - r - INFO: - Episode: 177/1000, Reward: 33.0, Step: 33 +2022-10-30 21:15:20 - r - INFO: - Episode: 178/1000, Reward: 92.0, Step: 92 +2022-10-30 21:15:20 - r - INFO: - Episode: 179/1000, Reward: 23.0, Step: 23 +2022-10-30 21:15:21 - r - INFO: - Episode: 180/1000, Reward: 76.0, Step: 76 +2022-10-30 21:15:21 - r - INFO: - Episode: 181/1000, Reward: 47.0, Step: 47 +2022-10-30 21:15:22 - r - INFO: - Episode: 182/1000, Reward: 89.0, Step: 89 +2022-10-30 21:15:22 - r - INFO: - Episode: 183/1000, Reward: 84.0, Step: 84 +2022-10-30 21:15:22 - r - INFO: - Episode: 184/1000, Reward: 75.0, Step: 75 +2022-10-30 21:15:23 - r - INFO: - Episode: 185/1000, Reward: 64.0, Step: 64 +2022-10-30 21:15:23 - r - INFO: - Episode: 186/1000, Reward: 35.0, Step: 35 +2022-10-30 21:15:23 - r - INFO: - Episode: 187/1000, Reward: 44.0, Step: 44 +2022-10-30 21:15:24 - r - INFO: - Episode: 188/1000, Reward: 46.0, Step: 46 +2022-10-30 21:15:24 - r - INFO: - Episode: 189/1000, Reward: 67.0, Step: 67 +2022-10-30 21:15:25 - r - INFO: - Episode: 190/1000, Reward: 82.0, Step: 82 +2022-10-30 21:15:25 - r - INFO: - Episode: 191/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:25 - r - INFO: - Episode: 192/1000, Reward: 26.0, Step: 26 +2022-10-30 21:15:26 - r - INFO: - Episode: 193/1000, Reward: 116.0, Step: 116 +2022-10-30 21:15:26 - r - INFO: - Episode: 194/1000, Reward: 116.0, Step: 116 +2022-10-30 21:15:26 - r - INFO: - Episode: 195/1000, Reward: 119.0, Step: 119 +2022-10-30 21:15:27 - r - INFO: - Episode: 196/1000, Reward: 50.0, Step: 50 +2022-10-30 21:15:27 - r - INFO: - Episode: 197/1000, Reward: 43.0, Step: 43 +2022-10-30 21:15:27 - r - INFO: - Episode: 198/1000, Reward: 47.0, Step: 47 +2022-10-30 21:15:28 - r - INFO: - Episode: 199/1000, Reward: 71.0, Step: 71 +2022-10-30 21:15:28 - r - INFO: - Episode: 200/1000, Reward: 53.0, Step: 53 +2022-10-30 21:15:28 - r - INFO: - Current episode 200 has the best eval reward: 86.0 +2022-10-30 21:15:29 - r - INFO: - Episode: 201/1000, Reward: 137.0, Step: 137 +2022-10-30 21:15:29 - r - INFO: - Episode: 202/1000, Reward: 82.0, Step: 82 +2022-10-30 21:15:30 - r - INFO: - Episode: 203/1000, Reward: 120.0, Step: 120 +2022-10-30 21:15:30 - r - INFO: - Current episode 203 has the best eval reward: 92.8 +2022-10-30 21:15:30 - r - INFO: - Episode: 204/1000, Reward: 69.0, Step: 69 +2022-10-30 21:15:31 - r - INFO: - Episode: 205/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:31 - r - INFO: - Episode: 206/1000, Reward: 62.0, Step: 62 +2022-10-30 21:15:31 - r - INFO: - Episode: 207/1000, Reward: 64.0, Step: 64 +2022-10-30 21:15:32 - r - INFO: - Episode: 208/1000, Reward: 49.0, Step: 49 +2022-10-30 21:15:32 - r - INFO: - Episode: 209/1000, Reward: 32.0, Step: 32 +2022-10-30 21:15:33 - r - INFO: - Episode: 210/1000, Reward: 42.0, Step: 42 +2022-10-30 21:15:33 - r - INFO: - Episode: 211/1000, Reward: 50.0, Step: 50 +2022-10-30 21:15:33 - r - INFO: - Episode: 212/1000, Reward: 93.0, Step: 93 +2022-10-30 21:15:34 - r - INFO: - Episode: 213/1000, Reward: 60.0, Step: 60 +2022-10-30 21:15:34 - r - INFO: - Episode: 214/1000, Reward: 54.0, Step: 54 +2022-10-30 21:15:35 - r - INFO: - Episode: 215/1000, Reward: 68.0, Step: 68 +2022-10-30 21:15:35 - r - INFO: - Episode: 216/1000, Reward: 84.0, Step: 84 +2022-10-30 21:15:35 - r - INFO: - Current episode 216 has the best eval reward: 94.6 +2022-10-30 21:15:36 - r - INFO: - Episode: 217/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:36 - r - INFO: - Episode: 218/1000, Reward: 70.0, Step: 70 +2022-10-30 21:15:37 - r - INFO: - Episode: 219/1000, Reward: 115.0, Step: 115 +2022-10-30 21:15:37 - r - INFO: - Episode: 220/1000, Reward: 149.0, Step: 149 +2022-10-30 21:15:38 - r - INFO: - Episode: 221/1000, Reward: 68.0, Step: 68 +2022-10-30 21:15:38 - r - INFO: - Episode: 222/1000, Reward: 50.0, Step: 50 +2022-10-30 21:15:38 - r - INFO: - Current episode 222 has the best eval reward: 95.5 +2022-10-30 21:15:39 - r - INFO: - Episode: 223/1000, Reward: 56.0, Step: 56 +2022-10-30 21:15:39 - r - INFO: - Episode: 224/1000, Reward: 61.0, Step: 61 +2022-10-30 21:15:39 - r - INFO: - Episode: 225/1000, Reward: 117.0, Step: 117 +2022-10-30 21:15:40 - r - INFO: - Episode: 226/1000, Reward: 66.0, Step: 66 +2022-10-30 21:15:41 - r - INFO: - Episode: 227/1000, Reward: 127.0, Step: 127 +2022-10-30 21:15:41 - r - INFO: - Episode: 228/1000, Reward: 66.0, Step: 66 +2022-10-30 21:15:42 - r - INFO: - Episode: 229/1000, Reward: 48.0, Step: 48 +2022-10-30 21:15:42 - r - INFO: - Episode: 230/1000, Reward: 36.0, Step: 36 +2022-10-30 21:15:42 - r - INFO: - Episode: 231/1000, Reward: 79.0, Step: 79 +2022-10-30 21:15:43 - r - INFO: - Episode: 232/1000, Reward: 49.0, Step: 49 +2022-10-30 21:15:43 - r - INFO: - Episode: 233/1000, Reward: 55.0, Step: 55 +2022-10-30 21:15:43 - r - INFO: - Episode: 234/1000, Reward: 41.0, Step: 41 +2022-10-30 21:15:43 - r - INFO: - Episode: 235/1000, Reward: 20.0, Step: 20 +2022-10-30 21:15:44 - r - INFO: - Episode: 236/1000, Reward: 40.0, Step: 40 +2022-10-30 21:15:44 - r - INFO: - Episode: 237/1000, Reward: 120.0, Step: 120 +2022-10-30 21:15:44 - r - INFO: - Episode: 238/1000, Reward: 27.0, Step: 27 +2022-10-30 21:15:45 - r - INFO: - Episode: 239/1000, Reward: 51.0, Step: 51 +2022-10-30 21:15:45 - r - INFO: - Episode: 240/1000, Reward: 35.0, Step: 35 +2022-10-30 21:15:45 - r - INFO: - Episode: 241/1000, Reward: 43.0, Step: 43 +2022-10-30 21:15:46 - r - INFO: - Episode: 242/1000, Reward: 54.0, Step: 54 +2022-10-30 21:15:46 - r - INFO: - Episode: 243/1000, Reward: 52.0, Step: 52 +2022-10-30 21:15:46 - r - INFO: - Episode: 244/1000, Reward: 47.0, Step: 47 +2022-10-30 21:15:46 - r - INFO: - Episode: 245/1000, Reward: 63.0, Step: 63 +2022-10-30 21:15:47 - r - INFO: - Episode: 246/1000, Reward: 29.0, Step: 29 +2022-10-30 21:15:47 - r - INFO: - Episode: 247/1000, Reward: 36.0, Step: 36 +2022-10-30 21:15:47 - r - INFO: - Episode: 248/1000, Reward: 58.0, Step: 58 +2022-10-30 21:15:48 - r - INFO: - Episode: 249/1000, Reward: 63.0, Step: 63 +2022-10-30 21:15:48 - r - INFO: - Episode: 250/1000, Reward: 49.0, Step: 49 +2022-10-30 21:15:48 - r - INFO: - Episode: 251/1000, Reward: 70.0, Step: 70 +2022-10-30 21:15:49 - r - INFO: - Episode: 252/1000, Reward: 114.0, Step: 114 +2022-10-30 21:15:49 - r - INFO: - Episode: 253/1000, Reward: 62.0, Step: 62 +2022-10-30 21:15:50 - r - INFO: - Episode: 254/1000, Reward: 73.0, Step: 73 +2022-10-30 21:15:50 - r - INFO: - Current episode 254 has the best eval reward: 96.7 +2022-10-30 21:15:50 - r - INFO: - Episode: 255/1000, Reward: 62.0, Step: 62 +2022-10-30 21:15:51 - r - INFO: - Episode: 256/1000, Reward: 61.0, Step: 61 +2022-10-30 21:15:51 - r - INFO: - Episode: 257/1000, Reward: 115.0, Step: 115 +2022-10-30 21:15:52 - r - INFO: - Episode: 258/1000, Reward: 50.0, Step: 50 +2022-10-30 21:15:52 - r - INFO: - Episode: 259/1000, Reward: 128.0, Step: 128 +2022-10-30 21:15:53 - r - INFO: - Current episode 259 has the best eval reward: 104.8 +2022-10-30 21:15:53 - r - INFO: - Episode: 260/1000, Reward: 200.0, Step: 200 +2022-10-30 21:15:53 - r - INFO: - Episode: 261/1000, Reward: 75.0, Step: 75 +2022-10-30 21:15:54 - r - INFO: - Episode: 262/1000, Reward: 64.0, Step: 64 +2022-10-30 21:15:54 - r - INFO: - Episode: 263/1000, Reward: 33.0, Step: 33 +2022-10-30 21:15:55 - r - INFO: - Episode: 264/1000, Reward: 90.0, Step: 90 +2022-10-30 21:15:55 - r - INFO: - Current episode 264 has the best eval reward: 107.6 +2022-10-30 21:15:56 - r - INFO: - Episode: 265/1000, Reward: 117.0, Step: 117 +2022-10-30 21:15:56 - r - INFO: - Current episode 265 has the best eval reward: 119.4 +2022-10-30 21:15:56 - r - INFO: - Episode: 266/1000, Reward: 60.0, Step: 60 +2022-10-30 21:15:57 - r - INFO: - Episode: 267/1000, Reward: 177.0, Step: 177 +2022-10-30 21:15:57 - r - INFO: - Episode: 268/1000, Reward: 39.0, Step: 39 +2022-10-30 21:15:58 - r - INFO: - Episode: 269/1000, Reward: 40.0, Step: 40 +2022-10-30 21:15:58 - r - INFO: - Episode: 270/1000, Reward: 109.0, Step: 109 +2022-10-30 21:15:59 - r - INFO: - Episode: 271/1000, Reward: 100.0, Step: 100 +2022-10-30 21:16:00 - r - INFO: - Episode: 272/1000, Reward: 99.0, Step: 99 +2022-10-30 21:16:00 - r - INFO: - Episode: 273/1000, Reward: 136.0, Step: 136 +2022-10-30 21:16:01 - r - INFO: - Episode: 274/1000, Reward: 62.0, Step: 62 +2022-10-30 21:16:01 - r - INFO: - Episode: 275/1000, Reward: 100.0, Step: 100 +2022-10-30 21:16:02 - r - INFO: - Current episode 275 has the best eval reward: 120.1 +2022-10-30 21:16:02 - r - INFO: - Episode: 276/1000, Reward: 73.0, Step: 73 +2022-10-30 21:16:03 - r - INFO: - Episode: 277/1000, Reward: 166.0, Step: 166 +2022-10-30 21:16:03 - r - INFO: - Episode: 278/1000, Reward: 74.0, Step: 74 +2022-10-30 21:16:04 - r - INFO: - Current episode 278 has the best eval reward: 121.8 +2022-10-30 21:16:04 - r - INFO: - Episode: 279/1000, Reward: 126.0, Step: 126 +2022-10-30 21:16:05 - r - INFO: - Episode: 280/1000, Reward: 111.0, Step: 111 +2022-10-30 21:16:06 - r - INFO: - Episode: 281/1000, Reward: 198.0, Step: 198 +2022-10-30 21:16:07 - r - INFO: - Episode: 282/1000, Reward: 106.0, Step: 106 +2022-10-30 21:16:07 - r - INFO: - Episode: 283/1000, Reward: 80.0, Step: 80 +2022-10-30 21:16:08 - r - INFO: - Episode: 284/1000, Reward: 74.0, Step: 74 +2022-10-30 21:16:08 - r - INFO: - Episode: 285/1000, Reward: 114.0, Step: 114 +2022-10-30 21:16:09 - r - INFO: - Episode: 286/1000, Reward: 69.0, Step: 69 +2022-10-30 21:16:09 - r - INFO: - Episode: 287/1000, Reward: 98.0, Step: 98 +2022-10-30 21:16:10 - r - INFO: - Episode: 288/1000, Reward: 63.0, Step: 63 +2022-10-30 21:16:10 - r - INFO: - Episode: 289/1000, Reward: 61.0, Step: 61 +2022-10-30 21:16:11 - r - INFO: - Episode: 290/1000, Reward: 49.0, Step: 49 +2022-10-30 21:16:11 - r - INFO: - Episode: 291/1000, Reward: 89.0, Step: 89 +2022-10-30 21:16:12 - r - INFO: - Episode: 292/1000, Reward: 114.0, Step: 114 +2022-10-30 21:16:13 - r - INFO: - Episode: 293/1000, Reward: 103.0, Step: 103 +2022-10-30 21:16:13 - r - INFO: - Episode: 294/1000, Reward: 103.0, Step: 103 +2022-10-30 21:16:14 - r - INFO: - Episode: 295/1000, Reward: 93.0, Step: 93 +2022-10-30 21:16:14 - r - INFO: - Episode: 296/1000, Reward: 137.0, Step: 137 +2022-10-30 21:16:15 - r - INFO: - Episode: 297/1000, Reward: 97.0, Step: 97 +2022-10-30 21:16:16 - r - INFO: - Episode: 298/1000, Reward: 124.0, Step: 124 +2022-10-30 21:16:16 - r - INFO: - Episode: 299/1000, Reward: 147.0, Step: 147 +2022-10-30 21:16:17 - r - INFO: - Episode: 300/1000, Reward: 125.0, Step: 125 +2022-10-30 21:16:18 - r - INFO: - Episode: 301/1000, Reward: 105.0, Step: 105 +2022-10-30 21:16:18 - r - INFO: - Current episode 301 has the best eval reward: 148.8 +2022-10-30 21:16:18 - r - INFO: - Episode: 302/1000, Reward: 113.0, Step: 113 +2022-10-30 21:16:19 - r - INFO: - Current episode 302 has the best eval reward: 150.8 +2022-10-30 21:16:19 - r - INFO: - Episode: 303/1000, Reward: 120.0, Step: 120 +2022-10-30 21:16:20 - r - INFO: - Episode: 304/1000, Reward: 159.0, Step: 159 +2022-10-30 21:16:21 - r - INFO: - Episode: 305/1000, Reward: 190.0, Step: 190 +2022-10-30 21:16:22 - r - INFO: - Current episode 305 has the best eval reward: 183.4 +2022-10-30 21:16:22 - r - INFO: - Episode: 306/1000, Reward: 119.0, Step: 119 +2022-10-30 21:16:23 - r - INFO: - Episode: 307/1000, Reward: 200.0, Step: 200 +2022-10-30 21:16:24 - r - INFO: - Episode: 308/1000, Reward: 148.0, Step: 148 +2022-10-30 21:16:25 - r - INFO: - Episode: 309/1000, Reward: 200.0, Step: 200 +2022-10-30 21:16:26 - r - INFO: - Episode: 310/1000, Reward: 79.0, Step: 79 +2022-10-30 21:16:27 - r - INFO: - Episode: 311/1000, Reward: 115.0, Step: 115 +2022-10-30 21:16:28 - r - INFO: - Episode: 312/1000, Reward: 147.0, Step: 147 +2022-10-30 21:16:29 - r - INFO: - Episode: 313/1000, Reward: 112.0, Step: 112 +2022-10-30 21:16:29 - r - INFO: - Episode: 314/1000, Reward: 125.0, Step: 125 +2022-10-30 21:16:30 - r - INFO: - Episode: 315/1000, Reward: 184.0, Step: 184 +2022-10-30 21:16:31 - r - INFO: - Episode: 316/1000, Reward: 193.0, Step: 193 +2022-10-30 21:16:32 - r - INFO: - Episode: 317/1000, Reward: 117.0, Step: 117 +2022-10-30 21:16:33 - r - INFO: - Episode: 318/1000, Reward: 153.0, Step: 153 +2022-10-30 21:16:34 - r - INFO: - Episode: 319/1000, Reward: 125.0, Step: 125 +2022-10-30 21:16:35 - r - INFO: - Episode: 320/1000, Reward: 184.0, Step: 184 +2022-10-30 21:16:36 - r - INFO: - Episode: 321/1000, Reward: 173.0, Step: 173 +2022-10-30 21:16:36 - r - INFO: - Episode: 322/1000, Reward: 117.0, Step: 117 +2022-10-30 21:16:37 - r - INFO: - Episode: 323/1000, Reward: 47.0, Step: 47 +2022-10-30 21:16:38 - r - INFO: - Episode: 324/1000, Reward: 107.0, Step: 107 +2022-10-30 21:16:38 - r - INFO: - Episode: 325/1000, Reward: 104.0, Step: 104 +2022-10-30 21:16:39 - r - INFO: - Episode: 326/1000, Reward: 114.0, Step: 114 +2022-10-30 21:16:39 - r - INFO: - Episode: 327/1000, Reward: 90.0, Step: 90 +2022-10-30 21:16:40 - r - INFO: - Episode: 328/1000, Reward: 112.0, Step: 112 +2022-10-30 21:16:41 - r - INFO: - Episode: 329/1000, Reward: 70.0, Step: 70 +2022-10-30 21:16:41 - r - INFO: - Episode: 330/1000, Reward: 74.0, Step: 74 +2022-10-30 21:16:42 - r - INFO: - Episode: 331/1000, Reward: 159.0, Step: 159 +2022-10-30 21:16:42 - r - INFO: - Episode: 332/1000, Reward: 39.0, Step: 39 +2022-10-30 21:16:43 - r - INFO: - Episode: 333/1000, Reward: 129.0, Step: 129 +2022-10-30 21:16:44 - r - INFO: - Episode: 334/1000, Reward: 50.0, Step: 50 +2022-10-30 21:16:44 - r - INFO: - Episode: 335/1000, Reward: 74.0, Step: 74 +2022-10-30 21:16:44 - r - INFO: - Episode: 336/1000, Reward: 31.0, Step: 31 +2022-10-30 21:16:45 - r - INFO: - Episode: 337/1000, Reward: 57.0, Step: 57 +2022-10-30 21:16:45 - r - INFO: - Episode: 338/1000, Reward: 71.0, Step: 71 +2022-10-30 21:16:46 - r - INFO: - Episode: 339/1000, Reward: 43.0, Step: 43 +2022-10-30 21:16:46 - r - INFO: - Episode: 340/1000, Reward: 41.0, Step: 41 +2022-10-30 21:16:46 - r - INFO: - Episode: 341/1000, Reward: 64.0, Step: 64 +2022-10-30 21:16:47 - r - INFO: - Episode: 342/1000, Reward: 38.0, Step: 38 +2022-10-30 21:16:47 - r - INFO: - Episode: 343/1000, Reward: 45.0, Step: 45 +2022-10-30 21:16:48 - r - INFO: - Episode: 344/1000, Reward: 120.0, Step: 120 +2022-10-30 21:16:48 - r - INFO: - Episode: 345/1000, Reward: 40.0, Step: 40 +2022-10-30 21:16:48 - r - INFO: - Episode: 346/1000, Reward: 46.0, Step: 46 +2022-10-30 21:16:48 - r - INFO: - Episode: 347/1000, Reward: 57.0, Step: 57 +2022-10-30 21:16:49 - r - INFO: - Episode: 348/1000, Reward: 29.0, Step: 29 +2022-10-30 21:16:49 - r - INFO: - Episode: 349/1000, Reward: 29.0, Step: 29 +2022-10-30 21:16:49 - r - INFO: - Episode: 350/1000, Reward: 50.0, Step: 50 +2022-10-30 21:16:50 - r - INFO: - Episode: 351/1000, Reward: 38.0, Step: 38 +2022-10-30 21:16:50 - r - INFO: - Episode: 352/1000, Reward: 51.0, Step: 51 +2022-10-30 21:16:50 - r - INFO: - Episode: 353/1000, Reward: 49.0, Step: 49 +2022-10-30 21:16:50 - r - INFO: - Episode: 354/1000, Reward: 30.0, Step: 30 +2022-10-30 21:16:51 - r - INFO: - Episode: 355/1000, Reward: 40.0, Step: 40 +2022-10-30 21:16:51 - r - INFO: - Episode: 356/1000, Reward: 45.0, Step: 45 +2022-10-30 21:16:51 - r - INFO: - Episode: 357/1000, Reward: 68.0, Step: 68 +2022-10-30 21:16:52 - r - INFO: - Episode: 358/1000, Reward: 27.0, Step: 27 +2022-10-30 21:16:52 - r - INFO: - Episode: 359/1000, Reward: 18.0, Step: 18 +2022-10-30 21:16:52 - r - INFO: - Episode: 360/1000, Reward: 26.0, Step: 26 +2022-10-30 21:16:52 - r - INFO: - Episode: 361/1000, Reward: 15.0, Step: 15 +2022-10-30 21:16:52 - r - INFO: - Episode: 362/1000, Reward: 65.0, Step: 65 +2022-10-30 21:16:53 - r - INFO: - Episode: 363/1000, Reward: 38.0, Step: 38 +2022-10-30 21:16:53 - r - INFO: - Episode: 364/1000, Reward: 41.0, Step: 41 +2022-10-30 21:16:53 - r - INFO: - Episode: 365/1000, Reward: 61.0, Step: 61 +2022-10-30 21:16:54 - r - INFO: - Episode: 366/1000, Reward: 113.0, Step: 113 +2022-10-30 21:16:54 - r - INFO: - Episode: 367/1000, Reward: 39.0, Step: 39 +2022-10-30 21:16:54 - r - INFO: - Episode: 368/1000, Reward: 60.0, Step: 60 +2022-10-30 21:16:55 - r - INFO: - Episode: 369/1000, Reward: 134.0, Step: 134 +2022-10-30 21:16:56 - r - INFO: - Episode: 370/1000, Reward: 122.0, Step: 122 +2022-10-30 21:16:56 - r - INFO: - Episode: 371/1000, Reward: 34.0, Step: 34 +2022-10-30 21:16:57 - r - INFO: - Episode: 372/1000, Reward: 129.0, Step: 129 +2022-10-30 21:16:57 - r - INFO: - Episode: 373/1000, Reward: 40.0, Step: 40 +2022-10-30 21:16:58 - r - INFO: - Episode: 374/1000, Reward: 128.0, Step: 128 +2022-10-30 21:16:59 - r - INFO: - Episode: 375/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:00 - r - INFO: - Episode: 376/1000, Reward: 108.0, Step: 108 +2022-10-30 21:17:01 - r - INFO: - Episode: 377/1000, Reward: 108.0, Step: 108 +2022-10-30 21:17:02 - r - INFO: - Episode: 378/1000, Reward: 151.0, Step: 151 +2022-10-30 21:17:03 - r - INFO: - Episode: 379/1000, Reward: 79.0, Step: 79 +2022-10-30 21:17:03 - r - INFO: - Episode: 380/1000, Reward: 105.0, Step: 105 +2022-10-30 21:17:04 - r - INFO: - Episode: 381/1000, Reward: 87.0, Step: 87 +2022-10-30 21:17:05 - r - INFO: - Episode: 382/1000, Reward: 94.0, Step: 94 +2022-10-30 21:17:06 - r - INFO: - Episode: 383/1000, Reward: 112.0, Step: 112 +2022-10-30 21:17:07 - r - INFO: - Episode: 384/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:08 - r - INFO: - Episode: 385/1000, Reward: 184.0, Step: 184 +2022-10-30 21:17:08 - r - INFO: - Episode: 386/1000, Reward: 124.0, Step: 124 +2022-10-30 21:17:09 - r - INFO: - Episode: 387/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:11 - r - INFO: - Episode: 388/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:12 - r - INFO: - Episode: 389/1000, Reward: 109.0, Step: 109 +2022-10-30 21:17:12 - r - INFO: - Episode: 390/1000, Reward: 88.0, Step: 88 +2022-10-30 21:17:13 - r - INFO: - Episode: 391/1000, Reward: 104.0, Step: 104 +2022-10-30 21:17:14 - r - INFO: - Episode: 392/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:15 - r - INFO: - Episode: 393/1000, Reward: 84.0, Step: 84 +2022-10-30 21:17:16 - r - INFO: - Episode: 394/1000, Reward: 187.0, Step: 187 +2022-10-30 21:17:17 - r - INFO: - Episode: 395/1000, Reward: 182.0, Step: 182 +2022-10-30 21:17:18 - r - INFO: - Episode: 396/1000, Reward: 148.0, Step: 148 +2022-10-30 21:17:19 - r - INFO: - Episode: 397/1000, Reward: 86.0, Step: 86 +2022-10-30 21:17:20 - r - INFO: - Episode: 398/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:21 - r - INFO: - Episode: 399/1000, Reward: 199.0, Step: 199 +2022-10-30 21:17:22 - r - INFO: - Episode: 400/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:23 - r - INFO: - Episode: 401/1000, Reward: 92.0, Step: 92 +2022-10-30 21:17:23 - r - INFO: - Episode: 402/1000, Reward: 112.0, Step: 112 +2022-10-30 21:17:24 - r - INFO: - Episode: 403/1000, Reward: 86.0, Step: 86 +2022-10-30 21:17:25 - r - INFO: - Episode: 404/1000, Reward: 114.0, Step: 114 +2022-10-30 21:17:26 - r - INFO: - Episode: 405/1000, Reward: 90.0, Step: 90 +2022-10-30 21:17:26 - r - INFO: - Episode: 406/1000, Reward: 101.0, Step: 101 +2022-10-30 21:17:27 - r - INFO: - Episode: 407/1000, Reward: 111.0, Step: 111 +2022-10-30 21:17:28 - r - INFO: - Episode: 408/1000, Reward: 107.0, Step: 107 +2022-10-30 21:17:28 - r - INFO: - Episode: 409/1000, Reward: 120.0, Step: 120 +2022-10-30 21:17:29 - r - INFO: - Episode: 410/1000, Reward: 114.0, Step: 114 +2022-10-30 21:17:30 - r - INFO: - Episode: 411/1000, Reward: 97.0, Step: 97 +2022-10-30 21:17:30 - r - INFO: - Episode: 412/1000, Reward: 95.0, Step: 95 +2022-10-30 21:17:31 - r - INFO: - Episode: 413/1000, Reward: 126.0, Step: 126 +2022-10-30 21:17:32 - r - INFO: - Episode: 414/1000, Reward: 111.0, Step: 111 +2022-10-30 21:17:33 - r - INFO: - Episode: 415/1000, Reward: 120.0, Step: 120 +2022-10-30 21:17:33 - r - INFO: - Episode: 416/1000, Reward: 178.0, Step: 178 +2022-10-30 21:17:34 - r - INFO: - Episode: 417/1000, Reward: 97.0, Step: 97 +2022-10-30 21:17:35 - r - INFO: - Episode: 418/1000, Reward: 144.0, Step: 144 +2022-10-30 21:17:36 - r - INFO: - Episode: 419/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:36 - r - INFO: - Episode: 420/1000, Reward: 190.0, Step: 190 +2022-10-30 21:17:37 - r - INFO: - Episode: 421/1000, Reward: 29.0, Step: 29 +2022-10-30 21:17:38 - r - INFO: - Episode: 422/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:38 - r - INFO: - Episode: 423/1000, Reward: 116.0, Step: 116 +2022-10-30 21:17:39 - r - INFO: - Episode: 424/1000, Reward: 200.0, Step: 200 +2022-10-30 21:17:40 - r - INFO: - Episode: 425/1000, Reward: 107.0, Step: 107 +2022-10-30 21:17:41 - r - INFO: - Episode: 426/1000, Reward: 128.0, Step: 128 +2022-10-30 21:17:41 - r - INFO: - Episode: 427/1000, Reward: 164.0, Step: 164 +2022-10-30 21:17:42 - r - INFO: - Episode: 428/1000, Reward: 30.0, Step: 30 +2022-10-30 21:17:42 - r - INFO: - Episode: 429/1000, Reward: 122.0, Step: 122 +2022-10-30 21:17:43 - r - INFO: - Episode: 430/1000, Reward: 110.0, Step: 110 +2022-10-30 21:17:44 - r - INFO: - Episode: 431/1000, Reward: 105.0, Step: 105 +2022-10-30 21:17:44 - r - INFO: - Episode: 432/1000, Reward: 137.0, Step: 137 +2022-10-30 21:17:45 - r - INFO: - Episode: 433/1000, Reward: 110.0, Step: 110 +2022-10-30 21:17:45 - r - INFO: - Episode: 434/1000, Reward: 111.0, Step: 111 +2022-10-30 21:17:46 - r - INFO: - Episode: 435/1000, Reward: 33.0, Step: 33 +2022-10-30 21:17:46 - r - INFO: - Episode: 436/1000, Reward: 100.0, Step: 100 +2022-10-30 21:17:47 - r - INFO: - Episode: 437/1000, Reward: 131.0, Step: 131 +2022-10-30 21:17:48 - r - INFO: - Episode: 438/1000, Reward: 99.0, Step: 99 +2022-10-30 21:17:48 - r - INFO: - Episode: 439/1000, Reward: 118.0, Step: 118 +2022-10-30 21:17:49 - r - INFO: - Episode: 440/1000, Reward: 98.0, Step: 98 +2022-10-30 21:17:49 - r - INFO: - Episode: 441/1000, Reward: 119.0, Step: 119 +2022-10-30 21:17:50 - r - INFO: - Episode: 442/1000, Reward: 41.0, Step: 41 +2022-10-30 21:17:50 - r - INFO: - Episode: 443/1000, Reward: 107.0, Step: 107 +2022-10-30 21:17:51 - r - INFO: - Episode: 444/1000, Reward: 41.0, Step: 41 +2022-10-30 21:17:52 - r - INFO: - Episode: 445/1000, Reward: 113.0, Step: 113 +2022-10-30 21:17:52 - r - INFO: - Episode: 446/1000, Reward: 113.0, Step: 113 +2022-10-30 21:17:53 - r - INFO: - Episode: 447/1000, Reward: 117.0, Step: 117 +2022-10-30 21:17:54 - r - INFO: - Episode: 448/1000, Reward: 140.0, Step: 140 +2022-10-30 21:17:54 - r - INFO: - Episode: 449/1000, Reward: 133.0, Step: 133 +2022-10-30 21:17:55 - r - INFO: - Episode: 450/1000, Reward: 108.0, Step: 108 +2022-10-30 21:17:56 - r - INFO: - Episode: 451/1000, Reward: 117.0, Step: 117 +2022-10-30 21:17:57 - r - INFO: - Episode: 452/1000, Reward: 40.0, Step: 40 +2022-10-30 21:17:57 - r - INFO: - Episode: 453/1000, Reward: 108.0, Step: 108 +2022-10-30 21:17:58 - r - INFO: - Episode: 454/1000, Reward: 140.0, Step: 140 +2022-10-30 21:17:59 - r - INFO: - Episode: 455/1000, Reward: 133.0, Step: 133 +2022-10-30 21:18:00 - r - INFO: - Episode: 456/1000, Reward: 115.0, Step: 115 +2022-10-30 21:18:00 - r - INFO: - Episode: 457/1000, Reward: 30.0, Step: 30 +2022-10-30 21:18:01 - r - INFO: - Episode: 458/1000, Reward: 119.0, Step: 119 +2022-10-30 21:18:02 - r - INFO: - Episode: 459/1000, Reward: 160.0, Step: 160 +2022-10-30 21:18:02 - r - INFO: - Episode: 460/1000, Reward: 125.0, Step: 125 +2022-10-30 21:18:03 - r - INFO: - Episode: 461/1000, Reward: 161.0, Step: 161 +2022-10-30 21:18:04 - r - INFO: - Episode: 462/1000, Reward: 139.0, Step: 139 +2022-10-30 21:18:05 - r - INFO: - Episode: 463/1000, Reward: 190.0, Step: 190 +2022-10-30 21:18:06 - r - INFO: - Episode: 464/1000, Reward: 149.0, Step: 149 +2022-10-30 21:18:07 - r - INFO: - Episode: 465/1000, Reward: 173.0, Step: 173 +2022-10-30 21:18:08 - r - INFO: - Current episode 465 has the best eval reward: 187.6 +2022-10-30 21:18:08 - r - INFO: - Episode: 466/1000, Reward: 165.0, Step: 165 +2022-10-30 21:18:09 - r - INFO: - Episode: 467/1000, Reward: 82.0, Step: 82 +2022-10-30 21:18:10 - r - INFO: - Episode: 468/1000, Reward: 197.0, Step: 197 +2022-10-30 21:18:11 - r - INFO: - Current episode 468 has the best eval reward: 195.0 +2022-10-30 21:18:12 - r - INFO: - Episode: 469/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:13 - r - INFO: - Episode: 470/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:14 - r - INFO: - Current episode 470 has the best eval reward: 199.4 +2022-10-30 21:18:14 - r - INFO: - Episode: 471/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:16 - r - INFO: - Episode: 472/1000, Reward: 182.0, Step: 182 +2022-10-30 21:18:17 - r - INFO: - Episode: 473/1000, Reward: 118.0, Step: 118 +2022-10-30 21:18:18 - r - INFO: - Episode: 474/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:19 - r - INFO: - Episode: 475/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:20 - r - INFO: - Episode: 476/1000, Reward: 93.0, Step: 93 +2022-10-30 21:18:21 - r - INFO: - Episode: 477/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:23 - r - INFO: - Episode: 478/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:24 - r - INFO: - Episode: 479/1000, Reward: 167.0, Step: 167 +2022-10-30 21:18:25 - r - INFO: - Episode: 480/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:26 - r - INFO: - Episode: 481/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:27 - r - INFO: - Episode: 482/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:28 - r - INFO: - Episode: 483/1000, Reward: 190.0, Step: 190 +2022-10-30 21:18:29 - r - INFO: - Episode: 484/1000, Reward: 86.0, Step: 86 +2022-10-30 21:18:30 - r - INFO: - Episode: 485/1000, Reward: 166.0, Step: 166 +2022-10-30 21:18:31 - r - INFO: - Episode: 486/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:32 - r - INFO: - Episode: 487/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:33 - r - INFO: - Episode: 488/1000, Reward: 172.0, Step: 172 +2022-10-30 21:18:34 - r - INFO: - Episode: 489/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:35 - r - INFO: - Episode: 490/1000, Reward: 102.0, Step: 102 +2022-10-30 21:18:36 - r - INFO: - Episode: 491/1000, Reward: 194.0, Step: 194 +2022-10-30 21:18:37 - r - INFO: - Episode: 492/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:38 - r - INFO: - Episode: 493/1000, Reward: 179.0, Step: 179 +2022-10-30 21:18:39 - r - INFO: - Episode: 494/1000, Reward: 187.0, Step: 187 +2022-10-30 21:18:40 - r - INFO: - Episode: 495/1000, Reward: 200.0, Step: 200 +2022-10-30 21:18:41 - r - INFO: - Episode: 496/1000, Reward: 89.0, Step: 89 +2022-10-30 21:18:41 - r - INFO: - Episode: 497/1000, Reward: 169.0, Step: 169 +2022-10-30 21:18:42 - r - INFO: - Episode: 498/1000, Reward: 28.0, Step: 28 +2022-10-30 21:18:43 - r - INFO: - Episode: 499/1000, Reward: 160.0, Step: 160 +2022-10-30 21:18:44 - r - INFO: - Episode: 500/1000, Reward: 140.0, Step: 140 +2022-10-30 21:18:44 - r - INFO: - Episode: 501/1000, Reward: 37.0, Step: 37 +2022-10-30 21:18:45 - r - INFO: - Episode: 502/1000, Reward: 32.0, Step: 32 +2022-10-30 21:18:45 - r - INFO: - Episode: 503/1000, Reward: 129.0, Step: 129 +2022-10-30 21:18:46 - r - INFO: - Episode: 504/1000, Reward: 22.0, Step: 22 +2022-10-30 21:18:46 - r - INFO: - Episode: 505/1000, Reward: 124.0, Step: 124 +2022-10-30 21:18:46 - r - INFO: - Episode: 506/1000, Reward: 24.0, Step: 24 +2022-10-30 21:18:47 - r - INFO: - Episode: 507/1000, Reward: 115.0, Step: 115 +2022-10-30 21:18:47 - r - INFO: - Episode: 508/1000, Reward: 24.0, Step: 24 +2022-10-30 21:18:48 - r - INFO: - Episode: 509/1000, Reward: 38.0, Step: 38 +2022-10-30 21:18:49 - r - INFO: - Episode: 510/1000, Reward: 24.0, Step: 24 +2022-10-30 21:18:49 - r - INFO: - Episode: 511/1000, Reward: 23.0, Step: 23 +2022-10-30 21:18:49 - r - INFO: - Episode: 512/1000, Reward: 125.0, Step: 125 +2022-10-30 21:18:49 - r - INFO: - Episode: 513/1000, Reward: 22.0, Step: 22 +2022-10-30 21:18:50 - r - INFO: - Episode: 514/1000, Reward: 24.0, Step: 24 +2022-10-30 21:18:50 - r - INFO: - Episode: 515/1000, Reward: 20.0, Step: 20 +2022-10-30 21:18:50 - r - INFO: - Episode: 516/1000, Reward: 25.0, Step: 25 +2022-10-30 21:18:50 - r - INFO: - Episode: 517/1000, Reward: 31.0, Step: 31 +2022-10-30 21:18:50 - r - INFO: - Episode: 518/1000, Reward: 23.0, Step: 23 +2022-10-30 21:18:51 - r - INFO: - Episode: 519/1000, Reward: 30.0, Step: 30 +2022-10-30 21:18:51 - r - INFO: - Episode: 520/1000, Reward: 101.0, Step: 101 +2022-10-30 21:18:51 - r - INFO: - Episode: 521/1000, Reward: 25.0, Step: 25 +2022-10-30 21:18:52 - r - INFO: - Episode: 522/1000, Reward: 22.0, Step: 22 +2022-10-30 21:18:52 - r - INFO: - Episode: 523/1000, Reward: 20.0, Step: 20 +2022-10-30 21:18:52 - r - INFO: - Episode: 524/1000, Reward: 16.0, Step: 16 +2022-10-30 21:18:53 - r - INFO: - Episode: 525/1000, Reward: 104.0, Step: 104 +2022-10-30 21:18:53 - r - INFO: - Episode: 526/1000, Reward: 17.0, Step: 17 +2022-10-30 21:18:53 - r - INFO: - Episode: 527/1000, Reward: 108.0, Step: 108 +2022-10-30 21:18:53 - r - INFO: - Episode: 528/1000, Reward: 121.0, Step: 121 +2022-10-30 21:18:54 - r - INFO: - Episode: 529/1000, Reward: 29.0, Step: 29 +2022-10-30 21:18:54 - r - INFO: - Episode: 530/1000, Reward: 29.0, Step: 29 +2022-10-30 21:18:54 - r - INFO: - Episode: 531/1000, Reward: 43.0, Step: 43 +2022-10-30 21:18:55 - r - INFO: - Episode: 532/1000, Reward: 105.0, Step: 105 +2022-10-30 21:18:55 - r - INFO: - Episode: 533/1000, Reward: 130.0, Step: 130 +2022-10-30 21:18:55 - r - INFO: - Episode: 534/1000, Reward: 30.0, Step: 30 +2022-10-30 21:18:56 - r - INFO: - Episode: 535/1000, Reward: 31.0, Step: 31 +2022-10-30 21:18:56 - r - INFO: - Episode: 536/1000, Reward: 30.0, Step: 30 +2022-10-30 21:18:56 - r - INFO: - Episode: 537/1000, Reward: 37.0, Step: 37 +2022-10-30 21:18:57 - r - INFO: - Episode: 538/1000, Reward: 115.0, Step: 115 +2022-10-30 21:18:58 - r - INFO: - Episode: 539/1000, Reward: 110.0, Step: 110 +2022-10-30 21:18:58 - r - INFO: - Episode: 540/1000, Reward: 112.0, Step: 112 +2022-10-30 21:18:59 - r - INFO: - Episode: 541/1000, Reward: 33.0, Step: 33 +2022-10-30 21:18:59 - r - INFO: - Episode: 542/1000, Reward: 120.0, Step: 120 +2022-10-30 21:19:00 - r - INFO: - Episode: 543/1000, Reward: 109.0, Step: 109 +2022-10-30 21:19:01 - r - INFO: - Episode: 544/1000, Reward: 122.0, Step: 122 +2022-10-30 21:19:01 - r - INFO: - Episode: 545/1000, Reward: 115.0, Step: 115 +2022-10-30 21:19:02 - r - INFO: - Episode: 546/1000, Reward: 34.0, Step: 34 +2022-10-30 21:19:02 - r - INFO: - Episode: 547/1000, Reward: 28.0, Step: 28 +2022-10-30 21:19:03 - r - INFO: - Episode: 548/1000, Reward: 29.0, Step: 29 +2022-10-30 21:19:03 - r - INFO: - Episode: 549/1000, Reward: 113.0, Step: 113 +2022-10-30 21:19:04 - r - INFO: - Episode: 550/1000, Reward: 100.0, Step: 100 +2022-10-30 21:19:04 - r - INFO: - Episode: 551/1000, Reward: 26.0, Step: 26 +2022-10-30 21:19:04 - r - INFO: - Episode: 552/1000, Reward: 24.0, Step: 24 +2022-10-30 21:19:05 - r - INFO: - Episode: 553/1000, Reward: 26.0, Step: 26 +2022-10-30 21:19:05 - r - INFO: - Episode: 554/1000, Reward: 102.0, Step: 102 +2022-10-30 21:19:05 - r - INFO: - Episode: 555/1000, Reward: 18.0, Step: 18 +2022-10-30 21:19:06 - r - INFO: - Episode: 556/1000, Reward: 107.0, Step: 107 +2022-10-30 21:19:06 - r - INFO: - Episode: 557/1000, Reward: 27.0, Step: 27 +2022-10-30 21:19:06 - r - INFO: - Episode: 558/1000, Reward: 87.0, Step: 87 +2022-10-30 21:19:07 - r - INFO: - Episode: 559/1000, Reward: 29.0, Step: 29 +2022-10-30 21:19:07 - r - INFO: - Episode: 560/1000, Reward: 31.0, Step: 31 +2022-10-30 21:19:07 - r - INFO: - Episode: 561/1000, Reward: 112.0, Step: 112 +2022-10-30 21:19:08 - r - INFO: - Episode: 562/1000, Reward: 112.0, Step: 112 +2022-10-30 21:19:09 - r - INFO: - Episode: 563/1000, Reward: 108.0, Step: 108 +2022-10-30 21:19:09 - r - INFO: - Episode: 564/1000, Reward: 98.0, Step: 98 +2022-10-30 21:19:10 - r - INFO: - Episode: 565/1000, Reward: 104.0, Step: 104 +2022-10-30 21:19:10 - r - INFO: - Episode: 566/1000, Reward: 116.0, Step: 116 +2022-10-30 21:19:11 - r - INFO: - Episode: 567/1000, Reward: 123.0, Step: 123 +2022-10-30 21:19:12 - r - INFO: - Episode: 568/1000, Reward: 105.0, Step: 105 +2022-10-30 21:19:12 - r - INFO: - Episode: 569/1000, Reward: 133.0, Step: 133 +2022-10-30 21:19:13 - r - INFO: - Episode: 570/1000, Reward: 116.0, Step: 116 +2022-10-30 21:19:14 - r - INFO: - Episode: 571/1000, Reward: 128.0, Step: 128 +2022-10-30 21:19:15 - r - INFO: - Episode: 572/1000, Reward: 130.0, Step: 130 +2022-10-30 21:19:15 - r - INFO: - Episode: 573/1000, Reward: 113.0, Step: 113 +2022-10-30 21:19:16 - r - INFO: - Episode: 574/1000, Reward: 143.0, Step: 143 +2022-10-30 21:19:17 - r - INFO: - Episode: 575/1000, Reward: 145.0, Step: 145 +2022-10-30 21:19:18 - r - INFO: - Episode: 576/1000, Reward: 159.0, Step: 159 +2022-10-30 21:19:19 - r - INFO: - Episode: 577/1000, Reward: 150.0, Step: 150 +2022-10-30 21:19:19 - r - INFO: - Episode: 578/1000, Reward: 130.0, Step: 130 +2022-10-30 21:19:20 - r - INFO: - Episode: 579/1000, Reward: 145.0, Step: 145 +2022-10-30 21:19:21 - r - INFO: - Episode: 580/1000, Reward: 173.0, Step: 173 +2022-10-30 21:19:22 - r - INFO: - Episode: 581/1000, Reward: 154.0, Step: 154 +2022-10-30 21:19:23 - r - INFO: - Episode: 582/1000, Reward: 131.0, Step: 131 +2022-10-30 21:19:24 - r - INFO: - Episode: 583/1000, Reward: 163.0, Step: 163 +2022-10-30 21:19:25 - r - INFO: - Episode: 584/1000, Reward: 160.0, Step: 160 +2022-10-30 21:19:26 - r - INFO: - Episode: 585/1000, Reward: 181.0, Step: 181 +2022-10-30 21:19:27 - r - INFO: - Episode: 586/1000, Reward: 161.0, Step: 161 +2022-10-30 21:19:28 - r - INFO: - Episode: 587/1000, Reward: 169.0, Step: 169 +2022-10-30 21:19:29 - r - INFO: - Episode: 588/1000, Reward: 150.0, Step: 150 +2022-10-30 21:19:30 - r - INFO: - Episode: 589/1000, Reward: 176.0, Step: 176 +2022-10-30 21:19:31 - r - INFO: - Episode: 590/1000, Reward: 157.0, Step: 157 +2022-10-30 21:19:32 - r - INFO: - Episode: 591/1000, Reward: 167.0, Step: 167 +2022-10-30 21:19:33 - r - INFO: - Episode: 592/1000, Reward: 168.0, Step: 168 +2022-10-30 21:19:34 - r - INFO: - Episode: 593/1000, Reward: 135.0, Step: 135 +2022-10-30 21:19:35 - r - INFO: - Episode: 594/1000, Reward: 157.0, Step: 157 +2022-10-30 21:19:35 - r - INFO: - Episode: 595/1000, Reward: 138.0, Step: 138 +2022-10-30 21:19:36 - r - INFO: - Episode: 596/1000, Reward: 139.0, Step: 139 +2022-10-30 21:19:37 - r - INFO: - Episode: 597/1000, Reward: 146.0, Step: 146 +2022-10-30 21:19:38 - r - INFO: - Episode: 598/1000, Reward: 121.0, Step: 121 +2022-10-30 21:19:39 - r - INFO: - Episode: 599/1000, Reward: 140.0, Step: 140 +2022-10-30 21:19:40 - r - INFO: - Episode: 600/1000, Reward: 124.0, Step: 124 +2022-10-30 21:19:41 - r - INFO: - Episode: 601/1000, Reward: 124.0, Step: 124 +2022-10-30 21:19:42 - r - INFO: - Episode: 602/1000, Reward: 115.0, Step: 115 +2022-10-30 21:19:42 - r - INFO: - Episode: 603/1000, Reward: 129.0, Step: 129 +2022-10-30 21:19:43 - r - INFO: - Episode: 604/1000, Reward: 107.0, Step: 107 +2022-10-30 21:19:44 - r - INFO: - Episode: 605/1000, Reward: 118.0, Step: 118 +2022-10-30 21:19:44 - r - INFO: - Episode: 606/1000, Reward: 108.0, Step: 108 +2022-10-30 21:19:45 - r - INFO: - Episode: 607/1000, Reward: 102.0, Step: 102 +2022-10-30 21:19:46 - r - INFO: - Episode: 608/1000, Reward: 105.0, Step: 105 +2022-10-30 21:19:46 - r - INFO: - Episode: 609/1000, Reward: 103.0, Step: 103 +2022-10-30 21:19:47 - r - INFO: - Episode: 610/1000, Reward: 96.0, Step: 96 +2022-10-30 21:19:47 - r - INFO: - Episode: 611/1000, Reward: 116.0, Step: 116 +2022-10-30 21:19:48 - r - INFO: - Episode: 612/1000, Reward: 51.0, Step: 51 +2022-10-30 21:19:48 - r - INFO: - Episode: 613/1000, Reward: 100.0, Step: 100 +2022-10-30 21:19:49 - r - INFO: - Episode: 614/1000, Reward: 121.0, Step: 121 +2022-10-30 21:19:50 - r - INFO: - Episode: 615/1000, Reward: 109.0, Step: 109 +2022-10-30 21:19:50 - r - INFO: - Episode: 616/1000, Reward: 85.0, Step: 85 +2022-10-30 21:19:51 - r - INFO: - Episode: 617/1000, Reward: 111.0, Step: 111 +2022-10-30 21:19:52 - r - INFO: - Episode: 618/1000, Reward: 91.0, Step: 91 +2022-10-30 21:19:52 - r - INFO: - Episode: 619/1000, Reward: 127.0, Step: 127 +2022-10-30 21:19:53 - r - INFO: - Episode: 620/1000, Reward: 117.0, Step: 117 +2022-10-30 21:19:53 - r - INFO: - Episode: 621/1000, Reward: 104.0, Step: 104 +2022-10-30 21:19:54 - r - INFO: - Episode: 622/1000, Reward: 119.0, Step: 119 +2022-10-30 21:19:55 - r - INFO: - Episode: 623/1000, Reward: 111.0, Step: 111 +2022-10-30 21:19:56 - r - INFO: - Episode: 624/1000, Reward: 132.0, Step: 132 +2022-10-30 21:19:56 - r - INFO: - Episode: 625/1000, Reward: 130.0, Step: 130 +2022-10-30 21:19:57 - r - INFO: - Episode: 626/1000, Reward: 140.0, Step: 140 +2022-10-30 21:19:58 - r - INFO: - Episode: 627/1000, Reward: 95.0, Step: 95 +2022-10-30 21:19:58 - r - INFO: - Episode: 628/1000, Reward: 106.0, Step: 106 +2022-10-30 21:19:59 - r - INFO: - Episode: 629/1000, Reward: 120.0, Step: 120 +2022-10-30 21:20:00 - r - INFO: - Episode: 630/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:00 - r - INFO: - Episode: 631/1000, Reward: 114.0, Step: 114 +2022-10-30 21:20:01 - r - INFO: - Episode: 632/1000, Reward: 126.0, Step: 126 +2022-10-30 21:20:02 - r - INFO: - Episode: 633/1000, Reward: 100.0, Step: 100 +2022-10-30 21:20:03 - r - INFO: - Episode: 634/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:03 - r - INFO: - Episode: 635/1000, Reward: 104.0, Step: 104 +2022-10-30 21:20:04 - r - INFO: - Episode: 636/1000, Reward: 103.0, Step: 103 +2022-10-30 21:20:04 - r - INFO: - Episode: 637/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:05 - r - INFO: - Episode: 638/1000, Reward: 110.0, Step: 110 +2022-10-30 21:20:06 - r - INFO: - Episode: 639/1000, Reward: 131.0, Step: 131 +2022-10-30 21:20:06 - r - INFO: - Episode: 640/1000, Reward: 90.0, Step: 90 +2022-10-30 21:20:07 - r - INFO: - Episode: 641/1000, Reward: 97.0, Step: 97 +2022-10-30 21:20:08 - r - INFO: - Episode: 642/1000, Reward: 104.0, Step: 104 +2022-10-30 21:20:09 - r - INFO: - Episode: 643/1000, Reward: 91.0, Step: 91 +2022-10-30 21:20:09 - r - INFO: - Episode: 644/1000, Reward: 97.0, Step: 97 +2022-10-30 21:20:10 - r - INFO: - Episode: 645/1000, Reward: 109.0, Step: 109 +2022-10-30 21:20:10 - r - INFO: - Episode: 646/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:11 - r - INFO: - Episode: 647/1000, Reward: 97.0, Step: 97 +2022-10-30 21:20:11 - r - INFO: - Episode: 648/1000, Reward: 32.0, Step: 32 +2022-10-30 21:20:12 - r - INFO: - Episode: 649/1000, Reward: 94.0, Step: 94 +2022-10-30 21:20:13 - r - INFO: - Episode: 650/1000, Reward: 107.0, Step: 107 +2022-10-30 21:20:13 - r - INFO: - Episode: 651/1000, Reward: 61.0, Step: 61 +2022-10-30 21:20:14 - r - INFO: - Episode: 652/1000, Reward: 97.0, Step: 97 +2022-10-30 21:20:14 - r - INFO: - Episode: 653/1000, Reward: 99.0, Step: 99 +2022-10-30 21:20:15 - r - INFO: - Episode: 654/1000, Reward: 76.0, Step: 76 +2022-10-30 21:20:15 - r - INFO: - Episode: 655/1000, Reward: 38.0, Step: 38 +2022-10-30 21:20:15 - r - INFO: - Episode: 656/1000, Reward: 96.0, Step: 96 +2022-10-30 21:20:16 - r - INFO: - Episode: 657/1000, Reward: 96.0, Step: 96 +2022-10-30 21:20:16 - r - INFO: - Episode: 658/1000, Reward: 65.0, Step: 65 +2022-10-30 21:20:17 - r - INFO: - Episode: 659/1000, Reward: 45.0, Step: 45 +2022-10-30 21:20:17 - r - INFO: - Episode: 660/1000, Reward: 91.0, Step: 91 +2022-10-30 21:20:18 - r - INFO: - Episode: 661/1000, Reward: 78.0, Step: 78 +2022-10-30 21:20:18 - r - INFO: - Episode: 662/1000, Reward: 90.0, Step: 90 +2022-10-30 21:20:19 - r - INFO: - Episode: 663/1000, Reward: 92.0, Step: 92 +2022-10-30 21:20:19 - r - INFO: - Episode: 664/1000, Reward: 94.0, Step: 94 +2022-10-30 21:20:20 - r - INFO: - Episode: 665/1000, Reward: 101.0, Step: 101 +2022-10-30 21:20:20 - r - INFO: - Episode: 666/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:21 - r - INFO: - Episode: 667/1000, Reward: 109.0, Step: 109 +2022-10-30 21:20:22 - r - INFO: - Episode: 668/1000, Reward: 99.0, Step: 99 +2022-10-30 21:20:22 - r - INFO: - Episode: 669/1000, Reward: 115.0, Step: 115 +2022-10-30 21:20:23 - r - INFO: - Episode: 670/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:23 - r - INFO: - Episode: 671/1000, Reward: 113.0, Step: 113 +2022-10-30 21:20:24 - r - INFO: - Episode: 672/1000, Reward: 110.0, Step: 110 +2022-10-30 21:20:25 - r - INFO: - Episode: 673/1000, Reward: 108.0, Step: 108 +2022-10-30 21:20:26 - r - INFO: - Episode: 674/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:26 - r - INFO: - Episode: 675/1000, Reward: 125.0, Step: 125 +2022-10-30 21:20:27 - r - INFO: - Episode: 676/1000, Reward: 122.0, Step: 122 +2022-10-30 21:20:28 - r - INFO: - Episode: 677/1000, Reward: 114.0, Step: 114 +2022-10-30 21:20:28 - r - INFO: - Episode: 678/1000, Reward: 127.0, Step: 127 +2022-10-30 21:20:29 - r - INFO: - Episode: 679/1000, Reward: 125.0, Step: 125 +2022-10-30 21:20:30 - r - INFO: - Episode: 680/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:30 - r - INFO: - Episode: 681/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:31 - r - INFO: - Episode: 682/1000, Reward: 124.0, Step: 124 +2022-10-30 21:20:32 - r - INFO: - Episode: 683/1000, Reward: 113.0, Step: 113 +2022-10-30 21:20:33 - r - INFO: - Episode: 684/1000, Reward: 103.0, Step: 103 +2022-10-30 21:20:33 - r - INFO: - Episode: 685/1000, Reward: 119.0, Step: 119 +2022-10-30 21:20:34 - r - INFO: - Episode: 686/1000, Reward: 120.0, Step: 120 +2022-10-30 21:20:35 - r - INFO: - Episode: 687/1000, Reward: 95.0, Step: 95 +2022-10-30 21:20:35 - r - INFO: - Episode: 688/1000, Reward: 100.0, Step: 100 +2022-10-30 21:20:36 - r - INFO: - Episode: 689/1000, Reward: 29.0, Step: 29 +2022-10-30 21:20:36 - r - INFO: - Episode: 690/1000, Reward: 119.0, Step: 119 +2022-10-30 21:20:37 - r - INFO: - Episode: 691/1000, Reward: 107.0, Step: 107 +2022-10-30 21:20:38 - r - INFO: - Episode: 692/1000, Reward: 117.0, Step: 117 +2022-10-30 21:20:38 - r - INFO: - Episode: 693/1000, Reward: 78.0, Step: 78 +2022-10-30 21:20:38 - r - INFO: - Episode: 694/1000, Reward: 35.0, Step: 35 +2022-10-30 21:20:39 - r - INFO: - Episode: 695/1000, Reward: 101.0, Step: 101 +2022-10-30 21:20:40 - r - INFO: - Episode: 696/1000, Reward: 98.0, Step: 98 +2022-10-30 21:20:40 - r - INFO: - Episode: 697/1000, Reward: 94.0, Step: 94 +2022-10-30 21:20:41 - r - INFO: - Episode: 698/1000, Reward: 102.0, Step: 102 +2022-10-30 21:20:41 - r - INFO: - Episode: 699/1000, Reward: 90.0, Step: 90 +2022-10-30 21:20:42 - r - INFO: - Episode: 700/1000, Reward: 86.0, Step: 86 +2022-10-30 21:20:42 - r - INFO: - Episode: 701/1000, Reward: 81.0, Step: 81 +2022-10-30 21:20:43 - r - INFO: - Episode: 702/1000, Reward: 105.0, Step: 105 +2022-10-30 21:20:43 - r - INFO: - Episode: 703/1000, Reward: 72.0, Step: 72 +2022-10-30 21:20:44 - r - INFO: - Episode: 704/1000, Reward: 100.0, Step: 100 +2022-10-30 21:20:44 - r - INFO: - Episode: 705/1000, Reward: 96.0, Step: 96 +2022-10-30 21:20:45 - r - INFO: - Episode: 706/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:45 - r - INFO: - Episode: 707/1000, Reward: 27.0, Step: 27 +2022-10-30 21:20:46 - r - INFO: - Episode: 708/1000, Reward: 107.0, Step: 107 +2022-10-30 21:20:47 - r - INFO: - Episode: 709/1000, Reward: 87.0, Step: 87 +2022-10-30 21:20:47 - r - INFO: - Episode: 710/1000, Reward: 114.0, Step: 114 +2022-10-30 21:20:48 - r - INFO: - Episode: 711/1000, Reward: 111.0, Step: 111 +2022-10-30 21:20:48 - r - INFO: - Episode: 712/1000, Reward: 88.0, Step: 88 +2022-10-30 21:20:49 - r - INFO: - Episode: 713/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:50 - r - INFO: - Episode: 714/1000, Reward: 108.0, Step: 108 +2022-10-30 21:20:50 - r - INFO: - Episode: 715/1000, Reward: 108.0, Step: 108 +2022-10-30 21:20:51 - r - INFO: - Episode: 716/1000, Reward: 103.0, Step: 103 +2022-10-30 21:20:52 - r - INFO: - Episode: 717/1000, Reward: 120.0, Step: 120 +2022-10-30 21:20:52 - r - INFO: - Episode: 718/1000, Reward: 116.0, Step: 116 +2022-10-30 21:20:53 - r - INFO: - Episode: 719/1000, Reward: 112.0, Step: 112 +2022-10-30 21:20:54 - r - INFO: - Episode: 720/1000, Reward: 99.0, Step: 99 +2022-10-30 21:20:54 - r - INFO: - Episode: 721/1000, Reward: 118.0, Step: 118 +2022-10-30 21:20:55 - r - INFO: - Episode: 722/1000, Reward: 114.0, Step: 114 +2022-10-30 21:20:56 - r - INFO: - Episode: 723/1000, Reward: 104.0, Step: 104 +2022-10-30 21:20:56 - r - INFO: - Episode: 724/1000, Reward: 99.0, Step: 99 +2022-10-30 21:20:57 - r - INFO: - Episode: 725/1000, Reward: 102.0, Step: 102 +2022-10-30 21:20:57 - r - INFO: - Episode: 726/1000, Reward: 106.0, Step: 106 +2022-10-30 21:20:58 - r - INFO: - Episode: 727/1000, Reward: 31.0, Step: 31 +2022-10-30 21:20:58 - r - INFO: - Episode: 728/1000, Reward: 91.0, Step: 91 +2022-10-30 21:20:59 - r - INFO: - Episode: 729/1000, Reward: 32.0, Step: 32 +2022-10-30 21:20:59 - r - INFO: - Episode: 730/1000, Reward: 96.0, Step: 96 +2022-10-30 21:20:59 - r - INFO: - Episode: 731/1000, Reward: 20.0, Step: 20 +2022-10-30 21:21:00 - r - INFO: - Episode: 732/1000, Reward: 33.0, Step: 33 +2022-10-30 21:21:00 - r - INFO: - Episode: 733/1000, Reward: 23.0, Step: 23 +2022-10-30 21:21:00 - r - INFO: - Episode: 734/1000, Reward: 80.0, Step: 80 +2022-10-30 21:21:01 - r - INFO: - Episode: 735/1000, Reward: 35.0, Step: 35 +2022-10-30 21:21:01 - r - INFO: - Episode: 736/1000, Reward: 88.0, Step: 88 +2022-10-30 21:21:01 - r - INFO: - Episode: 737/1000, Reward: 28.0, Step: 28 +2022-10-30 21:21:01 - r - INFO: - Episode: 738/1000, Reward: 26.0, Step: 26 +2022-10-30 21:21:02 - r - INFO: - Episode: 739/1000, Reward: 70.0, Step: 70 +2022-10-30 21:21:02 - r - INFO: - Episode: 740/1000, Reward: 86.0, Step: 86 +2022-10-30 21:21:02 - r - INFO: - Episode: 741/1000, Reward: 28.0, Step: 28 +2022-10-30 21:21:02 - r - INFO: - Episode: 742/1000, Reward: 39.0, Step: 39 +2022-10-30 21:21:03 - r - INFO: - Episode: 743/1000, Reward: 65.0, Step: 65 +2022-10-30 21:21:03 - r - INFO: - Episode: 744/1000, Reward: 52.0, Step: 52 +2022-10-30 21:21:03 - r - INFO: - Episode: 745/1000, Reward: 43.0, Step: 43 +2022-10-30 21:21:04 - r - INFO: - Episode: 746/1000, Reward: 97.0, Step: 97 +2022-10-30 21:21:04 - r - INFO: - Episode: 747/1000, Reward: 27.0, Step: 27 +2022-10-30 21:21:05 - r - INFO: - Episode: 748/1000, Reward: 89.0, Step: 89 +2022-10-30 21:21:05 - r - INFO: - Episode: 749/1000, Reward: 34.0, Step: 34 +2022-10-30 21:21:05 - r - INFO: - Episode: 750/1000, Reward: 35.0, Step: 35 +2022-10-30 21:21:06 - r - INFO: - Episode: 751/1000, Reward: 28.0, Step: 28 +2022-10-30 21:21:06 - r - INFO: - Episode: 752/1000, Reward: 96.0, Step: 96 +2022-10-30 21:21:07 - r - INFO: - Episode: 753/1000, Reward: 97.0, Step: 97 +2022-10-30 21:21:07 - r - INFO: - Episode: 754/1000, Reward: 108.0, Step: 108 +2022-10-30 21:21:08 - r - INFO: - Episode: 755/1000, Reward: 45.0, Step: 45 +2022-10-30 21:21:09 - r - INFO: - Episode: 756/1000, Reward: 103.0, Step: 103 +2022-10-30 21:21:10 - r - INFO: - Episode: 757/1000, Reward: 97.0, Step: 97 +2022-10-30 21:21:10 - r - INFO: - Episode: 758/1000, Reward: 114.0, Step: 114 +2022-10-30 21:21:11 - r - INFO: - Episode: 759/1000, Reward: 103.0, Step: 103 +2022-10-30 21:21:12 - r - INFO: - Episode: 760/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:12 - r - INFO: - Episode: 761/1000, Reward: 127.0, Step: 127 +2022-10-30 21:21:13 - r - INFO: - Episode: 762/1000, Reward: 122.0, Step: 122 +2022-10-30 21:21:14 - r - INFO: - Episode: 763/1000, Reward: 112.0, Step: 112 +2022-10-30 21:21:14 - r - INFO: - Episode: 764/1000, Reward: 112.0, Step: 112 +2022-10-30 21:21:15 - r - INFO: - Episode: 765/1000, Reward: 120.0, Step: 120 +2022-10-30 21:21:16 - r - INFO: - Episode: 766/1000, Reward: 129.0, Step: 129 +2022-10-30 21:21:17 - r - INFO: - Episode: 767/1000, Reward: 127.0, Step: 127 +2022-10-30 21:21:18 - r - INFO: - Episode: 768/1000, Reward: 125.0, Step: 125 +2022-10-30 21:21:19 - r - INFO: - Episode: 769/1000, Reward: 124.0, Step: 124 +2022-10-30 21:21:20 - r - INFO: - Episode: 770/1000, Reward: 126.0, Step: 126 +2022-10-30 21:21:20 - r - INFO: - Episode: 771/1000, Reward: 129.0, Step: 129 +2022-10-30 21:21:21 - r - INFO: - Episode: 772/1000, Reward: 129.0, Step: 129 +2022-10-30 21:21:22 - r - INFO: - Episode: 773/1000, Reward: 43.0, Step: 43 +2022-10-30 21:21:22 - r - INFO: - Episode: 774/1000, Reward: 121.0, Step: 121 +2022-10-30 21:21:23 - r - INFO: - Episode: 775/1000, Reward: 40.0, Step: 40 +2022-10-30 21:21:24 - r - INFO: - Episode: 776/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:24 - r - INFO: - Episode: 777/1000, Reward: 117.0, Step: 117 +2022-10-30 21:21:25 - r - INFO: - Episode: 778/1000, Reward: 113.0, Step: 113 +2022-10-30 21:21:26 - r - INFO: - Episode: 779/1000, Reward: 117.0, Step: 117 +2022-10-30 21:21:26 - r - INFO: - Episode: 780/1000, Reward: 108.0, Step: 108 +2022-10-30 21:21:27 - r - INFO: - Episode: 781/1000, Reward: 108.0, Step: 108 +2022-10-30 21:21:28 - r - INFO: - Episode: 782/1000, Reward: 119.0, Step: 119 +2022-10-30 21:21:28 - r - INFO: - Episode: 783/1000, Reward: 109.0, Step: 109 +2022-10-30 21:21:29 - r - INFO: - Episode: 784/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:29 - r - INFO: - Episode: 785/1000, Reward: 114.0, Step: 114 +2022-10-30 21:21:30 - r - INFO: - Episode: 786/1000, Reward: 45.0, Step: 45 +2022-10-30 21:21:31 - r - INFO: - Episode: 787/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:31 - r - INFO: - Episode: 788/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:32 - r - INFO: - Episode: 789/1000, Reward: 110.0, Step: 110 +2022-10-30 21:21:32 - r - INFO: - Episode: 790/1000, Reward: 105.0, Step: 105 +2022-10-30 21:21:33 - r - INFO: - Episode: 791/1000, Reward: 110.0, Step: 110 +2022-10-30 21:21:34 - r - INFO: - Episode: 792/1000, Reward: 112.0, Step: 112 +2022-10-30 21:21:34 - r - INFO: - Episode: 793/1000, Reward: 104.0, Step: 104 +2022-10-30 21:21:35 - r - INFO: - Episode: 794/1000, Reward: 120.0, Step: 120 +2022-10-30 21:21:36 - r - INFO: - Episode: 795/1000, Reward: 110.0, Step: 110 +2022-10-30 21:21:36 - r - INFO: - Episode: 796/1000, Reward: 113.0, Step: 113 +2022-10-30 21:21:37 - r - INFO: - Episode: 797/1000, Reward: 33.0, Step: 33 +2022-10-30 21:21:37 - r - INFO: - Episode: 798/1000, Reward: 111.0, Step: 111 +2022-10-30 21:21:38 - r - INFO: - Episode: 799/1000, Reward: 31.0, Step: 31 +2022-10-30 21:21:38 - r - INFO: - Episode: 800/1000, Reward: 139.0, Step: 139 +2022-10-30 21:21:39 - r - INFO: - Episode: 801/1000, Reward: 110.0, Step: 110 +2022-10-30 21:21:40 - r - INFO: - Episode: 802/1000, Reward: 124.0, Step: 124 +2022-10-30 21:21:41 - r - INFO: - Episode: 803/1000, Reward: 120.0, Step: 120 +2022-10-30 21:21:41 - r - INFO: - Episode: 804/1000, Reward: 112.0, Step: 112 +2022-10-30 21:21:42 - r - INFO: - Episode: 805/1000, Reward: 116.0, Step: 116 +2022-10-30 21:21:43 - r - INFO: - Episode: 806/1000, Reward: 105.0, Step: 105 +2022-10-30 21:21:43 - r - INFO: - Episode: 807/1000, Reward: 125.0, Step: 125 +2022-10-30 21:21:44 - r - INFO: - Episode: 808/1000, Reward: 103.0, Step: 103 +2022-10-30 21:21:45 - r - INFO: - Episode: 809/1000, Reward: 122.0, Step: 122 +2022-10-30 21:21:45 - r - INFO: - Episode: 810/1000, Reward: 109.0, Step: 109 +2022-10-30 21:21:46 - r - INFO: - Episode: 811/1000, Reward: 118.0, Step: 118 +2022-10-30 21:21:47 - r - INFO: - Episode: 812/1000, Reward: 124.0, Step: 124 +2022-10-30 21:21:48 - r - INFO: - Episode: 813/1000, Reward: 115.0, Step: 115 +2022-10-30 21:21:48 - r - INFO: - Episode: 814/1000, Reward: 26.0, Step: 26 +2022-10-30 21:21:49 - r - INFO: - Episode: 815/1000, Reward: 118.0, Step: 118 +2022-10-30 21:21:49 - r - INFO: - Episode: 816/1000, Reward: 118.0, Step: 118 +2022-10-30 21:21:50 - r - INFO: - Episode: 817/1000, Reward: 31.0, Step: 31 +2022-10-30 21:21:50 - r - INFO: - Episode: 818/1000, Reward: 99.0, Step: 99 +2022-10-30 21:21:51 - r - INFO: - Episode: 819/1000, Reward: 122.0, Step: 122 +2022-10-30 21:21:52 - r - INFO: - Episode: 820/1000, Reward: 102.0, Step: 102 +2022-10-30 21:21:52 - r - INFO: - Episode: 821/1000, Reward: 111.0, Step: 111 +2022-10-30 21:21:53 - r - INFO: - Episode: 822/1000, Reward: 110.0, Step: 110 +2022-10-30 21:21:54 - r - INFO: - Episode: 823/1000, Reward: 113.0, Step: 113 +2022-10-30 21:21:54 - r - INFO: - Episode: 824/1000, Reward: 117.0, Step: 117 +2022-10-30 21:21:55 - r - INFO: - Episode: 825/1000, Reward: 113.0, Step: 113 +2022-10-30 21:21:56 - r - INFO: - Episode: 826/1000, Reward: 109.0, Step: 109 +2022-10-30 21:21:57 - r - INFO: - Episode: 827/1000, Reward: 122.0, Step: 122 +2022-10-30 21:21:57 - r - INFO: - Episode: 828/1000, Reward: 117.0, Step: 117 +2022-10-30 21:21:58 - r - INFO: - Episode: 829/1000, Reward: 127.0, Step: 127 +2022-10-30 21:21:59 - r - INFO: - Episode: 830/1000, Reward: 113.0, Step: 113 +2022-10-30 21:21:59 - r - INFO: - Episode: 831/1000, Reward: 118.0, Step: 118 +2022-10-30 21:22:00 - r - INFO: - Episode: 832/1000, Reward: 107.0, Step: 107 +2022-10-30 21:22:01 - r - INFO: - Episode: 833/1000, Reward: 108.0, Step: 108 +2022-10-30 21:22:01 - r - INFO: - Episode: 834/1000, Reward: 103.0, Step: 103 +2022-10-30 21:22:02 - r - INFO: - Episode: 835/1000, Reward: 126.0, Step: 126 +2022-10-30 21:22:03 - r - INFO: - Episode: 836/1000, Reward: 131.0, Step: 131 +2022-10-30 21:22:03 - r - INFO: - Episode: 837/1000, Reward: 106.0, Step: 106 +2022-10-30 21:22:04 - r - INFO: - Episode: 838/1000, Reward: 116.0, Step: 116 +2022-10-30 21:22:05 - r - INFO: - Episode: 839/1000, Reward: 24.0, Step: 24 +2022-10-30 21:22:05 - r - INFO: - Episode: 840/1000, Reward: 107.0, Step: 107 +2022-10-30 21:22:06 - r - INFO: - Episode: 841/1000, Reward: 124.0, Step: 124 +2022-10-30 21:22:07 - r - INFO: - Episode: 842/1000, Reward: 125.0, Step: 125 +2022-10-30 21:22:07 - r - INFO: - Episode: 843/1000, Reward: 110.0, Step: 110 +2022-10-30 21:22:08 - r - INFO: - Episode: 844/1000, Reward: 112.0, Step: 112 +2022-10-30 21:22:09 - r - INFO: - Episode: 845/1000, Reward: 105.0, Step: 105 +2022-10-30 21:22:09 - r - INFO: - Episode: 846/1000, Reward: 104.0, Step: 104 +2022-10-30 21:22:10 - r - INFO: - Episode: 847/1000, Reward: 134.0, Step: 134 +2022-10-30 21:22:11 - r - INFO: - Episode: 848/1000, Reward: 107.0, Step: 107 +2022-10-30 21:22:11 - r - INFO: - Episode: 849/1000, Reward: 128.0, Step: 128 +2022-10-30 21:22:12 - r - INFO: - Episode: 850/1000, Reward: 113.0, Step: 113 +2022-10-30 21:22:13 - r - INFO: - Episode: 851/1000, Reward: 138.0, Step: 138 +2022-10-30 21:22:14 - r - INFO: - Episode: 852/1000, Reward: 118.0, Step: 118 +2022-10-30 21:22:14 - r - INFO: - Episode: 853/1000, Reward: 142.0, Step: 142 +2022-10-30 21:22:15 - r - INFO: - Episode: 854/1000, Reward: 118.0, Step: 118 +2022-10-30 21:22:16 - r - INFO: - Episode: 855/1000, Reward: 122.0, Step: 122 +2022-10-30 21:22:16 - r - INFO: - Episode: 856/1000, Reward: 130.0, Step: 130 +2022-10-30 21:22:17 - r - INFO: - Episode: 857/1000, Reward: 126.0, Step: 126 +2022-10-30 21:22:18 - r - INFO: - Episode: 858/1000, Reward: 111.0, Step: 111 +2022-10-30 21:22:19 - r - INFO: - Episode: 859/1000, Reward: 114.0, Step: 114 +2022-10-30 21:22:19 - r - INFO: - Episode: 860/1000, Reward: 128.0, Step: 128 +2022-10-30 21:22:20 - r - INFO: - Episode: 861/1000, Reward: 126.0, Step: 126 +2022-10-30 21:22:21 - r - INFO: - Episode: 862/1000, Reward: 143.0, Step: 143 +2022-10-30 21:22:22 - r - INFO: - Episode: 863/1000, Reward: 132.0, Step: 132 +2022-10-30 21:22:22 - r - INFO: - Episode: 864/1000, Reward: 123.0, Step: 123 +2022-10-30 21:22:23 - r - INFO: - Episode: 865/1000, Reward: 111.0, Step: 111 +2022-10-30 21:22:24 - r - INFO: - Episode: 866/1000, Reward: 129.0, Step: 129 +2022-10-30 21:22:25 - r - INFO: - Episode: 867/1000, Reward: 121.0, Step: 121 +2022-10-30 21:22:25 - r - INFO: - Episode: 868/1000, Reward: 114.0, Step: 114 +2022-10-30 21:22:26 - r - INFO: - Episode: 869/1000, Reward: 110.0, Step: 110 +2022-10-30 21:22:27 - r - INFO: - Episode: 870/1000, Reward: 118.0, Step: 118 +2022-10-30 21:22:27 - r - INFO: - Episode: 871/1000, Reward: 120.0, Step: 120 +2022-10-30 21:22:28 - r - INFO: - Episode: 872/1000, Reward: 109.0, Step: 109 +2022-10-30 21:22:29 - r - INFO: - Episode: 873/1000, Reward: 106.0, Step: 106 +2022-10-30 21:22:29 - r - INFO: - Episode: 874/1000, Reward: 118.0, Step: 118 +2022-10-30 21:22:30 - r - INFO: - Episode: 875/1000, Reward: 104.0, Step: 104 +2022-10-30 21:22:30 - r - INFO: - Episode: 876/1000, Reward: 98.0, Step: 98 +2022-10-30 21:22:31 - r - INFO: - Episode: 877/1000, Reward: 115.0, Step: 115 +2022-10-30 21:22:31 - r - INFO: - Episode: 878/1000, Reward: 34.0, Step: 34 +2022-10-30 21:22:32 - r - INFO: - Episode: 879/1000, Reward: 96.0, Step: 96 +2022-10-30 21:22:33 - r - INFO: - Episode: 880/1000, Reward: 108.0, Step: 108 +2022-10-30 21:22:33 - r - INFO: - Episode: 881/1000, Reward: 105.0, Step: 105 +2022-10-30 21:22:34 - r - INFO: - Episode: 882/1000, Reward: 33.0, Step: 33 +2022-10-30 21:22:34 - r - INFO: - Episode: 883/1000, Reward: 105.0, Step: 105 +2022-10-30 21:22:35 - r - INFO: - Episode: 884/1000, Reward: 111.0, Step: 111 +2022-10-30 21:22:35 - r - INFO: - Episode: 885/1000, Reward: 112.0, Step: 112 +2022-10-30 21:22:36 - r - INFO: - Episode: 886/1000, Reward: 101.0, Step: 101 +2022-10-30 21:22:36 - r - INFO: - Episode: 887/1000, Reward: 25.0, Step: 25 +2022-10-30 21:22:37 - r - INFO: - Episode: 888/1000, Reward: 35.0, Step: 35 +2022-10-30 21:22:37 - r - INFO: - Episode: 889/1000, Reward: 99.0, Step: 99 +2022-10-30 21:22:38 - r - INFO: - Episode: 890/1000, Reward: 105.0, Step: 105 +2022-10-30 21:22:38 - r - INFO: - Episode: 891/1000, Reward: 36.0, Step: 36 +2022-10-30 21:22:38 - r - INFO: - Episode: 892/1000, Reward: 92.0, Step: 92 +2022-10-30 21:22:39 - r - INFO: - Episode: 893/1000, Reward: 104.0, Step: 104 +2022-10-30 21:22:39 - r - INFO: - Episode: 894/1000, Reward: 111.0, Step: 111 +2022-10-30 21:22:40 - r - INFO: - Episode: 895/1000, Reward: 106.0, Step: 106 +2022-10-30 21:22:41 - r - INFO: - Episode: 896/1000, Reward: 109.0, Step: 109 +2022-10-30 21:22:41 - r - INFO: - Episode: 897/1000, Reward: 108.0, Step: 108 +2022-10-30 21:22:42 - r - INFO: - Episode: 898/1000, Reward: 101.0, Step: 101 +2022-10-30 21:22:42 - r - INFO: - Episode: 899/1000, Reward: 100.0, Step: 100 +2022-10-30 21:22:43 - r - INFO: - Episode: 900/1000, Reward: 33.0, Step: 33 +2022-10-30 21:22:44 - r - INFO: - Episode: 901/1000, Reward: 119.0, Step: 119 +2022-10-30 21:22:44 - r - INFO: - Episode: 902/1000, Reward: 112.0, Step: 112 +2022-10-30 21:22:45 - r - INFO: - Episode: 903/1000, Reward: 112.0, Step: 112 +2022-10-30 21:22:45 - r - INFO: - Episode: 904/1000, Reward: 126.0, Step: 126 +2022-10-30 21:22:46 - r - INFO: - Episode: 905/1000, Reward: 123.0, Step: 123 +2022-10-30 21:22:47 - r - INFO: - Episode: 906/1000, Reward: 125.0, Step: 125 +2022-10-30 21:22:47 - r - INFO: - Episode: 907/1000, Reward: 107.0, Step: 107 +2022-10-30 21:22:48 - r - INFO: - Episode: 908/1000, Reward: 128.0, Step: 128 +2022-10-30 21:22:49 - r - INFO: - Episode: 909/1000, Reward: 119.0, Step: 119 +2022-10-30 21:22:50 - r - INFO: - Episode: 910/1000, Reward: 142.0, Step: 142 +2022-10-30 21:22:50 - r - INFO: - Episode: 911/1000, Reward: 117.0, Step: 117 +2022-10-30 21:22:51 - r - INFO: - Episode: 912/1000, Reward: 125.0, Step: 125 +2022-10-30 21:22:52 - r - INFO: - Episode: 913/1000, Reward: 141.0, Step: 141 +2022-10-30 21:22:53 - r - INFO: - Episode: 914/1000, Reward: 134.0, Step: 134 +2022-10-30 21:22:53 - r - INFO: - Episode: 915/1000, Reward: 131.0, Step: 131 +2022-10-30 21:22:54 - r - INFO: - Episode: 916/1000, Reward: 131.0, Step: 131 +2022-10-30 21:22:55 - r - INFO: - Episode: 917/1000, Reward: 140.0, Step: 140 +2022-10-30 21:22:56 - r - INFO: - Episode: 918/1000, Reward: 115.0, Step: 115 +2022-10-30 21:22:56 - r - INFO: - Episode: 919/1000, Reward: 142.0, Step: 142 +2022-10-30 21:22:57 - r - INFO: - Episode: 920/1000, Reward: 142.0, Step: 142 +2022-10-30 21:22:58 - r - INFO: - Episode: 921/1000, Reward: 128.0, Step: 128 +2022-10-30 21:22:59 - r - INFO: - Episode: 922/1000, Reward: 139.0, Step: 139 +2022-10-30 21:23:00 - r - INFO: - Episode: 923/1000, Reward: 133.0, Step: 133 +2022-10-30 21:23:01 - r - INFO: - Episode: 924/1000, Reward: 129.0, Step: 129 +2022-10-30 21:23:01 - r - INFO: - Episode: 925/1000, Reward: 124.0, Step: 124 +2022-10-30 21:23:02 - r - INFO: - Episode: 926/1000, Reward: 131.0, Step: 131 +2022-10-30 21:23:03 - r - INFO: - Episode: 927/1000, Reward: 125.0, Step: 125 +2022-10-30 21:23:04 - r - INFO: - Episode: 928/1000, Reward: 146.0, Step: 146 +2022-10-30 21:23:04 - r - INFO: - Episode: 929/1000, Reward: 118.0, Step: 118 +2022-10-30 21:23:05 - r - INFO: - Episode: 930/1000, Reward: 126.0, Step: 126 +2022-10-30 21:23:06 - r - INFO: - Episode: 931/1000, Reward: 134.0, Step: 134 +2022-10-30 21:23:07 - r - INFO: - Episode: 932/1000, Reward: 155.0, Step: 155 +2022-10-30 21:23:07 - r - INFO: - Episode: 933/1000, Reward: 134.0, Step: 134 +2022-10-30 21:23:08 - r - INFO: - Episode: 934/1000, Reward: 136.0, Step: 136 +2022-10-30 21:23:09 - r - INFO: - Episode: 935/1000, Reward: 146.0, Step: 146 +2022-10-30 21:23:10 - r - INFO: - Episode: 936/1000, Reward: 150.0, Step: 150 +2022-10-30 21:23:11 - r - INFO: - Episode: 937/1000, Reward: 167.0, Step: 167 +2022-10-30 21:23:12 - r - INFO: - Episode: 938/1000, Reward: 135.0, Step: 135 +2022-10-30 21:23:13 - r - INFO: - Episode: 939/1000, Reward: 197.0, Step: 197 +2022-10-30 21:23:14 - r - INFO: - Episode: 940/1000, Reward: 190.0, Step: 190 +2022-10-30 21:23:15 - r - INFO: - Episode: 941/1000, Reward: 170.0, Step: 170 +2022-10-30 21:23:16 - r - INFO: - Episode: 942/1000, Reward: 179.0, Step: 179 +2022-10-30 21:23:17 - r - INFO: - Episode: 943/1000, Reward: 192.0, Step: 192 +2022-10-30 21:23:18 - r - INFO: - Episode: 944/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:19 - r - INFO: - Current episode 944 has the best eval reward: 199.5 +2022-10-30 21:23:20 - r - INFO: - Episode: 945/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:21 - r - INFO: - Episode: 946/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:22 - r - INFO: - Episode: 947/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:23 - r - INFO: - Current episode 947 has the best eval reward: 200.0 +2022-10-30 21:23:23 - r - INFO: - Episode: 948/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:25 - r - INFO: - Episode: 949/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:25 - r - INFO: - Current episode 949 has the best eval reward: 200.0 +2022-10-30 21:23:26 - r - INFO: - Episode: 950/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:26 - r - INFO: - Current episode 950 has the best eval reward: 200.0 +2022-10-30 21:23:27 - r - INFO: - Episode: 951/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:28 - r - INFO: - Current episode 951 has the best eval reward: 200.0 +2022-10-30 21:23:28 - r - INFO: - Episode: 952/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:29 - r - INFO: - Current episode 952 has the best eval reward: 200.0 +2022-10-30 21:23:29 - r - INFO: - Episode: 953/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:30 - r - INFO: - Current episode 953 has the best eval reward: 200.0 +2022-10-30 21:23:31 - r - INFO: - Episode: 954/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:31 - r - INFO: - Current episode 954 has the best eval reward: 200.0 +2022-10-30 21:23:32 - r - INFO: - Episode: 955/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:33 - r - INFO: - Current episode 955 has the best eval reward: 200.0 +2022-10-30 21:23:33 - r - INFO: - Episode: 956/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:34 - r - INFO: - Current episode 956 has the best eval reward: 200.0 +2022-10-30 21:23:34 - r - INFO: - Episode: 957/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:35 - r - INFO: - Current episode 957 has the best eval reward: 200.0 +2022-10-30 21:23:36 - r - INFO: - Episode: 958/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:36 - r - INFO: - Current episode 958 has the best eval reward: 200.0 +2022-10-30 21:23:37 - r - INFO: - Episode: 959/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:37 - r - INFO: - Current episode 959 has the best eval reward: 200.0 +2022-10-30 21:23:38 - r - INFO: - Episode: 960/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:39 - r - INFO: - Episode: 961/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:40 - r - INFO: - Current episode 961 has the best eval reward: 200.0 +2022-10-30 21:23:40 - r - INFO: - Episode: 962/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:41 - r - INFO: - Current episode 962 has the best eval reward: 200.0 +2022-10-30 21:23:42 - r - INFO: - Episode: 963/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:42 - r - INFO: - Current episode 963 has the best eval reward: 200.0 +2022-10-30 21:23:43 - r - INFO: - Episode: 964/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:43 - r - INFO: - Current episode 964 has the best eval reward: 200.0 +2022-10-30 21:23:44 - r - INFO: - Episode: 965/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:45 - r - INFO: - Episode: 966/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:46 - r - INFO: - Current episode 966 has the best eval reward: 200.0 +2022-10-30 21:23:46 - r - INFO: - Episode: 967/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:47 - r - INFO: - Current episode 967 has the best eval reward: 200.0 +2022-10-30 21:23:48 - r - INFO: - Episode: 968/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:48 - r - INFO: - Current episode 968 has the best eval reward: 200.0 +2022-10-30 21:23:49 - r - INFO: - Episode: 969/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:50 - r - INFO: - Current episode 969 has the best eval reward: 200.0 +2022-10-30 21:23:50 - r - INFO: - Episode: 970/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:51 - r - INFO: - Episode: 971/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:52 - r - INFO: - Current episode 971 has the best eval reward: 200.0 +2022-10-30 21:23:52 - r - INFO: - Episode: 972/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:53 - r - INFO: - Current episode 972 has the best eval reward: 200.0 +2022-10-30 21:23:54 - r - INFO: - Episode: 973/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:54 - r - INFO: - Current episode 973 has the best eval reward: 200.0 +2022-10-30 21:23:55 - r - INFO: - Episode: 974/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:55 - r - INFO: - Current episode 974 has the best eval reward: 200.0 +2022-10-30 21:23:56 - r - INFO: - Episode: 975/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:57 - r - INFO: - Current episode 975 has the best eval reward: 200.0 +2022-10-30 21:23:57 - r - INFO: - Episode: 976/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:58 - r - INFO: - Current episode 976 has the best eval reward: 200.0 +2022-10-30 21:23:58 - r - INFO: - Episode: 977/1000, Reward: 200.0, Step: 200 +2022-10-30 21:23:59 - r - INFO: - Current episode 977 has the best eval reward: 200.0 +2022-10-30 21:24:00 - r - INFO: - Episode: 978/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:01 - r - INFO: - Episode: 979/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:01 - r - INFO: - Current episode 979 has the best eval reward: 200.0 +2022-10-30 21:24:02 - r - INFO: - Episode: 980/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:03 - r - INFO: - Current episode 980 has the best eval reward: 200.0 +2022-10-30 21:24:03 - r - INFO: - Episode: 981/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:04 - r - INFO: - Episode: 982/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:05 - r - INFO: - Episode: 983/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:06 - r - INFO: - Current episode 983 has the best eval reward: 200.0 +2022-10-30 21:24:07 - r - INFO: - Episode: 984/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:08 - r - INFO: - Episode: 985/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:09 - r - INFO: - Episode: 986/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:10 - r - INFO: - Episode: 987/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:11 - r - INFO: - Current episode 987 has the best eval reward: 200.0 +2022-10-30 21:24:12 - r - INFO: - Episode: 988/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:12 - r - INFO: - Current episode 988 has the best eval reward: 200.0 +2022-10-30 21:24:13 - r - INFO: - Episode: 989/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:14 - r - INFO: - Episode: 990/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:15 - r - INFO: - Episode: 991/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:16 - r - INFO: - Current episode 991 has the best eval reward: 200.0 +2022-10-30 21:24:16 - r - INFO: - Episode: 992/1000, Reward: 198.0, Step: 198 +2022-10-30 21:24:17 - r - INFO: - Current episode 992 has the best eval reward: 200.0 +2022-10-30 21:24:18 - r - INFO: - Episode: 993/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:18 - r - INFO: - Current episode 993 has the best eval reward: 200.0 +2022-10-30 21:24:19 - r - INFO: - Episode: 994/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:19 - r - INFO: - Current episode 994 has the best eval reward: 200.0 +2022-10-30 21:24:20 - r - INFO: - Episode: 995/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:21 - r - INFO: - Episode: 996/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:22 - r - INFO: - Episode: 997/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:23 - r - INFO: - Episode: 998/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:24 - r - INFO: - Current episode 998 has the best eval reward: 200.0 +2022-10-30 21:24:25 - r - INFO: - Episode: 999/1000, Reward: 200.0, Step: 200 +2022-10-30 21:24:26 - r - INFO: - Episode: 1000/1000, Reward: 200.0, Step: 200 diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/actor_checkpoint.pt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/actor_checkpoint.pt new file mode 100644 index 0000000..89d0854 Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/actor_checkpoint.pt differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/critic_checkpoint.pt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/critic_checkpoint.pt new file mode 100644 index 0000000..720f388 Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/models/critic_checkpoint.pt differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/learning_curve.png b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/learning_curve.png new file mode 100644 index 0000000..8bbfcde Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/learning_curve.png differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/res.csv b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/res.csv new file mode 100644 index 0000000..6f853b9 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221030-211435/results/res.csv @@ -0,0 +1,1001 @@ +episodes,rewards,steps +0,25.0,25 +1,13.0,13 +2,58.0,58 +3,10.0,10 +4,39.0,39 +5,39.0,39 +6,25.0,25 +7,22.0,22 +8,21.0,21 +9,27.0,27 +10,35.0,35 +11,26.0,26 +12,38.0,38 +13,29.0,29 +14,50.0,50 +15,20.0,20 +16,52.0,52 +17,12.0,12 +18,20.0,20 +19,38.0,38 +20,22.0,22 +21,36.0,36 +22,20.0,20 +23,35.0,35 +24,90.0,90 +25,29.0,29 +26,16.0,16 +27,25.0,25 +28,46.0,46 +29,33.0,33 +30,11.0,11 +31,27.0,27 +32,32.0,32 +33,21.0,21 +34,11.0,11 +35,21.0,21 +36,51.0,51 +37,29.0,29 +38,50.0,50 +39,19.0,19 +40,41.0,41 +41,28.0,28 +42,71.0,71 +43,45.0,45 +44,42.0,42 +45,39.0,39 +46,21.0,21 +47,14.0,14 +48,23.0,23 +49,21.0,21 +50,34.0,34 +51,14.0,14 +52,41.0,41 +53,99.0,99 +54,21.0,21 +55,52.0,52 +56,34.0,34 +57,73.0,73 +58,21.0,21 +59,27.0,27 +60,51.0,51 +61,46.0,46 +62,21.0,21 +63,20.0,20 +64,44.0,44 +65,16.0,16 +66,39.0,39 +67,30.0,30 +68,37.0,37 +69,20.0,20 +70,21.0,21 +71,13.0,13 +72,65.0,65 +73,45.0,45 +74,45.0,45 +75,46.0,46 +76,13.0,13 +77,33.0,33 +78,30.0,30 +79,52.0,52 +80,27.0,27 +81,30.0,30 +82,47.0,47 +83,56.0,56 +84,19.0,19 +85,33.0,33 +86,25.0,25 +87,41.0,41 +88,20.0,20 +89,58.0,58 +90,35.0,35 +91,23.0,23 +92,12.0,12 +93,20.0,20 +94,10.0,10 +95,49.0,49 +96,29.0,29 +97,35.0,35 +98,36.0,36 +99,36.0,36 +100,16.0,16 +101,36.0,36 +102,30.0,30 +103,76.0,76 +104,52.0,52 +105,39.0,39 +106,52.0,52 +107,69.0,69 +108,27.0,27 +109,14.0,14 +110,28.0,28 +111,12.0,12 +112,26.0,26 +113,50.0,50 +114,25.0,25 +115,53.0,53 +116,19.0,19 +117,33.0,33 +118,34.0,34 +119,41.0,41 +120,25.0,25 +121,18.0,18 +122,114.0,114 +123,25.0,25 +124,46.0,46 +125,22.0,22 +126,71.0,71 +127,30.0,30 +128,130.0,130 +129,65.0,65 +130,55.0,55 +131,37.0,37 +132,46.0,46 +133,65.0,65 +134,31.0,31 +135,33.0,33 +136,39.0,39 +137,73.0,73 +138,78.0,78 +139,36.0,36 +140,56.0,56 +141,12.0,12 +142,36.0,36 +143,13.0,13 +144,85.0,85 +145,34.0,34 +146,16.0,16 +147,68.0,68 +148,94.0,94 +149,17.0,17 +150,64.0,64 +151,33.0,33 +152,63.0,63 +153,39.0,39 +154,72.0,72 +155,39.0,39 +156,37.0,37 +157,18.0,18 +158,55.0,55 +159,21.0,21 +160,54.0,54 +161,46.0,46 +162,21.0,21 +163,26.0,26 +164,70.0,70 +165,20.0,20 +166,41.0,41 +167,77.0,77 +168,13.0,13 +169,66.0,66 +170,72.0,72 +171,28.0,28 +172,68.0,68 +173,124.0,124 +174,41.0,41 +175,54.0,54 +176,33.0,33 +177,92.0,92 +178,23.0,23 +179,76.0,76 +180,47.0,47 +181,89.0,89 +182,84.0,84 +183,75.0,75 +184,64.0,64 +185,35.0,35 +186,44.0,44 +187,46.0,46 +188,67.0,67 +189,82.0,82 +190,55.0,55 +191,26.0,26 +192,116.0,116 +193,116.0,116 +194,119.0,119 +195,50.0,50 +196,43.0,43 +197,47.0,47 +198,71.0,71 +199,53.0,53 +200,137.0,137 +201,82.0,82 +202,120.0,120 +203,69.0,69 +204,55.0,55 +205,62.0,62 +206,64.0,64 +207,49.0,49 +208,32.0,32 +209,42.0,42 +210,50.0,50 +211,93.0,93 +212,60.0,60 +213,54.0,54 +214,68.0,68 +215,84.0,84 +216,55.0,55 +217,70.0,70 +218,115.0,115 +219,149.0,149 +220,68.0,68 +221,50.0,50 +222,56.0,56 +223,61.0,61 +224,117.0,117 +225,66.0,66 +226,127.0,127 +227,66.0,66 +228,48.0,48 +229,36.0,36 +230,79.0,79 +231,49.0,49 +232,55.0,55 +233,41.0,41 +234,20.0,20 +235,40.0,40 +236,120.0,120 +237,27.0,27 +238,51.0,51 +239,35.0,35 +240,43.0,43 +241,54.0,54 +242,52.0,52 +243,47.0,47 +244,63.0,63 +245,29.0,29 +246,36.0,36 +247,58.0,58 +248,63.0,63 +249,49.0,49 +250,70.0,70 +251,114.0,114 +252,62.0,62 +253,73.0,73 +254,62.0,62 +255,61.0,61 +256,115.0,115 +257,50.0,50 +258,128.0,128 +259,200.0,200 +260,75.0,75 +261,64.0,64 +262,33.0,33 +263,90.0,90 +264,117.0,117 +265,60.0,60 +266,177.0,177 +267,39.0,39 +268,40.0,40 +269,109.0,109 +270,100.0,100 +271,99.0,99 +272,136.0,136 +273,62.0,62 +274,100.0,100 +275,73.0,73 +276,166.0,166 +277,74.0,74 +278,126.0,126 +279,111.0,111 +280,198.0,198 +281,106.0,106 +282,80.0,80 +283,74.0,74 +284,114.0,114 +285,69.0,69 +286,98.0,98 +287,63.0,63 +288,61.0,61 +289,49.0,49 +290,89.0,89 +291,114.0,114 +292,103.0,103 +293,103.0,103 +294,93.0,93 +295,137.0,137 +296,97.0,97 +297,124.0,124 +298,147.0,147 +299,125.0,125 +300,105.0,105 +301,113.0,113 +302,120.0,120 +303,159.0,159 +304,190.0,190 +305,119.0,119 +306,200.0,200 +307,148.0,148 +308,200.0,200 +309,79.0,79 +310,115.0,115 +311,147.0,147 +312,112.0,112 +313,125.0,125 +314,184.0,184 +315,193.0,193 +316,117.0,117 +317,153.0,153 +318,125.0,125 +319,184.0,184 +320,173.0,173 +321,117.0,117 +322,47.0,47 +323,107.0,107 +324,104.0,104 +325,114.0,114 +326,90.0,90 +327,112.0,112 +328,70.0,70 +329,74.0,74 +330,159.0,159 +331,39.0,39 +332,129.0,129 +333,50.0,50 +334,74.0,74 +335,31.0,31 +336,57.0,57 +337,71.0,71 +338,43.0,43 +339,41.0,41 +340,64.0,64 +341,38.0,38 +342,45.0,45 +343,120.0,120 +344,40.0,40 +345,46.0,46 +346,57.0,57 +347,29.0,29 +348,29.0,29 +349,50.0,50 +350,38.0,38 +351,51.0,51 +352,49.0,49 +353,30.0,30 +354,40.0,40 +355,45.0,45 +356,68.0,68 +357,27.0,27 +358,18.0,18 +359,26.0,26 +360,15.0,15 +361,65.0,65 +362,38.0,38 +363,41.0,41 +364,61.0,61 +365,113.0,113 +366,39.0,39 +367,60.0,60 +368,134.0,134 +369,122.0,122 +370,34.0,34 +371,129.0,129 +372,40.0,40 +373,128.0,128 +374,200.0,200 +375,108.0,108 +376,108.0,108 +377,151.0,151 +378,79.0,79 +379,105.0,105 +380,87.0,87 +381,94.0,94 +382,112.0,112 +383,200.0,200 +384,184.0,184 +385,124.0,124 +386,200.0,200 +387,200.0,200 +388,109.0,109 +389,88.0,88 +390,104.0,104 +391,200.0,200 +392,84.0,84 +393,187.0,187 +394,182.0,182 +395,148.0,148 +396,86.0,86 +397,200.0,200 +398,199.0,199 +399,200.0,200 +400,92.0,92 +401,112.0,112 +402,86.0,86 +403,114.0,114 +404,90.0,90 +405,101.0,101 +406,111.0,111 +407,107.0,107 +408,120.0,120 +409,114.0,114 +410,97.0,97 +411,95.0,95 +412,126.0,126 +413,111.0,111 +414,120.0,120 +415,178.0,178 +416,97.0,97 +417,144.0,144 +418,200.0,200 +419,190.0,190 +420,29.0,29 +421,200.0,200 +422,116.0,116 +423,200.0,200 +424,107.0,107 +425,128.0,128 +426,164.0,164 +427,30.0,30 +428,122.0,122 +429,110.0,110 +430,105.0,105 +431,137.0,137 +432,110.0,110 +433,111.0,111 +434,33.0,33 +435,100.0,100 +436,131.0,131 +437,99.0,99 +438,118.0,118 +439,98.0,98 +440,119.0,119 +441,41.0,41 +442,107.0,107 +443,41.0,41 +444,113.0,113 +445,113.0,113 +446,117.0,117 +447,140.0,140 +448,133.0,133 +449,108.0,108 +450,117.0,117 +451,40.0,40 +452,108.0,108 +453,140.0,140 +454,133.0,133 +455,115.0,115 +456,30.0,30 +457,119.0,119 +458,160.0,160 +459,125.0,125 +460,161.0,161 +461,139.0,139 +462,190.0,190 +463,149.0,149 +464,173.0,173 +465,165.0,165 +466,82.0,82 +467,197.0,197 +468,200.0,200 +469,200.0,200 +470,200.0,200 +471,182.0,182 +472,118.0,118 +473,200.0,200 +474,200.0,200 +475,93.0,93 +476,200.0,200 +477,200.0,200 +478,167.0,167 +479,200.0,200 +480,200.0,200 +481,200.0,200 +482,190.0,190 +483,86.0,86 +484,166.0,166 +485,200.0,200 +486,200.0,200 +487,172.0,172 +488,200.0,200 +489,102.0,102 +490,194.0,194 +491,200.0,200 +492,179.0,179 +493,187.0,187 +494,200.0,200 +495,89.0,89 +496,169.0,169 +497,28.0,28 +498,160.0,160 +499,140.0,140 +500,37.0,37 +501,32.0,32 +502,129.0,129 +503,22.0,22 +504,124.0,124 +505,24.0,24 +506,115.0,115 +507,24.0,24 +508,38.0,38 +509,24.0,24 +510,23.0,23 +511,125.0,125 +512,22.0,22 +513,24.0,24 +514,20.0,20 +515,25.0,25 +516,31.0,31 +517,23.0,23 +518,30.0,30 +519,101.0,101 +520,25.0,25 +521,22.0,22 +522,20.0,20 +523,16.0,16 +524,104.0,104 +525,17.0,17 +526,108.0,108 +527,121.0,121 +528,29.0,29 +529,29.0,29 +530,43.0,43 +531,105.0,105 +532,130.0,130 +533,30.0,30 +534,31.0,31 +535,30.0,30 +536,37.0,37 +537,115.0,115 +538,110.0,110 +539,112.0,112 +540,33.0,33 +541,120.0,120 +542,109.0,109 +543,122.0,122 +544,115.0,115 +545,34.0,34 +546,28.0,28 +547,29.0,29 +548,113.0,113 +549,100.0,100 +550,26.0,26 +551,24.0,24 +552,26.0,26 +553,102.0,102 +554,18.0,18 +555,107.0,107 +556,27.0,27 +557,87.0,87 +558,29.0,29 +559,31.0,31 +560,112.0,112 +561,112.0,112 +562,108.0,108 +563,98.0,98 +564,104.0,104 +565,116.0,116 +566,123.0,123 +567,105.0,105 +568,133.0,133 +569,116.0,116 +570,128.0,128 +571,130.0,130 +572,113.0,113 +573,143.0,143 +574,145.0,145 +575,159.0,159 +576,150.0,150 +577,130.0,130 +578,145.0,145 +579,173.0,173 +580,154.0,154 +581,131.0,131 +582,163.0,163 +583,160.0,160 +584,181.0,181 +585,161.0,161 +586,169.0,169 +587,150.0,150 +588,176.0,176 +589,157.0,157 +590,167.0,167 +591,168.0,168 +592,135.0,135 +593,157.0,157 +594,138.0,138 +595,139.0,139 +596,146.0,146 +597,121.0,121 +598,140.0,140 +599,124.0,124 +600,124.0,124 +601,115.0,115 +602,129.0,129 +603,107.0,107 +604,118.0,118 +605,108.0,108 +606,102.0,102 +607,105.0,105 +608,103.0,103 +609,96.0,96 +610,116.0,116 +611,51.0,51 +612,100.0,100 +613,121.0,121 +614,109.0,109 +615,85.0,85 +616,111.0,111 +617,91.0,91 +618,127.0,127 +619,117.0,117 +620,104.0,104 +621,119.0,119 +622,111.0,111 +623,132.0,132 +624,130.0,130 +625,140.0,140 +626,95.0,95 +627,106.0,106 +628,120.0,120 +629,111.0,111 +630,114.0,114 +631,126.0,126 +632,100.0,100 +633,111.0,111 +634,104.0,104 +635,103.0,103 +636,111.0,111 +637,110.0,110 +638,131.0,131 +639,90.0,90 +640,97.0,97 +641,104.0,104 +642,91.0,91 +643,97.0,97 +644,109.0,109 +645,112.0,112 +646,97.0,97 +647,32.0,32 +648,94.0,94 +649,107.0,107 +650,61.0,61 +651,97.0,97 +652,99.0,99 +653,76.0,76 +654,38.0,38 +655,96.0,96 +656,96.0,96 +657,65.0,65 +658,45.0,45 +659,91.0,91 +660,78.0,78 +661,90.0,90 +662,92.0,92 +663,94.0,94 +664,101.0,101 +665,111.0,111 +666,109.0,109 +667,99.0,99 +668,115.0,115 +669,112.0,112 +670,113.0,113 +671,110.0,110 +672,108.0,108 +673,112.0,112 +674,125.0,125 +675,122.0,122 +676,114.0,114 +677,127.0,127 +678,125.0,125 +679,112.0,112 +680,111.0,111 +681,124.0,124 +682,113.0,113 +683,103.0,103 +684,119.0,119 +685,120.0,120 +686,95.0,95 +687,100.0,100 +688,29.0,29 +689,119.0,119 +690,107.0,107 +691,117.0,117 +692,78.0,78 +693,35.0,35 +694,101.0,101 +695,98.0,98 +696,94.0,94 +697,102.0,102 +698,90.0,90 +699,86.0,86 +700,81.0,81 +701,105.0,105 +702,72.0,72 +703,100.0,100 +704,96.0,96 +705,111.0,111 +706,27.0,27 +707,107.0,107 +708,87.0,87 +709,114.0,114 +710,111.0,111 +711,88.0,88 +712,112.0,112 +713,108.0,108 +714,108.0,108 +715,103.0,103 +716,120.0,120 +717,116.0,116 +718,112.0,112 +719,99.0,99 +720,118.0,118 +721,114.0,114 +722,104.0,104 +723,99.0,99 +724,102.0,102 +725,106.0,106 +726,31.0,31 +727,91.0,91 +728,32.0,32 +729,96.0,96 +730,20.0,20 +731,33.0,33 +732,23.0,23 +733,80.0,80 +734,35.0,35 +735,88.0,88 +736,28.0,28 +737,26.0,26 +738,70.0,70 +739,86.0,86 +740,28.0,28 +741,39.0,39 +742,65.0,65 +743,52.0,52 +744,43.0,43 +745,97.0,97 +746,27.0,27 +747,89.0,89 +748,34.0,34 +749,35.0,35 +750,28.0,28 +751,96.0,96 +752,97.0,97 +753,108.0,108 +754,45.0,45 +755,103.0,103 +756,97.0,97 +757,114.0,114 +758,103.0,103 +759,116.0,116 +760,127.0,127 +761,122.0,122 +762,112.0,112 +763,112.0,112 +764,120.0,120 +765,129.0,129 +766,127.0,127 +767,125.0,125 +768,124.0,124 +769,126.0,126 +770,129.0,129 +771,129.0,129 +772,43.0,43 +773,121.0,121 +774,40.0,40 +775,116.0,116 +776,117.0,117 +777,113.0,113 +778,117.0,117 +779,108.0,108 +780,108.0,108 +781,119.0,119 +782,109.0,109 +783,116.0,116 +784,114.0,114 +785,45.0,45 +786,116.0,116 +787,116.0,116 +788,110.0,110 +789,105.0,105 +790,110.0,110 +791,112.0,112 +792,104.0,104 +793,120.0,120 +794,110.0,110 +795,113.0,113 +796,33.0,33 +797,111.0,111 +798,31.0,31 +799,139.0,139 +800,110.0,110 +801,124.0,124 +802,120.0,120 +803,112.0,112 +804,116.0,116 +805,105.0,105 +806,125.0,125 +807,103.0,103 +808,122.0,122 +809,109.0,109 +810,118.0,118 +811,124.0,124 +812,115.0,115 +813,26.0,26 +814,118.0,118 +815,118.0,118 +816,31.0,31 +817,99.0,99 +818,122.0,122 +819,102.0,102 +820,111.0,111 +821,110.0,110 +822,113.0,113 +823,117.0,117 +824,113.0,113 +825,109.0,109 +826,122.0,122 +827,117.0,117 +828,127.0,127 +829,113.0,113 +830,118.0,118 +831,107.0,107 +832,108.0,108 +833,103.0,103 +834,126.0,126 +835,131.0,131 +836,106.0,106 +837,116.0,116 +838,24.0,24 +839,107.0,107 +840,124.0,124 +841,125.0,125 +842,110.0,110 +843,112.0,112 +844,105.0,105 +845,104.0,104 +846,134.0,134 +847,107.0,107 +848,128.0,128 +849,113.0,113 +850,138.0,138 +851,118.0,118 +852,142.0,142 +853,118.0,118 +854,122.0,122 +855,130.0,130 +856,126.0,126 +857,111.0,111 +858,114.0,114 +859,128.0,128 +860,126.0,126 +861,143.0,143 +862,132.0,132 +863,123.0,123 +864,111.0,111 +865,129.0,129 +866,121.0,121 +867,114.0,114 +868,110.0,110 +869,118.0,118 +870,120.0,120 +871,109.0,109 +872,106.0,106 +873,118.0,118 +874,104.0,104 +875,98.0,98 +876,115.0,115 +877,34.0,34 +878,96.0,96 +879,108.0,108 +880,105.0,105 +881,33.0,33 +882,105.0,105 +883,111.0,111 +884,112.0,112 +885,101.0,101 +886,25.0,25 +887,35.0,35 +888,99.0,99 +889,105.0,105 +890,36.0,36 +891,92.0,92 +892,104.0,104 +893,111.0,111 +894,106.0,106 +895,109.0,109 +896,108.0,108 +897,101.0,101 +898,100.0,100 +899,33.0,33 +900,119.0,119 +901,112.0,112 +902,112.0,112 +903,126.0,126 +904,123.0,123 +905,125.0,125 +906,107.0,107 +907,128.0,128 +908,119.0,119 +909,142.0,142 +910,117.0,117 +911,125.0,125 +912,141.0,141 +913,134.0,134 +914,131.0,131 +915,131.0,131 +916,140.0,140 +917,115.0,115 +918,142.0,142 +919,142.0,142 +920,128.0,128 +921,139.0,139 +922,133.0,133 +923,129.0,129 +924,124.0,124 +925,131.0,131 +926,125.0,125 +927,146.0,146 +928,118.0,118 +929,126.0,126 +930,134.0,134 +931,155.0,155 +932,134.0,134 +933,136.0,136 +934,146.0,146 +935,150.0,150 +936,167.0,167 +937,135.0,135 +938,197.0,197 +939,190.0,190 +940,170.0,170 +941,179.0,179 +942,192.0,192 +943,200.0,200 +944,200.0,200 +945,200.0,200 +946,200.0,200 +947,200.0,200 +948,200.0,200 +949,200.0,200 +950,200.0,200 +951,200.0,200 +952,200.0,200 +953,200.0,200 +954,200.0,200 +955,200.0,200 +956,200.0,200 +957,200.0,200 +958,200.0,200 +959,200.0,200 +960,200.0,200 +961,200.0,200 +962,200.0,200 +963,200.0,200 +964,200.0,200 +965,200.0,200 +966,200.0,200 +967,200.0,200 +968,200.0,200 +969,200.0,200 +970,200.0,200 +971,200.0,200 +972,200.0,200 +973,200.0,200 +974,200.0,200 +975,200.0,200 +976,200.0,200 +977,200.0,200 +978,200.0,200 +979,200.0,200 +980,200.0,200 +981,200.0,200 +982,200.0,200 +983,200.0,200 +984,200.0,200 +985,200.0,200 +986,200.0,200 +987,200.0,200 +988,200.0,200 +989,200.0,200 +990,200.0,200 +991,198.0,198 +992,200.0,200 +993,200.0,200 +994,200.0,200 +995,200.0,200 +996,200.0,200 +997,200.0,200 +998,200.0,200 +999,200.0,200 diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/config.yaml b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/config.yaml new file mode 100644 index 0000000..49d9701 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/config.yaml @@ -0,0 +1,24 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + actor_hidden_dim: 256 + actor_lr: 0.0003 + batch_size: 64 + buffer_size: 100000 + critic_hidden_dim: 256 + critic_lr: 0.001 + gamma: 0.99 + hidden_dim: 256 diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/logs/log.txt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/logs/log.txt new file mode 100644 index 0000000..18436c8 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/logs/log.txt @@ -0,0 +1,1086 @@ +2022-10-31 23:21:38 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-31 23:21:38 - r - INFO: - Actor model name: ActorSoftmaxTanh +2022-10-31 23:21:38 - r - INFO: - Critic model name: Critic +2022-10-31 23:21:38 - r - INFO: - ACMemory memory name: PGReplay +2022-10-31 23:21:38 - r - INFO: - agent name: A2C +2022-10-31 23:21:38 - r - INFO: - Start training! +2022-10-31 23:21:38 - r - INFO: - Env: CartPole-v1, Algorithm: A2C, Device: cuda +2022-10-31 23:21:40 - r - INFO: - Episode: 1/1000, Reward: 25.0, Step: 25 +2022-10-31 23:21:40 - r - INFO: - Episode: 2/1000, Reward: 11.0, Step: 11 +2022-10-31 23:21:41 - r - INFO: - Episode: 3/1000, Reward: 32.0, Step: 32 +2022-10-31 23:21:41 - r - INFO: - Episode: 4/1000, Reward: 11.0, Step: 11 +2022-10-31 23:21:41 - r - INFO: - Episode: 5/1000, Reward: 14.0, Step: 14 +2022-10-31 23:21:41 - r - INFO: - Current episode 5 has the best eval reward: 19.90 +2022-10-31 23:21:41 - r - INFO: - Episode: 6/1000, Reward: 11.0, Step: 11 +2022-10-31 23:21:41 - r - INFO: - Episode: 7/1000, Reward: 23.0, Step: 23 +2022-10-31 23:21:41 - r - INFO: - Episode: 8/1000, Reward: 27.0, Step: 27 +2022-10-31 23:21:41 - r - INFO: - Episode: 9/1000, Reward: 10.0, Step: 10 +2022-10-31 23:21:41 - r - INFO: - Episode: 10/1000, Reward: 21.0, Step: 21 +2022-10-31 23:21:41 - r - INFO: - Episode: 11/1000, Reward: 15.0, Step: 15 +2022-10-31 23:21:41 - r - INFO: - Episode: 12/1000, Reward: 26.0, Step: 26 +2022-10-31 23:21:41 - r - INFO: - Episode: 13/1000, Reward: 22.0, Step: 22 +2022-10-31 23:21:41 - r - INFO: - Episode: 14/1000, Reward: 14.0, Step: 14 +2022-10-31 23:21:41 - r - INFO: - Episode: 15/1000, Reward: 14.0, Step: 14 +2022-10-31 23:21:41 - r - INFO: - Episode: 16/1000, Reward: 21.0, Step: 21 +2022-10-31 23:21:41 - r - INFO: - Episode: 17/1000, Reward: 10.0, Step: 10 +2022-10-31 23:21:42 - r - INFO: - Episode: 18/1000, Reward: 19.0, Step: 19 +2022-10-31 23:21:42 - r - INFO: - Episode: 19/1000, Reward: 18.0, Step: 18 +2022-10-31 23:21:42 - r - INFO: - Episode: 20/1000, Reward: 26.0, Step: 26 +2022-10-31 23:21:42 - r - INFO: - Current episode 20 has the best eval reward: 21.50 +2022-10-31 23:21:42 - r - INFO: - Episode: 21/1000, Reward: 29.0, Step: 29 +2022-10-31 23:21:42 - r - INFO: - Episode: 22/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:42 - r - INFO: - Episode: 23/1000, Reward: 35.0, Step: 35 +2022-10-31 23:21:42 - r - INFO: - Episode: 24/1000, Reward: 33.0, Step: 33 +2022-10-31 23:21:42 - r - INFO: - Episode: 25/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:43 - r - INFO: - Current episode 25 has the best eval reward: 31.90 +2022-10-31 23:21:43 - r - INFO: - Episode: 26/1000, Reward: 50.0, Step: 50 +2022-10-31 23:21:43 - r - INFO: - Episode: 27/1000, Reward: 21.0, Step: 21 +2022-10-31 23:21:43 - r - INFO: - Episode: 28/1000, Reward: 30.0, Step: 30 +2022-10-31 23:21:43 - r - INFO: - Episode: 29/1000, Reward: 26.0, Step: 26 +2022-10-31 23:21:43 - r - INFO: - Episode: 30/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:43 - r - INFO: - Current episode 30 has the best eval reward: 56.70 +2022-10-31 23:21:43 - r - INFO: - Episode: 31/1000, Reward: 31.0, Step: 31 +2022-10-31 23:21:43 - r - INFO: - Episode: 32/1000, Reward: 54.0, Step: 54 +2022-10-31 23:21:43 - r - INFO: - Episode: 33/1000, Reward: 59.0, Step: 59 +2022-10-31 23:21:44 - r - INFO: - Episode: 34/1000, Reward: 50.0, Step: 50 +2022-10-31 23:21:44 - r - INFO: - Episode: 35/1000, Reward: 26.0, Step: 26 +2022-10-31 23:21:44 - r - INFO: - Episode: 36/1000, Reward: 34.0, Step: 34 +2022-10-31 23:21:44 - r - INFO: - Episode: 37/1000, Reward: 25.0, Step: 25 +2022-10-31 23:21:44 - r - INFO: - Episode: 38/1000, Reward: 166.0, Step: 166 +2022-10-31 23:21:44 - r - INFO: - Episode: 39/1000, Reward: 35.0, Step: 35 +2022-10-31 23:21:44 - r - INFO: - Episode: 40/1000, Reward: 25.0, Step: 25 +2022-10-31 23:21:45 - r - INFO: - Episode: 41/1000, Reward: 110.0, Step: 110 +2022-10-31 23:21:45 - r - INFO: - Episode: 42/1000, Reward: 22.0, Step: 22 +2022-10-31 23:21:45 - r - INFO: - Episode: 43/1000, Reward: 57.0, Step: 57 +2022-10-31 23:21:45 - r - INFO: - Episode: 44/1000, Reward: 45.0, Step: 45 +2022-10-31 23:21:45 - r - INFO: - Episode: 45/1000, Reward: 35.0, Step: 35 +2022-10-31 23:21:45 - r - INFO: - Episode: 46/1000, Reward: 45.0, Step: 45 +2022-10-31 23:21:45 - r - INFO: - Episode: 47/1000, Reward: 51.0, Step: 51 +2022-10-31 23:21:46 - r - INFO: - Episode: 48/1000, Reward: 32.0, Step: 32 +2022-10-31 23:21:46 - r - INFO: - Episode: 49/1000, Reward: 67.0, Step: 67 +2022-10-31 23:21:46 - r - INFO: - Episode: 50/1000, Reward: 46.0, Step: 46 +2022-10-31 23:21:46 - r - INFO: - Episode: 51/1000, Reward: 61.0, Step: 61 +2022-10-31 23:21:46 - r - INFO: - Episode: 52/1000, Reward: 49.0, Step: 49 +2022-10-31 23:21:46 - r - INFO: - Episode: 53/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:46 - r - INFO: - Episode: 54/1000, Reward: 37.0, Step: 37 +2022-10-31 23:21:46 - r - INFO: - Episode: 55/1000, Reward: 32.0, Step: 32 +2022-10-31 23:21:47 - r - INFO: - Current episode 55 has the best eval reward: 85.50 +2022-10-31 23:21:47 - r - INFO: - Episode: 56/1000, Reward: 31.0, Step: 31 +2022-10-31 23:21:47 - r - INFO: - Episode: 57/1000, Reward: 33.0, Step: 33 +2022-10-31 23:21:47 - r - INFO: - Episode: 58/1000, Reward: 93.0, Step: 93 +2022-10-31 23:21:47 - r - INFO: - Episode: 59/1000, Reward: 60.0, Step: 60 +2022-10-31 23:21:48 - r - INFO: - Episode: 60/1000, Reward: 128.0, Step: 128 +2022-10-31 23:21:48 - r - INFO: - Episode: 61/1000, Reward: 200.0, Step: 200 +2022-10-31 23:21:48 - r - INFO: - Episode: 62/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:48 - r - INFO: - Episode: 63/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:49 - r - INFO: - Episode: 64/1000, Reward: 63.0, Step: 63 +2022-10-31 23:21:49 - r - INFO: - Episode: 65/1000, Reward: 68.0, Step: 68 +2022-10-31 23:21:49 - r - INFO: - Episode: 66/1000, Reward: 45.0, Step: 45 +2022-10-31 23:21:49 - r - INFO: - Episode: 67/1000, Reward: 101.0, Step: 101 +2022-10-31 23:21:49 - r - INFO: - Episode: 68/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:49 - r - INFO: - Episode: 69/1000, Reward: 49.0, Step: 49 +2022-10-31 23:21:50 - r - INFO: - Episode: 70/1000, Reward: 54.0, Step: 54 +2022-10-31 23:21:50 - r - INFO: - Episode: 71/1000, Reward: 42.0, Step: 42 +2022-10-31 23:21:50 - r - INFO: - Episode: 72/1000, Reward: 77.0, Step: 77 +2022-10-31 23:21:50 - r - INFO: - Episode: 73/1000, Reward: 67.0, Step: 67 +2022-10-31 23:21:50 - r - INFO: - Episode: 74/1000, Reward: 41.0, Step: 41 +2022-10-31 23:21:51 - r - INFO: - Episode: 75/1000, Reward: 89.0, Step: 89 +2022-10-31 23:21:51 - r - INFO: - Episode: 76/1000, Reward: 51.0, Step: 51 +2022-10-31 23:21:51 - r - INFO: - Episode: 77/1000, Reward: 54.0, Step: 54 +2022-10-31 23:21:51 - r - INFO: - Episode: 78/1000, Reward: 37.0, Step: 37 +2022-10-31 23:21:51 - r - INFO: - Episode: 79/1000, Reward: 49.0, Step: 49 +2022-10-31 23:21:51 - r - INFO: - Episode: 80/1000, Reward: 46.0, Step: 46 +2022-10-31 23:21:52 - r - INFO: - Episode: 81/1000, Reward: 31.0, Step: 31 +2022-10-31 23:21:52 - r - INFO: - Episode: 82/1000, Reward: 43.0, Step: 43 +2022-10-31 23:21:52 - r - INFO: - Episode: 83/1000, Reward: 60.0, Step: 60 +2022-10-31 23:21:52 - r - INFO: - Episode: 84/1000, Reward: 41.0, Step: 41 +2022-10-31 23:21:52 - r - INFO: - Episode: 85/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:52 - r - INFO: - Episode: 86/1000, Reward: 28.0, Step: 28 +2022-10-31 23:21:52 - r - INFO: - Episode: 87/1000, Reward: 50.0, Step: 50 +2022-10-31 23:21:53 - r - INFO: - Episode: 88/1000, Reward: 159.0, Step: 159 +2022-10-31 23:21:53 - r - INFO: - Episode: 89/1000, Reward: 30.0, Step: 30 +2022-10-31 23:21:53 - r - INFO: - Episode: 90/1000, Reward: 34.0, Step: 34 +2022-10-31 23:21:53 - r - INFO: - Episode: 91/1000, Reward: 70.0, Step: 70 +2022-10-31 23:21:53 - r - INFO: - Episode: 92/1000, Reward: 22.0, Step: 22 +2022-10-31 23:21:53 - r - INFO: - Episode: 93/1000, Reward: 39.0, Step: 39 +2022-10-31 23:21:53 - r - INFO: - Episode: 94/1000, Reward: 50.0, Step: 50 +2022-10-31 23:21:53 - r - INFO: - Episode: 95/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:54 - r - INFO: - Episode: 96/1000, Reward: 37.0, Step: 37 +2022-10-31 23:21:54 - r - INFO: - Episode: 97/1000, Reward: 121.0, Step: 121 +2022-10-31 23:21:54 - r - INFO: - Episode: 98/1000, Reward: 26.0, Step: 26 +2022-10-31 23:21:54 - r - INFO: - Episode: 99/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:54 - r - INFO: - Episode: 100/1000, Reward: 30.0, Step: 30 +2022-10-31 23:21:55 - r - INFO: - Episode: 101/1000, Reward: 35.0, Step: 35 +2022-10-31 23:21:55 - r - INFO: - Episode: 102/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:55 - r - INFO: - Episode: 103/1000, Reward: 28.0, Step: 28 +2022-10-31 23:21:55 - r - INFO: - Episode: 104/1000, Reward: 29.0, Step: 29 +2022-10-31 23:21:55 - r - INFO: - Episode: 105/1000, Reward: 42.0, Step: 42 +2022-10-31 23:21:55 - r - INFO: - Episode: 106/1000, Reward: 54.0, Step: 54 +2022-10-31 23:21:55 - r - INFO: - Episode: 107/1000, Reward: 25.0, Step: 25 +2022-10-31 23:21:55 - r - INFO: - Episode: 108/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:55 - r - INFO: - Episode: 109/1000, Reward: 32.0, Step: 32 +2022-10-31 23:21:55 - r - INFO: - Episode: 110/1000, Reward: 50.0, Step: 50 +2022-10-31 23:21:56 - r - INFO: - Episode: 111/1000, Reward: 30.0, Step: 30 +2022-10-31 23:21:56 - r - INFO: - Episode: 112/1000, Reward: 58.0, Step: 58 +2022-10-31 23:21:56 - r - INFO: - Episode: 113/1000, Reward: 32.0, Step: 32 +2022-10-31 23:21:56 - r - INFO: - Episode: 114/1000, Reward: 43.0, Step: 43 +2022-10-31 23:21:56 - r - INFO: - Episode: 115/1000, Reward: 57.0, Step: 57 +2022-10-31 23:21:56 - r - INFO: - Episode: 116/1000, Reward: 20.0, Step: 20 +2022-10-31 23:21:57 - r - INFO: - Episode: 117/1000, Reward: 48.0, Step: 48 +2022-10-31 23:21:57 - r - INFO: - Episode: 118/1000, Reward: 45.0, Step: 45 +2022-10-31 23:21:57 - r - INFO: - Episode: 119/1000, Reward: 47.0, Step: 47 +2022-10-31 23:21:57 - r - INFO: - Episode: 120/1000, Reward: 69.0, Step: 69 +2022-10-31 23:21:57 - r - INFO: - Episode: 121/1000, Reward: 34.0, Step: 34 +2022-10-31 23:21:57 - r - INFO: - Episode: 122/1000, Reward: 22.0, Step: 22 +2022-10-31 23:21:57 - r - INFO: - Episode: 123/1000, Reward: 22.0, Step: 22 +2022-10-31 23:21:57 - r - INFO: - Episode: 124/1000, Reward: 38.0, Step: 38 +2022-10-31 23:21:57 - r - INFO: - Episode: 125/1000, Reward: 36.0, Step: 36 +2022-10-31 23:21:58 - r - INFO: - Episode: 126/1000, Reward: 41.0, Step: 41 +2022-10-31 23:21:58 - r - INFO: - Episode: 127/1000, Reward: 28.0, Step: 28 +2022-10-31 23:21:58 - r - INFO: - Episode: 128/1000, Reward: 35.0, Step: 35 +2022-10-31 23:21:58 - r - INFO: - Episode: 129/1000, Reward: 48.0, Step: 48 +2022-10-31 23:21:58 - r - INFO: - Episode: 130/1000, Reward: 51.0, Step: 51 +2022-10-31 23:21:58 - r - INFO: - Episode: 131/1000, Reward: 51.0, Step: 51 +2022-10-31 23:21:58 - r - INFO: - Episode: 132/1000, Reward: 36.0, Step: 36 +2022-10-31 23:21:59 - r - INFO: - Episode: 133/1000, Reward: 45.0, Step: 45 +2022-10-31 23:21:59 - r - INFO: - Episode: 134/1000, Reward: 27.0, Step: 27 +2022-10-31 23:21:59 - r - INFO: - Episode: 135/1000, Reward: 40.0, Step: 40 +2022-10-31 23:21:59 - r - INFO: - Episode: 136/1000, Reward: 43.0, Step: 43 +2022-10-31 23:21:59 - r - INFO: - Episode: 137/1000, Reward: 64.0, Step: 64 +2022-10-31 23:21:59 - r - INFO: - Episode: 138/1000, Reward: 43.0, Step: 43 +2022-10-31 23:21:59 - r - INFO: - Episode: 139/1000, Reward: 37.0, Step: 37 +2022-10-31 23:21:59 - r - INFO: - Episode: 140/1000, Reward: 38.0, Step: 38 +2022-10-31 23:22:00 - r - INFO: - Episode: 141/1000, Reward: 69.0, Step: 69 +2022-10-31 23:22:00 - r - INFO: - Episode: 142/1000, Reward: 36.0, Step: 36 +2022-10-31 23:22:00 - r - INFO: - Episode: 143/1000, Reward: 28.0, Step: 28 +2022-10-31 23:22:00 - r - INFO: - Episode: 144/1000, Reward: 58.0, Step: 58 +2022-10-31 23:22:00 - r - INFO: - Episode: 145/1000, Reward: 43.0, Step: 43 +2022-10-31 23:22:00 - r - INFO: - Episode: 146/1000, Reward: 50.0, Step: 50 +2022-10-31 23:22:01 - r - INFO: - Episode: 147/1000, Reward: 30.0, Step: 30 +2022-10-31 23:22:01 - r - INFO: - Episode: 148/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:01 - r - INFO: - Episode: 149/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:01 - r - INFO: - Episode: 150/1000, Reward: 35.0, Step: 35 +2022-10-31 23:22:01 - r - INFO: - Episode: 151/1000, Reward: 67.0, Step: 67 +2022-10-31 23:22:01 - r - INFO: - Episode: 152/1000, Reward: 45.0, Step: 45 +2022-10-31 23:22:01 - r - INFO: - Episode: 153/1000, Reward: 28.0, Step: 28 +2022-10-31 23:22:01 - r - INFO: - Episode: 154/1000, Reward: 59.0, Step: 59 +2022-10-31 23:22:02 - r - INFO: - Episode: 155/1000, Reward: 64.0, Step: 64 +2022-10-31 23:22:02 - r - INFO: - Episode: 156/1000, Reward: 67.0, Step: 67 +2022-10-31 23:22:02 - r - INFO: - Episode: 157/1000, Reward: 41.0, Step: 41 +2022-10-31 23:22:02 - r - INFO: - Episode: 158/1000, Reward: 81.0, Step: 81 +2022-10-31 23:22:02 - r - INFO: - Episode: 159/1000, Reward: 76.0, Step: 76 +2022-10-31 23:22:02 - r - INFO: - Episode: 160/1000, Reward: 91.0, Step: 91 +2022-10-31 23:22:03 - r - INFO: - Episode: 161/1000, Reward: 119.0, Step: 119 +2022-10-31 23:22:03 - r - INFO: - Episode: 162/1000, Reward: 47.0, Step: 47 +2022-10-31 23:22:03 - r - INFO: - Episode: 163/1000, Reward: 64.0, Step: 64 +2022-10-31 23:22:03 - r - INFO: - Episode: 164/1000, Reward: 178.0, Step: 178 +2022-10-31 23:22:04 - r - INFO: - Episode: 165/1000, Reward: 97.0, Step: 97 +2022-10-31 23:22:04 - r - INFO: - Current episode 165 has the best eval reward: 104.10 +2022-10-31 23:22:04 - r - INFO: - Episode: 166/1000, Reward: 181.0, Step: 181 +2022-10-31 23:22:05 - r - INFO: - Episode: 167/1000, Reward: 166.0, Step: 166 +2022-10-31 23:22:05 - r - INFO: - Episode: 168/1000, Reward: 79.0, Step: 79 +2022-10-31 23:22:05 - r - INFO: - Episode: 169/1000, Reward: 141.0, Step: 141 +2022-10-31 23:22:06 - r - INFO: - Episode: 170/1000, Reward: 119.0, Step: 119 +2022-10-31 23:22:06 - r - INFO: - Current episode 170 has the best eval reward: 119.50 +2022-10-31 23:22:06 - r - INFO: - Episode: 171/1000, Reward: 81.0, Step: 81 +2022-10-31 23:22:06 - r - INFO: - Episode: 172/1000, Reward: 124.0, Step: 124 +2022-10-31 23:22:07 - r - INFO: - Episode: 173/1000, Reward: 150.0, Step: 150 +2022-10-31 23:22:07 - r - INFO: - Episode: 174/1000, Reward: 98.0, Step: 98 +2022-10-31 23:22:07 - r - INFO: - Episode: 175/1000, Reward: 164.0, Step: 164 +2022-10-31 23:22:08 - r - INFO: - Current episode 175 has the best eval reward: 132.00 +2022-10-31 23:22:08 - r - INFO: - Episode: 176/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:09 - r - INFO: - Episode: 177/1000, Reward: 115.0, Step: 115 +2022-10-31 23:22:09 - r - INFO: - Episode: 178/1000, Reward: 116.0, Step: 116 +2022-10-31 23:22:09 - r - INFO: - Episode: 179/1000, Reward: 160.0, Step: 160 +2022-10-31 23:22:09 - r - INFO: - Episode: 180/1000, Reward: 103.0, Step: 103 +2022-10-31 23:22:10 - r - INFO: - Current episode 180 has the best eval reward: 134.00 +2022-10-31 23:22:10 - r - INFO: - Episode: 181/1000, Reward: 181.0, Step: 181 +2022-10-31 23:22:11 - r - INFO: - Episode: 182/1000, Reward: 185.0, Step: 185 +2022-10-31 23:22:11 - r - INFO: - Episode: 183/1000, Reward: 93.0, Step: 93 +2022-10-31 23:22:11 - r - INFO: - Episode: 184/1000, Reward: 110.0, Step: 110 +2022-10-31 23:22:12 - r - INFO: - Episode: 185/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:12 - r - INFO: - Current episode 185 has the best eval reward: 155.50 +2022-10-31 23:22:13 - r - INFO: - Episode: 186/1000, Reward: 141.0, Step: 141 +2022-10-31 23:22:13 - r - INFO: - Episode: 187/1000, Reward: 150.0, Step: 150 +2022-10-31 23:22:13 - r - INFO: - Episode: 188/1000, Reward: 121.0, Step: 121 +2022-10-31 23:22:13 - r - INFO: - Episode: 189/1000, Reward: 110.0, Step: 110 +2022-10-31 23:22:14 - r - INFO: - Episode: 190/1000, Reward: 115.0, Step: 115 +2022-10-31 23:22:14 - r - INFO: - Episode: 191/1000, Reward: 114.0, Step: 114 +2022-10-31 23:22:14 - r - INFO: - Episode: 192/1000, Reward: 45.0, Step: 45 +2022-10-31 23:22:15 - r - INFO: - Episode: 193/1000, Reward: 125.0, Step: 125 +2022-10-31 23:22:15 - r - INFO: - Episode: 194/1000, Reward: 142.0, Step: 142 +2022-10-31 23:22:15 - r - INFO: - Episode: 195/1000, Reward: 54.0, Step: 54 +2022-10-31 23:22:16 - r - INFO: - Episode: 196/1000, Reward: 62.0, Step: 62 +2022-10-31 23:22:16 - r - INFO: - Episode: 197/1000, Reward: 122.0, Step: 122 +2022-10-31 23:22:16 - r - INFO: - Episode: 198/1000, Reward: 58.0, Step: 58 +2022-10-31 23:22:16 - r - INFO: - Episode: 199/1000, Reward: 88.0, Step: 88 +2022-10-31 23:22:16 - r - INFO: - Episode: 200/1000, Reward: 141.0, Step: 141 +2022-10-31 23:22:17 - r - INFO: - Episode: 201/1000, Reward: 113.0, Step: 113 +2022-10-31 23:22:18 - r - INFO: - Episode: 202/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:18 - r - INFO: - Episode: 203/1000, Reward: 136.0, Step: 136 +2022-10-31 23:22:18 - r - INFO: - Episode: 204/1000, Reward: 114.0, Step: 114 +2022-10-31 23:22:18 - r - INFO: - Episode: 205/1000, Reward: 102.0, Step: 102 +2022-10-31 23:22:19 - r - INFO: - Episode: 206/1000, Reward: 176.0, Step: 176 +2022-10-31 23:22:20 - r - INFO: - Episode: 207/1000, Reward: 150.0, Step: 150 +2022-10-31 23:22:20 - r - INFO: - Episode: 208/1000, Reward: 105.0, Step: 105 +2022-10-31 23:22:20 - r - INFO: - Episode: 209/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:21 - r - INFO: - Episode: 210/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:21 - r - INFO: - Episode: 211/1000, Reward: 167.0, Step: 167 +2022-10-31 23:22:22 - r - INFO: - Episode: 212/1000, Reward: 104.0, Step: 104 +2022-10-31 23:22:22 - r - INFO: - Episode: 213/1000, Reward: 124.0, Step: 124 +2022-10-31 23:22:22 - r - INFO: - Episode: 214/1000, Reward: 96.0, Step: 96 +2022-10-31 23:22:23 - r - INFO: - Episode: 215/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:24 - r - INFO: - Episode: 216/1000, Reward: 199.0, Step: 199 +2022-10-31 23:22:24 - r - INFO: - Episode: 217/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:24 - r - INFO: - Episode: 218/1000, Reward: 132.0, Step: 132 +2022-10-31 23:22:25 - r - INFO: - Episode: 219/1000, Reward: 188.0, Step: 188 +2022-10-31 23:22:25 - r - INFO: - Episode: 220/1000, Reward: 132.0, Step: 132 +2022-10-31 23:22:26 - r - INFO: - Episode: 221/1000, Reward: 151.0, Step: 151 +2022-10-31 23:22:26 - r - INFO: - Episode: 222/1000, Reward: 125.0, Step: 125 +2022-10-31 23:22:26 - r - INFO: - Episode: 223/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:27 - r - INFO: - Episode: 224/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:27 - r - INFO: - Episode: 225/1000, Reward: 159.0, Step: 159 +2022-10-31 23:22:28 - r - INFO: - Episode: 226/1000, Reward: 171.0, Step: 171 +2022-10-31 23:22:28 - r - INFO: - Episode: 227/1000, Reward: 122.0, Step: 122 +2022-10-31 23:22:29 - r - INFO: - Episode: 228/1000, Reward: 189.0, Step: 189 +2022-10-31 23:22:29 - r - INFO: - Episode: 229/1000, Reward: 129.0, Step: 129 +2022-10-31 23:22:29 - r - INFO: - Episode: 230/1000, Reward: 106.0, Step: 106 +2022-10-31 23:22:30 - r - INFO: - Episode: 231/1000, Reward: 107.0, Step: 107 +2022-10-31 23:22:30 - r - INFO: - Episode: 232/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:31 - r - INFO: - Episode: 233/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:31 - r - INFO: - Episode: 234/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:32 - r - INFO: - Episode: 235/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:32 - r - INFO: - Current episode 235 has the best eval reward: 169.70 +2022-10-31 23:22:33 - r - INFO: - Episode: 236/1000, Reward: 158.0, Step: 158 +2022-10-31 23:22:33 - r - INFO: - Episode: 237/1000, Reward: 200.0, Step: 200 +2022-10-31 23:22:33 - r - INFO: - Episode: 238/1000, Reward: 192.0, Step: 192 +2022-10-31 23:22:34 - r - INFO: - Episode: 239/1000, Reward: 179.0, Step: 179 +2022-10-31 23:22:34 - r - INFO: - Episode: 240/1000, Reward: 102.0, Step: 102 +2022-10-31 23:22:35 - r - INFO: - Episode: 241/1000, Reward: 125.0, Step: 125 +2022-10-31 23:22:35 - r - INFO: - Episode: 242/1000, Reward: 138.0, Step: 138 +2022-10-31 23:22:36 - r - INFO: - Episode: 243/1000, Reward: 189.0, Step: 189 +2022-10-31 23:22:36 - r - INFO: - Episode: 244/1000, Reward: 41.0, Step: 41 +2022-10-31 23:22:36 - r - INFO: - Episode: 245/1000, Reward: 97.0, Step: 97 +2022-10-31 23:22:36 - r - INFO: - Episode: 246/1000, Reward: 49.0, Step: 49 +2022-10-31 23:22:37 - r - INFO: - Episode: 247/1000, Reward: 86.0, Step: 86 +2022-10-31 23:22:37 - r - INFO: - Episode: 248/1000, Reward: 121.0, Step: 121 +2022-10-31 23:22:37 - r - INFO: - Episode: 249/1000, Reward: 117.0, Step: 117 +2022-10-31 23:22:37 - r - INFO: - Episode: 250/1000, Reward: 43.0, Step: 43 +2022-10-31 23:22:38 - r - INFO: - Episode: 251/1000, Reward: 72.0, Step: 72 +2022-10-31 23:22:38 - r - INFO: - Episode: 252/1000, Reward: 34.0, Step: 34 +2022-10-31 23:22:38 - r - INFO: - Episode: 253/1000, Reward: 83.0, Step: 83 +2022-10-31 23:22:38 - r - INFO: - Episode: 254/1000, Reward: 83.0, Step: 83 +2022-10-31 23:22:38 - r - INFO: - Episode: 255/1000, Reward: 38.0, Step: 38 +2022-10-31 23:22:38 - r - INFO: - Episode: 256/1000, Reward: 34.0, Step: 34 +2022-10-31 23:22:39 - r - INFO: - Episode: 257/1000, Reward: 99.0, Step: 99 +2022-10-31 23:22:39 - r - INFO: - Episode: 258/1000, Reward: 45.0, Step: 45 +2022-10-31 23:22:39 - r - INFO: - Episode: 259/1000, Reward: 47.0, Step: 47 +2022-10-31 23:22:39 - r - INFO: - Episode: 260/1000, Reward: 44.0, Step: 44 +2022-10-31 23:22:39 - r - INFO: - Episode: 261/1000, Reward: 26.0, Step: 26 +2022-10-31 23:22:39 - r - INFO: - Episode: 262/1000, Reward: 37.0, Step: 37 +2022-10-31 23:22:39 - r - INFO: - Episode: 263/1000, Reward: 26.0, Step: 26 +2022-10-31 23:22:39 - r - INFO: - Episode: 264/1000, Reward: 43.0, Step: 43 +2022-10-31 23:22:40 - r - INFO: - Episode: 265/1000, Reward: 27.0, Step: 27 +2022-10-31 23:22:40 - r - INFO: - Episode: 266/1000, Reward: 24.0, Step: 24 +2022-10-31 23:22:40 - r - INFO: - Episode: 267/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:40 - r - INFO: - Episode: 268/1000, Reward: 86.0, Step: 86 +2022-10-31 23:22:40 - r - INFO: - Episode: 269/1000, Reward: 23.0, Step: 23 +2022-10-31 23:22:40 - r - INFO: - Episode: 270/1000, Reward: 32.0, Step: 32 +2022-10-31 23:22:40 - r - INFO: - Episode: 271/1000, Reward: 57.0, Step: 57 +2022-10-31 23:22:40 - r - INFO: - Episode: 272/1000, Reward: 25.0, Step: 25 +2022-10-31 23:22:41 - r - INFO: - Episode: 273/1000, Reward: 98.0, Step: 98 +2022-10-31 23:22:41 - r - INFO: - Episode: 274/1000, Reward: 29.0, Step: 29 +2022-10-31 23:22:41 - r - INFO: - Episode: 275/1000, Reward: 25.0, Step: 25 +2022-10-31 23:22:41 - r - INFO: - Episode: 276/1000, Reward: 29.0, Step: 29 +2022-10-31 23:22:41 - r - INFO: - Episode: 277/1000, Reward: 39.0, Step: 39 +2022-10-31 23:22:41 - r - INFO: - Episode: 278/1000, Reward: 20.0, Step: 20 +2022-10-31 23:22:41 - r - INFO: - Episode: 279/1000, Reward: 92.0, Step: 92 +2022-10-31 23:22:41 - r - INFO: - Episode: 280/1000, Reward: 28.0, Step: 28 +2022-10-31 23:22:42 - r - INFO: - Episode: 281/1000, Reward: 78.0, Step: 78 +2022-10-31 23:22:42 - r - INFO: - Episode: 282/1000, Reward: 25.0, Step: 25 +2022-10-31 23:22:42 - r - INFO: - Episode: 283/1000, Reward: 31.0, Step: 31 +2022-10-31 23:22:42 - r - INFO: - Episode: 284/1000, Reward: 88.0, Step: 88 +2022-10-31 23:22:42 - r - INFO: - Episode: 285/1000, Reward: 85.0, Step: 85 +2022-10-31 23:22:43 - r - INFO: - Episode: 286/1000, Reward: 37.0, Step: 37 +2022-10-31 23:22:43 - r - INFO: - Episode: 287/1000, Reward: 26.0, Step: 26 +2022-10-31 23:22:43 - r - INFO: - Episode: 288/1000, Reward: 19.0, Step: 19 +2022-10-31 23:22:43 - r - INFO: - Episode: 289/1000, Reward: 40.0, Step: 40 +2022-10-31 23:22:43 - r - INFO: - Episode: 290/1000, Reward: 27.0, Step: 27 +2022-10-31 23:22:43 - r - INFO: - Episode: 291/1000, Reward: 17.0, Step: 17 +2022-10-31 23:22:43 - r - INFO: - Episode: 292/1000, Reward: 27.0, Step: 27 +2022-10-31 23:22:43 - r - INFO: - Episode: 293/1000, Reward: 26.0, Step: 26 +2022-10-31 23:22:43 - r - INFO: - Episode: 294/1000, Reward: 82.0, Step: 82 +2022-10-31 23:22:43 - r - INFO: - Episode: 295/1000, Reward: 36.0, Step: 36 +2022-10-31 23:22:44 - r - INFO: - Episode: 296/1000, Reward: 24.0, Step: 24 +2022-10-31 23:22:44 - r - INFO: - Episode: 297/1000, Reward: 30.0, Step: 30 +2022-10-31 23:22:44 - r - INFO: - Episode: 298/1000, Reward: 20.0, Step: 20 +2022-10-31 23:22:44 - r - INFO: - Episode: 299/1000, Reward: 34.0, Step: 34 +2022-10-31 23:22:44 - r - INFO: - Episode: 300/1000, Reward: 30.0, Step: 30 +2022-10-31 23:22:44 - r - INFO: - Episode: 301/1000, Reward: 23.0, Step: 23 +2022-10-31 23:22:44 - r - INFO: - Episode: 302/1000, Reward: 36.0, Step: 36 +2022-10-31 23:22:44 - r - INFO: - Episode: 303/1000, Reward: 29.0, Step: 29 +2022-10-31 23:22:44 - r - INFO: - Episode: 304/1000, Reward: 34.0, Step: 34 +2022-10-31 23:22:44 - r - INFO: - Episode: 305/1000, Reward: 25.0, Step: 25 +2022-10-31 23:22:45 - r - INFO: - Episode: 306/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:45 - r - INFO: - Episode: 307/1000, Reward: 88.0, Step: 88 +2022-10-31 23:22:45 - r - INFO: - Episode: 308/1000, Reward: 26.0, Step: 26 +2022-10-31 23:22:45 - r - INFO: - Episode: 309/1000, Reward: 85.0, Step: 85 +2022-10-31 23:22:45 - r - INFO: - Episode: 310/1000, Reward: 89.0, Step: 89 +2022-10-31 23:22:46 - r - INFO: - Episode: 311/1000, Reward: 48.0, Step: 48 +2022-10-31 23:22:46 - r - INFO: - Episode: 312/1000, Reward: 83.0, Step: 83 +2022-10-31 23:22:46 - r - INFO: - Episode: 313/1000, Reward: 109.0, Step: 109 +2022-10-31 23:22:46 - r - INFO: - Episode: 314/1000, Reward: 42.0, Step: 42 +2022-10-31 23:22:46 - r - INFO: - Episode: 315/1000, Reward: 93.0, Step: 93 +2022-10-31 23:22:47 - r - INFO: - Episode: 316/1000, Reward: 85.0, Step: 85 +2022-10-31 23:22:47 - r - INFO: - Episode: 317/1000, Reward: 100.0, Step: 100 +2022-10-31 23:22:47 - r - INFO: - Episode: 318/1000, Reward: 106.0, Step: 106 +2022-10-31 23:22:47 - r - INFO: - Episode: 319/1000, Reward: 28.0, Step: 28 +2022-10-31 23:22:48 - r - INFO: - Episode: 320/1000, Reward: 108.0, Step: 108 +2022-10-31 23:22:48 - r - INFO: - Episode: 321/1000, Reward: 112.0, Step: 112 +2022-10-31 23:22:48 - r - INFO: - Episode: 322/1000, Reward: 88.0, Step: 88 +2022-10-31 23:22:49 - r - INFO: - Episode: 323/1000, Reward: 108.0, Step: 108 +2022-10-31 23:22:49 - r - INFO: - Episode: 324/1000, Reward: 108.0, Step: 108 +2022-10-31 23:22:49 - r - INFO: - Episode: 325/1000, Reward: 90.0, Step: 90 +2022-10-31 23:22:50 - r - INFO: - Episode: 326/1000, Reward: 112.0, Step: 112 +2022-10-31 23:22:50 - r - INFO: - Episode: 327/1000, Reward: 113.0, Step: 113 +2022-10-31 23:22:50 - r - INFO: - Episode: 328/1000, Reward: 94.0, Step: 94 +2022-10-31 23:22:50 - r - INFO: - Episode: 329/1000, Reward: 99.0, Step: 99 +2022-10-31 23:22:51 - r - INFO: - Episode: 330/1000, Reward: 45.0, Step: 45 +2022-10-31 23:22:51 - r - INFO: - Episode: 331/1000, Reward: 121.0, Step: 121 +2022-10-31 23:22:51 - r - INFO: - Episode: 332/1000, Reward: 102.0, Step: 102 +2022-10-31 23:22:52 - r - INFO: - Episode: 333/1000, Reward: 111.0, Step: 111 +2022-10-31 23:22:52 - r - INFO: - Episode: 334/1000, Reward: 54.0, Step: 54 +2022-10-31 23:22:52 - r - INFO: - Episode: 335/1000, Reward: 198.0, Step: 198 +2022-10-31 23:22:53 - r - INFO: - Episode: 336/1000, Reward: 83.0, Step: 83 +2022-10-31 23:22:53 - r - INFO: - Episode: 337/1000, Reward: 107.0, Step: 107 +2022-10-31 23:22:53 - r - INFO: - Episode: 338/1000, Reward: 101.0, Step: 101 +2022-10-31 23:22:54 - r - INFO: - Episode: 339/1000, Reward: 129.0, Step: 129 +2022-10-31 23:22:54 - r - INFO: - Episode: 340/1000, Reward: 88.0, Step: 88 +2022-10-31 23:22:54 - r - INFO: - Episode: 341/1000, Reward: 86.0, Step: 86 +2022-10-31 23:22:55 - r - INFO: - Episode: 342/1000, Reward: 199.0, Step: 199 +2022-10-31 23:22:55 - r - INFO: - Episode: 343/1000, Reward: 95.0, Step: 95 +2022-10-31 23:22:55 - r - INFO: - Episode: 344/1000, Reward: 103.0, Step: 103 +2022-10-31 23:22:56 - r - INFO: - Episode: 345/1000, Reward: 100.0, Step: 100 +2022-10-31 23:22:56 - r - INFO: - Episode: 346/1000, Reward: 89.0, Step: 89 +2022-10-31 23:22:56 - r - INFO: - Episode: 347/1000, Reward: 87.0, Step: 87 +2022-10-31 23:22:57 - r - INFO: - Episode: 348/1000, Reward: 110.0, Step: 110 +2022-10-31 23:22:57 - r - INFO: - Episode: 349/1000, Reward: 127.0, Step: 127 +2022-10-31 23:22:57 - r - INFO: - Episode: 350/1000, Reward: 97.0, Step: 97 +2022-10-31 23:22:57 - r - INFO: - Episode: 351/1000, Reward: 34.0, Step: 34 +2022-10-31 23:22:58 - r - INFO: - Episode: 352/1000, Reward: 123.0, Step: 123 +2022-10-31 23:22:58 - r - INFO: - Episode: 353/1000, Reward: 49.0, Step: 49 +2022-10-31 23:22:58 - r - INFO: - Episode: 354/1000, Reward: 96.0, Step: 96 +2022-10-31 23:22:58 - r - INFO: - Episode: 355/1000, Reward: 90.0, Step: 90 +2022-10-31 23:22:59 - r - INFO: - Episode: 356/1000, Reward: 110.0, Step: 110 +2022-10-31 23:22:59 - r - INFO: - Episode: 357/1000, Reward: 93.0, Step: 93 +2022-10-31 23:22:59 - r - INFO: - Episode: 358/1000, Reward: 102.0, Step: 102 +2022-10-31 23:23:00 - r - INFO: - Episode: 359/1000, Reward: 128.0, Step: 128 +2022-10-31 23:23:00 - r - INFO: - Episode: 360/1000, Reward: 125.0, Step: 125 +2022-10-31 23:23:01 - r - INFO: - Episode: 361/1000, Reward: 92.0, Step: 92 +2022-10-31 23:23:01 - r - INFO: - Episode: 362/1000, Reward: 109.0, Step: 109 +2022-10-31 23:23:01 - r - INFO: - Episode: 363/1000, Reward: 114.0, Step: 114 +2022-10-31 23:23:01 - r - INFO: - Episode: 364/1000, Reward: 111.0, Step: 111 +2022-10-31 23:23:02 - r - INFO: - Episode: 365/1000, Reward: 38.0, Step: 38 +2022-10-31 23:23:02 - r - INFO: - Episode: 366/1000, Reward: 55.0, Step: 55 +2022-10-31 23:23:02 - r - INFO: - Episode: 367/1000, Reward: 106.0, Step: 106 +2022-10-31 23:23:02 - r - INFO: - Episode: 368/1000, Reward: 115.0, Step: 115 +2022-10-31 23:23:03 - r - INFO: - Episode: 369/1000, Reward: 103.0, Step: 103 +2022-10-31 23:23:03 - r - INFO: - Episode: 370/1000, Reward: 50.0, Step: 50 +2022-10-31 23:23:03 - r - INFO: - Episode: 371/1000, Reward: 110.0, Step: 110 +2022-10-31 23:23:04 - r - INFO: - Episode: 372/1000, Reward: 102.0, Step: 102 +2022-10-31 23:23:04 - r - INFO: - Episode: 373/1000, Reward: 110.0, Step: 110 +2022-10-31 23:23:04 - r - INFO: - Episode: 374/1000, Reward: 29.0, Step: 29 +2022-10-31 23:23:04 - r - INFO: - Episode: 375/1000, Reward: 35.0, Step: 35 +2022-10-31 23:23:04 - r - INFO: - Episode: 376/1000, Reward: 42.0, Step: 42 +2022-10-31 23:23:05 - r - INFO: - Episode: 377/1000, Reward: 62.0, Step: 62 +2022-10-31 23:23:05 - r - INFO: - Episode: 378/1000, Reward: 119.0, Step: 119 +2022-10-31 23:23:05 - r - INFO: - Episode: 379/1000, Reward: 33.0, Step: 33 +2022-10-31 23:23:05 - r - INFO: - Episode: 380/1000, Reward: 31.0, Step: 31 +2022-10-31 23:23:05 - r - INFO: - Episode: 381/1000, Reward: 97.0, Step: 97 +2022-10-31 23:23:06 - r - INFO: - Episode: 382/1000, Reward: 192.0, Step: 192 +2022-10-31 23:23:06 - r - INFO: - Episode: 383/1000, Reward: 179.0, Step: 179 +2022-10-31 23:23:07 - r - INFO: - Episode: 384/1000, Reward: 89.0, Step: 89 +2022-10-31 23:23:07 - r - INFO: - Episode: 385/1000, Reward: 32.0, Step: 32 +2022-10-31 23:23:07 - r - INFO: - Episode: 386/1000, Reward: 33.0, Step: 33 +2022-10-31 23:23:07 - r - INFO: - Episode: 387/1000, Reward: 52.0, Step: 52 +2022-10-31 23:23:07 - r - INFO: - Episode: 388/1000, Reward: 31.0, Step: 31 +2022-10-31 23:23:07 - r - INFO: - Episode: 389/1000, Reward: 22.0, Step: 22 +2022-10-31 23:23:08 - r - INFO: - Episode: 390/1000, Reward: 118.0, Step: 118 +2022-10-31 23:23:08 - r - INFO: - Episode: 391/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:08 - r - INFO: - Episode: 392/1000, Reward: 115.0, Step: 115 +2022-10-31 23:23:08 - r - INFO: - Episode: 393/1000, Reward: 20.0, Step: 20 +2022-10-31 23:23:08 - r - INFO: - Episode: 394/1000, Reward: 33.0, Step: 33 +2022-10-31 23:23:08 - r - INFO: - Episode: 395/1000, Reward: 40.0, Step: 40 +2022-10-31 23:23:08 - r - INFO: - Episode: 396/1000, Reward: 27.0, Step: 27 +2022-10-31 23:23:08 - r - INFO: - Episode: 397/1000, Reward: 26.0, Step: 26 +2022-10-31 23:23:09 - r - INFO: - Episode: 398/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:09 - r - INFO: - Episode: 399/1000, Reward: 19.0, Step: 19 +2022-10-31 23:23:09 - r - INFO: - Episode: 400/1000, Reward: 22.0, Step: 22 +2022-10-31 23:23:09 - r - INFO: - Episode: 401/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:09 - r - INFO: - Episode: 402/1000, Reward: 18.0, Step: 18 +2022-10-31 23:23:09 - r - INFO: - Episode: 403/1000, Reward: 23.0, Step: 23 +2022-10-31 23:23:09 - r - INFO: - Episode: 404/1000, Reward: 27.0, Step: 27 +2022-10-31 23:23:09 - r - INFO: - Episode: 405/1000, Reward: 20.0, Step: 20 +2022-10-31 23:23:09 - r - INFO: - Episode: 406/1000, Reward: 27.0, Step: 27 +2022-10-31 23:23:09 - r - INFO: - Episode: 407/1000, Reward: 17.0, Step: 17 +2022-10-31 23:23:09 - r - INFO: - Episode: 408/1000, Reward: 27.0, Step: 27 +2022-10-31 23:23:09 - r - INFO: - Episode: 409/1000, Reward: 25.0, Step: 25 +2022-10-31 23:23:09 - r - INFO: - Episode: 410/1000, Reward: 25.0, Step: 25 +2022-10-31 23:23:09 - r - INFO: - Episode: 411/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:10 - r - INFO: - Episode: 412/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:10 - r - INFO: - Episode: 413/1000, Reward: 18.0, Step: 18 +2022-10-31 23:23:10 - r - INFO: - Episode: 414/1000, Reward: 20.0, Step: 20 +2022-10-31 23:23:10 - r - INFO: - Episode: 415/1000, Reward: 27.0, Step: 27 +2022-10-31 23:23:10 - r - INFO: - Episode: 416/1000, Reward: 28.0, Step: 28 +2022-10-31 23:23:10 - r - INFO: - Episode: 417/1000, Reward: 30.0, Step: 30 +2022-10-31 23:23:10 - r - INFO: - Episode: 418/1000, Reward: 28.0, Step: 28 +2022-10-31 23:23:10 - r - INFO: - Episode: 419/1000, Reward: 33.0, Step: 33 +2022-10-31 23:23:10 - r - INFO: - Episode: 420/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:10 - r - INFO: - Episode: 421/1000, Reward: 96.0, Step: 96 +2022-10-31 23:23:11 - r - INFO: - Episode: 422/1000, Reward: 26.0, Step: 26 +2022-10-31 23:23:11 - r - INFO: - Episode: 423/1000, Reward: 29.0, Step: 29 +2022-10-31 23:23:11 - r - INFO: - Episode: 424/1000, Reward: 25.0, Step: 25 +2022-10-31 23:23:11 - r - INFO: - Episode: 425/1000, Reward: 38.0, Step: 38 +2022-10-31 23:23:11 - r - INFO: - Episode: 426/1000, Reward: 33.0, Step: 33 +2022-10-31 23:23:11 - r - INFO: - Episode: 427/1000, Reward: 23.0, Step: 23 +2022-10-31 23:23:11 - r - INFO: - Episode: 428/1000, Reward: 39.0, Step: 39 +2022-10-31 23:23:11 - r - INFO: - Episode: 429/1000, Reward: 28.0, Step: 28 +2022-10-31 23:23:11 - r - INFO: - Episode: 430/1000, Reward: 97.0, Step: 97 +2022-10-31 23:23:12 - r - INFO: - Episode: 431/1000, Reward: 30.0, Step: 30 +2022-10-31 23:23:12 - r - INFO: - Episode: 432/1000, Reward: 29.0, Step: 29 +2022-10-31 23:23:12 - r - INFO: - Episode: 433/1000, Reward: 103.0, Step: 103 +2022-10-31 23:23:12 - r - INFO: - Episode: 434/1000, Reward: 36.0, Step: 36 +2022-10-31 23:23:12 - r - INFO: - Episode: 435/1000, Reward: 32.0, Step: 32 +2022-10-31 23:23:12 - r - INFO: - Episode: 436/1000, Reward: 41.0, Step: 41 +2022-10-31 23:23:13 - r - INFO: - Episode: 437/1000, Reward: 111.0, Step: 111 +2022-10-31 23:23:13 - r - INFO: - Episode: 438/1000, Reward: 48.0, Step: 48 +2022-10-31 23:23:13 - r - INFO: - Episode: 439/1000, Reward: 24.0, Step: 24 +2022-10-31 23:23:13 - r - INFO: - Episode: 440/1000, Reward: 49.0, Step: 49 +2022-10-31 23:23:14 - r - INFO: - Episode: 441/1000, Reward: 116.0, Step: 116 +2022-10-31 23:23:14 - r - INFO: - Episode: 442/1000, Reward: 118.0, Step: 118 +2022-10-31 23:23:14 - r - INFO: - Episode: 443/1000, Reward: 94.0, Step: 94 +2022-10-31 23:23:14 - r - INFO: - Episode: 444/1000, Reward: 132.0, Step: 132 +2022-10-31 23:23:14 - r - INFO: - Episode: 445/1000, Reward: 41.0, Step: 41 +2022-10-31 23:23:15 - r - INFO: - Episode: 446/1000, Reward: 105.0, Step: 105 +2022-10-31 23:23:15 - r - INFO: - Episode: 447/1000, Reward: 116.0, Step: 116 +2022-10-31 23:23:16 - r - INFO: - Episode: 448/1000, Reward: 136.0, Step: 136 +2022-10-31 23:23:16 - r - INFO: - Episode: 449/1000, Reward: 137.0, Step: 137 +2022-10-31 23:23:16 - r - INFO: - Episode: 450/1000, Reward: 45.0, Step: 45 +2022-10-31 23:23:17 - r - INFO: - Episode: 451/1000, Reward: 157.0, Step: 157 +2022-10-31 23:23:17 - r - INFO: - Episode: 452/1000, Reward: 116.0, Step: 116 +2022-10-31 23:23:17 - r - INFO: - Episode: 453/1000, Reward: 125.0, Step: 125 +2022-10-31 23:23:18 - r - INFO: - Episode: 454/1000, Reward: 120.0, Step: 120 +2022-10-31 23:23:18 - r - INFO: - Episode: 455/1000, Reward: 150.0, Step: 150 +2022-10-31 23:23:19 - r - INFO: - Episode: 456/1000, Reward: 114.0, Step: 114 +2022-10-31 23:23:19 - r - INFO: - Episode: 457/1000, Reward: 44.0, Step: 44 +2022-10-31 23:23:19 - r - INFO: - Episode: 458/1000, Reward: 138.0, Step: 138 +2022-10-31 23:23:19 - r - INFO: - Episode: 459/1000, Reward: 133.0, Step: 133 +2022-10-31 23:23:20 - r - INFO: - Episode: 460/1000, Reward: 141.0, Step: 141 +2022-10-31 23:23:20 - r - INFO: - Episode: 461/1000, Reward: 124.0, Step: 124 +2022-10-31 23:23:21 - r - INFO: - Episode: 462/1000, Reward: 143.0, Step: 143 +2022-10-31 23:23:21 - r - INFO: - Episode: 463/1000, Reward: 123.0, Step: 123 +2022-10-31 23:23:21 - r - INFO: - Episode: 464/1000, Reward: 134.0, Step: 134 +2022-10-31 23:23:22 - r - INFO: - Episode: 465/1000, Reward: 152.0, Step: 152 +2022-10-31 23:23:23 - r - INFO: - Episode: 466/1000, Reward: 140.0, Step: 140 +2022-10-31 23:23:23 - r - INFO: - Episode: 467/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:23 - r - INFO: - Episode: 468/1000, Reward: 168.0, Step: 168 +2022-10-31 23:23:24 - r - INFO: - Episode: 469/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:24 - r - INFO: - Episode: 470/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:25 - r - INFO: - Current episode 470 has the best eval reward: 199.80 +2022-10-31 23:23:25 - r - INFO: - Episode: 471/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:26 - r - INFO: - Episode: 472/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:26 - r - INFO: - Episode: 473/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:27 - r - INFO: - Episode: 474/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:27 - r - INFO: - Episode: 475/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:28 - r - INFO: - Current episode 475 has the best eval reward: 200.00 +2022-10-31 23:23:28 - r - INFO: - Episode: 476/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:29 - r - INFO: - Episode: 477/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:29 - r - INFO: - Episode: 478/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:30 - r - INFO: - Episode: 479/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:30 - r - INFO: - Episode: 480/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:31 - r - INFO: - Current episode 480 has the best eval reward: 200.00 +2022-10-31 23:23:31 - r - INFO: - Episode: 481/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:32 - r - INFO: - Episode: 482/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:32 - r - INFO: - Episode: 483/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:33 - r - INFO: - Episode: 484/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:33 - r - INFO: - Episode: 485/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:34 - r - INFO: - Current episode 485 has the best eval reward: 200.00 +2022-10-31 23:23:34 - r - INFO: - Episode: 486/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:35 - r - INFO: - Episode: 487/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:35 - r - INFO: - Episode: 488/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:36 - r - INFO: - Episode: 489/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:36 - r - INFO: - Episode: 490/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:37 - r - INFO: - Current episode 490 has the best eval reward: 200.00 +2022-10-31 23:23:37 - r - INFO: - Episode: 491/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:38 - r - INFO: - Episode: 492/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:38 - r - INFO: - Episode: 493/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:38 - r - INFO: - Episode: 494/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:39 - r - INFO: - Episode: 495/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:40 - r - INFO: - Current episode 495 has the best eval reward: 200.00 +2022-10-31 23:23:40 - r - INFO: - Episode: 496/1000, Reward: 169.0, Step: 169 +2022-10-31 23:23:40 - r - INFO: - Episode: 497/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:41 - r - INFO: - Episode: 498/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:41 - r - INFO: - Episode: 499/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:42 - r - INFO: - Episode: 500/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:42 - r - INFO: - Current episode 500 has the best eval reward: 200.00 +2022-10-31 23:23:43 - r - INFO: - Episode: 501/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:43 - r - INFO: - Episode: 502/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:44 - r - INFO: - Episode: 503/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:44 - r - INFO: - Episode: 504/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:45 - r - INFO: - Episode: 505/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:45 - r - INFO: - Current episode 505 has the best eval reward: 200.00 +2022-10-31 23:23:46 - r - INFO: - Episode: 506/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:46 - r - INFO: - Episode: 507/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:47 - r - INFO: - Episode: 508/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:47 - r - INFO: - Episode: 509/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:48 - r - INFO: - Episode: 510/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:49 - r - INFO: - Episode: 511/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:49 - r - INFO: - Episode: 512/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:50 - r - INFO: - Episode: 513/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:50 - r - INFO: - Episode: 514/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:51 - r - INFO: - Episode: 515/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:52 - r - INFO: - Episode: 516/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:52 - r - INFO: - Episode: 517/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:53 - r - INFO: - Episode: 518/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:53 - r - INFO: - Episode: 519/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:54 - r - INFO: - Episode: 520/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:55 - r - INFO: - Current episode 520 has the best eval reward: 200.00 +2022-10-31 23:23:55 - r - INFO: - Episode: 521/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:55 - r - INFO: - Episode: 522/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:56 - r - INFO: - Episode: 523/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:56 - r - INFO: - Episode: 524/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:57 - r - INFO: - Episode: 525/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:58 - r - INFO: - Current episode 525 has the best eval reward: 200.00 +2022-10-31 23:23:58 - r - INFO: - Episode: 526/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:59 - r - INFO: - Episode: 527/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:59 - r - INFO: - Episode: 528/1000, Reward: 200.0, Step: 200 +2022-10-31 23:23:59 - r - INFO: - Episode: 529/1000, Reward: 186.0, Step: 186 +2022-10-31 23:24:00 - r - INFO: - Episode: 530/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:00 - r - INFO: - Current episode 530 has the best eval reward: 200.00 +2022-10-31 23:24:01 - r - INFO: - Episode: 531/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:01 - r - INFO: - Episode: 532/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:02 - r - INFO: - Episode: 533/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:02 - r - INFO: - Episode: 534/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:03 - r - INFO: - Episode: 535/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:04 - r - INFO: - Current episode 535 has the best eval reward: 200.00 +2022-10-31 23:24:04 - r - INFO: - Episode: 536/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:04 - r - INFO: - Episode: 537/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:05 - r - INFO: - Episode: 538/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:05 - r - INFO: - Episode: 539/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:06 - r - INFO: - Episode: 540/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:07 - r - INFO: - Current episode 540 has the best eval reward: 200.00 +2022-10-31 23:24:07 - r - INFO: - Episode: 541/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:08 - r - INFO: - Episode: 542/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:08 - r - INFO: - Episode: 543/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:08 - r - INFO: - Episode: 544/1000, Reward: 84.0, Step: 84 +2022-10-31 23:24:09 - r - INFO: - Episode: 545/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:09 - r - INFO: - Current episode 545 has the best eval reward: 200.00 +2022-10-31 23:24:10 - r - INFO: - Episode: 546/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:10 - r - INFO: - Episode: 547/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:11 - r - INFO: - Episode: 548/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:11 - r - INFO: - Episode: 549/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:12 - r - INFO: - Episode: 550/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:13 - r - INFO: - Episode: 551/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:13 - r - INFO: - Episode: 552/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:14 - r - INFO: - Episode: 553/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:14 - r - INFO: - Episode: 554/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:15 - r - INFO: - Episode: 555/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:16 - r - INFO: - Episode: 556/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:16 - r - INFO: - Episode: 557/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:17 - r - INFO: - Episode: 558/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:17 - r - INFO: - Episode: 559/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:17 - r - INFO: - Episode: 560/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:18 - r - INFO: - Current episode 560 has the best eval reward: 200.00 +2022-10-31 23:24:19 - r - INFO: - Episode: 561/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:19 - r - INFO: - Episode: 562/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:20 - r - INFO: - Episode: 563/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:20 - r - INFO: - Episode: 564/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:20 - r - INFO: - Episode: 565/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:21 - r - INFO: - Current episode 565 has the best eval reward: 200.00 +2022-10-31 23:24:21 - r - INFO: - Episode: 566/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:22 - r - INFO: - Episode: 567/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:22 - r - INFO: - Episode: 568/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:23 - r - INFO: - Episode: 569/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:23 - r - INFO: - Episode: 570/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:24 - r - INFO: - Current episode 570 has the best eval reward: 200.00 +2022-10-31 23:24:24 - r - INFO: - Episode: 571/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:25 - r - INFO: - Episode: 572/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:25 - r - INFO: - Episode: 573/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:26 - r - INFO: - Episode: 574/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:26 - r - INFO: - Episode: 575/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:28 - r - INFO: - Episode: 576/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:28 - r - INFO: - Episode: 577/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:28 - r - INFO: - Episode: 578/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:29 - r - INFO: - Episode: 579/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:29 - r - INFO: - Episode: 580/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:30 - r - INFO: - Current episode 580 has the best eval reward: 200.00 +2022-10-31 23:24:31 - r - INFO: - Episode: 581/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:31 - r - INFO: - Episode: 582/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:32 - r - INFO: - Episode: 583/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:32 - r - INFO: - Episode: 584/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:33 - r - INFO: - Episode: 585/1000, Reward: 199.0, Step: 199 +2022-10-31 23:24:34 - r - INFO: - Episode: 586/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:34 - r - INFO: - Episode: 587/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:35 - r - INFO: - Episode: 588/1000, Reward: 178.0, Step: 178 +2022-10-31 23:24:35 - r - INFO: - Episode: 589/1000, Reward: 200.0, Step: 200 +2022-10-31 23:24:36 - r - INFO: - Episode: 590/1000, Reward: 188.0, Step: 188 +2022-10-31 23:24:36 - r - INFO: - Episode: 591/1000, Reward: 156.0, Step: 156 +2022-10-31 23:24:37 - r - INFO: - Episode: 592/1000, Reward: 165.0, Step: 165 +2022-10-31 23:24:37 - r - INFO: - Episode: 593/1000, Reward: 131.0, Step: 131 +2022-10-31 23:24:37 - r - INFO: - Episode: 594/1000, Reward: 157.0, Step: 157 +2022-10-31 23:24:38 - r - INFO: - Episode: 595/1000, Reward: 170.0, Step: 170 +2022-10-31 23:24:39 - r - INFO: - Episode: 596/1000, Reward: 123.0, Step: 123 +2022-10-31 23:24:39 - r - INFO: - Episode: 597/1000, Reward: 109.0, Step: 109 +2022-10-31 23:24:39 - r - INFO: - Episode: 598/1000, Reward: 124.0, Step: 124 +2022-10-31 23:24:39 - r - INFO: - Episode: 599/1000, Reward: 113.0, Step: 113 +2022-10-31 23:24:39 - r - INFO: - Episode: 600/1000, Reward: 38.0, Step: 38 +2022-10-31 23:24:40 - r - INFO: - Episode: 601/1000, Reward: 107.0, Step: 107 +2022-10-31 23:24:40 - r - INFO: - Episode: 602/1000, Reward: 115.0, Step: 115 +2022-10-31 23:24:41 - r - INFO: - Episode: 603/1000, Reward: 101.0, Step: 101 +2022-10-31 23:24:41 - r - INFO: - Episode: 604/1000, Reward: 113.0, Step: 113 +2022-10-31 23:24:41 - r - INFO: - Episode: 605/1000, Reward: 100.0, Step: 100 +2022-10-31 23:24:42 - r - INFO: - Episode: 606/1000, Reward: 109.0, Step: 109 +2022-10-31 23:24:42 - r - INFO: - Episode: 607/1000, Reward: 119.0, Step: 119 +2022-10-31 23:24:42 - r - INFO: - Episode: 608/1000, Reward: 117.0, Step: 117 +2022-10-31 23:24:43 - r - INFO: - Episode: 609/1000, Reward: 108.0, Step: 108 +2022-10-31 23:24:43 - r - INFO: - Episode: 610/1000, Reward: 101.0, Step: 101 +2022-10-31 23:24:43 - r - INFO: - Episode: 611/1000, Reward: 110.0, Step: 110 +2022-10-31 23:24:44 - r - INFO: - Episode: 612/1000, Reward: 59.0, Step: 59 +2022-10-31 23:24:44 - r - INFO: - Episode: 613/1000, Reward: 112.0, Step: 112 +2022-10-31 23:24:44 - r - INFO: - Episode: 614/1000, Reward: 104.0, Step: 104 +2022-10-31 23:24:44 - r - INFO: - Episode: 615/1000, Reward: 45.0, Step: 45 +2022-10-31 23:24:44 - r - INFO: - Episode: 616/1000, Reward: 29.0, Step: 29 +2022-10-31 23:24:44 - r - INFO: - Episode: 617/1000, Reward: 42.0, Step: 42 +2022-10-31 23:24:45 - r - INFO: - Episode: 618/1000, Reward: 74.0, Step: 74 +2022-10-31 23:24:45 - r - INFO: - Episode: 619/1000, Reward: 79.0, Step: 79 +2022-10-31 23:24:45 - r - INFO: - Episode: 620/1000, Reward: 50.0, Step: 50 +2022-10-31 23:24:45 - r - INFO: - Episode: 621/1000, Reward: 30.0, Step: 30 +2022-10-31 23:24:45 - r - INFO: - Episode: 622/1000, Reward: 43.0, Step: 43 +2022-10-31 23:24:46 - r - INFO: - Episode: 623/1000, Reward: 77.0, Step: 77 +2022-10-31 23:24:46 - r - INFO: - Episode: 624/1000, Reward: 36.0, Step: 36 +2022-10-31 23:24:46 - r - INFO: - Episode: 625/1000, Reward: 61.0, Step: 61 +2022-10-31 23:24:46 - r - INFO: - Episode: 626/1000, Reward: 36.0, Step: 36 +2022-10-31 23:24:46 - r - INFO: - Episode: 627/1000, Reward: 30.0, Step: 30 +2022-10-31 23:24:46 - r - INFO: - Episode: 628/1000, Reward: 43.0, Step: 43 +2022-10-31 23:24:46 - r - INFO: - Episode: 629/1000, Reward: 27.0, Step: 27 +2022-10-31 23:24:46 - r - INFO: - Episode: 630/1000, Reward: 88.0, Step: 88 +2022-10-31 23:24:47 - r - INFO: - Episode: 631/1000, Reward: 42.0, Step: 42 +2022-10-31 23:24:47 - r - INFO: - Episode: 632/1000, Reward: 40.0, Step: 40 +2022-10-31 23:24:47 - r - INFO: - Episode: 633/1000, Reward: 59.0, Step: 59 +2022-10-31 23:24:47 - r - INFO: - Episode: 634/1000, Reward: 81.0, Step: 81 +2022-10-31 23:24:47 - r - INFO: - Episode: 635/1000, Reward: 85.0, Step: 85 +2022-10-31 23:24:48 - r - INFO: - Episode: 636/1000, Reward: 55.0, Step: 55 +2022-10-31 23:24:48 - r - INFO: - Episode: 637/1000, Reward: 40.0, Step: 40 +2022-10-31 23:24:48 - r - INFO: - Episode: 638/1000, Reward: 99.0, Step: 99 +2022-10-31 23:24:48 - r - INFO: - Episode: 639/1000, Reward: 104.0, Step: 104 +2022-10-31 23:24:49 - r - INFO: - Episode: 640/1000, Reward: 117.0, Step: 117 +2022-10-31 23:24:49 - r - INFO: - Episode: 641/1000, Reward: 112.0, Step: 112 +2022-10-31 23:24:49 - r - INFO: - Episode: 642/1000, Reward: 43.0, Step: 43 +2022-10-31 23:24:50 - r - INFO: - Episode: 643/1000, Reward: 96.0, Step: 96 +2022-10-31 23:24:50 - r - INFO: - Episode: 644/1000, Reward: 105.0, Step: 105 +2022-10-31 23:24:50 - r - INFO: - Episode: 645/1000, Reward: 115.0, Step: 115 +2022-10-31 23:24:51 - r - INFO: - Episode: 646/1000, Reward: 99.0, Step: 99 +2022-10-31 23:24:51 - r - INFO: - Episode: 647/1000, Reward: 123.0, Step: 123 +2022-10-31 23:24:51 - r - INFO: - Episode: 648/1000, Reward: 123.0, Step: 123 +2022-10-31 23:24:51 - r - INFO: - Episode: 649/1000, Reward: 40.0, Step: 40 +2022-10-31 23:24:51 - r - INFO: - Episode: 650/1000, Reward: 100.0, Step: 100 +2022-10-31 23:24:52 - r - INFO: - Episode: 651/1000, Reward: 124.0, Step: 124 +2022-10-31 23:24:52 - r - INFO: - Episode: 652/1000, Reward: 106.0, Step: 106 +2022-10-31 23:24:53 - r - INFO: - Episode: 653/1000, Reward: 122.0, Step: 122 +2022-10-31 23:24:53 - r - INFO: - Episode: 654/1000, Reward: 127.0, Step: 127 +2022-10-31 23:24:53 - r - INFO: - Episode: 655/1000, Reward: 121.0, Step: 121 +2022-10-31 23:24:54 - r - INFO: - Episode: 656/1000, Reward: 121.0, Step: 121 +2022-10-31 23:24:54 - r - INFO: - Episode: 657/1000, Reward: 125.0, Step: 125 +2022-10-31 23:24:54 - r - INFO: - Episode: 658/1000, Reward: 127.0, Step: 127 +2022-10-31 23:24:55 - r - INFO: - Episode: 659/1000, Reward: 132.0, Step: 132 +2022-10-31 23:24:55 - r - INFO: - Episode: 660/1000, Reward: 142.0, Step: 142 +2022-10-31 23:24:56 - r - INFO: - Episode: 661/1000, Reward: 134.0, Step: 134 +2022-10-31 23:24:56 - r - INFO: - Episode: 662/1000, Reward: 147.0, Step: 147 +2022-10-31 23:24:57 - r - INFO: - Episode: 663/1000, Reward: 175.0, Step: 175 +2022-10-31 23:24:57 - r - INFO: - Episode: 664/1000, Reward: 180.0, Step: 180 +2022-10-31 23:24:57 - r - INFO: - Episode: 665/1000, Reward: 183.0, Step: 183 +2022-10-31 23:24:58 - r - INFO: - Episode: 666/1000, Reward: 167.0, Step: 167 +2022-10-31 23:24:59 - r - INFO: - Episode: 667/1000, Reward: 179.0, Step: 179 +2022-10-31 23:24:59 - r - INFO: - Episode: 668/1000, Reward: 173.0, Step: 173 +2022-10-31 23:25:00 - r - INFO: - Episode: 669/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:00 - r - INFO: - Episode: 670/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:01 - r - INFO: - Episode: 671/1000, Reward: 184.0, Step: 184 +2022-10-31 23:25:02 - r - INFO: - Episode: 672/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:02 - r - INFO: - Episode: 673/1000, Reward: 193.0, Step: 193 +2022-10-31 23:25:03 - r - INFO: - Episode: 674/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:03 - r - INFO: - Episode: 675/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:04 - r - INFO: - Current episode 675 has the best eval reward: 200.00 +2022-10-31 23:25:04 - r - INFO: - Episode: 676/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:05 - r - INFO: - Episode: 677/1000, Reward: 199.0, Step: 199 +2022-10-31 23:25:05 - r - INFO: - Episode: 678/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:06 - r - INFO: - Episode: 679/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:06 - r - INFO: - Episode: 680/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:07 - r - INFO: - Current episode 680 has the best eval reward: 200.00 +2022-10-31 23:25:08 - r - INFO: - Episode: 681/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:08 - r - INFO: - Episode: 682/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:09 - r - INFO: - Episode: 683/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:09 - r - INFO: - Episode: 684/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:10 - r - INFO: - Episode: 685/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:10 - r - INFO: - Current episode 685 has the best eval reward: 200.00 +2022-10-31 23:25:11 - r - INFO: - Episode: 686/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:11 - r - INFO: - Episode: 687/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:12 - r - INFO: - Episode: 688/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:12 - r - INFO: - Episode: 689/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:13 - r - INFO: - Episode: 690/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:14 - r - INFO: - Episode: 691/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:14 - r - INFO: - Episode: 692/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:15 - r - INFO: - Episode: 693/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:15 - r - INFO: - Episode: 694/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:15 - r - INFO: - Episode: 695/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:17 - r - INFO: - Episode: 696/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:17 - r - INFO: - Episode: 697/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:17 - r - INFO: - Episode: 698/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:18 - r - INFO: - Episode: 699/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:18 - r - INFO: - Episode: 700/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:19 - r - INFO: - Episode: 701/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:20 - r - INFO: - Episode: 702/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:20 - r - INFO: - Episode: 703/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:21 - r - INFO: - Episode: 704/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:21 - r - INFO: - Episode: 705/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:22 - r - INFO: - Current episode 705 has the best eval reward: 200.00 +2022-10-31 23:25:22 - r - INFO: - Episode: 706/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:23 - r - INFO: - Episode: 707/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:23 - r - INFO: - Episode: 708/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:24 - r - INFO: - Episode: 709/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:24 - r - INFO: - Episode: 710/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:25 - r - INFO: - Current episode 710 has the best eval reward: 200.00 +2022-10-31 23:25:26 - r - INFO: - Episode: 711/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:26 - r - INFO: - Episode: 712/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:26 - r - INFO: - Episode: 713/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:27 - r - INFO: - Episode: 714/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:27 - r - INFO: - Episode: 715/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:28 - r - INFO: - Current episode 715 has the best eval reward: 200.00 +2022-10-31 23:25:28 - r - INFO: - Episode: 716/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:29 - r - INFO: - Episode: 717/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:29 - r - INFO: - Episode: 718/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:30 - r - INFO: - Episode: 719/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:30 - r - INFO: - Episode: 720/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:31 - r - INFO: - Current episode 720 has the best eval reward: 200.00 +2022-10-31 23:25:31 - r - INFO: - Episode: 721/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:32 - r - INFO: - Episode: 722/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:32 - r - INFO: - Episode: 723/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:33 - r - INFO: - Episode: 724/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:33 - r - INFO: - Episode: 725/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:34 - r - INFO: - Current episode 725 has the best eval reward: 200.00 +2022-10-31 23:25:34 - r - INFO: - Episode: 726/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:35 - r - INFO: - Episode: 727/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:35 - r - INFO: - Episode: 728/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:35 - r - INFO: - Episode: 729/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:36 - r - INFO: - Episode: 730/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:37 - r - INFO: - Current episode 730 has the best eval reward: 200.00 +2022-10-31 23:25:37 - r - INFO: - Episode: 731/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:37 - r - INFO: - Episode: 732/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:38 - r - INFO: - Episode: 733/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:38 - r - INFO: - Episode: 734/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:39 - r - INFO: - Episode: 735/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:39 - r - INFO: - Current episode 735 has the best eval reward: 200.00 +2022-10-31 23:25:40 - r - INFO: - Episode: 736/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:40 - r - INFO: - Episode: 737/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:41 - r - INFO: - Episode: 738/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:41 - r - INFO: - Episode: 739/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:42 - r - INFO: - Episode: 740/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:42 - r - INFO: - Current episode 740 has the best eval reward: 200.00 +2022-10-31 23:25:43 - r - INFO: - Episode: 741/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:43 - r - INFO: - Episode: 742/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:44 - r - INFO: - Episode: 743/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:44 - r - INFO: - Episode: 744/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:44 - r - INFO: - Episode: 745/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:45 - r - INFO: - Current episode 745 has the best eval reward: 200.00 +2022-10-31 23:25:46 - r - INFO: - Episode: 746/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:46 - r - INFO: - Episode: 747/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:46 - r - INFO: - Episode: 748/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:47 - r - INFO: - Episode: 749/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:47 - r - INFO: - Episode: 750/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:48 - r - INFO: - Current episode 750 has the best eval reward: 200.00 +2022-10-31 23:25:48 - r - INFO: - Episode: 751/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:49 - r - INFO: - Episode: 752/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:49 - r - INFO: - Episode: 753/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:50 - r - INFO: - Episode: 754/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:50 - r - INFO: - Episode: 755/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:51 - r - INFO: - Current episode 755 has the best eval reward: 200.00 +2022-10-31 23:25:51 - r - INFO: - Episode: 756/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:52 - r - INFO: - Episode: 757/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:52 - r - INFO: - Episode: 758/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:53 - r - INFO: - Episode: 759/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:53 - r - INFO: - Episode: 760/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:54 - r - INFO: - Current episode 760 has the best eval reward: 200.00 +2022-10-31 23:25:54 - r - INFO: - Episode: 761/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:55 - r - INFO: - Episode: 762/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:55 - r - INFO: - Episode: 763/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:55 - r - INFO: - Episode: 764/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:56 - r - INFO: - Episode: 765/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:57 - r - INFO: - Current episode 765 has the best eval reward: 200.00 +2022-10-31 23:25:57 - r - INFO: - Episode: 766/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:57 - r - INFO: - Episode: 767/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:58 - r - INFO: - Episode: 768/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:58 - r - INFO: - Episode: 769/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:59 - r - INFO: - Episode: 770/1000, Reward: 200.0, Step: 200 +2022-10-31 23:25:59 - r - INFO: - Current episode 770 has the best eval reward: 200.00 +2022-10-31 23:26:00 - r - INFO: - Episode: 771/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:00 - r - INFO: - Episode: 772/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:01 - r - INFO: - Episode: 773/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:01 - r - INFO: - Episode: 774/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:02 - r - INFO: - Episode: 775/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:02 - r - INFO: - Current episode 775 has the best eval reward: 200.00 +2022-10-31 23:26:03 - r - INFO: - Episode: 776/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:03 - r - INFO: - Episode: 777/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:04 - r - INFO: - Episode: 778/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:04 - r - INFO: - Episode: 779/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:04 - r - INFO: - Episode: 780/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:06 - r - INFO: - Episode: 781/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:06 - r - INFO: - Episode: 782/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:06 - r - INFO: - Episode: 783/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:07 - r - INFO: - Episode: 784/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:07 - r - INFO: - Episode: 785/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:08 - r - INFO: - Episode: 786/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:09 - r - INFO: - Episode: 787/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:09 - r - INFO: - Episode: 788/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:10 - r - INFO: - Episode: 789/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:10 - r - INFO: - Episode: 790/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:11 - r - INFO: - Current episode 790 has the best eval reward: 200.00 +2022-10-31 23:26:11 - r - INFO: - Episode: 791/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:12 - r - INFO: - Episode: 792/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:12 - r - INFO: - Episode: 793/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:12 - r - INFO: - Episode: 794/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:13 - r - INFO: - Episode: 795/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:14 - r - INFO: - Current episode 795 has the best eval reward: 200.00 +2022-10-31 23:26:14 - r - INFO: - Episode: 796/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:14 - r - INFO: - Episode: 797/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:15 - r - INFO: - Episode: 798/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:15 - r - INFO: - Episode: 799/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:16 - r - INFO: - Episode: 800/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:16 - r - INFO: - Current episode 800 has the best eval reward: 200.00 +2022-10-31 23:26:17 - r - INFO: - Episode: 801/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:17 - r - INFO: - Episode: 802/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:18 - r - INFO: - Episode: 803/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:18 - r - INFO: - Episode: 804/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:19 - r - INFO: - Episode: 805/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:19 - r - INFO: - Current episode 805 has the best eval reward: 200.00 +2022-10-31 23:26:20 - r - INFO: - Episode: 806/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:20 - r - INFO: - Episode: 807/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:21 - r - INFO: - Episode: 808/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:21 - r - INFO: - Episode: 809/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:21 - r - INFO: - Episode: 810/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:22 - r - INFO: - Current episode 810 has the best eval reward: 200.00 +2022-10-31 23:26:23 - r - INFO: - Episode: 811/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:23 - r - INFO: - Episode: 812/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:23 - r - INFO: - Episode: 813/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:24 - r - INFO: - Episode: 814/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:24 - r - INFO: - Episode: 815/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:25 - r - INFO: - Current episode 815 has the best eval reward: 200.00 +2022-10-31 23:26:25 - r - INFO: - Episode: 816/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:26 - r - INFO: - Episode: 817/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:26 - r - INFO: - Episode: 818/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:27 - r - INFO: - Episode: 819/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:27 - r - INFO: - Episode: 820/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:28 - r - INFO: - Current episode 820 has the best eval reward: 200.00 +2022-10-31 23:26:28 - r - INFO: - Episode: 821/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:29 - r - INFO: - Episode: 822/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:29 - r - INFO: - Episode: 823/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:30 - r - INFO: - Episode: 824/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:30 - r - INFO: - Episode: 825/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:31 - r - INFO: - Current episode 825 has the best eval reward: 200.00 +2022-10-31 23:26:31 - r - INFO: - Episode: 826/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:32 - r - INFO: - Episode: 827/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:32 - r - INFO: - Episode: 828/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:32 - r - INFO: - Episode: 829/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:33 - r - INFO: - Episode: 830/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:34 - r - INFO: - Current episode 830 has the best eval reward: 200.00 +2022-10-31 23:26:34 - r - INFO: - Episode: 831/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:34 - r - INFO: - Episode: 832/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:35 - r - INFO: - Episode: 833/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:35 - r - INFO: - Episode: 834/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:36 - r - INFO: - Episode: 835/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:36 - r - INFO: - Current episode 835 has the best eval reward: 200.00 +2022-10-31 23:26:37 - r - INFO: - Episode: 836/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:37 - r - INFO: - Episode: 837/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:38 - r - INFO: - Episode: 838/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:38 - r - INFO: - Episode: 839/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:38 - r - INFO: - Episode: 840/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:39 - r - INFO: - Current episode 840 has the best eval reward: 200.00 +2022-10-31 23:26:40 - r - INFO: - Episode: 841/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:40 - r - INFO: - Episode: 842/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:40 - r - INFO: - Episode: 843/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:41 - r - INFO: - Episode: 844/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:41 - r - INFO: - Episode: 845/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:42 - r - INFO: - Current episode 845 has the best eval reward: 200.00 +2022-10-31 23:26:42 - r - INFO: - Episode: 846/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:43 - r - INFO: - Episode: 847/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:43 - r - INFO: - Episode: 848/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:44 - r - INFO: - Episode: 849/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:44 - r - INFO: - Episode: 850/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:45 - r - INFO: - Current episode 850 has the best eval reward: 200.00 +2022-10-31 23:26:45 - r - INFO: - Episode: 851/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:46 - r - INFO: - Episode: 852/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:46 - r - INFO: - Episode: 853/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:47 - r - INFO: - Episode: 854/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:47 - r - INFO: - Episode: 855/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:48 - r - INFO: - Current episode 855 has the best eval reward: 200.00 +2022-10-31 23:26:48 - r - INFO: - Episode: 856/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:49 - r - INFO: - Episode: 857/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:49 - r - INFO: - Episode: 858/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:49 - r - INFO: - Episode: 859/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:50 - r - INFO: - Episode: 860/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:51 - r - INFO: - Current episode 860 has the best eval reward: 200.00 +2022-10-31 23:26:51 - r - INFO: - Episode: 861/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:51 - r - INFO: - Episode: 862/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:52 - r - INFO: - Episode: 863/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:52 - r - INFO: - Episode: 864/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:53 - r - INFO: - Episode: 865/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:53 - r - INFO: - Current episode 865 has the best eval reward: 200.00 +2022-10-31 23:26:54 - r - INFO: - Episode: 866/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:54 - r - INFO: - Episode: 867/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:55 - r - INFO: - Episode: 868/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:55 - r - INFO: - Episode: 869/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:56 - r - INFO: - Episode: 870/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:56 - r - INFO: - Current episode 870 has the best eval reward: 200.00 +2022-10-31 23:26:57 - r - INFO: - Episode: 871/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:57 - r - INFO: - Episode: 872/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:58 - r - INFO: - Episode: 873/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:58 - r - INFO: - Episode: 874/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:59 - r - INFO: - Episode: 875/1000, Reward: 200.0, Step: 200 +2022-10-31 23:26:59 - r - INFO: - Current episode 875 has the best eval reward: 200.00 +2022-10-31 23:27:00 - r - INFO: - Episode: 876/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:00 - r - INFO: - Episode: 877/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:01 - r - INFO: - Episode: 878/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:01 - r - INFO: - Episode: 879/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:01 - r - INFO: - Episode: 880/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:02 - r - INFO: - Current episode 880 has the best eval reward: 200.00 +2022-10-31 23:27:03 - r - INFO: - Episode: 881/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:03 - r - INFO: - Episode: 882/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:03 - r - INFO: - Episode: 883/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:04 - r - INFO: - Episode: 884/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:04 - r - INFO: - Episode: 885/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:05 - r - INFO: - Current episode 885 has the best eval reward: 200.00 +2022-10-31 23:27:05 - r - INFO: - Episode: 886/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:06 - r - INFO: - Episode: 887/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:06 - r - INFO: - Episode: 888/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:07 - r - INFO: - Episode: 889/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:07 - r - INFO: - Episode: 890/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:08 - r - INFO: - Current episode 890 has the best eval reward: 200.00 +2022-10-31 23:27:08 - r - INFO: - Episode: 891/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:09 - r - INFO: - Episode: 892/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:09 - r - INFO: - Episode: 893/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:10 - r - INFO: - Episode: 894/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:10 - r - INFO: - Episode: 895/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:11 - r - INFO: - Current episode 895 has the best eval reward: 200.00 +2022-10-31 23:27:11 - r - INFO: - Episode: 896/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:12 - r - INFO: - Episode: 897/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:12 - r - INFO: - Episode: 898/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:12 - r - INFO: - Episode: 899/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:13 - r - INFO: - Episode: 900/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:14 - r - INFO: - Current episode 900 has the best eval reward: 200.00 +2022-10-31 23:27:14 - r - INFO: - Episode: 901/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:15 - r - INFO: - Episode: 902/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:15 - r - INFO: - Episode: 903/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:16 - r - INFO: - Episode: 904/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:16 - r - INFO: - Episode: 905/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:17 - r - INFO: - Current episode 905 has the best eval reward: 200.00 +2022-10-31 23:27:17 - r - INFO: - Episode: 906/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:18 - r - INFO: - Episode: 907/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:18 - r - INFO: - Episode: 908/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:19 - r - INFO: - Episode: 909/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:19 - r - INFO: - Episode: 910/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:20 - r - INFO: - Current episode 910 has the best eval reward: 200.00 +2022-10-31 23:27:20 - r - INFO: - Episode: 911/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:21 - r - INFO: - Episode: 912/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:21 - r - INFO: - Episode: 913/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:21 - r - INFO: - Episode: 914/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:22 - r - INFO: - Episode: 915/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:23 - r - INFO: - Current episode 915 has the best eval reward: 200.00 +2022-10-31 23:27:23 - r - INFO: - Episode: 916/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:23 - r - INFO: - Episode: 917/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:24 - r - INFO: - Episode: 918/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:24 - r - INFO: - Episode: 919/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:25 - r - INFO: - Episode: 920/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:25 - r - INFO: - Current episode 920 has the best eval reward: 200.00 +2022-10-31 23:27:26 - r - INFO: - Episode: 921/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:26 - r - INFO: - Episode: 922/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:27 - r - INFO: - Episode: 923/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:27 - r - INFO: - Episode: 924/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:28 - r - INFO: - Episode: 925/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:29 - r - INFO: - Episode: 926/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:29 - r - INFO: - Episode: 927/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:30 - r - INFO: - Episode: 928/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:30 - r - INFO: - Episode: 929/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:30 - r - INFO: - Episode: 930/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:31 - r - INFO: - Episode: 931/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:32 - r - INFO: - Episode: 932/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:32 - r - INFO: - Episode: 933/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:33 - r - INFO: - Episode: 934/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:33 - r - INFO: - Episode: 935/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:34 - r - INFO: - Episode: 936/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:35 - r - INFO: - Episode: 937/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:35 - r - INFO: - Episode: 938/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:36 - r - INFO: - Episode: 939/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:36 - r - INFO: - Episode: 940/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:37 - r - INFO: - Episode: 941/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:38 - r - INFO: - Episode: 942/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:38 - r - INFO: - Episode: 943/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:38 - r - INFO: - Episode: 944/1000, Reward: 153.0, Step: 153 +2022-10-31 23:27:39 - r - INFO: - Episode: 945/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:40 - r - INFO: - Episode: 946/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:40 - r - INFO: - Episode: 947/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:41 - r - INFO: - Episode: 948/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:41 - r - INFO: - Episode: 949/1000, Reward: 150.0, Step: 150 +2022-10-31 23:27:41 - r - INFO: - Episode: 950/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:43 - r - INFO: - Episode: 951/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:43 - r - INFO: - Episode: 952/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:43 - r - INFO: - Episode: 953/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:44 - r - INFO: - Episode: 954/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:44 - r - INFO: - Episode: 955/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:45 - r - INFO: - Episode: 956/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:46 - r - INFO: - Episode: 957/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:46 - r - INFO: - Episode: 958/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:47 - r - INFO: - Episode: 959/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:47 - r - INFO: - Episode: 960/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:48 - r - INFO: - Current episode 960 has the best eval reward: 200.00 +2022-10-31 23:27:48 - r - INFO: - Episode: 961/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:49 - r - INFO: - Episode: 962/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:49 - r - INFO: - Episode: 963/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:49 - r - INFO: - Episode: 964/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:50 - r - INFO: - Episode: 965/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:51 - r - INFO: - Current episode 965 has the best eval reward: 200.00 +2022-10-31 23:27:51 - r - INFO: - Episode: 966/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:51 - r - INFO: - Episode: 967/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:52 - r - INFO: - Episode: 968/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:52 - r - INFO: - Episode: 969/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:53 - r - INFO: - Episode: 970/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:53 - r - INFO: - Current episode 970 has the best eval reward: 200.00 +2022-10-31 23:27:54 - r - INFO: - Episode: 971/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:54 - r - INFO: - Episode: 972/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:55 - r - INFO: - Episode: 973/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:55 - r - INFO: - Episode: 974/1000, Reward: 161.0, Step: 161 +2022-10-31 23:27:55 - r - INFO: - Episode: 975/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:57 - r - INFO: - Episode: 976/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:57 - r - INFO: - Episode: 977/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:57 - r - INFO: - Episode: 978/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:58 - r - INFO: - Episode: 979/1000, Reward: 200.0, Step: 200 +2022-10-31 23:27:58 - r - INFO: - Episode: 980/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:00 - r - INFO: - Episode: 981/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:00 - r - INFO: - Episode: 982/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:01 - r - INFO: - Episode: 983/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:01 - r - INFO: - Episode: 984/1000, Reward: 111.0, Step: 111 +2022-10-31 23:28:01 - r - INFO: - Episode: 985/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:03 - r - INFO: - Episode: 986/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:03 - r - INFO: - Episode: 987/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:04 - r - INFO: - Episode: 988/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:04 - r - INFO: - Episode: 989/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:04 - r - INFO: - Episode: 990/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:05 - r - INFO: - Current episode 990 has the best eval reward: 200.00 +2022-10-31 23:28:06 - r - INFO: - Episode: 991/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:06 - r - INFO: - Episode: 992/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:07 - r - INFO: - Episode: 993/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:07 - r - INFO: - Episode: 994/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:07 - r - INFO: - Episode: 995/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:09 - r - INFO: - Episode: 996/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:09 - r - INFO: - Episode: 997/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:09 - r - INFO: - Episode: 998/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:10 - r - INFO: - Episode: 999/1000, Reward: 154.0, Step: 154 +2022-10-31 23:28:10 - r - INFO: - Episode: 1000/1000, Reward: 200.0, Step: 200 +2022-10-31 23:28:11 - r - INFO: - Finish training! diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/actor_checkpoint.pt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/actor_checkpoint.pt new file mode 100644 index 0000000..05bd7b6 Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/actor_checkpoint.pt differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/critic_checkpoint.pt b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/critic_checkpoint.pt new file mode 100644 index 0000000..720f388 Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/models/critic_checkpoint.pt differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/learning_curve.png b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/learning_curve.png new file mode 100644 index 0000000..841a786 Binary files /dev/null and b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/learning_curve.png differ diff --git a/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/res.csv b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/res.csv new file mode 100644 index 0000000..ee82c68 --- /dev/null +++ b/projects/codes/A2C/Train_CartPole-v1_A2C_20221031-232138/results/res.csv @@ -0,0 +1,1001 @@ +episodes,rewards,steps +0,25.0,25 +1,11.0,11 +2,32.0,32 +3,11.0,11 +4,14.0,14 +5,11.0,11 +6,23.0,23 +7,27.0,27 +8,10.0,10 +9,21.0,21 +10,15.0,15 +11,26.0,26 +12,22.0,22 +13,14.0,14 +14,14.0,14 +15,21.0,21 +16,10.0,10 +17,19.0,19 +18,18.0,18 +19,26.0,26 +20,29.0,29 +21,40.0,40 +22,35.0,35 +23,33.0,33 +24,47.0,47 +25,50.0,50 +26,21.0,21 +27,30.0,30 +28,26.0,26 +29,40.0,40 +30,31.0,31 +31,54.0,54 +32,59.0,59 +33,50.0,50 +34,26.0,26 +35,34.0,34 +36,25.0,25 +37,166.0,166 +38,35.0,35 +39,25.0,25 +40,110.0,110 +41,22.0,22 +42,57.0,57 +43,45.0,45 +44,35.0,35 +45,45.0,45 +46,51.0,51 +47,32.0,32 +48,67.0,67 +49,46.0,46 +50,61.0,61 +51,49.0,49 +52,47.0,47 +53,37.0,37 +54,32.0,32 +55,31.0,31 +56,33.0,33 +57,93.0,93 +58,60.0,60 +59,128.0,128 +60,200.0,200 +61,47.0,47 +62,47.0,47 +63,63.0,63 +64,68.0,68 +65,45.0,45 +66,101.0,101 +67,47.0,47 +68,49.0,49 +69,54.0,54 +70,42.0,42 +71,77.0,77 +72,67.0,67 +73,41.0,41 +74,89.0,89 +75,51.0,51 +76,54.0,54 +77,37.0,37 +78,49.0,49 +79,46.0,46 +80,31.0,31 +81,43.0,43 +82,60.0,60 +83,41.0,41 +84,40.0,40 +85,28.0,28 +86,50.0,50 +87,159.0,159 +88,30.0,30 +89,34.0,34 +90,70.0,70 +91,22.0,22 +92,39.0,39 +93,50.0,50 +94,40.0,40 +95,37.0,37 +96,121.0,121 +97,26.0,26 +98,40.0,40 +99,30.0,30 +100,35.0,35 +101,40.0,40 +102,28.0,28 +103,29.0,29 +104,42.0,42 +105,54.0,54 +106,25.0,25 +107,47.0,47 +108,32.0,32 +109,50.0,50 +110,30.0,30 +111,58.0,58 +112,32.0,32 +113,43.0,43 +114,57.0,57 +115,20.0,20 +116,48.0,48 +117,45.0,45 +118,47.0,47 +119,69.0,69 +120,34.0,34 +121,22.0,22 +122,22.0,22 +123,38.0,38 +124,36.0,36 +125,41.0,41 +126,28.0,28 +127,35.0,35 +128,48.0,48 +129,51.0,51 +130,51.0,51 +131,36.0,36 +132,45.0,45 +133,27.0,27 +134,40.0,40 +135,43.0,43 +136,64.0,64 +137,43.0,43 +138,37.0,37 +139,38.0,38 +140,69.0,69 +141,36.0,36 +142,28.0,28 +143,58.0,58 +144,43.0,43 +145,50.0,50 +146,30.0,30 +147,42.0,42 +148,42.0,42 +149,35.0,35 +150,67.0,67 +151,45.0,45 +152,28.0,28 +153,59.0,59 +154,64.0,64 +155,67.0,67 +156,41.0,41 +157,81.0,81 +158,76.0,76 +159,91.0,91 +160,119.0,119 +161,47.0,47 +162,64.0,64 +163,178.0,178 +164,97.0,97 +165,181.0,181 +166,166.0,166 +167,79.0,79 +168,141.0,141 +169,119.0,119 +170,81.0,81 +171,124.0,124 +172,150.0,150 +173,98.0,98 +174,164.0,164 +175,200.0,200 +176,115.0,115 +177,116.0,116 +178,160.0,160 +179,103.0,103 +180,181.0,181 +181,185.0,185 +182,93.0,93 +183,110.0,110 +184,200.0,200 +185,141.0,141 +186,150.0,150 +187,121.0,121 +188,110.0,110 +189,115.0,115 +190,114.0,114 +191,45.0,45 +192,125.0,125 +193,142.0,142 +194,54.0,54 +195,62.0,62 +196,122.0,122 +197,58.0,58 +198,88.0,88 +199,141.0,141 +200,113.0,113 +201,200.0,200 +202,136.0,136 +203,114.0,114 +204,102.0,102 +205,176.0,176 +206,150.0,150 +207,105.0,105 +208,200.0,200 +209,200.0,200 +210,167.0,167 +211,104.0,104 +212,124.0,124 +213,96.0,96 +214,200.0,200 +215,199.0,199 +216,200.0,200 +217,132.0,132 +218,188.0,188 +219,132.0,132 +220,151.0,151 +221,125.0,125 +222,42.0,42 +223,200.0,200 +224,159.0,159 +225,171.0,171 +226,122.0,122 +227,189.0,189 +228,129.0,129 +229,106.0,106 +230,107.0,107 +231,200.0,200 +232,200.0,200 +233,200.0,200 +234,200.0,200 +235,158.0,158 +236,200.0,200 +237,192.0,192 +238,179.0,179 +239,102.0,102 +240,125.0,125 +241,138.0,138 +242,189.0,189 +243,41.0,41 +244,97.0,97 +245,49.0,49 +246,86.0,86 +247,121.0,121 +248,117.0,117 +249,43.0,43 +250,72.0,72 +251,34.0,34 +252,83.0,83 +253,83.0,83 +254,38.0,38 +255,34.0,34 +256,99.0,99 +257,45.0,45 +258,47.0,47 +259,44.0,44 +260,26.0,26 +261,37.0,37 +262,26.0,26 +263,43.0,43 +264,27.0,27 +265,24.0,24 +266,42.0,42 +267,86.0,86 +268,23.0,23 +269,32.0,32 +270,57.0,57 +271,25.0,25 +272,98.0,98 +273,29.0,29 +274,25.0,25 +275,29.0,29 +276,39.0,39 +277,20.0,20 +278,92.0,92 +279,28.0,28 +280,78.0,78 +281,25.0,25 +282,31.0,31 +283,88.0,88 +284,85.0,85 +285,37.0,37 +286,26.0,26 +287,19.0,19 +288,40.0,40 +289,27.0,27 +290,17.0,17 +291,27.0,27 +292,26.0,26 +293,82.0,82 +294,36.0,36 +295,24.0,24 +296,30.0,30 +297,20.0,20 +298,34.0,34 +299,30.0,30 +300,23.0,23 +301,36.0,36 +302,29.0,29 +303,34.0,34 +304,25.0,25 +305,42.0,42 +306,88.0,88 +307,26.0,26 +308,85.0,85 +309,89.0,89 +310,48.0,48 +311,83.0,83 +312,109.0,109 +313,42.0,42 +314,93.0,93 +315,85.0,85 +316,100.0,100 +317,106.0,106 +318,28.0,28 +319,108.0,108 +320,112.0,112 +321,88.0,88 +322,108.0,108 +323,108.0,108 +324,90.0,90 +325,112.0,112 +326,113.0,113 +327,94.0,94 +328,99.0,99 +329,45.0,45 +330,121.0,121 +331,102.0,102 +332,111.0,111 +333,54.0,54 +334,198.0,198 +335,83.0,83 +336,107.0,107 +337,101.0,101 +338,129.0,129 +339,88.0,88 +340,86.0,86 +341,199.0,199 +342,95.0,95 +343,103.0,103 +344,100.0,100 +345,89.0,89 +346,87.0,87 +347,110.0,110 +348,127.0,127 +349,97.0,97 +350,34.0,34 +351,123.0,123 +352,49.0,49 +353,96.0,96 +354,90.0,90 +355,110.0,110 +356,93.0,93 +357,102.0,102 +358,128.0,128 +359,125.0,125 +360,92.0,92 +361,109.0,109 +362,114.0,114 +363,111.0,111 +364,38.0,38 +365,55.0,55 +366,106.0,106 +367,115.0,115 +368,103.0,103 +369,50.0,50 +370,110.0,110 +371,102.0,102 +372,110.0,110 +373,29.0,29 +374,35.0,35 +375,42.0,42 +376,62.0,62 +377,119.0,119 +378,33.0,33 +379,31.0,31 +380,97.0,97 +381,192.0,192 +382,179.0,179 +383,89.0,89 +384,32.0,32 +385,33.0,33 +386,52.0,52 +387,31.0,31 +388,22.0,22 +389,118.0,118 +390,24.0,24 +391,115.0,115 +392,20.0,20 +393,33.0,33 +394,40.0,40 +395,27.0,27 +396,26.0,26 +397,24.0,24 +398,19.0,19 +399,22.0,22 +400,24.0,24 +401,18.0,18 +402,23.0,23 +403,27.0,27 +404,20.0,20 +405,27.0,27 +406,17.0,17 +407,27.0,27 +408,25.0,25 +409,25.0,25 +410,24.0,24 +411,24.0,24 +412,18.0,18 +413,20.0,20 +414,27.0,27 +415,28.0,28 +416,30.0,30 +417,28.0,28 +418,33.0,33 +419,24.0,24 +420,96.0,96 +421,26.0,26 +422,29.0,29 +423,25.0,25 +424,38.0,38 +425,33.0,33 +426,23.0,23 +427,39.0,39 +428,28.0,28 +429,97.0,97 +430,30.0,30 +431,29.0,29 +432,103.0,103 +433,36.0,36 +434,32.0,32 +435,41.0,41 +436,111.0,111 +437,48.0,48 +438,24.0,24 +439,49.0,49 +440,116.0,116 +441,118.0,118 +442,94.0,94 +443,132.0,132 +444,41.0,41 +445,105.0,105 +446,116.0,116 +447,136.0,136 +448,137.0,137 +449,45.0,45 +450,157.0,157 +451,116.0,116 +452,125.0,125 +453,120.0,120 +454,150.0,150 +455,114.0,114 +456,44.0,44 +457,138.0,138 +458,133.0,133 +459,141.0,141 +460,124.0,124 +461,143.0,143 +462,123.0,123 +463,134.0,134 +464,152.0,152 +465,140.0,140 +466,200.0,200 +467,168.0,168 +468,200.0,200 +469,200.0,200 +470,200.0,200 +471,200.0,200 +472,200.0,200 +473,200.0,200 +474,200.0,200 +475,200.0,200 +476,200.0,200 +477,200.0,200 +478,200.0,200 +479,200.0,200 +480,200.0,200 +481,200.0,200 +482,200.0,200 +483,200.0,200 +484,200.0,200 +485,200.0,200 +486,200.0,200 +487,200.0,200 +488,200.0,200 +489,200.0,200 +490,200.0,200 +491,200.0,200 +492,200.0,200 +493,200.0,200 +494,200.0,200 +495,169.0,169 +496,200.0,200 +497,200.0,200 +498,200.0,200 +499,200.0,200 +500,200.0,200 +501,200.0,200 +502,200.0,200 +503,200.0,200 +504,200.0,200 +505,200.0,200 +506,200.0,200 +507,200.0,200 +508,200.0,200 +509,200.0,200 +510,200.0,200 +511,200.0,200 +512,200.0,200 +513,200.0,200 +514,200.0,200 +515,200.0,200 +516,200.0,200 +517,200.0,200 +518,200.0,200 +519,200.0,200 +520,200.0,200 +521,200.0,200 +522,200.0,200 +523,200.0,200 +524,200.0,200 +525,200.0,200 +526,200.0,200 +527,200.0,200 +528,186.0,186 +529,200.0,200 +530,200.0,200 +531,200.0,200 +532,200.0,200 +533,200.0,200 +534,200.0,200 +535,200.0,200 +536,200.0,200 +537,200.0,200 +538,200.0,200 +539,200.0,200 +540,200.0,200 +541,200.0,200 +542,200.0,200 +543,84.0,84 +544,200.0,200 +545,200.0,200 +546,200.0,200 +547,200.0,200 +548,200.0,200 +549,200.0,200 +550,200.0,200 +551,200.0,200 +552,200.0,200 +553,200.0,200 +554,200.0,200 +555,200.0,200 +556,200.0,200 +557,200.0,200 +558,200.0,200 +559,200.0,200 +560,200.0,200 +561,200.0,200 +562,200.0,200 +563,200.0,200 +564,200.0,200 +565,200.0,200 +566,200.0,200 +567,200.0,200 +568,200.0,200 +569,200.0,200 +570,200.0,200 +571,200.0,200 +572,200.0,200 +573,200.0,200 +574,200.0,200 +575,200.0,200 +576,200.0,200 +577,200.0,200 +578,200.0,200 +579,200.0,200 +580,200.0,200 +581,200.0,200 +582,200.0,200 +583,200.0,200 +584,199.0,199 +585,200.0,200 +586,200.0,200 +587,178.0,178 +588,200.0,200 +589,188.0,188 +590,156.0,156 +591,165.0,165 +592,131.0,131 +593,157.0,157 +594,170.0,170 +595,123.0,123 +596,109.0,109 +597,124.0,124 +598,113.0,113 +599,38.0,38 +600,107.0,107 +601,115.0,115 +602,101.0,101 +603,113.0,113 +604,100.0,100 +605,109.0,109 +606,119.0,119 +607,117.0,117 +608,108.0,108 +609,101.0,101 +610,110.0,110 +611,59.0,59 +612,112.0,112 +613,104.0,104 +614,45.0,45 +615,29.0,29 +616,42.0,42 +617,74.0,74 +618,79.0,79 +619,50.0,50 +620,30.0,30 +621,43.0,43 +622,77.0,77 +623,36.0,36 +624,61.0,61 +625,36.0,36 +626,30.0,30 +627,43.0,43 +628,27.0,27 +629,88.0,88 +630,42.0,42 +631,40.0,40 +632,59.0,59 +633,81.0,81 +634,85.0,85 +635,55.0,55 +636,40.0,40 +637,99.0,99 +638,104.0,104 +639,117.0,117 +640,112.0,112 +641,43.0,43 +642,96.0,96 +643,105.0,105 +644,115.0,115 +645,99.0,99 +646,123.0,123 +647,123.0,123 +648,40.0,40 +649,100.0,100 +650,124.0,124 +651,106.0,106 +652,122.0,122 +653,127.0,127 +654,121.0,121 +655,121.0,121 +656,125.0,125 +657,127.0,127 +658,132.0,132 +659,142.0,142 +660,134.0,134 +661,147.0,147 +662,175.0,175 +663,180.0,180 +664,183.0,183 +665,167.0,167 +666,179.0,179 +667,173.0,173 +668,200.0,200 +669,200.0,200 +670,184.0,184 +671,200.0,200 +672,193.0,193 +673,200.0,200 +674,200.0,200 +675,200.0,200 +676,199.0,199 +677,200.0,200 +678,200.0,200 +679,200.0,200 +680,200.0,200 +681,200.0,200 +682,200.0,200 +683,200.0,200 +684,200.0,200 +685,200.0,200 +686,200.0,200 +687,200.0,200 +688,200.0,200 +689,200.0,200 +690,200.0,200 +691,200.0,200 +692,200.0,200 +693,200.0,200 +694,200.0,200 +695,200.0,200 +696,200.0,200 +697,200.0,200 +698,200.0,200 +699,200.0,200 +700,200.0,200 +701,200.0,200 +702,200.0,200 +703,200.0,200 +704,200.0,200 +705,200.0,200 +706,200.0,200 +707,200.0,200 +708,200.0,200 +709,200.0,200 +710,200.0,200 +711,200.0,200 +712,200.0,200 +713,200.0,200 +714,200.0,200 +715,200.0,200 +716,200.0,200 +717,200.0,200 +718,200.0,200 +719,200.0,200 +720,200.0,200 +721,200.0,200 +722,200.0,200 +723,200.0,200 +724,200.0,200 +725,200.0,200 +726,200.0,200 +727,200.0,200 +728,200.0,200 +729,200.0,200 +730,200.0,200 +731,200.0,200 +732,200.0,200 +733,200.0,200 +734,200.0,200 +735,200.0,200 +736,200.0,200 +737,200.0,200 +738,200.0,200 +739,200.0,200 +740,200.0,200 +741,200.0,200 +742,200.0,200 +743,200.0,200 +744,200.0,200 +745,200.0,200 +746,200.0,200 +747,200.0,200 +748,200.0,200 +749,200.0,200 +750,200.0,200 +751,200.0,200 +752,200.0,200 +753,200.0,200 +754,200.0,200 +755,200.0,200 +756,200.0,200 +757,200.0,200 +758,200.0,200 +759,200.0,200 +760,200.0,200 +761,200.0,200 +762,200.0,200 +763,200.0,200 +764,200.0,200 +765,200.0,200 +766,200.0,200 +767,200.0,200 +768,200.0,200 +769,200.0,200 +770,200.0,200 +771,200.0,200 +772,200.0,200 +773,200.0,200 +774,200.0,200 +775,200.0,200 +776,200.0,200 +777,200.0,200 +778,200.0,200 +779,200.0,200 +780,200.0,200 +781,200.0,200 +782,200.0,200 +783,200.0,200 +784,200.0,200 +785,200.0,200 +786,200.0,200 +787,200.0,200 +788,200.0,200 +789,200.0,200 +790,200.0,200 +791,200.0,200 +792,200.0,200 +793,200.0,200 +794,200.0,200 +795,200.0,200 +796,200.0,200 +797,200.0,200 +798,200.0,200 +799,200.0,200 +800,200.0,200 +801,200.0,200 +802,200.0,200 +803,200.0,200 +804,200.0,200 +805,200.0,200 +806,200.0,200 +807,200.0,200 +808,200.0,200 +809,200.0,200 +810,200.0,200 +811,200.0,200 +812,200.0,200 +813,200.0,200 +814,200.0,200 +815,200.0,200 +816,200.0,200 +817,200.0,200 +818,200.0,200 +819,200.0,200 +820,200.0,200 +821,200.0,200 +822,200.0,200 +823,200.0,200 +824,200.0,200 +825,200.0,200 +826,200.0,200 +827,200.0,200 +828,200.0,200 +829,200.0,200 +830,200.0,200 +831,200.0,200 +832,200.0,200 +833,200.0,200 +834,200.0,200 +835,200.0,200 +836,200.0,200 +837,200.0,200 +838,200.0,200 +839,200.0,200 +840,200.0,200 +841,200.0,200 +842,200.0,200 +843,200.0,200 +844,200.0,200 +845,200.0,200 +846,200.0,200 +847,200.0,200 +848,200.0,200 +849,200.0,200 +850,200.0,200 +851,200.0,200 +852,200.0,200 +853,200.0,200 +854,200.0,200 +855,200.0,200 +856,200.0,200 +857,200.0,200 +858,200.0,200 +859,200.0,200 +860,200.0,200 +861,200.0,200 +862,200.0,200 +863,200.0,200 +864,200.0,200 +865,200.0,200 +866,200.0,200 +867,200.0,200 +868,200.0,200 +869,200.0,200 +870,200.0,200 +871,200.0,200 +872,200.0,200 +873,200.0,200 +874,200.0,200 +875,200.0,200 +876,200.0,200 +877,200.0,200 +878,200.0,200 +879,200.0,200 +880,200.0,200 +881,200.0,200 +882,200.0,200 +883,200.0,200 +884,200.0,200 +885,200.0,200 +886,200.0,200 +887,200.0,200 +888,200.0,200 +889,200.0,200 +890,200.0,200 +891,200.0,200 +892,200.0,200 +893,200.0,200 +894,200.0,200 +895,200.0,200 +896,200.0,200 +897,200.0,200 +898,200.0,200 +899,200.0,200 +900,200.0,200 +901,200.0,200 +902,200.0,200 +903,200.0,200 +904,200.0,200 +905,200.0,200 +906,200.0,200 +907,200.0,200 +908,200.0,200 +909,200.0,200 +910,200.0,200 +911,200.0,200 +912,200.0,200 +913,200.0,200 +914,200.0,200 +915,200.0,200 +916,200.0,200 +917,200.0,200 +918,200.0,200 +919,200.0,200 +920,200.0,200 +921,200.0,200 +922,200.0,200 +923,200.0,200 +924,200.0,200 +925,200.0,200 +926,200.0,200 +927,200.0,200 +928,200.0,200 +929,200.0,200 +930,200.0,200 +931,200.0,200 +932,200.0,200 +933,200.0,200 +934,200.0,200 +935,200.0,200 +936,200.0,200 +937,200.0,200 +938,200.0,200 +939,200.0,200 +940,200.0,200 +941,200.0,200 +942,200.0,200 +943,153.0,153 +944,200.0,200 +945,200.0,200 +946,200.0,200 +947,200.0,200 +948,150.0,150 +949,200.0,200 +950,200.0,200 +951,200.0,200 +952,200.0,200 +953,200.0,200 +954,200.0,200 +955,200.0,200 +956,200.0,200 +957,200.0,200 +958,200.0,200 +959,200.0,200 +960,200.0,200 +961,200.0,200 +962,200.0,200 +963,200.0,200 +964,200.0,200 +965,200.0,200 +966,200.0,200 +967,200.0,200 +968,200.0,200 +969,200.0,200 +970,200.0,200 +971,200.0,200 +972,200.0,200 +973,161.0,161 +974,200.0,200 +975,200.0,200 +976,200.0,200 +977,200.0,200 +978,200.0,200 +979,200.0,200 +980,200.0,200 +981,200.0,200 +982,200.0,200 +983,111.0,111 +984,200.0,200 +985,200.0,200 +986,200.0,200 +987,200.0,200 +988,200.0,200 +989,200.0,200 +990,200.0,200 +991,200.0,200 +992,200.0,200 +993,200.0,200 +994,200.0,200 +995,200.0,200 +996,200.0,200 +997,200.0,200 +998,154.0,154 +999,200.0,200 diff --git a/projects/codes/A2C/a2c.py b/projects/codes/A2C/a2c.py index c1a88a5..f822451 100644 --- a/projects/codes/A2C/a2c.py +++ b/projects/codes/A2C/a2c.py @@ -1,34 +1,79 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-08-16 23:05:25 +LastEditor: JiangJi +LastEditTime: 2022-11-01 00:33:49 +Discription: +''' import torch import numpy as np - +from torch.distributions import Categorical,Normal class A2C: def __init__(self,models,memories,cfg): - self.n_actions = cfg['n_actions'] - self.gamma = cfg['gamma'] - self.device = torch.device(cfg['device']) + self.n_actions = cfg.n_actions + self.gamma = cfg.gamma + self.device = torch.device(cfg.device) + self.continuous = cfg.continuous + if hasattr(cfg,'action_bound'): + self.action_bound = cfg.action_bound self.memory = memories['ACMemory'] self.actor = models['Actor'].to(self.device) self.critic = models['Critic'].to(self.device) - self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=cfg['actor_lr']) - self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=cfg['critic_lr']) + self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=cfg.actor_lr) + self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=cfg.critic_lr) def sample_action(self,state): - state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) - dist = self.actor(state) - value = self.critic(state) # note that 'dist' need require_grad=True - value = value.detach().numpy().squeeze(0)[0] - action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1) - return action,value,dist + # state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + # dist = self.actor(state) + # self.entropy = - np.sum(np.mean(dist.detach().cpu().numpy()) * np.log(dist.detach().cpu().numpy())) + # value = self.critic(state) # note that 'dist' need require_grad=True + # self.value = value.detach().cpu().numpy().squeeze(0)[0] + # action = np.random.choice(self.n_actions, p=dist.detach().cpu().numpy().squeeze(0)) # shape(p=(n_actions,1) + # self.log_prob = torch.log(dist.squeeze(0)[action]) + if self.continuous: + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + mu, sigma = self.actor(state) + dist = Normal(self.action_bound * mu.view(1,), sigma.view(1,)) + action = dist.sample() + value = self.critic(state) + # self.entropy = - np.sum(np.mean(dist.detach().cpu().numpy()) * np.log(dist.detach().cpu().numpy())) + self.value = value.detach().cpu().numpy().squeeze(0)[0] # detach() to avoid gradient + self.log_prob = dist.log_prob(action).squeeze(dim=0) # Tensor([0.]) + self.entropy = dist.entropy().cpu().detach().numpy().squeeze(0) # detach() to avoid gradient + return action.cpu().detach().numpy() + else: + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + probs = self.actor(state) + dist = Categorical(probs) + action = dist.sample() # Tensor([0]) + value = self.critic(state) + self.value = value.detach().cpu().numpy().squeeze(0)[0] # detach() to avoid gradient + self.log_prob = dist.log_prob(action).squeeze(dim=0) # Tensor([0.]) + self.entropy = dist.entropy().cpu().detach().numpy().squeeze(0) # detach() to avoid gradient + return action.cpu().numpy().item() + @torch.no_grad() def predict_action(self,state): - state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) - dist = self.actor(state) - value = self.critic(state) # note that 'dist' need require_grad=True - value = value.detach().numpy().squeeze(0)[0] - action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1) - return action,value,dist + if self.continuous: + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + mu, sigma = self.actor(state) + dist = Normal(self.action_bound * mu.view(1,), sigma.view(1,)) + action = dist.sample() + return action.cpu().detach().numpy() + else: + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + dist = self.actor(state) + # value = self.critic(state) # note that 'dist' need require_grad=True + # value = value.detach().cpu().numpy().squeeze(0)[0] + action = np.random.choice(self.n_actions, p=dist.detach().cpu().numpy().squeeze(0)) # shape(p=(n_actions,1) + return action def update(self,next_state,entropy): value_pool,log_prob_pool,reward_pool = self.memory.sample() + value_pool = torch.tensor(value_pool, device=self.device) + log_prob_pool = torch.stack(log_prob_pool) next_state = torch.tensor(next_state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) next_value = self.critic(next_state) returns = np.zeros_like(reward_pool) @@ -36,9 +81,7 @@ class A2C: next_value = reward_pool[t] + self.gamma * next_value # G(s_{t},a{t}) = r_{t+1} + gamma * V(s_{t+1}) returns[t] = next_value returns = torch.tensor(returns, device=self.device) - value_pool = torch.tensor(value_pool, device=self.device) advantages = returns - value_pool - log_prob_pool = torch.stack(log_prob_pool) actor_loss = (-log_prob_pool * advantages).mean() critic_loss = 0.5 * advantages.pow(2).mean() tot_loss = actor_loss + critic_loss + 0.001 * entropy diff --git a/projects/codes/A2C/a2c_2.py b/projects/codes/A2C/a2c_2.py index 74e2cfe..e29acdc 100644 --- a/projects/codes/A2C/a2c_2.py +++ b/projects/codes/A2C/a2c_2.py @@ -1,14 +1,24 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-09-19 14:48:16 +LastEditor: JiangJi +LastEditTime: 2022-10-30 01:21:50 +Discription: #TODO,待更新模版 +''' import torch import numpy as np class A2C_2: def __init__(self,models,memories,cfg): - self.n_actions = cfg['n_actions'] - self.gamma = cfg['gamma'] - self.device = torch.device(cfg['device']) + self.n_actions = cfg.n_actions + self.gamma = cfg.gamma + self.device = torch.device(cfg.device) self.memory = memories['ACMemory'] self.ac_net = models['ActorCritic'].to(self.device) - self.ac_optimizer = torch.optim.Adam(self.ac_net.parameters(), lr=cfg['lr']) + self.ac_optimizer = torch.optim.Adam(self.ac_net.parameters(), lr = cfg.lr) def sample_action(self,state): state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) value, dist = self.ac_net(state) # note that 'dist' need require_grad=True diff --git a/projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml b/projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml new file mode 100644 index 0000000..d148bb0 --- /dev/null +++ b/projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml @@ -0,0 +1,21 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + mode: test + load_checkpoint: true + load_path: Train_CartPole-v1_A2C_20221031-232138 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + continuous: false + batch_size: 64 + buffer_size: 100000 + gamma: 0.99 + actor_lr: 0.0003 + critic_lr: 0.001 + target_update: 4 diff --git a/projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml b/projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml new file mode 100644 index 0000000..f79f148 --- /dev/null +++ b/projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: CartPole-v1 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 600 +algo_cfg: + continuous: false + batch_size: 64 + buffer_size: 100000 + gamma: 0.0003 + lr: 0.001 diff --git a/projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml b/projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml new file mode 100644 index 0000000..a1680c9 --- /dev/null +++ b/projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml @@ -0,0 +1,21 @@ +general_cfg: + algo_name: A2C + device: cuda + env_name: Pendulum-v1 + mode: train + eval_per_episode: 200 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 1000 +algo_cfg: + continuous: true + batch_size: 64 + buffer_size: 100000 + gamma: 0.0003 + actor_lr: 0.0003 + critic_lr: 0.001 diff --git a/projects/codes/A2C/config/config.py b/projects/codes/A2C/config/config.py new file mode 100644 index 0000000..a552d38 --- /dev/null +++ b/projects/codes/A2C/config/config.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 00:53:03 +LastEditor: JiangJi +LastEditTime: 2022-11-01 00:17:55 +Discription: default parameters of A2C +''' +from common.config import GeneralConfig,AlgoConfig + +class GeneralConfigA2C(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CartPole-v1" # name of environment + self.algo_name = "A2C" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cuda" # device to use + self.train_eps = 1000 # number of episodes for training + self.test_eps = 20 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigA2C(AlgoConfig): + def __init__(self) -> None: + self.continuous = False # continuous or discrete action space + self.hidden_dim = 256 # hidden_dim for MLP + self.gamma = 0.99 # discount factor + self.actor_lr = 3e-4 # learning rate of actor + self.critic_lr = 1e-3 # learning rate of critic + self.actor_hidden_dim = 256 # hidden_dim for actor MLP + self.critic_hidden_dim = 256 # hidden_dim for critic MLP + self.buffer_size = 100000 # size of replay buffer + self.batch_size = 64 # batch size \ No newline at end of file diff --git a/projects/codes/A2C/main.py b/projects/codes/A2C/main.py deleted file mode 100644 index e5585e8..0000000 --- a/projects/codes/A2C/main.py +++ /dev/null @@ -1,121 +0,0 @@ -import sys,os -os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path -parent_path = os.path.dirname(curr_path) # parent path -sys.path.append(parent_path) # add path to system path - -import datetime -import argparse -import gym -import torch -import numpy as np -from common.utils import all_seed -from common.launcher import Launcher -from common.memories import PGReplay -from common.models import ActorSoftmax,Critic -from envs.register import register_env -from a2c import A2C - -class Main(Launcher): - def get_args(self): - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default=1600,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--actor_lr',default=3e-4,type=float,help="learning rate of actor") - parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic") - parser.add_argument('--actor_hidden_dim',default=256,type=int,help="hidden of actor net") - parser.add_argument('--critic_hidden_dim',default=256,type=int,help="hidden of critic net") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--seed',default=10,type=int,help="seed") - parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", - 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", - } - args = {**vars(args),**default_args} # type(dict) - return args - def env_agent_config(self,cfg): - ''' create env and agent - ''' - register_env(cfg['env_name']) - env = gym.make(cfg['env_name']) - if cfg['seed'] !=0: # set random seed - all_seed(env,seed=cfg["seed"]) - try: # state dimension - n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) - except AttributeError: - n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) - n_actions = env.action_space.n # action dimension - print(f"n_states: {n_states}, n_actions: {n_actions}") - cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters - models = {'Actor':ActorSoftmax(cfg['n_states'],cfg['n_actions'], hidden_dim = cfg['actor_hidden_dim']),'Critic':Critic(cfg['n_states'],1,hidden_dim=cfg['critic_hidden_dim'])} - memories = {'ACMemory':PGReplay()} - agent = A2C(models,memories,cfg) - return env,agent - def train(self,cfg,env,agent): - print("Start training!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - - for i_ep in range(cfg['train_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 # step per episode - ep_entropy = 0 - state = env.reset() # reset and obtain initial state - - for _ in range(cfg['ep_max_steps']): - action, value, dist = agent.sample_action(state) # sample action - next_state, reward, done, _ = env.step(action) # update env and return transitions - log_prob = torch.log(dist.squeeze(0)[action]) - entropy = -np.sum(np.mean(dist.detach().numpy()) * np.log(dist.detach().numpy())) - agent.memory.push((value,log_prob,reward)) # save transitions - state = next_state # update state - ep_reward += reward - ep_entropy += entropy - ep_step += 1 - if done: - break - agent.update(next_state,ep_entropy) # update agent - rewards.append(ep_reward) - steps.append(ep_step) - if (i_ep+1)%10==0: - print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}') - print("Finish training!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - def test(self,cfg,env,agent): - print("Start testing!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['test_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 - state = env.reset() # reset and obtain initial state - for _ in range(cfg['ep_max_steps']): - action,_,_ = agent.predict_action(state) # predict action - next_state, reward, done, _ = env.step(action) - state = next_state - ep_reward += reward - ep_step += 1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}") - print("Finish testing!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - -if __name__ == "__main__": - main = Main() - main.run() - - - - diff --git a/projects/codes/A2C/main2.py b/projects/codes/A2C/main2.py index c81754f..60bd7c2 100644 --- a/projects/codes/A2C/main2.py +++ b/projects/codes/A2C/main2.py @@ -1,3 +1,13 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-09-19 14:48:16 +LastEditor: JiangJi +LastEditTime: 2022-10-30 01:21:15 +Discription: #TODO,待更新模版 +''' import sys,os os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." curr_path = os.path.dirname(os.path.abspath(__file__)) # current path diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt deleted file mode 100644 index c346b1b..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json deleted file mode 100644 index 2ce53a7..0000000 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "algo_name": "A2C", - "env_name": "CartPole-v0", - "train_eps": 2000, - "test_eps": 20, - "ep_max_steps": 100000, - "gamma": 0.99, - "lr": 0.0003, - "actor_hidden_dim": 256, - "critic_hidden_dim": 256, - "device": "cpu", - "seed": 10, - "show_fig": false, - "save_fig": true, - "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/results/", - "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/models/", - "n_states": 4, - "n_actions": 2 -} \ No newline at end of file diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png deleted file mode 100644 index b1bbebb..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png deleted file mode 100644 index 4410e5e..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv deleted file mode 100644 index 7d5debb..0000000 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv +++ /dev/null @@ -1,2001 +0,0 @@ -episodes,rewards,steps -0,16.0,16 -1,17.0,17 -2,19.0,19 -3,95.0,95 -4,13.0,13 -5,22.0,22 -6,15.0,15 -7,20.0,20 -8,13.0,13 -9,20.0,20 -10,21.0,21 -11,59.0,59 -12,16.0,16 -13,35.0,35 -14,17.0,17 -15,22.0,22 -16,28.0,28 -17,19.0,19 -18,12.0,12 -19,17.0,17 -20,16.0,16 -21,28.0,28 -22,11.0,11 -23,11.0,11 -24,11.0,11 -25,18.0,18 -26,15.0,15 -27,33.0,33 -28,21.0,21 -29,34.0,34 -30,11.0,11 -31,16.0,16 -32,27.0,27 -33,19.0,19 -34,16.0,16 -35,33.0,33 -36,21.0,21 -37,12.0,12 -38,26.0,26 -39,34.0,34 -40,11.0,11 -41,38.0,38 -42,13.0,13 -43,12.0,12 -44,20.0,20 -45,17.0,17 -46,10.0,10 -47,20.0,20 -48,22.0,22 -49,21.0,21 -50,20.0,20 -51,32.0,32 -52,10.0,10 -53,33.0,33 -54,25.0,25 -55,30.0,30 -56,22.0,22 -57,25.0,25 -58,19.0,19 -59,12.0,12 -60,9.0,9 -61,11.0,11 -62,12.0,12 -63,28.0,28 -64,12.0,12 -65,12.0,12 -66,12.0,12 -67,34.0,34 -68,12.0,12 -69,25.0,25 -70,13.0,13 -71,26.0,26 -72,13.0,13 -73,22.0,22 -74,24.0,24 -75,9.0,9 -76,14.0,14 -77,17.0,17 -78,14.0,14 -79,25.0,25 -80,23.0,23 -81,38.0,38 -82,30.0,30 -83,28.0,28 -84,25.0,25 -85,16.0,16 -86,13.0,13 -87,34.0,34 -88,16.0,16 -89,48.0,48 -90,12.0,12 -91,25.0,25 -92,25.0,25 -93,17.0,17 -94,13.0,13 -95,12.0,12 -96,23.0,23 -97,22.0,22 -98,12.0,12 -99,16.0,16 -100,16.0,16 -101,10.0,10 -102,14.0,14 -103,20.0,20 -104,13.0,13 -105,16.0,16 -106,14.0,14 -107,22.0,22 -108,17.0,17 -109,19.0,19 -110,26.0,26 -111,16.0,16 -112,22.0,22 -113,20.0,20 -114,27.0,27 -115,16.0,16 -116,40.0,40 -117,14.0,14 -118,15.0,15 -119,40.0,40 -120,23.0,23 -121,32.0,32 -122,13.0,13 -123,33.0,33 -124,18.0,18 -125,26.0,26 -126,30.0,30 -127,28.0,28 -128,12.0,12 -129,45.0,45 -130,14.0,14 -131,40.0,40 -132,13.0,13 -133,16.0,16 -134,78.0,78 -135,19.0,19 -136,19.0,19 -137,20.0,20 -138,26.0,26 -139,21.0,21 -140,28.0,28 -141,17.0,17 -142,19.0,19 -143,13.0,13 -144,54.0,54 -145,41.0,41 -146,10.0,10 -147,15.0,15 -148,14.0,14 -149,19.0,19 -150,19.0,19 -151,32.0,32 -152,39.0,39 -153,36.0,36 -154,21.0,21 -155,58.0,58 -156,15.0,15 -157,55.0,55 -158,16.0,16 -159,46.0,46 -160,25.0,25 -161,15.0,15 -162,13.0,13 -163,18.0,18 -164,19.0,19 -165,22.0,22 -166,17.0,17 -167,48.0,48 -168,28.0,28 -169,29.0,29 -170,60.0,60 -171,12.0,12 -172,34.0,34 -173,18.0,18 -174,20.0,20 -175,18.0,18 -176,29.0,29 -177,14.0,14 -178,23.0,23 -179,26.0,26 -180,23.0,23 -181,77.0,77 -182,46.0,46 -183,25.0,25 -184,37.0,37 -185,12.0,12 -186,12.0,12 -187,36.0,36 -188,30.0,30 -189,135.0,135 -190,11.0,11 -191,18.0,18 -192,34.0,34 -193,12.0,12 -194,22.0,22 -195,19.0,19 -196,21.0,21 -197,26.0,26 -198,13.0,13 -199,128.0,128 -200,11.0,11 -201,21.0,21 -202,27.0,27 -203,11.0,11 -204,14.0,14 -205,70.0,70 -206,10.0,10 -207,18.0,18 -208,35.0,35 -209,80.0,80 -210,19.0,19 -211,95.0,95 -212,14.0,14 -213,20.0,20 -214,30.0,30 -215,19.0,19 -216,20.0,20 -217,54.0,54 -218,27.0,27 -219,19.0,19 -220,30.0,30 -221,19.0,19 -222,55.0,55 -223,29.0,29 -224,65.0,65 -225,19.0,19 -226,38.0,38 -227,14.0,14 -228,62.0,62 -229,37.0,37 -230,50.0,50 -231,40.0,40 -232,39.0,39 -233,16.0,16 -234,18.0,18 -235,86.0,86 -236,45.0,45 -237,37.0,37 -238,35.0,35 -239,20.0,20 -240,22.0,22 -241,40.0,40 -242,29.0,29 -243,17.0,17 -244,34.0,34 -245,91.0,91 -246,31.0,31 -247,69.0,69 -248,31.0,31 -249,30.0,30 -250,30.0,30 -251,23.0,23 -252,22.0,22 -253,52.0,52 -254,25.0,25 -255,32.0,32 -256,37.0,37 -257,31.0,31 -258,18.0,18 -259,60.0,60 -260,15.0,15 -261,23.0,23 -262,34.0,34 -263,43.0,43 -264,66.0,66 -265,15.0,15 -266,19.0,19 -267,55.0,55 -268,65.0,65 -269,50.0,50 -270,19.0,19 -271,37.0,37 -272,33.0,33 -273,33.0,33 -274,16.0,16 -275,19.0,19 -276,18.0,18 -277,52.0,52 -278,27.0,27 -279,48.0,48 -280,39.0,39 -281,29.0,29 -282,86.0,86 -283,37.0,37 -284,21.0,21 -285,21.0,21 -286,52.0,52 -287,88.0,88 -288,30.0,30 -289,34.0,34 -290,22.0,22 -291,26.0,26 -292,18.0,18 -293,29.0,29 -294,57.0,57 -295,61.0,61 -296,73.0,73 -297,19.0,19 -298,32.0,32 -299,49.0,49 -300,58.0,58 -301,31.0,31 -302,29.0,29 -303,73.0,73 -304,21.0,21 -305,33.0,33 -306,23.0,23 -307,22.0,22 -308,36.0,36 -309,41.0,41 -310,19.0,19 -311,41.0,41 -312,67.0,67 -313,95.0,95 -314,43.0,43 -315,27.0,27 -316,29.0,29 -317,30.0,30 -318,23.0,23 -319,32.0,32 -320,48.0,48 -321,33.0,33 -322,32.0,32 -323,63.0,63 -324,20.0,20 -325,25.0,25 -326,23.0,23 -327,42.0,42 -328,15.0,15 -329,66.0,66 -330,40.0,40 -331,31.0,31 -332,73.0,73 -333,52.0,52 -334,31.0,31 -335,69.0,69 -336,33.0,33 -337,39.0,39 -338,21.0,21 -339,34.0,34 -340,32.0,32 -341,38.0,38 -342,29.0,29 -343,32.0,32 -344,14.0,14 -345,51.0,51 -346,38.0,38 -347,51.0,51 -348,28.0,28 -349,85.0,85 -350,34.0,34 -351,57.0,57 -352,21.0,21 -353,22.0,22 -354,27.0,27 -355,19.0,19 -356,77.0,77 -357,30.0,30 -358,28.0,28 -359,53.0,53 -360,48.0,48 -361,41.0,41 -362,26.0,26 -363,35.0,35 -364,52.0,52 -365,42.0,42 -366,21.0,21 -367,34.0,34 -368,43.0,43 -369,82.0,82 -370,43.0,43 -371,67.0,67 -372,56.0,56 -373,54.0,54 -374,27.0,27 -375,37.0,37 -376,32.0,32 -377,23.0,23 -378,32.0,32 -379,40.0,40 -380,26.0,26 -381,22.0,22 -382,23.0,23 -383,100.0,100 -384,45.0,45 -385,57.0,57 -386,51.0,51 -387,15.0,15 -388,17.0,17 -389,63.0,63 -390,67.0,67 -391,80.0,80 -392,64.0,64 -393,29.0,29 -394,74.0,74 -395,51.0,51 -396,88.0,88 -397,11.0,11 -398,27.0,27 -399,30.0,30 -400,22.0,22 -401,36.0,36 -402,13.0,13 -403,37.0,37 -404,86.0,86 -405,18.0,18 -406,25.0,25 -407,21.0,21 -408,30.0,30 -409,28.0,28 -410,43.0,43 -411,23.0,23 -412,17.0,17 -413,32.0,32 -414,25.0,25 -415,22.0,22 -416,36.0,36 -417,52.0,52 -418,33.0,33 -419,16.0,16 -420,30.0,30 -421,52.0,52 -422,59.0,59 -423,35.0,35 -424,39.0,39 -425,47.0,47 -426,30.0,30 -427,32.0,32 -428,42.0,42 -429,37.0,37 -430,35.0,35 -431,25.0,25 -432,39.0,39 -433,26.0,26 -434,58.0,58 -435,64.0,64 -436,30.0,30 -437,33.0,33 -438,42.0,42 -439,30.0,30 -440,47.0,47 -441,69.0,69 -442,47.0,47 -443,40.0,40 -444,53.0,53 -445,38.0,38 -446,176.0,176 -447,116.0,116 -448,40.0,40 -449,86.0,86 -450,38.0,38 -451,39.0,39 -452,48.0,48 -453,22.0,22 -454,64.0,64 -455,30.0,30 -456,36.0,36 -457,46.0,46 -458,16.0,16 -459,103.0,103 -460,58.0,58 -461,16.0,16 -462,36.0,36 -463,21.0,21 -464,79.0,79 -465,29.0,29 -466,67.0,67 -467,59.0,59 -468,50.0,50 -469,72.0,72 -470,75.0,75 -471,26.0,26 -472,36.0,36 -473,35.0,35 -474,40.0,40 -475,49.0,49 -476,47.0,47 -477,42.0,42 -478,37.0,37 -479,33.0,33 -480,60.0,60 -481,34.0,34 -482,20.0,20 -483,69.0,69 -484,63.0,63 -485,49.0,49 -486,18.0,18 -487,68.0,68 -488,24.0,24 -489,79.0,79 -490,22.0,22 -491,39.0,39 -492,64.0,64 -493,20.0,20 -494,21.0,21 -495,22.0,22 -496,56.0,56 -497,56.0,56 -498,39.0,39 -499,64.0,64 -500,42.0,42 -501,40.0,40 -502,44.0,44 -503,30.0,30 -504,56.0,56 -505,137.0,137 -506,37.0,37 -507,19.0,19 -508,59.0,59 -509,29.0,29 -510,108.0,108 -511,53.0,53 -512,26.0,26 -513,43.0,43 -514,27.0,27 -515,34.0,34 -516,51.0,51 -517,35.0,35 -518,90.0,90 -519,64.0,64 -520,63.0,63 -521,33.0,33 -522,29.0,29 -523,48.0,48 -524,40.0,40 -525,55.0,55 -526,26.0,26 -527,69.0,69 -528,48.0,48 -529,50.0,50 -530,34.0,34 -531,31.0,31 -532,26.0,26 -533,60.0,60 -534,60.0,60 -535,80.0,80 -536,44.0,44 -537,62.0,62 -538,47.0,47 -539,79.0,79 -540,91.0,91 -541,84.0,84 -542,134.0,134 -543,49.0,49 -544,37.0,37 -545,23.0,23 -546,52.0,52 -547,52.0,52 -548,57.0,57 -549,69.0,69 -550,76.0,76 -551,33.0,33 -552,117.0,117 -553,83.0,83 -554,38.0,38 -555,45.0,45 -556,119.0,119 -557,105.0,105 -558,151.0,151 -559,59.0,59 -560,41.0,41 -561,49.0,49 -562,29.0,29 -563,61.0,61 -564,106.0,106 -565,43.0,43 -566,51.0,51 -567,54.0,54 -568,60.0,60 -569,29.0,29 -570,54.0,54 -571,47.0,47 -572,101.0,101 -573,26.0,26 -574,71.0,71 -575,115.0,115 -576,21.0,21 -577,47.0,47 -578,71.0,71 -579,61.0,61 -580,30.0,30 -581,72.0,72 -582,37.0,37 -583,62.0,62 -584,76.0,76 -585,49.0,49 -586,78.0,78 -587,62.0,62 -588,86.0,86 -589,56.0,56 -590,103.0,103 -591,97.0,97 -592,53.0,53 -593,110.0,110 -594,91.0,91 -595,52.0,52 -596,67.0,67 -597,40.0,40 -598,71.0,71 -599,52.0,52 -600,49.0,49 -601,101.0,101 -602,93.0,93 -603,33.0,33 -604,56.0,56 -605,53.0,53 -606,103.0,103 -607,134.0,134 -608,83.0,83 -609,78.0,78 -610,25.0,25 -611,52.0,52 -612,61.0,61 -613,49.0,49 -614,91.0,91 -615,46.0,46 -616,34.0,34 -617,72.0,72 -618,62.0,62 -619,42.0,42 -620,63.0,63 -621,60.0,60 -622,178.0,178 -623,43.0,43 -624,66.0,66 -625,23.0,23 -626,42.0,42 -627,26.0,26 -628,73.0,73 -629,30.0,30 -630,39.0,39 -631,36.0,36 -632,47.0,47 -633,58.0,58 -634,45.0,45 -635,82.0,82 -636,55.0,55 -637,31.0,31 -638,71.0,71 -639,68.0,68 -640,119.0,119 -641,42.0,42 -642,72.0,72 -643,36.0,36 -644,47.0,47 -645,126.0,126 -646,64.0,64 -647,60.0,60 -648,147.0,147 -649,34.0,34 -650,17.0,17 -651,59.0,59 -652,46.0,46 -653,126.0,126 -654,76.0,76 -655,85.0,85 -656,68.0,68 -657,36.0,36 -658,53.0,53 -659,116.0,116 -660,99.0,99 -661,29.0,29 -662,22.0,22 -663,89.0,89 -664,166.0,166 -665,73.0,73 -666,28.0,28 -667,110.0,110 -668,92.0,92 -669,76.0,76 -670,65.0,65 -671,48.0,48 -672,27.0,27 -673,38.0,38 -674,44.0,44 -675,70.0,70 -676,103.0,103 -677,48.0,48 -678,56.0,56 -679,51.0,51 -680,30.0,30 -681,118.0,118 -682,35.0,35 -683,12.0,12 -684,64.0,64 -685,105.0,105 -686,23.0,23 -687,52.0,52 -688,153.0,153 -689,65.0,65 -690,44.0,44 -691,38.0,38 -692,55.0,55 -693,37.0,37 -694,18.0,18 -695,106.0,106 -696,175.0,175 -697,88.0,88 -698,22.0,22 -699,60.0,60 -700,22.0,22 -701,78.0,78 -702,54.0,54 -703,61.0,61 -704,50.0,50 -705,55.0,55 -706,34.0,34 -707,23.0,23 -708,39.0,39 -709,45.0,45 -710,43.0,43 -711,113.0,113 -712,59.0,59 -713,36.0,36 -714,71.0,71 -715,35.0,35 -716,45.0,45 -717,42.0,42 -718,40.0,40 -719,26.0,26 -720,52.0,52 -721,29.0,29 -722,47.0,47 -723,45.0,45 -724,73.0,73 -725,42.0,42 -726,42.0,42 -727,30.0,30 -728,119.0,119 -729,60.0,60 -730,18.0,18 -731,18.0,18 -732,21.0,21 -733,33.0,33 -734,45.0,45 -735,35.0,35 -736,28.0,28 -737,37.0,37 -738,26.0,26 -739,28.0,28 -740,31.0,31 -741,130.0,130 -742,48.0,48 -743,65.0,65 -744,38.0,38 -745,54.0,54 -746,92.0,92 -747,53.0,53 -748,42.0,42 -749,87.0,87 -750,65.0,65 -751,45.0,45 -752,58.0,58 -753,27.0,27 -754,20.0,20 -755,59.0,59 -756,105.0,105 -757,54.0,54 -758,27.0,27 -759,46.0,46 -760,29.0,29 -761,15.0,15 -762,58.0,58 -763,22.0,22 -764,45.0,45 -765,44.0,44 -766,81.0,81 -767,61.0,61 -768,23.0,23 -769,134.0,134 -770,38.0,38 -771,200.0,200 -772,63.0,63 -773,62.0,62 -774,36.0,36 -775,68.0,68 -776,94.0,94 -777,26.0,26 -778,61.0,61 -779,77.0,77 -780,71.0,71 -781,63.0,63 -782,40.0,40 -783,26.0,26 -784,126.0,126 -785,59.0,59 -786,64.0,64 -787,57.0,57 -788,99.0,99 -789,47.0,47 -790,68.0,68 -791,38.0,38 -792,57.0,57 -793,42.0,42 -794,79.0,79 -795,108.0,108 -796,63.0,63 -797,62.0,62 -798,189.0,189 -799,157.0,157 -800,93.0,93 -801,53.0,53 -802,56.0,56 -803,144.0,144 -804,63.0,63 -805,41.0,41 -806,134.0,134 -807,23.0,23 -808,90.0,90 -809,124.0,124 -810,42.0,42 -811,40.0,40 -812,29.0,29 -813,46.0,46 -814,160.0,160 -815,34.0,34 -816,91.0,91 -817,60.0,60 -818,50.0,50 -819,113.0,113 -820,108.0,108 -821,56.0,56 -822,200.0,200 -823,154.0,154 -824,78.0,78 -825,55.0,55 -826,136.0,136 -827,66.0,66 -828,81.0,81 -829,23.0,23 -830,63.0,63 -831,85.0,85 -832,91.0,91 -833,85.0,85 -834,17.0,17 -835,85.0,85 -836,152.0,152 -837,59.0,59 -838,40.0,40 -839,103.0,103 -840,135.0,135 -841,50.0,50 -842,22.0,22 -843,75.0,75 -844,97.0,97 -845,59.0,59 -846,57.0,57 -847,122.0,122 -848,100.0,100 -849,132.0,132 -850,53.0,53 -851,106.0,106 -852,87.0,87 -853,82.0,82 -854,154.0,154 -855,139.0,139 -856,27.0,27 -857,35.0,35 -858,60.0,60 -859,188.0,188 -860,116.0,116 -861,160.0,160 -862,190.0,190 -863,61.0,61 -864,122.0,122 -865,97.0,97 -866,54.0,54 -867,24.0,24 -868,122.0,122 -869,161.0,161 -870,40.0,40 -871,165.0,165 -872,145.0,145 -873,155.0,155 -874,90.0,90 -875,58.0,58 -876,53.0,53 -877,47.0,47 -878,53.0,53 -879,86.0,86 -880,56.0,56 -881,152.0,152 -882,77.0,77 -883,50.0,50 -884,85.0,85 -885,200.0,200 -886,96.0,96 -887,85.0,85 -888,44.0,44 -889,39.0,39 -890,200.0,200 -891,164.0,164 -892,36.0,36 -893,139.0,139 -894,44.0,44 -895,46.0,46 -896,103.0,103 -897,168.0,168 -898,189.0,189 -899,200.0,200 -900,69.0,69 -901,71.0,71 -902,147.0,147 -903,140.0,140 -904,200.0,200 -905,82.0,82 -906,129.0,129 -907,164.0,164 -908,28.0,28 -909,73.0,73 -910,174.0,174 -911,176.0,176 -912,132.0,132 -913,149.0,149 -914,93.0,93 -915,52.0,52 -916,93.0,93 -917,33.0,33 -918,154.0,154 -919,200.0,200 -920,200.0,200 -921,200.0,200 -922,67.0,67 -923,83.0,83 -924,162.0,162 -925,41.0,41 -926,103.0,103 -927,200.0,200 -928,131.0,131 -929,117.0,117 -930,77.0,77 -931,45.0,45 -932,144.0,144 -933,123.0,123 -934,122.0,122 -935,29.0,29 -936,89.0,89 -937,71.0,71 -938,200.0,200 -939,80.0,80 -940,98.0,98 -941,143.0,143 -942,200.0,200 -943,95.0,95 -944,83.0,83 -945,62.0,62 -946,62.0,62 -947,118.0,118 -948,144.0,144 -949,113.0,113 -950,110.0,110 -951,139.0,139 -952,70.0,70 -953,79.0,79 -954,176.0,176 -955,151.0,151 -956,24.0,24 -957,50.0,50 -958,90.0,90 -959,20.0,20 -960,65.0,65 -961,176.0,176 -962,37.0,37 -963,48.0,48 -964,89.0,89 -965,190.0,190 -966,155.0,155 -967,26.0,26 -968,200.0,200 -969,186.0,186 -970,60.0,60 -971,115.0,115 -972,115.0,115 -973,121.0,121 -974,177.0,177 -975,200.0,200 -976,51.0,51 -977,105.0,105 -978,200.0,200 -979,68.0,68 -980,170.0,170 -981,70.0,70 -982,55.0,55 -983,70.0,70 -984,66.0,66 -985,161.0,161 -986,40.0,40 -987,200.0,200 -988,107.0,107 -989,80.0,80 -990,128.0,128 -991,154.0,154 -992,101.0,101 -993,178.0,178 -994,129.0,129 -995,128.0,128 -996,146.0,146 -997,142.0,142 -998,200.0,200 -999,62.0,62 -1000,19.0,19 -1001,82.0,82 -1002,63.0,63 -1003,129.0,129 -1004,54.0,54 -1005,125.0,125 -1006,113.0,113 -1007,93.0,93 -1008,200.0,200 -1009,48.0,48 -1010,58.0,58 -1011,66.0,66 -1012,41.0,41 -1013,145.0,145 -1014,42.0,42 -1015,185.0,185 -1016,199.0,199 -1017,200.0,200 -1018,125.0,125 -1019,145.0,145 -1020,32.0,32 -1021,141.0,141 -1022,195.0,195 -1023,175.0,175 -1024,162.0,162 -1025,127.0,127 -1026,154.0,154 -1027,166.0,166 -1028,200.0,200 -1029,188.0,188 -1030,200.0,200 -1031,200.0,200 -1032,162.0,162 -1033,141.0,141 -1034,200.0,200 -1035,155.0,155 -1036,134.0,134 -1037,146.0,146 -1038,192.0,192 -1039,136.0,136 -1040,200.0,200 -1041,149.0,149 -1042,113.0,113 -1043,40.0,40 -1044,178.0,178 -1045,126.0,126 -1046,200.0,200 -1047,37.0,37 -1048,200.0,200 -1049,141.0,141 -1050,118.0,118 -1051,34.0,34 -1052,142.0,142 -1053,65.0,65 -1054,200.0,200 -1055,133.0,133 -1056,137.0,137 -1057,148.0,148 -1058,148.0,148 -1059,136.0,136 -1060,159.0,159 -1061,48.0,48 -1062,109.0,109 -1063,65.0,65 -1064,130.0,130 -1065,191.0,191 -1066,200.0,200 -1067,200.0,200 -1068,48.0,48 -1069,200.0,200 -1070,65.0,65 -1071,164.0,164 -1072,200.0,200 -1073,156.0,156 -1074,200.0,200 -1075,131.0,131 -1076,126.0,126 -1077,200.0,200 -1078,200.0,200 -1079,32.0,32 -1080,175.0,175 -1081,200.0,200 -1082,84.0,84 -1083,81.0,81 -1084,183.0,183 -1085,51.0,51 -1086,155.0,155 -1087,146.0,146 -1088,108.0,108 -1089,176.0,176 -1090,189.0,189 -1091,200.0,200 -1092,164.0,164 -1093,70.0,70 -1094,200.0,200 -1095,172.0,172 -1096,163.0,163 -1097,168.0,168 -1098,181.0,181 -1099,200.0,200 -1100,33.0,33 -1101,200.0,200 -1102,58.0,58 -1103,200.0,200 -1104,156.0,156 -1105,200.0,200 -1106,138.0,138 -1107,200.0,200 -1108,81.0,81 -1109,105.0,105 -1110,87.0,87 -1111,170.0,170 -1112,200.0,200 -1113,200.0,200 -1114,200.0,200 -1115,200.0,200 -1116,200.0,200 -1117,200.0,200 -1118,200.0,200 -1119,158.0,158 -1120,64.0,64 -1121,138.0,138 -1122,200.0,200 -1123,158.0,158 -1124,86.0,86 -1125,125.0,125 -1126,105.0,105 -1127,200.0,200 -1128,120.0,120 -1129,53.0,53 -1130,127.0,127 -1131,200.0,200 -1132,48.0,48 -1133,200.0,200 -1134,144.0,144 -1135,42.0,42 -1136,100.0,100 -1137,160.0,160 -1138,200.0,200 -1139,200.0,200 -1140,200.0,200 -1141,200.0,200 -1142,135.0,135 -1143,184.0,184 -1144,184.0,184 -1145,168.0,168 -1146,162.0,162 -1147,52.0,52 -1148,120.0,120 -1149,133.0,133 -1150,200.0,200 -1151,178.0,178 -1152,200.0,200 -1153,22.0,22 -1154,200.0,200 -1155,108.0,108 -1156,200.0,200 -1157,200.0,200 -1158,102.0,102 -1159,200.0,200 -1160,200.0,200 -1161,200.0,200 -1162,65.0,65 -1163,131.0,131 -1164,129.0,129 -1165,136.0,136 -1166,137.0,137 -1167,40.0,40 -1168,130.0,130 -1169,99.0,99 -1170,131.0,131 -1171,200.0,200 -1172,172.0,172 -1173,200.0,200 -1174,200.0,200 -1175,83.0,83 -1176,151.0,151 -1177,200.0,200 -1178,70.0,70 -1179,84.0,84 -1180,172.0,172 -1181,200.0,200 -1182,22.0,22 -1183,118.0,118 -1184,200.0,200 -1185,200.0,200 -1186,200.0,200 -1187,200.0,200 -1188,200.0,200 -1189,145.0,145 -1190,121.0,121 -1191,159.0,159 -1192,126.0,126 -1193,49.0,49 -1194,200.0,200 -1195,119.0,119 -1196,155.0,155 -1197,71.0,71 -1198,200.0,200 -1199,79.0,79 -1200,68.0,68 -1201,200.0,200 -1202,200.0,200 -1203,143.0,143 -1204,200.0,200 -1205,54.0,54 -1206,180.0,180 -1207,158.0,158 -1208,149.0,149 -1209,170.0,170 -1210,118.0,118 -1211,155.0,155 -1212,200.0,200 -1213,200.0,200 -1214,143.0,143 -1215,200.0,200 -1216,200.0,200 -1217,55.0,55 -1218,200.0,200 -1219,200.0,200 -1220,168.0,168 -1221,170.0,170 -1222,112.0,112 -1223,108.0,108 -1224,104.0,104 -1225,135.0,135 -1226,131.0,131 -1227,98.0,98 -1228,63.0,63 -1229,200.0,200 -1230,119.0,119 -1231,130.0,130 -1232,113.0,113 -1233,170.0,170 -1234,200.0,200 -1235,164.0,164 -1236,149.0,149 -1237,200.0,200 -1238,116.0,116 -1239,200.0,200 -1240,133.0,133 -1241,200.0,200 -1242,164.0,164 -1243,154.0,154 -1244,191.0,191 -1245,128.0,128 -1246,119.0,119 -1247,114.0,114 -1248,200.0,200 -1249,111.0,111 -1250,78.0,78 -1251,200.0,200 -1252,200.0,200 -1253,172.0,172 -1254,200.0,200 -1255,105.0,105 -1256,181.0,181 -1257,200.0,200 -1258,83.0,83 -1259,200.0,200 -1260,176.0,176 -1261,200.0,200 -1262,147.0,147 -1263,165.0,165 -1264,167.0,167 -1265,183.0,183 -1266,117.0,117 -1267,200.0,200 -1268,200.0,200 -1269,171.0,171 -1270,20.0,20 -1271,200.0,200 -1272,200.0,200 -1273,200.0,200 -1274,109.0,109 -1275,142.0,142 -1276,117.0,117 -1277,200.0,200 -1278,176.0,176 -1279,200.0,200 -1280,101.0,101 -1281,200.0,200 -1282,130.0,130 -1283,200.0,200 -1284,111.0,111 -1285,124.0,124 -1286,178.0,178 -1287,200.0,200 -1288,184.0,184 -1289,200.0,200 -1290,200.0,200 -1291,200.0,200 -1292,200.0,200 -1293,130.0,130 -1294,200.0,200 -1295,134.0,134 -1296,195.0,195 -1297,200.0,200 -1298,62.0,62 -1299,200.0,200 -1300,200.0,200 -1301,165.0,165 -1302,190.0,190 -1303,200.0,200 -1304,200.0,200 -1305,168.0,168 -1306,200.0,200 -1307,64.0,64 -1308,122.0,122 -1309,200.0,200 -1310,134.0,134 -1311,200.0,200 -1312,200.0,200 -1313,200.0,200 -1314,150.0,150 -1315,187.0,187 -1316,130.0,130 -1317,140.0,140 -1318,157.0,157 -1319,200.0,200 -1320,149.0,149 -1321,200.0,200 -1322,58.0,58 -1323,84.0,84 -1324,140.0,140 -1325,139.0,139 -1326,117.0,117 -1327,175.0,175 -1328,135.0,135 -1329,169.0,169 -1330,200.0,200 -1331,143.0,143 -1332,127.0,127 -1333,127.0,127 -1334,148.0,148 -1335,200.0,200 -1336,136.0,136 -1337,200.0,200 -1338,200.0,200 -1339,175.0,175 -1340,102.0,102 -1341,200.0,200 -1342,97.0,97 -1343,120.0,120 -1344,59.0,59 -1345,200.0,200 -1346,200.0,200 -1347,104.0,104 -1348,128.0,128 -1349,200.0,200 -1350,195.0,195 -1351,96.0,96 -1352,196.0,196 -1353,200.0,200 -1354,58.0,58 -1355,200.0,200 -1356,200.0,200 -1357,114.0,114 -1358,104.0,104 -1359,200.0,200 -1360,179.0,179 -1361,200.0,200 -1362,200.0,200 -1363,140.0,140 -1364,138.0,138 -1365,57.0,57 -1366,165.0,165 -1367,174.0,174 -1368,199.0,199 -1369,110.0,110 -1370,200.0,200 -1371,154.0,154 -1372,200.0,200 -1373,78.0,78 -1374,200.0,200 -1375,185.0,185 -1376,167.0,167 -1377,161.0,161 -1378,155.0,155 -1379,117.0,117 -1380,128.0,128 -1381,94.0,94 -1382,200.0,200 -1383,121.0,121 -1384,61.0,61 -1385,21.0,21 -1386,105.0,105 -1387,185.0,185 -1388,200.0,200 -1389,124.0,124 -1390,200.0,200 -1391,133.0,133 -1392,200.0,200 -1393,153.0,153 -1394,200.0,200 -1395,200.0,200 -1396,152.0,152 -1397,146.0,146 -1398,200.0,200 -1399,183.0,183 -1400,195.0,195 -1401,172.0,172 -1402,151.0,151 -1403,122.0,122 -1404,200.0,200 -1405,200.0,200 -1406,200.0,200 -1407,200.0,200 -1408,130.0,130 -1409,148.0,148 -1410,200.0,200 -1411,200.0,200 -1412,200.0,200 -1413,157.0,157 -1414,136.0,136 -1415,115.0,115 -1416,200.0,200 -1417,105.0,105 -1418,124.0,124 -1419,144.0,144 -1420,34.0,34 -1421,151.0,151 -1422,101.0,101 -1423,64.0,64 -1424,200.0,200 -1425,100.0,100 -1426,54.0,54 -1427,132.0,132 -1428,200.0,200 -1429,131.0,131 -1430,51.0,51 -1431,123.0,123 -1432,99.0,99 -1433,200.0,200 -1434,200.0,200 -1435,144.0,144 -1436,166.0,166 -1437,122.0,122 -1438,147.0,147 -1439,200.0,200 -1440,103.0,103 -1441,164.0,164 -1442,76.0,76 -1443,159.0,159 -1444,152.0,152 -1445,200.0,200 -1446,129.0,129 -1447,124.0,124 -1448,40.0,40 -1449,200.0,200 -1450,117.0,117 -1451,175.0,175 -1452,51.0,51 -1453,101.0,101 -1454,117.0,117 -1455,179.0,179 -1456,44.0,44 -1457,190.0,190 -1458,135.0,135 -1459,183.0,183 -1460,118.0,118 -1461,200.0,200 -1462,109.0,109 -1463,86.0,86 -1464,147.0,147 -1465,200.0,200 -1466,124.0,124 -1467,128.0,128 -1468,156.0,156 -1469,200.0,200 -1470,167.0,167 -1471,197.0,197 -1472,75.0,75 -1473,168.0,168 -1474,114.0,114 -1475,153.0,153 -1476,146.0,146 -1477,188.0,188 -1478,144.0,144 -1479,200.0,200 -1480,51.0,51 -1481,35.0,35 -1482,152.0,152 -1483,161.0,161 -1484,114.0,114 -1485,200.0,200 -1486,161.0,161 -1487,200.0,200 -1488,93.0,93 -1489,116.0,116 -1490,152.0,152 -1491,200.0,200 -1492,200.0,200 -1493,200.0,200 -1494,86.0,86 -1495,200.0,200 -1496,178.0,178 -1497,200.0,200 -1498,200.0,200 -1499,154.0,154 -1500,135.0,135 -1501,200.0,200 -1502,146.0,146 -1503,78.0,78 -1504,115.0,115 -1505,189.0,189 -1506,133.0,133 -1507,123.0,123 -1508,158.0,158 -1509,200.0,200 -1510,200.0,200 -1511,200.0,200 -1512,200.0,200 -1513,200.0,200 -1514,200.0,200 -1515,200.0,200 -1516,119.0,119 -1517,162.0,162 -1518,200.0,200 -1519,114.0,114 -1520,200.0,200 -1521,128.0,128 -1522,200.0,200 -1523,200.0,200 -1524,130.0,130 -1525,65.0,65 -1526,200.0,200 -1527,200.0,200 -1528,200.0,200 -1529,188.0,188 -1530,159.0,159 -1531,200.0,200 -1532,200.0,200 -1533,200.0,200 -1534,147.0,147 -1535,180.0,180 -1536,152.0,152 -1537,178.0,178 -1538,131.0,131 -1539,118.0,118 -1540,153.0,153 -1541,197.0,197 -1542,200.0,200 -1543,200.0,200 -1544,178.0,178 -1545,67.0,67 -1546,137.0,137 -1547,51.0,51 -1548,160.0,160 -1549,200.0,200 -1550,124.0,124 -1551,109.0,109 -1552,181.0,181 -1553,182.0,182 -1554,136.0,136 -1555,91.0,91 -1556,159.0,159 -1557,192.0,192 -1558,106.0,106 -1559,200.0,200 -1560,169.0,169 -1561,167.0,167 -1562,141.0,141 -1563,127.0,127 -1564,71.0,71 -1565,134.0,134 -1566,200.0,200 -1567,115.0,115 -1568,99.0,99 -1569,184.0,184 -1570,200.0,200 -1571,133.0,133 -1572,153.0,153 -1573,200.0,200 -1574,194.0,194 -1575,169.0,169 -1576,113.0,113 -1577,147.0,147 -1578,140.0,140 -1579,200.0,200 -1580,113.0,113 -1581,181.0,181 -1582,200.0,200 -1583,182.0,182 -1584,185.0,185 -1585,197.0,197 -1586,200.0,200 -1587,151.0,151 -1588,49.0,49 -1589,137.0,137 -1590,166.0,166 -1591,149.0,149 -1592,126.0,126 -1593,73.0,73 -1594,127.0,127 -1595,104.0,104 -1596,65.0,65 -1597,63.0,63 -1598,126.0,126 -1599,181.0,181 -1600,132.0,132 -1601,89.0,89 -1602,130.0,130 -1603,150.0,150 -1604,100.0,100 -1605,139.0,139 -1606,119.0,119 -1607,48.0,48 -1608,80.0,80 -1609,105.0,105 -1610,85.0,85 -1611,200.0,200 -1612,142.0,142 -1613,95.0,95 -1614,50.0,50 -1615,51.0,51 -1616,124.0,124 -1617,47.0,47 -1618,159.0,159 -1619,154.0,154 -1620,200.0,200 -1621,88.0,88 -1622,65.0,65 -1623,111.0,111 -1624,99.0,99 -1625,120.0,120 -1626,127.0,127 -1627,43.0,43 -1628,80.0,80 -1629,163.0,163 -1630,90.0,90 -1631,154.0,154 -1632,127.0,127 -1633,39.0,39 -1634,200.0,200 -1635,161.0,161 -1636,119.0,119 -1637,156.0,156 -1638,200.0,200 -1639,200.0,200 -1640,41.0,41 -1641,200.0,200 -1642,136.0,136 -1643,157.0,157 -1644,142.0,142 -1645,125.0,125 -1646,155.0,155 -1647,139.0,139 -1648,122.0,122 -1649,116.0,116 -1650,200.0,200 -1651,144.0,144 -1652,170.0,170 -1653,200.0,200 -1654,103.0,103 -1655,105.0,105 -1656,193.0,193 -1657,122.0,122 -1658,200.0,200 -1659,191.0,191 -1660,200.0,200 -1661,200.0,200 -1662,200.0,200 -1663,200.0,200 -1664,200.0,200 -1665,200.0,200 -1666,200.0,200 -1667,64.0,64 -1668,200.0,200 -1669,121.0,121 -1670,200.0,200 -1671,171.0,171 -1672,200.0,200 -1673,130.0,130 -1674,200.0,200 -1675,200.0,200 -1676,188.0,188 -1677,200.0,200 -1678,200.0,200 -1679,200.0,200 -1680,181.0,181 -1681,200.0,200 -1682,200.0,200 -1683,135.0,135 -1684,200.0,200 -1685,114.0,114 -1686,189.0,189 -1687,200.0,200 -1688,200.0,200 -1689,200.0,200 -1690,184.0,184 -1691,200.0,200 -1692,200.0,200 -1693,55.0,55 -1694,153.0,153 -1695,200.0,200 -1696,200.0,200 -1697,125.0,125 -1698,177.0,177 -1699,154.0,154 -1700,53.0,53 -1701,112.0,112 -1702,184.0,184 -1703,200.0,200 -1704,200.0,200 -1705,137.0,137 -1706,72.0,72 -1707,200.0,200 -1708,200.0,200 -1709,200.0,200 -1710,121.0,121 -1711,200.0,200 -1712,200.0,200 -1713,142.0,142 -1714,200.0,200 -1715,200.0,200 -1716,169.0,169 -1717,200.0,200 -1718,200.0,200 -1719,196.0,196 -1720,135.0,135 -1721,200.0,200 -1722,200.0,200 -1723,200.0,200 -1724,96.0,96 -1725,200.0,200 -1726,200.0,200 -1727,200.0,200 -1728,200.0,200 -1729,138.0,138 -1730,200.0,200 -1731,139.0,139 -1732,200.0,200 -1733,190.0,190 -1734,200.0,200 -1735,200.0,200 -1736,138.0,138 -1737,114.0,114 -1738,159.0,159 -1739,120.0,120 -1740,186.0,186 -1741,200.0,200 -1742,183.0,183 -1743,200.0,200 -1744,200.0,200 -1745,200.0,200 -1746,99.0,99 -1747,200.0,200 -1748,100.0,100 -1749,187.0,187 -1750,106.0,106 -1751,200.0,200 -1752,200.0,200 -1753,200.0,200 -1754,52.0,52 -1755,197.0,197 -1756,165.0,165 -1757,200.0,200 -1758,200.0,200 -1759,92.0,92 -1760,200.0,200 -1761,200.0,200 -1762,70.0,70 -1763,165.0,165 -1764,192.0,192 -1765,200.0,200 -1766,200.0,200 -1767,87.0,87 -1768,150.0,150 -1769,149.0,149 -1770,79.0,79 -1771,200.0,200 -1772,200.0,200 -1773,117.0,117 -1774,200.0,200 -1775,135.0,135 -1776,200.0,200 -1777,130.0,130 -1778,200.0,200 -1779,200.0,200 -1780,200.0,200 -1781,200.0,200 -1782,200.0,200 -1783,200.0,200 -1784,200.0,200 -1785,200.0,200 -1786,200.0,200 -1787,200.0,200 -1788,140.0,140 -1789,200.0,200 -1790,200.0,200 -1791,42.0,42 -1792,198.0,198 -1793,200.0,200 -1794,200.0,200 -1795,85.0,85 -1796,164.0,164 -1797,99.0,99 -1798,151.0,151 -1799,200.0,200 -1800,200.0,200 -1801,199.0,199 -1802,200.0,200 -1803,190.0,190 -1804,114.0,114 -1805,200.0,200 -1806,200.0,200 -1807,161.0,161 -1808,200.0,200 -1809,187.0,187 -1810,145.0,145 -1811,200.0,200 -1812,200.0,200 -1813,200.0,200 -1814,96.0,96 -1815,163.0,163 -1816,160.0,160 -1817,200.0,200 -1818,200.0,200 -1819,50.0,50 -1820,200.0,200 -1821,102.0,102 -1822,200.0,200 -1823,200.0,200 -1824,200.0,200 -1825,200.0,200 -1826,109.0,109 -1827,200.0,200 -1828,129.0,129 -1829,200.0,200 -1830,171.0,171 -1831,183.0,183 -1832,106.0,106 -1833,200.0,200 -1834,136.0,136 -1835,100.0,100 -1836,200.0,200 -1837,188.0,188 -1838,200.0,200 -1839,200.0,200 -1840,162.0,162 -1841,200.0,200 -1842,200.0,200 -1843,177.0,177 -1844,200.0,200 -1845,200.0,200 -1846,200.0,200 -1847,200.0,200 -1848,166.0,166 -1849,200.0,200 -1850,53.0,53 -1851,200.0,200 -1852,200.0,200 -1853,153.0,153 -1854,190.0,190 -1855,200.0,200 -1856,200.0,200 -1857,200.0,200 -1858,200.0,200 -1859,136.0,136 -1860,200.0,200 -1861,143.0,143 -1862,45.0,45 -1863,129.0,129 -1864,200.0,200 -1865,200.0,200 -1866,200.0,200 -1867,200.0,200 -1868,60.0,60 -1869,150.0,150 -1870,174.0,174 -1871,157.0,157 -1872,198.0,198 -1873,200.0,200 -1874,91.0,91 -1875,200.0,200 -1876,112.0,112 -1877,159.0,159 -1878,186.0,186 -1879,200.0,200 -1880,82.0,82 -1881,192.0,192 -1882,147.0,147 -1883,200.0,200 -1884,200.0,200 -1885,174.0,174 -1886,181.0,181 -1887,200.0,200 -1888,74.0,74 -1889,200.0,200 -1890,200.0,200 -1891,200.0,200 -1892,157.0,157 -1893,200.0,200 -1894,200.0,200 -1895,180.0,180 -1896,170.0,170 -1897,200.0,200 -1898,135.0,135 -1899,200.0,200 -1900,175.0,175 -1901,200.0,200 -1902,200.0,200 -1903,118.0,118 -1904,147.0,147 -1905,44.0,44 -1906,200.0,200 -1907,58.0,58 -1908,185.0,185 -1909,200.0,200 -1910,200.0,200 -1911,200.0,200 -1912,78.0,78 -1913,190.0,190 -1914,177.0,177 -1915,112.0,112 -1916,200.0,200 -1917,142.0,142 -1918,200.0,200 -1919,92.0,92 -1920,172.0,172 -1921,200.0,200 -1922,178.0,178 -1923,200.0,200 -1924,200.0,200 -1925,138.0,138 -1926,100.0,100 -1927,200.0,200 -1928,95.0,95 -1929,200.0,200 -1930,200.0,200 -1931,129.0,129 -1932,154.0,154 -1933,200.0,200 -1934,200.0,200 -1935,133.0,133 -1936,152.0,152 -1937,133.0,133 -1938,200.0,200 -1939,200.0,200 -1940,200.0,200 -1941,200.0,200 -1942,200.0,200 -1943,167.0,167 -1944,179.0,179 -1945,164.0,164 -1946,187.0,187 -1947,156.0,156 -1948,200.0,200 -1949,200.0,200 -1950,130.0,130 -1951,200.0,200 -1952,200.0,200 -1953,200.0,200 -1954,200.0,200 -1955,65.0,65 -1956,200.0,200 -1957,148.0,148 -1958,200.0,200 -1959,200.0,200 -1960,200.0,200 -1961,168.0,168 -1962,164.0,164 -1963,200.0,200 -1964,200.0,200 -1965,103.0,103 -1966,200.0,200 -1967,173.0,173 -1968,200.0,200 -1969,146.0,146 -1970,197.0,197 -1971,123.0,123 -1972,162.0,162 -1973,200.0,200 -1974,162.0,162 -1975,82.0,82 -1976,157.0,157 -1977,138.0,138 -1978,37.0,37 -1979,200.0,200 -1980,194.0,194 -1981,200.0,200 -1982,104.0,104 -1983,198.0,198 -1984,200.0,200 -1985,200.0,200 -1986,154.0,154 -1987,200.0,200 -1988,200.0,200 -1989,158.0,158 -1990,200.0,200 -1991,142.0,142 -1992,185.0,185 -1993,69.0,69 -1994,200.0,200 -1995,144.0,144 -1996,164.0,164 -1997,189.0,189 -1998,200.0,200 -1999,141.0,141 diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt deleted file mode 100644 index 20d78c0..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt deleted file mode 100644 index c35547d..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json deleted file mode 100644 index 010058e..0000000 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "A2C", "env_name": "CartPole-v0", "train_eps": 1600, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "actor_lr": 0.0003, "critic_lr": 0.001, "actor_hidden_dim": 256, "critic_hidden_dim": 256, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/models/", "n_states": 4, "n_actions": 2} \ No newline at end of file diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png deleted file mode 100644 index 96a9a22..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv deleted file mode 100644 index ebf3893..0000000 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards,steps -0,177.0,177 -1,180.0,180 -2,200.0,200 -3,200.0,200 -4,167.0,167 -5,124.0,124 -6,128.0,128 -7,200.0,200 -8,200.0,200 -9,200.0,200 -10,186.0,186 -11,187.0,187 -12,200.0,200 -13,176.0,176 -14,200.0,200 -15,200.0,200 -16,200.0,200 -17,200.0,200 -18,185.0,185 -19,180.0,180 diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png deleted file mode 100644 index 860a49c..0000000 Binary files a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png and /dev/null differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv deleted file mode 100644 index f05699c..0000000 --- a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv +++ /dev/null @@ -1,1601 +0,0 @@ -episodes,rewards,steps -0,16.0,16 -1,14.0,14 -2,18.0,18 -3,31.0,31 -4,23.0,23 -5,43.0,43 -6,14.0,14 -7,20.0,20 -8,24.0,24 -9,14.0,14 -10,12.0,12 -11,16.0,16 -12,17.0,17 -13,13.0,13 -14,78.0,78 -15,36.0,36 -16,9.0,9 -17,19.0,19 -18,18.0,18 -19,20.0,20 -20,33.0,33 -21,37.0,37 -22,15.0,15 -23,24.0,24 -24,32.0,32 -25,19.0,19 -26,65.0,65 -27,13.0,13 -28,26.0,26 -29,27.0,27 -30,15.0,15 -31,10.0,10 -32,16.0,16 -33,30.0,30 -34,42.0,42 -35,30.0,30 -36,35.0,35 -37,21.0,21 -38,36.0,36 -39,18.0,18 -40,81.0,81 -41,12.0,12 -42,21.0,21 -43,32.0,32 -44,39.0,39 -45,10.0,10 -46,13.0,13 -47,35.0,35 -48,22.0,22 -49,20.0,20 -50,67.0,67 -51,28.0,28 -52,13.0,13 -53,14.0,14 -54,13.0,13 -55,26.0,26 -56,12.0,12 -57,17.0,17 -58,41.0,41 -59,14.0,14 -60,68.0,68 -61,12.0,12 -62,20.0,20 -63,19.0,19 -64,31.0,31 -65,31.0,31 -66,34.0,34 -67,24.0,24 -68,31.0,31 -69,37.0,37 -70,49.0,49 -71,15.0,15 -72,97.0,97 -73,18.0,18 -74,20.0,20 -75,31.0,31 -76,68.0,68 -77,21.0,21 -78,19.0,19 -79,19.0,19 -80,19.0,19 -81,14.0,14 -82,17.0,17 -83,35.0,35 -84,25.0,25 -85,59.0,59 -86,29.0,29 -87,35.0,35 -88,73.0,73 -89,26.0,26 -90,81.0,81 -91,20.0,20 -92,78.0,78 -93,26.0,26 -94,43.0,43 -95,25.0,25 -96,24.0,24 -97,14.0,14 -98,27.0,27 -99,16.0,16 -100,68.0,68 -101,57.0,57 -102,31.0,31 -103,46.0,46 -104,17.0,17 -105,16.0,16 -106,21.0,21 -107,25.0,25 -108,13.0,13 -109,50.0,50 -110,12.0,12 -111,34.0,34 -112,16.0,16 -113,34.0,34 -114,19.0,19 -115,38.0,38 -116,20.0,20 -117,42.0,42 -118,28.0,28 -119,53.0,53 -120,43.0,43 -121,37.0,37 -122,13.0,13 -123,29.0,29 -124,17.0,17 -125,23.0,23 -126,22.0,22 -127,21.0,21 -128,62.0,62 -129,52.0,52 -130,32.0,32 -131,37.0,37 -132,38.0,38 -133,28.0,28 -134,33.0,33 -135,33.0,33 -136,35.0,35 -137,43.0,43 -138,41.0,41 -139,18.0,18 -140,30.0,30 -141,48.0,48 -142,50.0,50 -143,34.0,34 -144,86.0,86 -145,21.0,21 -146,29.0,29 -147,29.0,29 -148,24.0,24 -149,88.0,88 -150,28.0,28 -151,19.0,19 -152,35.0,35 -153,24.0,24 -154,11.0,11 -155,49.0,49 -156,30.0,30 -157,44.0,44 -158,26.0,26 -159,19.0,19 -160,12.0,12 -161,65.0,65 -162,18.0,18 -163,11.0,11 -164,38.0,38 -165,28.0,28 -166,17.0,17 -167,51.0,51 -168,18.0,18 -169,39.0,39 -170,22.0,22 -171,26.0,26 -172,33.0,33 -173,35.0,35 -174,60.0,60 -175,12.0,12 -176,19.0,19 -177,62.0,62 -178,79.0,79 -179,36.0,36 -180,31.0,31 -181,23.0,23 -182,23.0,23 -183,40.0,40 -184,19.0,19 -185,18.0,18 -186,18.0,18 -187,23.0,23 -188,43.0,43 -189,45.0,45 -190,28.0,28 -191,61.0,61 -192,50.0,50 -193,40.0,40 -194,9.0,9 -195,56.0,56 -196,43.0,43 -197,43.0,43 -198,18.0,18 -199,43.0,43 -200,26.0,26 -201,44.0,44 -202,35.0,35 -203,25.0,25 -204,31.0,31 -205,26.0,26 -206,40.0,40 -207,36.0,36 -208,50.0,50 -209,25.0,25 -210,29.0,29 -211,11.0,11 -212,23.0,23 -213,23.0,23 -214,36.0,36 -215,39.0,39 -216,45.0,45 -217,14.0,14 -218,46.0,46 -219,62.0,62 -220,15.0,15 -221,19.0,19 -222,28.0,28 -223,39.0,39 -224,14.0,14 -225,25.0,25 -226,35.0,35 -227,16.0,16 -228,22.0,22 -229,41.0,41 -230,21.0,21 -231,22.0,22 -232,37.0,37 -233,32.0,32 -234,18.0,18 -235,23.0,23 -236,23.0,23 -237,16.0,16 -238,38.0,38 -239,25.0,25 -240,37.0,37 -241,13.0,13 -242,30.0,30 -243,27.0,27 -244,27.0,27 -245,23.0,23 -246,62.0,62 -247,31.0,31 -248,37.0,37 -249,26.0,26 -250,77.0,77 -251,15.0,15 -252,25.0,25 -253,20.0,20 -254,27.0,27 -255,42.0,42 -256,41.0,41 -257,34.0,34 -258,23.0,23 -259,37.0,37 -260,47.0,47 -261,22.0,22 -262,34.0,34 -263,10.0,10 -264,37.0,37 -265,29.0,29 -266,49.0,49 -267,67.0,67 -268,15.0,15 -269,81.0,81 -270,95.0,95 -271,54.0,54 -272,19.0,19 -273,31.0,31 -274,54.0,54 -275,46.0,46 -276,21.0,21 -277,22.0,22 -278,58.0,58 -279,58.0,58 -280,37.0,37 -281,37.0,37 -282,25.0,25 -283,20.0,20 -284,46.0,46 -285,80.0,80 -286,25.0,25 -287,18.0,18 -288,18.0,18 -289,18.0,18 -290,37.0,37 -291,20.0,20 -292,62.0,62 -293,22.0,22 -294,23.0,23 -295,28.0,28 -296,38.0,38 -297,15.0,15 -298,17.0,17 -299,20.0,20 -300,20.0,20 -301,18.0,18 -302,77.0,77 -303,54.0,54 -304,95.0,95 -305,25.0,25 -306,79.0,79 -307,116.0,116 -308,52.0,52 -309,58.0,58 -310,15.0,15 -311,60.0,60 -312,97.0,97 -313,61.0,61 -314,18.0,18 -315,69.0,69 -316,18.0,18 -317,20.0,20 -318,19.0,19 -319,16.0,16 -320,21.0,21 -321,12.0,12 -322,30.0,30 -323,51.0,51 -324,37.0,37 -325,28.0,28 -326,29.0,29 -327,66.0,66 -328,56.0,56 -329,79.0,79 -330,56.0,56 -331,70.0,70 -332,33.0,33 -333,54.0,54 -334,35.0,35 -335,18.0,18 -336,141.0,141 -337,38.0,38 -338,18.0,18 -339,34.0,34 -340,20.0,20 -341,95.0,95 -342,29.0,29 -343,40.0,40 -344,20.0,20 -345,14.0,14 -346,83.0,83 -347,72.0,72 -348,88.0,88 -349,41.0,41 -350,103.0,103 -351,85.0,85 -352,14.0,14 -353,51.0,51 -354,68.0,68 -355,102.0,102 -356,61.0,61 -357,22.0,22 -358,22.0,22 -359,15.0,15 -360,23.0,23 -361,35.0,35 -362,72.0,72 -363,32.0,32 -364,107.0,107 -365,34.0,34 -366,28.0,28 -367,99.0,99 -368,140.0,140 -369,40.0,40 -370,24.0,24 -371,157.0,157 -372,33.0,33 -373,35.0,35 -374,18.0,18 -375,21.0,21 -376,23.0,23 -377,67.0,67 -378,112.0,112 -379,21.0,21 -380,38.0,38 -381,12.0,12 -382,23.0,23 -383,31.0,31 -384,36.0,36 -385,20.0,20 -386,82.0,82 -387,20.0,20 -388,32.0,32 -389,28.0,28 -390,26.0,26 -391,26.0,26 -392,46.0,46 -393,35.0,35 -394,32.0,32 -395,67.0,67 -396,30.0,30 -397,36.0,36 -398,67.0,67 -399,51.0,51 -400,21.0,21 -401,28.0,28 -402,46.0,46 -403,29.0,29 -404,30.0,30 -405,22.0,22 -406,24.0,24 -407,78.0,78 -408,28.0,28 -409,41.0,41 -410,38.0,38 -411,56.0,56 -412,19.0,19 -413,38.0,38 -414,51.0,51 -415,33.0,33 -416,40.0,40 -417,35.0,35 -418,59.0,59 -419,138.0,138 -420,18.0,18 -421,10.0,10 -422,25.0,25 -423,104.0,104 -424,127.0,127 -425,44.0,44 -426,35.0,35 -427,62.0,62 -428,108.0,108 -429,39.0,39 -430,15.0,15 -431,73.0,73 -432,24.0,24 -433,23.0,23 -434,35.0,35 -435,14.0,14 -436,47.0,47 -437,112.0,112 -438,79.0,79 -439,99.0,99 -440,66.0,66 -441,64.0,64 -442,63.0,63 -443,28.0,28 -444,116.0,116 -445,94.0,94 -446,91.0,91 -447,60.0,60 -448,22.0,22 -449,68.0,68 -450,22.0,22 -451,34.0,34 -452,23.0,23 -453,117.0,117 -454,26.0,26 -455,30.0,30 -456,31.0,31 -457,50.0,50 -458,56.0,56 -459,81.0,81 -460,43.0,43 -461,49.0,49 -462,62.0,62 -463,64.0,64 -464,16.0,16 -465,37.0,37 -466,103.0,103 -467,66.0,66 -468,25.0,25 -469,50.0,50 -470,118.0,118 -471,42.0,42 -472,69.0,69 -473,55.0,55 -474,41.0,41 -475,25.0,25 -476,114.0,114 -477,27.0,27 -478,27.0,27 -479,61.0,61 -480,61.0,61 -481,66.0,66 -482,73.0,73 -483,35.0,35 -484,47.0,47 -485,77.0,77 -486,34.0,34 -487,16.0,16 -488,22.0,22 -489,47.0,47 -490,72.0,72 -491,76.0,76 -492,74.0,74 -493,36.0,36 -494,47.0,47 -495,25.0,25 -496,22.0,22 -497,20.0,20 -498,40.0,40 -499,44.0,44 -500,41.0,41 -501,42.0,42 -502,112.0,112 -503,64.0,64 -504,95.0,95 -505,112.0,112 -506,117.0,117 -507,84.0,84 -508,79.0,79 -509,129.0,129 -510,139.0,139 -511,41.0,41 -512,82.0,82 -513,54.0,54 -514,69.0,69 -515,44.0,44 -516,31.0,31 -517,64.0,64 -518,41.0,41 -519,100.0,100 -520,86.0,86 -521,44.0,44 -522,38.0,38 -523,36.0,36 -524,41.0,41 -525,22.0,22 -526,51.0,51 -527,24.0,24 -528,47.0,47 -529,79.0,79 -530,125.0,125 -531,50.0,50 -532,35.0,35 -533,48.0,48 -534,85.0,85 -535,58.0,58 -536,101.0,101 -537,200.0,200 -538,79.0,79 -539,159.0,159 -540,71.0,71 -541,71.0,71 -542,77.0,77 -543,78.0,78 -544,46.0,46 -545,49.0,49 -546,74.0,74 -547,71.0,71 -548,106.0,106 -549,36.0,36 -550,33.0,33 -551,160.0,160 -552,53.0,53 -553,54.0,54 -554,27.0,27 -555,55.0,55 -556,174.0,174 -557,33.0,33 -558,61.0,61 -559,118.0,118 -560,128.0,128 -561,148.0,148 -562,97.0,97 -563,63.0,63 -564,44.0,44 -565,110.0,110 -566,156.0,156 -567,50.0,50 -568,163.0,163 -569,126.0,126 -570,114.0,114 -571,78.0,78 -572,48.0,48 -573,59.0,59 -574,116.0,116 -575,46.0,46 -576,135.0,135 -577,71.0,71 -578,19.0,19 -579,43.0,43 -580,89.0,89 -581,97.0,97 -582,21.0,21 -583,120.0,120 -584,54.0,54 -585,24.0,24 -586,62.0,62 -587,78.0,78 -588,36.0,36 -589,71.0,71 -590,25.0,25 -591,71.0,71 -592,56.0,56 -593,78.0,78 -594,65.0,65 -595,200.0,200 -596,200.0,200 -597,167.0,167 -598,59.0,59 -599,73.0,73 -600,66.0,66 -601,35.0,35 -602,186.0,186 -603,140.0,140 -604,49.0,49 -605,134.0,134 -606,46.0,46 -607,149.0,149 -608,82.0,82 -609,119.0,119 -610,126.0,126 -611,52.0,52 -612,89.0,89 -613,200.0,200 -614,89.0,89 -615,93.0,93 -616,200.0,200 -617,39.0,39 -618,113.0,113 -619,67.0,67 -620,164.0,164 -621,120.0,120 -622,74.0,74 -623,153.0,153 -624,124.0,124 -625,41.0,41 -626,173.0,173 -627,48.0,48 -628,200.0,200 -629,58.0,58 -630,35.0,35 -631,45.0,45 -632,43.0,43 -633,157.0,157 -634,111.0,111 -635,98.0,98 -636,102.0,102 -637,124.0,124 -638,111.0,111 -639,42.0,42 -640,128.0,128 -641,183.0,183 -642,49.0,49 -643,64.0,64 -644,117.0,117 -645,150.0,150 -646,103.0,103 -647,115.0,115 -648,41.0,41 -649,200.0,200 -650,162.0,162 -651,105.0,105 -652,94.0,94 -653,55.0,55 -654,73.0,73 -655,65.0,65 -656,29.0,29 -657,148.0,148 -658,42.0,42 -659,17.0,17 -660,60.0,60 -661,59.0,59 -662,80.0,80 -663,41.0,41 -664,143.0,143 -665,200.0,200 -666,45.0,45 -667,95.0,95 -668,61.0,61 -669,63.0,63 -670,170.0,170 -671,150.0,150 -672,139.0,139 -673,44.0,44 -674,44.0,44 -675,47.0,47 -676,72.0,72 -677,129.0,129 -678,79.0,79 -679,128.0,128 -680,126.0,126 -681,65.0,65 -682,57.0,57 -683,58.0,58 -684,66.0,66 -685,89.0,89 -686,150.0,150 -687,97.0,97 -688,20.0,20 -689,58.0,58 -690,81.0,81 -691,54.0,54 -692,55.0,55 -693,197.0,197 -694,61.0,61 -695,157.0,157 -696,166.0,166 -697,79.0,79 -698,128.0,128 -699,200.0,200 -700,46.0,46 -701,140.0,140 -702,19.0,19 -703,144.0,144 -704,138.0,138 -705,46.0,46 -706,200.0,200 -707,61.0,61 -708,114.0,114 -709,100.0,100 -710,85.0,85 -711,200.0,200 -712,36.0,36 -713,142.0,142 -714,22.0,22 -715,82.0,82 -716,49.0,49 -717,139.0,139 -718,173.0,173 -719,47.0,47 -720,67.0,67 -721,197.0,197 -722,157.0,157 -723,149.0,149 -724,29.0,29 -725,85.0,85 -726,135.0,135 -727,157.0,157 -728,141.0,141 -729,165.0,165 -730,102.0,102 -731,192.0,192 -732,196.0,196 -733,183.0,183 -734,75.0,75 -735,41.0,41 -736,122.0,122 -737,200.0,200 -738,166.0,166 -739,109.0,109 -740,200.0,200 -741,200.0,200 -742,24.0,24 -743,20.0,20 -744,138.0,138 -745,122.0,122 -746,200.0,200 -747,156.0,156 -748,191.0,191 -749,91.0,91 -750,105.0,105 -751,145.0,145 -752,130.0,130 -753,150.0,150 -754,77.0,77 -755,137.0,137 -756,181.0,181 -757,200.0,200 -758,132.0,132 -759,200.0,200 -760,76.0,76 -761,63.0,63 -762,160.0,160 -763,28.0,28 -764,135.0,135 -765,43.0,43 -766,146.0,146 -767,179.0,179 -768,82.0,82 -769,126.0,126 -770,148.0,148 -771,110.0,110 -772,116.0,116 -773,55.0,55 -774,158.0,158 -775,155.0,155 -776,200.0,200 -777,153.0,153 -778,147.0,147 -779,54.0,54 -780,173.0,173 -781,44.0,44 -782,47.0,47 -783,200.0,200 -784,179.0,179 -785,194.0,194 -786,200.0,200 -787,141.0,141 -788,130.0,130 -789,133.0,133 -790,53.0,53 -791,124.0,124 -792,143.0,143 -793,58.0,58 -794,190.0,190 -795,130.0,130 -796,130.0,130 -797,200.0,200 -798,163.0,163 -799,200.0,200 -800,103.0,103 -801,200.0,200 -802,200.0,200 -803,18.0,18 -804,156.0,156 -805,165.0,165 -806,129.0,129 -807,33.0,33 -808,132.0,132 -809,200.0,200 -810,110.0,110 -811,93.0,93 -812,198.0,198 -813,200.0,200 -814,200.0,200 -815,182.0,182 -816,75.0,75 -817,200.0,200 -818,200.0,200 -819,200.0,200 -820,144.0,144 -821,118.0,118 -822,200.0,200 -823,60.0,60 -824,200.0,200 -825,134.0,134 -826,154.0,154 -827,116.0,116 -828,21.0,21 -829,200.0,200 -830,105.0,105 -831,158.0,158 -832,122.0,122 -833,88.0,88 -834,108.0,108 -835,112.0,112 -836,134.0,134 -837,165.0,165 -838,200.0,200 -839,138.0,138 -840,164.0,164 -841,200.0,200 -842,30.0,30 -843,181.0,181 -844,149.0,149 -845,102.0,102 -846,128.0,128 -847,74.0,74 -848,112.0,112 -849,80.0,80 -850,190.0,190 -851,35.0,35 -852,40.0,40 -853,121.0,121 -854,125.0,125 -855,99.0,99 -856,115.0,115 -857,171.0,171 -858,200.0,200 -859,50.0,50 -860,200.0,200 -861,143.0,143 -862,146.0,146 -863,47.0,47 -864,154.0,154 -865,48.0,48 -866,103.0,103 -867,200.0,200 -868,151.0,151 -869,46.0,46 -870,155.0,155 -871,40.0,40 -872,124.0,124 -873,41.0,41 -874,45.0,45 -875,158.0,158 -876,29.0,29 -877,200.0,200 -878,200.0,200 -879,151.0,151 -880,158.0,158 -881,200.0,200 -882,15.0,15 -883,180.0,180 -884,75.0,75 -885,196.0,196 -886,176.0,176 -887,67.0,67 -888,90.0,90 -889,161.0,161 -890,88.0,88 -891,200.0,200 -892,64.0,64 -893,111.0,111 -894,184.0,184 -895,189.0,189 -896,109.0,109 -897,167.0,167 -898,99.0,99 -899,180.0,180 -900,121.0,121 -901,126.0,126 -902,200.0,200 -903,200.0,200 -904,177.0,177 -905,107.0,107 -906,200.0,200 -907,133.0,133 -908,164.0,164 -909,200.0,200 -910,160.0,160 -911,120.0,120 -912,200.0,200 -913,65.0,65 -914,27.0,27 -915,200.0,200 -916,162.0,162 -917,25.0,25 -918,118.0,118 -919,56.0,56 -920,107.0,107 -921,200.0,200 -922,166.0,166 -923,69.0,69 -924,187.0,187 -925,126.0,126 -926,200.0,200 -927,49.0,49 -928,99.0,99 -929,200.0,200 -930,200.0,200 -931,153.0,153 -932,158.0,158 -933,200.0,200 -934,145.0,145 -935,126.0,126 -936,133.0,133 -937,81.0,81 -938,200.0,200 -939,57.0,57 -940,200.0,200 -941,131.0,131 -942,200.0,200 -943,200.0,200 -944,200.0,200 -945,171.0,171 -946,200.0,200 -947,200.0,200 -948,200.0,200 -949,176.0,176 -950,110.0,110 -951,158.0,158 -952,137.0,137 -953,103.0,103 -954,200.0,200 -955,200.0,200 -956,200.0,200 -957,190.0,190 -958,130.0,130 -959,139.0,139 -960,200.0,200 -961,172.0,172 -962,152.0,152 -963,154.0,154 -964,52.0,52 -965,194.0,194 -966,52.0,52 -967,169.0,169 -968,200.0,200 -969,176.0,176 -970,127.0,127 -971,157.0,157 -972,200.0,200 -973,134.0,134 -974,138.0,138 -975,133.0,133 -976,170.0,170 -977,159.0,159 -978,88.0,88 -979,141.0,141 -980,117.0,117 -981,157.0,157 -982,145.0,145 -983,200.0,200 -984,129.0,129 -985,155.0,155 -986,83.0,83 -987,152.0,152 -988,156.0,156 -989,200.0,200 -990,135.0,135 -991,75.0,75 -992,138.0,138 -993,83.0,83 -994,200.0,200 -995,128.0,128 -996,122.0,122 -997,200.0,200 -998,34.0,34 -999,161.0,161 -1000,143.0,143 -1001,200.0,200 -1002,103.0,103 -1003,168.0,168 -1004,200.0,200 -1005,200.0,200 -1006,167.0,167 -1007,200.0,200 -1008,76.0,76 -1009,101.0,101 -1010,153.0,153 -1011,113.0,113 -1012,109.0,109 -1013,188.0,188 -1014,122.0,122 -1015,181.0,181 -1016,166.0,166 -1017,189.0,189 -1018,200.0,200 -1019,187.0,187 -1020,116.0,116 -1021,200.0,200 -1022,108.0,108 -1023,18.0,18 -1024,158.0,158 -1025,200.0,200 -1026,43.0,43 -1027,200.0,200 -1028,199.0,199 -1029,200.0,200 -1030,133.0,133 -1031,171.0,171 -1032,200.0,200 -1033,200.0,200 -1034,200.0,200 -1035,156.0,156 -1036,52.0,52 -1037,200.0,200 -1038,121.0,121 -1039,188.0,188 -1040,167.0,167 -1041,200.0,200 -1042,124.0,124 -1043,102.0,102 -1044,161.0,161 -1045,200.0,200 -1046,200.0,200 -1047,135.0,135 -1048,200.0,200 -1049,80.0,80 -1050,200.0,200 -1051,66.0,66 -1052,200.0,200 -1053,200.0,200 -1054,112.0,112 -1055,195.0,195 -1056,200.0,200 -1057,170.0,170 -1058,194.0,194 -1059,200.0,200 -1060,200.0,200 -1061,59.0,59 -1062,75.0,75 -1063,200.0,200 -1064,200.0,200 -1065,97.0,97 -1066,171.0,171 -1067,30.0,30 -1068,200.0,200 -1069,101.0,101 -1070,124.0,124 -1071,136.0,136 -1072,184.0,184 -1073,149.0,149 -1074,137.0,137 -1075,167.0,167 -1076,136.0,136 -1077,200.0,200 -1078,139.0,139 -1079,85.0,85 -1080,137.0,137 -1081,161.0,161 -1082,81.0,81 -1083,200.0,200 -1084,200.0,200 -1085,200.0,200 -1086,200.0,200 -1087,87.0,87 -1088,174.0,174 -1089,200.0,200 -1090,128.0,128 -1091,200.0,200 -1092,200.0,200 -1093,200.0,200 -1094,120.0,120 -1095,200.0,200 -1096,131.0,131 -1097,200.0,200 -1098,200.0,200 -1099,200.0,200 -1100,146.0,146 -1101,200.0,200 -1102,200.0,200 -1103,200.0,200 -1104,80.0,80 -1105,200.0,200 -1106,172.0,172 -1107,143.0,143 -1108,200.0,200 -1109,200.0,200 -1110,181.0,181 -1111,189.0,189 -1112,133.0,133 -1113,200.0,200 -1114,111.0,111 -1115,200.0,200 -1116,200.0,200 -1117,200.0,200 -1118,192.0,192 -1119,200.0,200 -1120,200.0,200 -1121,200.0,200 -1122,144.0,144 -1123,27.0,27 -1124,200.0,200 -1125,198.0,198 -1126,186.0,186 -1127,80.0,80 -1128,200.0,200 -1129,169.0,169 -1130,48.0,48 -1131,198.0,198 -1132,162.0,162 -1133,58.0,58 -1134,200.0,200 -1135,200.0,200 -1136,189.0,189 -1137,200.0,200 -1138,117.0,117 -1139,200.0,200 -1140,200.0,200 -1141,150.0,150 -1142,163.0,163 -1143,161.0,161 -1144,200.0,200 -1145,113.0,113 -1146,181.0,181 -1147,193.0,193 -1148,98.0,98 -1149,200.0,200 -1150,22.0,22 -1151,125.0,125 -1152,200.0,200 -1153,200.0,200 -1154,200.0,200 -1155,67.0,67 -1156,186.0,186 -1157,189.0,189 -1158,186.0,186 -1159,156.0,156 -1160,200.0,200 -1161,200.0,200 -1162,116.0,116 -1163,77.0,77 -1164,148.0,148 -1165,111.0,111 -1166,68.0,68 -1167,140.0,140 -1168,114.0,114 -1169,200.0,200 -1170,173.0,173 -1171,97.0,97 -1172,166.0,166 -1173,154.0,154 -1174,200.0,200 -1175,200.0,200 -1176,129.0,129 -1177,111.0,111 -1178,200.0,200 -1179,85.0,85 -1180,71.0,71 -1181,200.0,200 -1182,158.0,158 -1183,130.0,130 -1184,161.0,161 -1185,188.0,188 -1186,124.0,124 -1187,190.0,190 -1188,157.0,157 -1189,188.0,188 -1190,194.0,194 -1191,173.0,173 -1192,123.0,123 -1193,200.0,200 -1194,123.0,123 -1195,200.0,200 -1196,200.0,200 -1197,114.0,114 -1198,45.0,45 -1199,144.0,144 -1200,107.0,107 -1201,184.0,184 -1202,121.0,121 -1203,200.0,200 -1204,50.0,50 -1205,123.0,123 -1206,73.0,73 -1207,142.0,142 -1208,38.0,38 -1209,129.0,129 -1210,123.0,123 -1211,149.0,149 -1212,97.0,97 -1213,40.0,40 -1214,177.0,177 -1215,200.0,200 -1216,166.0,166 -1217,106.0,106 -1218,114.0,114 -1219,53.0,53 -1220,162.0,162 -1221,181.0,181 -1222,128.0,128 -1223,155.0,155 -1224,180.0,180 -1225,139.0,139 -1226,99.0,99 -1227,95.0,95 -1228,159.0,159 -1229,30.0,30 -1230,67.0,67 -1231,127.0,127 -1232,33.0,33 -1233,80.0,80 -1234,200.0,200 -1235,200.0,200 -1236,200.0,200 -1237,117.0,117 -1238,128.0,128 -1239,47.0,47 -1240,134.0,134 -1241,129.0,129 -1242,135.0,135 -1243,200.0,200 -1244,200.0,200 -1245,101.0,101 -1246,35.0,35 -1247,73.0,73 -1248,95.0,95 -1249,125.0,125 -1250,200.0,200 -1251,155.0,155 -1252,48.0,48 -1253,200.0,200 -1254,153.0,153 -1255,173.0,173 -1256,128.0,128 -1257,200.0,200 -1258,200.0,200 -1259,196.0,196 -1260,50.0,50 -1261,193.0,193 -1262,200.0,200 -1263,200.0,200 -1264,200.0,200 -1265,200.0,200 -1266,179.0,179 -1267,180.0,180 -1268,200.0,200 -1269,200.0,200 -1270,200.0,200 -1271,120.0,120 -1272,200.0,200 -1273,60.0,60 -1274,99.0,99 -1275,178.0,178 -1276,157.0,157 -1277,200.0,200 -1278,177.0,177 -1279,200.0,200 -1280,200.0,200 -1281,200.0,200 -1282,200.0,200 -1283,200.0,200 -1284,200.0,200 -1285,200.0,200 -1286,97.0,97 -1287,167.0,167 -1288,183.0,183 -1289,200.0,200 -1290,61.0,61 -1291,192.0,192 -1292,200.0,200 -1293,137.0,137 -1294,200.0,200 -1295,200.0,200 -1296,200.0,200 -1297,200.0,200 -1298,200.0,200 -1299,200.0,200 -1300,103.0,103 -1301,142.0,142 -1302,200.0,200 -1303,47.0,47 -1304,189.0,189 -1305,41.0,41 -1306,200.0,200 -1307,200.0,200 -1308,132.0,132 -1309,154.0,154 -1310,95.0,95 -1311,200.0,200 -1312,200.0,200 -1313,200.0,200 -1314,71.0,71 -1315,200.0,200 -1316,170.0,170 -1317,121.0,121 -1318,200.0,200 -1319,127.0,127 -1320,200.0,200 -1321,120.0,120 -1322,200.0,200 -1323,200.0,200 -1324,161.0,161 -1325,37.0,37 -1326,200.0,200 -1327,200.0,200 -1328,200.0,200 -1329,49.0,49 -1330,118.0,118 -1331,200.0,200 -1332,167.0,167 -1333,200.0,200 -1334,99.0,99 -1335,137.0,137 -1336,200.0,200 -1337,41.0,41 -1338,200.0,200 -1339,200.0,200 -1340,97.0,97 -1341,34.0,34 -1342,40.0,40 -1343,197.0,197 -1344,51.0,51 -1345,200.0,200 -1346,156.0,156 -1347,200.0,200 -1348,75.0,75 -1349,118.0,118 -1350,200.0,200 -1351,73.0,73 -1352,200.0,200 -1353,133.0,133 -1354,200.0,200 -1355,200.0,200 -1356,162.0,162 -1357,37.0,37 -1358,130.0,130 -1359,123.0,123 -1360,200.0,200 -1361,99.0,99 -1362,200.0,200 -1363,46.0,46 -1364,200.0,200 -1365,190.0,190 -1366,34.0,34 -1367,37.0,37 -1368,200.0,200 -1369,200.0,200 -1370,131.0,131 -1371,200.0,200 -1372,200.0,200 -1373,158.0,158 -1374,175.0,175 -1375,134.0,134 -1376,100.0,100 -1377,200.0,200 -1378,200.0,200 -1379,123.0,123 -1380,200.0,200 -1381,200.0,200 -1382,200.0,200 -1383,116.0,116 -1384,200.0,200 -1385,88.0,88 -1386,200.0,200 -1387,200.0,200 -1388,147.0,147 -1389,200.0,200 -1390,200.0,200 -1391,84.0,84 -1392,200.0,200 -1393,184.0,184 -1394,200.0,200 -1395,179.0,179 -1396,200.0,200 -1397,200.0,200 -1398,130.0,130 -1399,29.0,29 -1400,200.0,200 -1401,200.0,200 -1402,200.0,200 -1403,200.0,200 -1404,195.0,195 -1405,118.0,118 -1406,200.0,200 -1407,200.0,200 -1408,200.0,200 -1409,154.0,154 -1410,185.0,185 -1411,200.0,200 -1412,152.0,152 -1413,200.0,200 -1414,200.0,200 -1415,200.0,200 -1416,200.0,200 -1417,31.0,31 -1418,200.0,200 -1419,134.0,134 -1420,172.0,172 -1421,112.0,112 -1422,153.0,153 -1423,199.0,199 -1424,200.0,200 -1425,200.0,200 -1426,200.0,200 -1427,200.0,200 -1428,166.0,166 -1429,200.0,200 -1430,200.0,200 -1431,199.0,199 -1432,195.0,195 -1433,174.0,174 -1434,46.0,46 -1435,174.0,174 -1436,23.0,23 -1437,157.0,157 -1438,200.0,200 -1439,170.0,170 -1440,92.0,92 -1441,200.0,200 -1442,200.0,200 -1443,72.0,72 -1444,200.0,200 -1445,200.0,200 -1446,200.0,200 -1447,118.0,118 -1448,119.0,119 -1449,109.0,109 -1450,101.0,101 -1451,32.0,32 -1452,197.0,197 -1453,154.0,154 -1454,138.0,138 -1455,141.0,141 -1456,141.0,141 -1457,200.0,200 -1458,90.0,90 -1459,200.0,200 -1460,122.0,122 -1461,144.0,144 -1462,155.0,155 -1463,200.0,200 -1464,160.0,160 -1465,129.0,129 -1466,200.0,200 -1467,112.0,112 -1468,132.0,132 -1469,144.0,144 -1470,184.0,184 -1471,200.0,200 -1472,26.0,26 -1473,200.0,200 -1474,26.0,26 -1475,128.0,128 -1476,200.0,200 -1477,173.0,173 -1478,145.0,145 -1479,128.0,128 -1480,118.0,118 -1481,50.0,50 -1482,184.0,184 -1483,166.0,166 -1484,142.0,142 -1485,104.0,104 -1486,180.0,180 -1487,200.0,200 -1488,200.0,200 -1489,200.0,200 -1490,123.0,123 -1491,200.0,200 -1492,140.0,140 -1493,200.0,200 -1494,200.0,200 -1495,200.0,200 -1496,200.0,200 -1497,117.0,117 -1498,13.0,13 -1499,200.0,200 -1500,127.0,127 -1501,200.0,200 -1502,200.0,200 -1503,200.0,200 -1504,200.0,200 -1505,200.0,200 -1506,200.0,200 -1507,77.0,77 -1508,152.0,152 -1509,38.0,38 -1510,125.0,125 -1511,154.0,154 -1512,142.0,142 -1513,120.0,120 -1514,200.0,200 -1515,191.0,191 -1516,21.0,21 -1517,101.0,101 -1518,191.0,191 -1519,170.0,170 -1520,200.0,200 -1521,30.0,30 -1522,191.0,191 -1523,200.0,200 -1524,200.0,200 -1525,200.0,200 -1526,135.0,135 -1527,200.0,200 -1528,185.0,185 -1529,123.0,123 -1530,156.0,156 -1531,200.0,200 -1532,140.0,140 -1533,200.0,200 -1534,136.0,136 -1535,139.0,139 -1536,200.0,200 -1537,169.0,169 -1538,200.0,200 -1539,200.0,200 -1540,103.0,103 -1541,91.0,91 -1542,200.0,200 -1543,200.0,200 -1544,65.0,65 -1545,200.0,200 -1546,169.0,169 -1547,59.0,59 -1548,175.0,175 -1549,200.0,200 -1550,200.0,200 -1551,189.0,189 -1552,200.0,200 -1553,200.0,200 -1554,151.0,151 -1555,108.0,108 -1556,146.0,146 -1557,200.0,200 -1558,198.0,198 -1559,119.0,119 -1560,105.0,105 -1561,175.0,175 -1562,200.0,200 -1563,136.0,136 -1564,200.0,200 -1565,86.0,86 -1566,200.0,200 -1567,200.0,200 -1568,200.0,200 -1569,124.0,124 -1570,200.0,200 -1571,122.0,122 -1572,200.0,200 -1573,200.0,200 -1574,47.0,47 -1575,200.0,200 -1576,194.0,194 -1577,200.0,200 -1578,121.0,121 -1579,200.0,200 -1580,200.0,200 -1581,190.0,190 -1582,200.0,200 -1583,200.0,200 -1584,200.0,200 -1585,145.0,145 -1586,121.0,121 -1587,198.0,198 -1588,200.0,200 -1589,200.0,200 -1590,130.0,130 -1591,185.0,185 -1592,193.0,193 -1593,200.0,200 -1594,200.0,200 -1595,200.0,200 -1596,200.0,200 -1597,168.0,168 -1598,200.0,200 -1599,200.0,200 diff --git a/projects/codes/A2C/task0.py b/projects/codes/A2C/task0.py new file mode 100644 index 0000000..4a3208a --- /dev/null +++ b/projects/codes/A2C/task0.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 01:19:43 +LastEditor: JiangJi +LastEditTime: 2022-11-01 01:21:06 +Discription: +''' +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import gym +from common.utils import all_seed,merge_class_attrs +from common.launcher import Launcher +from common.memories import PGReplay +from common.models import ActorSoftmax,Critic +from envs.register import register_env +from a2c import A2C +from config.config import GeneralConfigA2C,AlgoConfigA2C + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigA2C()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigA2C()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=True) # create env + if cfg.seed !=0: # set random seed + all_seed(env,seed = cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + models = {'Actor':ActorSoftmax(n_states,n_actions, hidden_dim = cfg.actor_hidden_dim),'Critic':Critic(n_states,1,hidden_dim=cfg.critic_hidden_dim)} + memories = {'ACMemory':PGReplay()} + agent = A2C(models,memories,cfg) + for k,v in models.items(): + logger.info(f"{k} model name: {type(v).__name__}") + for k,v in memories.items(): + logger.info(f"{k} memory name: {type(v).__name__}") + logger.info(f"agent name: {type(agent).__name__}") + return env,agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + ep_entropy = 0 # entropy per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + state = next_state # update state + ep_reward += reward + ep_entropy += agent.entropy + ep_step += 1 + if terminated: + break + agent.update(next_state,ep_entropy) # update agent + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.predict_action(state) # predict action + next_state, reward, terminated, truncated , info = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if terminated: + break + return agent,ep_reward,ep_step + # def train(self,cfg,env,agent,logger): + # logger.info("Start training!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.train_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 # step per episode + # ep_entropy = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.sample_action(state) # sample action + # next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + # agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + # state = next_state # update state + # ep_reward += reward + # ep_entropy += agent.entropy + # ep_step += 1 + # if terminated: + # break + # agent.update(next_state,ep_entropy) # update agent + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish training!") + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + # def test(self,cfg,env,agent,logger): + # logger.info("Start testing!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.test_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.predict_action(state) # predict action + # next_state, reward, terminated, truncated , info = env.step(action) + # state = next_state + # ep_reward += reward + # ep_step += 1 + # if terminated: + # break + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish testing!") + # env.close() + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/A2C/task1.py b/projects/codes/A2C/task1.py new file mode 100644 index 0000000..ff7c86f --- /dev/null +++ b/projects/codes/A2C/task1.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 01:19:43 +LastEditor: JiangJi +LastEditTime: 2022-11-01 01:21:12 +Discription: continuous action space +''' +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import gym +from common.utils import all_seed,merge_class_attrs +from common.launcher import Launcher +from common.memories import PGReplay +from common.models import ActorSoftmaxTanh,Critic +from envs.register import register_env +from a2c import A2C +from config.config import GeneralConfigA2C,AlgoConfigA2C + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigA2C()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigA2C()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=True) # create env + if cfg.seed !=0: # set random seed + all_seed(env,seed = cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + models = {'Actor':ActorSoftmaxTanh(n_states,n_actions, hidden_dim = cfg.actor_hidden_dim),'Critic':Critic(n_states,1,hidden_dim=cfg.critic_hidden_dim)} + memories = {'ACMemory':PGReplay()} + agent = A2C(models,memories,cfg) + for k,v in models.items(): + logger.info(f"{k} model name: {type(v).__name__}") + for k,v in memories.items(): + logger.info(f"{k} memory name: {type(v).__name__}") + logger.info(f"agent name: {type(agent).__name__}") + return env,agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + ep_entropy = 0 # entropy per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + state = next_state # update state + ep_reward += reward + ep_entropy += agent.entropy + ep_step += 1 + if terminated: + break + agent.update(next_state,ep_entropy) # update agent + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.predict_action(state) # predict action + next_state, reward, terminated, truncated , info = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if terminated: + break + return agent,ep_reward,ep_step + # def train(self,cfg,env,agent,logger): + # logger.info("Start training!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.train_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 # step per episode + # ep_entropy = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.sample_action(state) # sample action + # next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + # agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + # state = next_state # update state + # ep_reward += reward + # ep_entropy += agent.entropy + # ep_step += 1 + # if terminated: + # break + # agent.update(next_state,ep_entropy) # update agent + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish training!") + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + # def test(self,cfg,env,agent,logger): + # logger.info("Start testing!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.test_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.predict_action(state) # predict action + # next_state, reward, terminated, truncated , info = env.step(action) + # state = next_state + # ep_reward += reward + # ep_step += 1 + # if terminated: + # break + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish testing!") + # env.close() + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/A2C/task2.py b/projects/codes/A2C/task2.py new file mode 100644 index 0000000..96c1cc2 --- /dev/null +++ b/projects/codes/A2C/task2.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 01:19:43 +LastEditor: JiangJi +LastEditTime: 2022-11-01 00:08:22 +Discription: the only difference from task0.py is that the actor here we use ActorSoftmaxTanh instead of ActorSoftmax with ReLU +''' +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import gym +import torch +import numpy as np +from common.utils import all_seed,merge_class_attrs +from common.launcher import Launcher +from common.memories import PGReplay +from common.models import ActorNormal,Critic +from envs.register import register_env +from a2c import A2C +from config.config import GeneralConfigA2C,AlgoConfigA2C + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigA2C()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigA2C()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=True) # create env + if cfg.seed !=0: # set random seed + all_seed(env,seed = cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + try: + n_actions = env.action_space.n # action dimension + except AttributeError: + n_actions = env.action_space.shape[0] + logger.info(f"action bound: {abs(env.action_space.low.item())}") + setattr(cfg, 'action_bound', abs(env.action_space.low.item())) + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + models = {'Actor':ActorNormal(n_states,n_actions, hidden_dim = cfg.actor_hidden_dim),'Critic':Critic(n_states,1,hidden_dim=cfg.critic_hidden_dim)} + memories = {'ACMemory':PGReplay()} + agent = A2C(models,memories,cfg) + for k,v in models.items(): + logger.info(f"{k} model name: {type(v).__name__}") + for k,v in memories.items(): + logger.info(f"{k} memory name: {type(v).__name__}") + logger.info(f"agent name: {type(agent).__name__}") + return env,agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + ep_entropy = 0 # entropy per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + state = next_state # update state + ep_reward += reward + ep_entropy += agent.entropy + ep_step += 1 + if terminated: + break + agent.update(next_state,ep_entropy) # update agent + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.predict_action(state) # predict action + next_state, reward, terminated, truncated , info = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if terminated: + break + return agent,ep_reward,ep_step + # def train(self,cfg,env,agent,logger): + # logger.info("Start training!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.train_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 # step per episode + # ep_entropy = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.sample_action(state) # sample action + # next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions + # agent.memory.push((agent.value,agent.log_prob,reward)) # save transitions + # state = next_state # update state + # ep_reward += reward + # ep_entropy += agent.entropy + # ep_step += 1 + # if terminated: + # break + # agent.update(next_state,ep_entropy) # update agent + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish training!") + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + # def test(self,cfg,env,agent,logger): + # logger.info("Start testing!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.test_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # action = agent.predict_action(state) # predict action + # next_state, reward, terminated, truncated , info = env.step(action) + # state = next_state + # ep_reward += reward + # ep_step += 1 + # if terminated: + # break + # rewards.append(ep_reward) + # steps.append(ep_step) + # logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step}") + # logger.info("Finish testing!") + # env.close() + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/DDPG/ddpg.py b/projects/codes/DDPG/ddpg.py index 93894e3..246966b 100644 --- a/projects/codes/DDPG/ddpg.py +++ b/projects/codes/DDPG/ddpg.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-09 20:25:52 @LastEditor: John -LastEditTime: 2022-06-09 19:04:44 +LastEditTime: 2022-09-27 15:43:21 @Discription: @Environment: python 3.7.7 ''' @@ -14,96 +14,45 @@ import numpy as np import torch import torch.nn as nn import torch.optim as optim -import torch.nn.functional as F -class ReplayBuffer: - def __init__(self, capacity): - self.capacity = capacity # 经验回放的容量 - self.buffer = [] # 缓冲区 - self.position = 0 - - def push(self, state, action, reward, next_state, done): - ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition) - ''' - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = (self.position + 1) % self.capacity - - def sample(self, batch_size): - batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移 - state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等 - return state, action, reward, next_state, done - - def __len__(self): - ''' 返回当前存储的量 - ''' - return len(self.buffer) -class Actor(nn.Module): - def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): - super(Actor, self).__init__() - self.linear1 = nn.Linear(n_states, hidden_dim) - self.linear2 = nn.Linear(hidden_dim, hidden_dim) - self.linear3 = nn.Linear(hidden_dim, n_actions) - - self.linear3.weight.data.uniform_(-init_w, init_w) - self.linear3.bias.data.uniform_(-init_w, init_w) - - def forward(self, x): - x = F.relu(self.linear1(x)) - x = F.relu(self.linear2(x)) - x = torch.tanh(self.linear3(x)) - return x -class Critic(nn.Module): - def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): - super(Critic, self).__init__() - - self.linear1 = nn.Linear(n_states + n_actions, hidden_dim) - self.linear2 = nn.Linear(hidden_dim, hidden_dim) - self.linear3 = nn.Linear(hidden_dim, 1) - # 随机初始化为较小的值 - self.linear3.weight.data.uniform_(-init_w, init_w) - self.linear3.bias.data.uniform_(-init_w, init_w) - - def forward(self, state, action): - # 按维数1拼接 - x = torch.cat([state, action], 1) - x = F.relu(self.linear1(x)) - x = F.relu(self.linear2(x)) - x = self.linear3(x) - return x -class DDPG: - def __init__(self, n_states, n_actions, cfg): - self.device = torch.device(cfg.device) - self.critic = Critic(n_states, n_actions, cfg.hidden_dim).to(self.device) - self.actor = Actor(n_states, n_actions, cfg.hidden_dim).to(self.device) - self.target_critic = Critic(n_states, n_actions, cfg.hidden_dim).to(self.device) - self.target_actor = Actor(n_states, n_actions, cfg.hidden_dim).to(self.device) - # 复制参数到目标网络 +class DDPG: + def __init__(self, models,memories,cfg): + self.device = torch.device(cfg['device']) + self.critic = models['critic'].to(self.device) + self.target_critic = models['critic'].to(self.device) + self.actor = models['actor'].to(self.device) + self.target_actor = models['actor'].to(self.device) + # copy weights from critic to target_critic for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()): target_param.data.copy_(param.data) + # copy weights from actor to target_actor for target_param, param in zip(self.target_actor.parameters(), self.actor.parameters()): target_param.data.copy_(param.data) + self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=cfg['critic_lr']) + self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=cfg['actor_lr']) + self.memory = memories['memory'] + self.batch_size = cfg['batch_size'] + self.gamma = cfg['gamma'] + self.tau = cfg['tau'] - self.critic_optimizer = optim.Adam( - self.critic.parameters(), lr=cfg.critic_lr) - self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=cfg.actor_lr) - self.memory = ReplayBuffer(cfg.memory_capacity) - self.batch_size = cfg.batch_size - self.soft_tau = cfg.soft_tau # 软更新参数 - self.gamma = cfg.gamma - - def choose_action(self, state): + def sample_action(self, state): state = torch.FloatTensor(state).unsqueeze(0).to(self.device) action = self.actor(state) return action.detach().cpu().numpy()[0, 0] + @torch.no_grad() + def predict_action(self, state): + ''' predict action + ''' + state = torch.FloatTensor(state).unsqueeze(0).to(self.device) + action = self.actor(state) + return action.cpu().numpy()[0, 0] def update(self): - if len(self.memory) < self.batch_size: # 当 memory 中不满足一个批量时,不更新策略 + if len(self.memory) < self.batch_size: # when memory size is less than batch size, return return - # 从经验回放中(replay memory)中随机采样一个批量的转移(transition) + # sample a random minibatch of N transitions from R state, action, reward, next_state, done = self.memory.sample(self.batch_size) - # 转变为张量 + # convert to tensor state = torch.FloatTensor(np.array(state)).to(self.device) next_state = torch.FloatTensor(np.array(next_state)).to(self.device) action = torch.FloatTensor(np.array(action)).to(self.device) @@ -126,19 +75,22 @@ class DDPG: self.critic_optimizer.zero_grad() value_loss.backward() self.critic_optimizer.step() - # 软更新 + # soft update for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()): target_param.data.copy_( - target_param.data * (1.0 - self.soft_tau) + - param.data * self.soft_tau + target_param.data * (1.0 - self.tau) + + param.data * self.tau ) for target_param, param in zip(self.target_actor.parameters(), self.actor.parameters()): target_param.data.copy_( - target_param.data * (1.0 - self.soft_tau) + - param.data * self.soft_tau + target_param.data * (1.0 - self.tau) + + param.data * self.tau ) - def save(self,path): - torch.save(self.actor.state_dict(), path+'checkpoint.pt') + def save_model(self,path): + from pathlib import Path + # create path + Path(path).mkdir(parents=True, exist_ok=True) + torch.save(self.actor.state_dict(), f"{path}/actor_checkpoint.pt") - def load(self,path): - self.actor.load_state_dict(torch.load(path+'checkpoint.pt')) \ No newline at end of file + def load_model(self,path): + self.actor.load_state_dict(torch.load(f"{path}/actor_checkpoint.pt")) \ No newline at end of file diff --git a/projects/codes/DDPG/main.py b/projects/codes/DDPG/main.py new file mode 100644 index 0000000..8da5d29 --- /dev/null +++ b/projects/codes/DDPG/main.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +@Author: John +@Email: johnjim0816@gmail.com +@Date: 2020-06-11 20:58:21 +@LastEditor: John +LastEditTime: 2022-09-27 15:50:12 +@Discription: +@Environment: python 3.7.7 +''' +import sys,os +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add to system path + +import datetime +import gym +import torch +import argparse +import torch.nn as nn +import torch.nn.functional as F +from env import NormalizedActions,OUNoise +from ddpg import DDPG +from common.utils import all_seed +from common.memories import ReplayBufferQue +from common.launcher import Launcher +from envs.register import register_env + +class Actor(nn.Module): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): + super(Actor, self).__init__() + self.linear1 = nn.Linear(n_states, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, n_actions) + + self.linear3.weight.data.uniform_(-init_w, init_w) + self.linear3.bias.data.uniform_(-init_w, init_w) + + def forward(self, x): + x = F.relu(self.linear1(x)) + x = F.relu(self.linear2(x)) + x = torch.tanh(self.linear3(x)) + return x +class Critic(nn.Module): + def __init__(self, n_states, n_actions, hidden_dim, init_w=3e-3): + super(Critic, self).__init__() + + self.linear1 = nn.Linear(n_states + n_actions, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, 1) + # 随机初始化为较小的值 + self.linear3.weight.data.uniform_(-init_w, init_w) + self.linear3.bias.data.uniform_(-init_w, init_w) + + def forward(self, state, action): + # 按维数1拼接 + x = torch.cat([state, action], 1) + x = F.relu(self.linear1(x)) + x = F.relu(self.linear2(x)) + x = self.linear3(x) + return x +class Main(Launcher): + def get_args(self): + """ hyperparameters + """ + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='DDPG',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='Pendulum-v1',type=str,help="name of environment") + parser.add_argument('--train_eps',default=300,type=int,help="episodes of training") + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--max_steps',default=100000,type=int,help="steps per episode, much larger value can simulate infinite steps") + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic") + parser.add_argument('--actor_lr',default=1e-4,type=float,help="learning rate of actor") + parser.add_argument('--memory_capacity',default=8000,type=int,help="memory capacity") + parser.add_argument('--batch_size',default=128,type=int) + parser.add_argument('--target_update',default=2,type=int) + parser.add_argument('--tau',default=1e-2,type=float) + parser.add_argument('--critic_hidden_dim',default=256,type=int) + parser.add_argument('--actor_hidden_dim',default=256,type=int) + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--seed',default=1,type=int,help="random seed") + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", + 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", + } + args = {**vars(args),**default_args} # type(dict) + return args + + def env_agent_config(self,cfg): + register_env(cfg['env_name']) + env = gym.make(cfg['env_name']) + env = NormalizedActions(env) # decorate with action noise + if cfg['seed'] !=0: # set random seed + all_seed(env,seed=cfg["seed"]) + n_states = env.observation_space.shape[0] + n_actions = env.action_space.shape[0] + print(f"n_states: {n_states}, n_actions: {n_actions}") + cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + models = {"actor":Actor(n_states,n_actions,hidden_dim=cfg['actor_hidden_dim']),"critic":Critic(n_states,n_actions,hidden_dim=cfg['critic_hidden_dim'])} + memories = {"memory":ReplayBufferQue(cfg['memory_capacity'])} + agent = DDPG(models,memories,cfg) + return env,agent + def train(self,cfg, env, agent): + print('Start training!') + ou_noise = OUNoise(env.action_space) # noise of action + rewards = [] # record rewards for all episodes + for i_ep in range(cfg['train_eps']): + state = env.reset() + ou_noise.reset() + ep_reward = 0 + for i_step in range(cfg['max_steps']): + action = agent.sample_action(state) + action = ou_noise.get_action(action, i_step+1) + next_state, reward, done, _ = env.step(action) + ep_reward += reward + agent.memory.push((state, action, reward, next_state, done)) + agent.update() + state = next_state + if done: + break + if (i_ep+1)%10 == 0: + print(f"Env:{i_ep+1}/{cfg['train_eps']}, Reward:{ep_reward:.2f}") + rewards.append(ep_reward) + print('Finish training!') + return {'rewards':rewards} + + def test(self,cfg, env, agent): + print('Start testing!') + rewards = [] # record rewards for all episodes + for i_ep in range(cfg['test_eps']): + state = env.reset() + ep_reward = 0 + for i_step in range(cfg['max_steps']): + action = agent.predict_action(state) + next_state, reward, done, _ = env.step(action) + ep_reward += reward + state = next_state + if done: + break + rewards.append(ep_reward) + print(f"Episode:{i_ep+1}/{cfg['test_eps']}, Reward:{ep_reward:.1f}") + print('Finish testing!') + return {'rewards':rewards} +if __name__ == "__main__": + main = Main() + main.run() + diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt deleted file mode 100644 index f245d72..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/models/checkpoint.pt and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json deleted file mode 100644 index 7d22454..0000000 --- a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/params.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "algo_name": "DDPG", - "env_name": "Pendulum-v1", - "train_eps": 300, - "test_eps": 20, - "gamma": 0.99, - "critic_lr": 0.001, - "actor_lr": 0.0001, - "memory_capacity": 8000, - "batch_size": 128, - "target_update": 2, - "soft_tau": 0.01, - "hidden_dim": 256, - "deivce": "cpu", - "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/DDPG/outputs/Pendulum-v1/20220713-225402/results//", - "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials/outputs/DDPG/outputs/Pendulum-v1/20220713-225402/models/", - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy deleted file mode 100644 index 5c72032..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy deleted file mode 100644 index 3508874..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png deleted file mode 100644 index 8d7fbd2..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy deleted file mode 100644 index c3dd9ad..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy deleted file mode 100644 index 48e4157..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png deleted file mode 100644 index ec6038f..0000000 Binary files a/projects/codes/DDPG/outputs/Pendulum-v1/20220713-225402/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/models/actor_checkpoint.pt b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/models/actor_checkpoint.pt new file mode 100644 index 0000000..e65e7ca Binary files /dev/null and b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/models/actor_checkpoint.pt differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/params.json b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/params.json new file mode 100644 index 0000000..c3825cf --- /dev/null +++ b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/params.json @@ -0,0 +1,25 @@ +{ + "algo_name": "DDPG", + "env_name": "Pendulum-v1", + "train_eps": 300, + "test_eps": 20, + "max_steps": 100000, + "gamma": 0.99, + "critic_lr": 0.001, + "actor_lr": 0.0001, + "memory_capacity": 8000, + "batch_size": 128, + "target_update": 2, + "tau": 0.01, + "critic_hidden_dim": 256, + "actor_hidden_dim": 256, + "device": "cpu", + "seed": 1, + "show_fig": false, + "save_fig": true, + "result_path": "/Users/jj/Desktop/rl-tutorials/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/", + "model_path": "/Users/jj/Desktop/rl-tutorials/codes/DDPG/outputs/Pendulum-v1/20220927-155053/models/", + "n_states": 3, + "n_actions": 1, + "training_time": 358.8142900466919 +} \ No newline at end of file diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_curve.png new file mode 100644 index 0000000..44e53e2 Binary files /dev/null and b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_curve.png differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_results.csv b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_results.csv new file mode 100644 index 0000000..590c141 --- /dev/null +++ b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/testing_results.csv @@ -0,0 +1,21 @@ +rewards +-116.045416124376 +-126.18022935469217 +-231.46338228458293 +-246.40481094689758 +-304.69493818839186 +-124.39609191913091 +-1.060003582878406 +-114.19659653048288 +-348.9745708742037 +-116.10811133324769 +-117.20146333694844 +-118.66206784602966 +-235.17836229762355 +-356.14054913290624 +-118.38579118156366 +-351.9415915140771 +-114.50877866098972 +-124.775484599685 +-226.47062962476875 +-121.48872909193936 diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_curve.png b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_curve.png new file mode 100644 index 0000000..b0b95fe Binary files /dev/null and b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_curve.png differ diff --git a/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_results.csv b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_results.csv new file mode 100644 index 0000000..2fa54ec --- /dev/null +++ b/projects/codes/DDPG/outputs/Pendulum-v1/20220927-155053/results/training_results.csv @@ -0,0 +1,301 @@ +rewards +-1557.8518596631177 +-1354.7599369723537 +-1375.5732016629706 +-1493.8609739040871 +-1426.7116204537845 +-1235.7920755027762 +-1339.1647620443073 +-1544.2379906560486 +-1539.6232758780877 +-1549.5690058648204 +-1446.9193195793853 +-1520.2666688767558 +-1525.0116707122581 +-1379.136573640111 +-1532.702831768523 +-1484.7552963941637 +-1359.6699201737677 +-1349.6805649166854 +-1510.869999766432 +-1515.8398785434708 +-1447.4648656578254 +-1537.3822077872178 +-1249.6517039877456 +-1350.0302666965736 +-1529.4363372505607 +-1320.28204807604 +-1502.9248141320654 +-1545.4861772197075 +-1579.928789692619 +-1413.296070504152 +-1242.4673258663781 +-1403.8672028946078 +-1452.7199002523635 +-871.6071114009982 +-1324.1789316121412 +-1313.3348146041249 +-1059.8722927418046 +-1054.232673559123 +-973.8956270782459 +-972.9936641224186 +-972.9477399905655 +-947.0613443333731 +-737.3866328989184 +-958.6068164634295 +-739.6973395350705 +-886.8383108399455 +-775.1430379821574 +-937.3115016337417 +-700.875502951337 +-829.9396339144109 +-271.1629773396998 +-493.5460684734584 +-485.9321719313203 +-858.3735607086766 +-1145.3440084994113 +-1121.1338201339777 +-1191.5640831332366 +-1350.0425368846784 +-249.25438665107953 +-727.9051714734406 +-368.5579316240395 +-392.0611344939354 +-955.3231703741553 +-488.27956192035265 +-362.2734695759137 +-949.5440839122496 +-496.8460016912189 +-726.6871514929877 +-424.48641462866266 +-954.7075428204689 +-608.9650086409792 +-848.6059768900151 +-866.7052398755033 +-856.9846415044439 +-751.0342976129083 +-749.5118249469103 +-509.882299129811 +-506.56154097018043 +-906.0964475820368 +-1318.3941416286855 +-1422.2017011876615 +-1523.1661091894277 +-1209.2850593747999 +-1415.0972750475833 +-1533.2263827605834 +-1405.8345530072663 +-1244.3384723384913 +-1237.4704845061992 +-949.3394417935086 +-981.1855396112669 +-1241.224568444032 +-1033.118364799829 +-1017.2403725619487 +-981.9727804516916 +-853.1877724775591 +-869.0652369861646 +-1069.8265343327998 +-371.73173813891884 +-735.5887912713665 +-1262.050240428957 +-1242.985056062197 +-1191.6867713427482 +-1328.5323118458034 +-1015.5308653784714 +-895.3066515461381 +-994.1114862316568 +-761.4710321387583 +-717.6979056272868 +-782.302146467708 +-640.4913147345328 +-725.6469893076355 +-497.5346232085584 +-1027.1192149202325 +-950.0117149822681 +-956.1343737377374 +-708.9489626669097 +-964.5003064113283 +-611.9111516886613 +-612.3182791021098 +-1100.0047939174613 +-984.9262458612923 +-858.7106075590494 +-842.305917848386 +-745.9043991922597 +-741.2168858394704 +-1143.0750387284456 +-755.5257242325362 +-745.8440029056219 +-387.8717950334138 +-764.6628701051523 +-486.7967495537958 +-485.13357559164814 +-313.5415216767419 +-611.3450529954782 +-611.1570544377465 +-507.6456747676814 +-615.2032627013064 +-242.37988821149764 +-603.85498620892 +-352.2672241055367 +-155.99874664988383 +-615.4003063516313 +-384.9811293551548 +-498.80727354456315 +-407.6898591217813 +-1213.6383844696395 +-1122.2425748913884 +-592.4819308883913 +-478.2046833075051 +-891.0254788311132 +-482.40204115385 +-339.34676196677407 +-582.9985110154428 +-213.38243627478826 +-928.8434951613825 +-1545.5433749195483 +-1179.5016285049896 +-1211.9549773601925 +-1396.8082561792166 +-1318.073128824395 +-597.3837225413702 +-564.7793352410449 +-723.744223659601 +-653.0145534050461 +-847.6138123247009 +-385.62784320332867 +-245.25250602651928 +-117.55094416757835 +-864.0064774069044 +-124.30221387458867 +-244.4014050243669 +-1148.861754008653 +-914.4047868424254 +-765.9394408203351 +-124.05114610943177 +-605.7641303826842 +-616.3595829453579 +-375.5024692962698 +-253.51874076866997 +-240.08405245866714 +-503.96565579077225 +-606.7646526173963 +-502.6512112729435 +-746.404013238678 +-718.8658110051653 +-125.65808359856703 +-247.62256797883364 +-363.69852213666803 +-249.21801061415547 +-491.7724416523124 +-235.37050442527357 +-609.6026403583944 +-236.05731608228092 +-381.19853850450454 +-298.7683201867404 +-127.64145601534942 +-233.4300138495176 +-129.11243486763516 +-390.0092951263507 +-1000.7729892969854 +-249.60445310459787 +-253.02347910759622 +-129.04269174391223 +-360.6321251486308 +-377.26297602576534 +-124.98466986009481 +-245.47913567739212 +-127.0885254550411 +-118.11013006825459 +-128.8682755001942 +-497.3015586531096 +-340.77352433313484 +-514.4945799737978 +-503.24077308842783 +-627.9068157464455 +-511.39396524392146 +-763.8866112068075 +-741.7885082408757 +-617.4945380476306 +-950.3176437519387 +-643.4791402436576 +-511.9377874351982 +-573.6219349516633 +-564.1297823875693 +-242.06399233336583 +-496.4020380325518 +-360.56387982880364 +-495.4590728336022 +-503.7263345016764 +-122.47964616802327 +-254.16543926263168 +-614.5335268729743 +-234.3718017676852 +-301.27514663062874 +-387.64758894986204 +-368.74492411716415 +-364.43559131093593 +-160.6845848115533 +-504.1948947975429 +-246.51676032967683 +-251.5732500220603 +-600.1463819723879 +-247.17476928471288 +-381.924164337607 +-377.4773226068174 +-378.511830774651 +-126.69199895843033 +-365.0506645811703 +-130.45052114802874 +-374.37400288581813 +-502.37678159638887 +-374.43552658473055 +-241.157211525502 +-388.9597456642503 +-249.4412385534861 +-114.71395078439846 +-864.6882327286056 +-626.8144095971478 +-732.9226896140248 +-368.24767905020394 +-369.7425524469132 +-398.07832598184626 +-906.7113918582257 +-252.2343258180765 +-370.4258473086036 +-736.0203154396909 +-609.4605173515027 +-661.1255920773486 +-489.9605291008584 +-364.1671188501402 +-644.4029089587781 +-477.9510457677364 +-128.78294672880136 +-373.74382001694886 +-380.69931133982936 +-372.60275628381805 +-743.0410655515724 +-597.558847789258 +-387.94245652694394 +-725.3939448944484 +-409.1301313430852 +-491.8442467896486 +-123.0638156839621 +-377.9292326597324 +-489.27209762667974 +-255.63227821371257 +-379.5885382060625 +-370.2312967024669 +-250.94061817008688 +-131.2125308195906 +-600.3312016651868 +-130.84444772735733 +-312.6287688438562 +-382.4144610039701 +-259.03558003697265 +-224.92206667096863 +-376.81390821359685 +-382.39993489751646 +-380.25599578593636 +-610.1016672243638 diff --git a/projects/codes/DDPG/task0.py b/projects/codes/DDPG/task0.py deleted file mode 100644 index 20688d3..0000000 --- a/projects/codes/DDPG/task0.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -@Author: John -@Email: johnjim0816@gmail.com -@Date: 2020-06-11 20:58:21 -@LastEditor: John -LastEditTime: 2022-07-21 21:51:34 -@Discription: -@Environment: python 3.7.7 -''' -import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path -parent_path = os.path.dirname(curr_path) # parent path -sys.path.append(parent_path) # add to system path - -import datetime -import gym -import torch -import argparse - -from env import NormalizedActions,OUNoise -from ddpg import DDPG -from common.utils import save_results,make_dir -from common.utils import plot_rewards,save_args - -def get_args(): - """ Hyperparameters - """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # Obtain current time - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='DDPG',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='Pendulum-v1',type=str,help="name of environment") - parser.add_argument('--train_eps',default=300,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic") - parser.add_argument('--actor_lr',default=1e-4,type=float,help="learning rate of actor") - parser.add_argument('--memory_capacity',default=8000,type=int,help="memory capacity") - parser.add_argument('--batch_size',default=128,type=int) - parser.add_argument('--target_update',default=2,type=int) - parser.add_argument('--soft_tau',default=1e-2,type=float) - parser.add_argument('--hidden_dim',default=256,type=int) - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/results/' ) - parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) # path to save models - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - return args - -def env_agent_config(cfg,seed=1): - env = NormalizedActions(gym.make(cfg.env_name)) # 装饰action噪声 - env.seed(seed) # 随机种子 - n_states = env.observation_space.shape[0] - n_actions = env.action_space.shape[0] - agent = DDPG(n_states,n_actions,cfg) - return env,agent -def train(cfg, env, agent): - print('Start training!') - print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') - ou_noise = OUNoise(env.action_space) # noise of action - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.train_eps): - state = env.reset() - ou_noise.reset() - done = False - ep_reward = 0 - i_step = 0 - while not done: - i_step += 1 - action = agent.choose_action(state) - action = ou_noise.get_action(action, i_step) - next_state, reward, done, _ = env.step(action) - ep_reward += reward - agent.memory.push(state, action, reward, next_state, done) - agent.update() - state = next_state - if (i_ep+1)%10 == 0: - print(f'Env:{i_ep+1}/{cfg.train_eps}, Reward:{ep_reward:.2f}') - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - print('Finish training!') - return {'rewards':rewards,'ma_rewards':ma_rewards} - -def test(cfg, env, agent): - print('Start testing') - print(f'Env:{cfg.env_name}, Algorithm:{cfg.algo_name}, Device:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.test_eps): - state = env.reset() - done = False - ep_reward = 0 - i_step = 0 - while not done: - i_step += 1 - action = agent.choose_action(state) - next_state, reward, done, _ = env.step(action) - ep_reward += reward - state = next_state - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - print(f"Epside:{i_ep+1}/{cfg.test_eps}, Reward:{ep_reward:.1f}") - print('Finish testing!') - return {'rewards':rewards,'ma_rewards':ma_rewards} -if __name__ == "__main__": - cfg = get_args() - # training - env,agent = env_agent_config(cfg,seed=1) - res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) - agent.save(path=cfg.model_path) - save_results(res_dic, tag='train', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") - # testing - env,agent = env_agent_config(cfg,seed=10) - agent.load(path=cfg.model_path) - res_dic = test(cfg,env,agent) - save_results(res_dic, tag='test', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="test") - diff --git a/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/config.yaml b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/config.yaml new file mode 100644 index 0000000..5e3ad4e --- /dev/null +++ b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/config.yaml @@ -0,0 +1,25 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_CartPole-v1_DQN_20221031-001201 + max_steps: 200 + mode: test + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/logs/log.txt b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/logs/log.txt new file mode 100644 index 0000000..44f28cb --- /dev/null +++ b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/logs/log.txt @@ -0,0 +1,14 @@ +2022-10-31 00:13:43 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-31 00:13:44 - r - INFO: - Start testing! +2022-10-31 00:13:44 - r - INFO: - Env: CartPole-v1, Algorithm: DQN, Device: cuda +2022-10-31 00:13:45 - r - INFO: - Episode: 1/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 2/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 3/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 4/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 5/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 6/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 7/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 8/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 9/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Episode: 10/10, Reward: 200.0, Step: 200 +2022-10-31 00:13:45 - r - INFO: - Finish testing! diff --git a/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/models/checkpoint.pt b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/models/checkpoint.pt new file mode 100644 index 0000000..722eb69 Binary files /dev/null and b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/models/checkpoint.pt differ diff --git a/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/learning_curve.png b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/learning_curve.png new file mode 100644 index 0000000..046009a Binary files /dev/null and b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/learning_curve.png differ diff --git a/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/res.csv b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/res.csv new file mode 100644 index 0000000..cbbcf2e --- /dev/null +++ b/projects/codes/DQN/Test_CartPole-v1_DQN_20221031-001343/results/res.csv @@ -0,0 +1,11 @@ +episodes,rewards,steps +0,200.0,200 +1,200.0,200 +2,200.0,200 +3,200.0,200 +4,200.0,200 +5,200.0,200 +6,200.0,200 +7,200.0,200 +8,200.0,200 +9,200.0,200 diff --git a/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/config.yaml b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/config.yaml new file mode 100644 index 0000000..7416aec --- /dev/null +++ b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/config.yaml @@ -0,0 +1,23 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: Acrobot-v1 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 100000 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 128 + buffer_size: 200000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.002 + target_update: 4 diff --git a/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/logs/log.txt b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/logs/log.txt new file mode 100644 index 0000000..e745c8c --- /dev/null +++ b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/logs/log.txt @@ -0,0 +1,104 @@ +2022-10-26 09:46:45 - r - INFO: - n_states: 6, n_actions: 3 +2022-10-26 09:46:48 - r - INFO: - Start training! +2022-10-26 09:46:48 - r - INFO: - Env: Acrobot-v1, Algorithm: DQN, Device: cuda +2022-10-26 09:46:50 - r - INFO: - Episode: 1/100, Reward: -861.00: Epislon: 0.178 +2022-10-26 09:46:50 - r - INFO: - Episode: 2/100, Reward: -252.00: Epislon: 0.111 +2022-10-26 09:46:50 - r - INFO: - Episode: 3/100, Reward: -196.00: Epislon: 0.078 +2022-10-26 09:46:51 - r - INFO: - Episode: 4/100, Reward: -390.00: Epislon: 0.041 +2022-10-26 09:46:52 - r - INFO: - Episode: 5/100, Reward: -371.00: Epislon: 0.025 +2022-10-26 09:46:52 - r - INFO: - Episode: 6/100, Reward: -237.00: Epislon: 0.019 +2022-10-26 09:46:52 - r - INFO: - Episode: 7/100, Reward: -227.00: Epislon: 0.016 +2022-10-26 09:46:53 - r - INFO: - Episode: 8/100, Reward: -228.00: Epislon: 0.014 +2022-10-26 09:46:53 - r - INFO: - Episode: 9/100, Reward: -305.00: Epislon: 0.012 +2022-10-26 09:46:54 - r - INFO: - Episode: 10/100, Reward: -234.00: Epislon: 0.011 +2022-10-26 09:46:54 - r - INFO: - Episode: 11/100, Reward: -204.00: Epislon: 0.011 +2022-10-26 09:46:55 - r - INFO: - Episode: 12/100, Reward: -277.00: Epislon: 0.010 +2022-10-26 09:46:55 - r - INFO: - Episode: 13/100, Reward: -148.00: Epislon: 0.010 +2022-10-26 09:46:56 - r - INFO: - Episode: 14/100, Reward: -372.00: Epislon: 0.010 +2022-10-26 09:46:56 - r - INFO: - Episode: 15/100, Reward: -273.00: Epislon: 0.010 +2022-10-26 09:46:56 - r - INFO: - Episode: 16/100, Reward: -105.00: Epislon: 0.010 +2022-10-26 09:46:56 - r - INFO: - Episode: 17/100, Reward: -79.00: Epislon: 0.010 +2022-10-26 09:46:57 - r - INFO: - Episode: 18/100, Reward: -112.00: Epislon: 0.010 +2022-10-26 09:46:57 - r - INFO: - Episode: 19/100, Reward: -276.00: Epislon: 0.010 +2022-10-26 09:46:57 - r - INFO: - Episode: 20/100, Reward: -148.00: Epislon: 0.010 +2022-10-26 09:46:58 - r - INFO: - Episode: 21/100, Reward: -201.00: Epislon: 0.010 +2022-10-26 09:46:58 - r - INFO: - Episode: 22/100, Reward: -173.00: Epislon: 0.010 +2022-10-26 09:46:58 - r - INFO: - Episode: 23/100, Reward: -226.00: Epislon: 0.010 +2022-10-26 09:46:59 - r - INFO: - Episode: 24/100, Reward: -154.00: Epislon: 0.010 +2022-10-26 09:46:59 - r - INFO: - Episode: 25/100, Reward: -269.00: Epislon: 0.010 +2022-10-26 09:46:59 - r - INFO: - Episode: 26/100, Reward: -191.00: Epislon: 0.010 +2022-10-26 09:47:00 - r - INFO: - Episode: 27/100, Reward: -177.00: Epislon: 0.010 +2022-10-26 09:47:00 - r - INFO: - Episode: 28/100, Reward: -209.00: Epislon: 0.010 +2022-10-26 09:47:00 - r - INFO: - Episode: 29/100, Reward: -116.00: Epislon: 0.010 +2022-10-26 09:47:00 - r - INFO: - Episode: 30/100, Reward: -117.00: Epislon: 0.010 +2022-10-26 09:47:01 - r - INFO: - Episode: 31/100, Reward: -121.00: Epislon: 0.010 +2022-10-26 09:47:01 - r - INFO: - Episode: 32/100, Reward: -208.00: Epislon: 0.010 +2022-10-26 09:47:01 - r - INFO: - Episode: 33/100, Reward: -147.00: Epislon: 0.010 +2022-10-26 09:47:02 - r - INFO: - Episode: 34/100, Reward: -104.00: Epislon: 0.010 +2022-10-26 09:47:02 - r - INFO: - Episode: 35/100, Reward: -161.00: Epislon: 0.010 +2022-10-26 09:47:02 - r - INFO: - Episode: 36/100, Reward: -144.00: Epislon: 0.010 +2022-10-26 09:47:02 - r - INFO: - Episode: 37/100, Reward: -131.00: Epislon: 0.010 +2022-10-26 09:47:03 - r - INFO: - Episode: 38/100, Reward: -226.00: Epislon: 0.010 +2022-10-26 09:47:03 - r - INFO: - Episode: 39/100, Reward: -117.00: Epislon: 0.010 +2022-10-26 09:47:03 - r - INFO: - Episode: 40/100, Reward: -344.00: Epislon: 0.010 +2022-10-26 09:47:04 - r - INFO: - Episode: 41/100, Reward: -123.00: Epislon: 0.010 +2022-10-26 09:47:04 - r - INFO: - Episode: 42/100, Reward: -232.00: Epislon: 0.010 +2022-10-26 09:47:04 - r - INFO: - Episode: 43/100, Reward: -190.00: Epislon: 0.010 +2022-10-26 09:47:05 - r - INFO: - Episode: 44/100, Reward: -176.00: Epislon: 0.010 +2022-10-26 09:47:05 - r - INFO: - Episode: 45/100, Reward: -139.00: Epislon: 0.010 +2022-10-26 09:47:06 - r - INFO: - Episode: 46/100, Reward: -410.00: Epislon: 0.010 +2022-10-26 09:47:06 - r - INFO: - Episode: 47/100, Reward: -115.00: Epislon: 0.010 +2022-10-26 09:47:06 - r - INFO: - Episode: 48/100, Reward: -118.00: Epislon: 0.010 +2022-10-26 09:47:06 - r - INFO: - Episode: 49/100, Reward: -113.00: Epislon: 0.010 +2022-10-26 09:47:07 - r - INFO: - Episode: 50/100, Reward: -355.00: Epislon: 0.010 +2022-10-26 09:47:07 - r - INFO: - Episode: 51/100, Reward: -110.00: Epislon: 0.010 +2022-10-26 09:47:07 - r - INFO: - Episode: 52/100, Reward: -148.00: Epislon: 0.010 +2022-10-26 09:47:08 - r - INFO: - Episode: 53/100, Reward: -135.00: Epislon: 0.010 +2022-10-26 09:47:08 - r - INFO: - Episode: 54/100, Reward: -220.00: Epislon: 0.010 +2022-10-26 09:47:08 - r - INFO: - Episode: 55/100, Reward: -157.00: Epislon: 0.010 +2022-10-26 09:47:09 - r - INFO: - Episode: 56/100, Reward: -130.00: Epislon: 0.010 +2022-10-26 09:47:09 - r - INFO: - Episode: 57/100, Reward: -150.00: Epislon: 0.010 +2022-10-26 09:47:09 - r - INFO: - Episode: 58/100, Reward: -254.00: Epislon: 0.010 +2022-10-26 09:47:10 - r - INFO: - Episode: 59/100, Reward: -148.00: Epislon: 0.010 +2022-10-26 09:47:10 - r - INFO: - Episode: 60/100, Reward: -108.00: Epislon: 0.010 +2022-10-26 09:47:10 - r - INFO: - Episode: 61/100, Reward: -152.00: Epislon: 0.010 +2022-10-26 09:47:10 - r - INFO: - Episode: 62/100, Reward: -107.00: Epislon: 0.010 +2022-10-26 09:47:10 - r - INFO: - Episode: 63/100, Reward: -110.00: Epislon: 0.010 +2022-10-26 09:47:11 - r - INFO: - Episode: 64/100, Reward: -266.00: Epislon: 0.010 +2022-10-26 09:47:11 - r - INFO: - Episode: 65/100, Reward: -344.00: Epislon: 0.010 +2022-10-26 09:47:12 - r - INFO: - Episode: 66/100, Reward: -93.00: Epislon: 0.010 +2022-10-26 09:47:12 - r - INFO: - Episode: 67/100, Reward: -113.00: Epislon: 0.010 +2022-10-26 09:47:12 - r - INFO: - Episode: 68/100, Reward: -191.00: Epislon: 0.010 +2022-10-26 09:47:12 - r - INFO: - Episode: 69/100, Reward: -102.00: Epislon: 0.010 +2022-10-26 09:47:13 - r - INFO: - Episode: 70/100, Reward: -187.00: Epislon: 0.010 +2022-10-26 09:47:13 - r - INFO: - Episode: 71/100, Reward: -158.00: Epislon: 0.010 +2022-10-26 09:47:13 - r - INFO: - Episode: 72/100, Reward: -166.00: Epislon: 0.010 +2022-10-26 09:47:14 - r - INFO: - Episode: 73/100, Reward: -202.00: Epislon: 0.010 +2022-10-26 09:47:14 - r - INFO: - Episode: 74/100, Reward: -179.00: Epislon: 0.010 +2022-10-26 09:47:14 - r - INFO: - Episode: 75/100, Reward: -150.00: Epislon: 0.010 +2022-10-26 09:47:14 - r - INFO: - Episode: 76/100, Reward: -170.00: Epislon: 0.010 +2022-10-26 09:47:15 - r - INFO: - Episode: 77/100, Reward: -149.00: Epislon: 0.010 +2022-10-26 09:47:15 - r - INFO: - Episode: 78/100, Reward: -119.00: Epislon: 0.010 +2022-10-26 09:47:15 - r - INFO: - Episode: 79/100, Reward: -115.00: Epislon: 0.010 +2022-10-26 09:47:15 - r - INFO: - Episode: 80/100, Reward: -97.00: Epislon: 0.010 +2022-10-26 09:47:16 - r - INFO: - Episode: 81/100, Reward: -153.00: Epislon: 0.010 +2022-10-26 09:47:16 - r - INFO: - Episode: 82/100, Reward: -97.00: Epislon: 0.010 +2022-10-26 09:47:16 - r - INFO: - Episode: 83/100, Reward: -211.00: Epislon: 0.010 +2022-10-26 09:47:16 - r - INFO: - Episode: 84/100, Reward: -195.00: Epislon: 0.010 +2022-10-26 09:47:17 - r - INFO: - Episode: 85/100, Reward: -125.00: Epislon: 0.010 +2022-10-26 09:47:17 - r - INFO: - Episode: 86/100, Reward: -155.00: Epislon: 0.010 +2022-10-26 09:47:17 - r - INFO: - Episode: 87/100, Reward: -151.00: Epislon: 0.010 +2022-10-26 09:47:18 - r - INFO: - Episode: 88/100, Reward: -194.00: Epislon: 0.010 +2022-10-26 09:47:18 - r - INFO: - Episode: 89/100, Reward: -188.00: Epislon: 0.010 +2022-10-26 09:47:18 - r - INFO: - Episode: 90/100, Reward: -195.00: Epislon: 0.010 +2022-10-26 09:47:19 - r - INFO: - Episode: 91/100, Reward: -141.00: Epislon: 0.010 +2022-10-26 09:47:19 - r - INFO: - Episode: 92/100, Reward: -132.00: Epislon: 0.010 +2022-10-26 09:47:19 - r - INFO: - Episode: 93/100, Reward: -127.00: Epislon: 0.010 +2022-10-26 09:47:19 - r - INFO: - Episode: 94/100, Reward: -195.00: Epislon: 0.010 +2022-10-26 09:47:20 - r - INFO: - Episode: 95/100, Reward: -152.00: Epislon: 0.010 +2022-10-26 09:47:20 - r - INFO: - Episode: 96/100, Reward: -145.00: Epislon: 0.010 +2022-10-26 09:47:20 - r - INFO: - Episode: 97/100, Reward: -123.00: Epislon: 0.010 +2022-10-26 09:47:20 - r - INFO: - Episode: 98/100, Reward: -176.00: Epislon: 0.010 +2022-10-26 09:47:21 - r - INFO: - Episode: 99/100, Reward: -180.00: Epislon: 0.010 +2022-10-26 09:47:21 - r - INFO: - Episode: 100/100, Reward: -124.00: Epislon: 0.010 +2022-10-26 09:47:21 - r - INFO: - Finish training! diff --git a/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/models/checkpoint.pt b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/models/checkpoint.pt new file mode 100644 index 0000000..5448aca Binary files /dev/null and b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/models/checkpoint.pt differ diff --git a/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/learning_curve.png b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/learning_curve.png new file mode 100644 index 0000000..7f1054d Binary files /dev/null and b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/learning_curve.png differ diff --git a/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/res.csv b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/res.csv new file mode 100644 index 0000000..1758be2 --- /dev/null +++ b/projects/codes/DQN/Train_Acrobot-v1_DQN_20221026-094645/results/res.csv @@ -0,0 +1,101 @@ +episodes,rewards,steps +0,-861.0,862 +1,-252.0,253 +2,-196.0,197 +3,-390.0,391 +4,-371.0,372 +5,-237.0,238 +6,-227.0,228 +7,-228.0,229 +8,-305.0,306 +9,-234.0,235 +10,-204.0,205 +11,-277.0,278 +12,-148.0,149 +13,-372.0,373 +14,-273.0,274 +15,-105.0,106 +16,-79.0,80 +17,-112.0,113 +18,-276.0,277 +19,-148.0,149 +20,-201.0,202 +21,-173.0,174 +22,-226.0,227 +23,-154.0,155 +24,-269.0,270 +25,-191.0,192 +26,-177.0,178 +27,-209.0,210 +28,-116.0,117 +29,-117.0,118 +30,-121.0,122 +31,-208.0,209 +32,-147.0,148 +33,-104.0,105 +34,-161.0,162 +35,-144.0,145 +36,-131.0,132 +37,-226.0,227 +38,-117.0,118 +39,-344.0,345 +40,-123.0,124 +41,-232.0,233 +42,-190.0,191 +43,-176.0,177 +44,-139.0,140 +45,-410.0,411 +46,-115.0,116 +47,-118.0,119 +48,-113.0,114 +49,-355.0,356 +50,-110.0,111 +51,-148.0,149 +52,-135.0,136 +53,-220.0,221 +54,-157.0,158 +55,-130.0,131 +56,-150.0,151 +57,-254.0,255 +58,-148.0,149 +59,-108.0,109 +60,-152.0,153 +61,-107.0,108 +62,-110.0,111 +63,-266.0,267 +64,-344.0,345 +65,-93.0,94 +66,-113.0,114 +67,-191.0,192 +68,-102.0,103 +69,-187.0,188 +70,-158.0,159 +71,-166.0,167 +72,-202.0,203 +73,-179.0,180 +74,-150.0,151 +75,-170.0,171 +76,-149.0,150 +77,-119.0,120 +78,-115.0,116 +79,-97.0,98 +80,-153.0,154 +81,-97.0,98 +82,-211.0,212 +83,-195.0,196 +84,-125.0,126 +85,-155.0,156 +86,-151.0,152 +87,-194.0,195 +88,-188.0,189 +89,-195.0,196 +90,-141.0,142 +91,-132.0,133 +92,-127.0,128 +93,-195.0,196 +94,-152.0,153 +95,-145.0,146 +96,-123.0,124 +97,-176.0,177 +98,-180.0,181 +99,-124.0,125 diff --git a/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/config.yaml b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/config.yaml new file mode 100644 index 0000000..33950ad --- /dev/null +++ b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/config.yaml @@ -0,0 +1,25 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + target_update: 800 diff --git a/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/logs/log.txt b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/logs/log.txt new file mode 100644 index 0000000..5b084be --- /dev/null +++ b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/logs/log.txt @@ -0,0 +1,116 @@ +2022-10-31 00:12:01 - r - INFO: - n_states: 4, n_actions: 2 +2022-10-31 00:12:01 - r - INFO: - Start training! +2022-10-31 00:12:01 - r - INFO: - Env: CartPole-v1, Algorithm: DQN, Device: cuda +2022-10-31 00:12:04 - r - INFO: - Episode: 1/100, Reward: 18.0, Step: 18 +2022-10-31 00:12:04 - r - INFO: - Episode: 2/100, Reward: 35.0, Step: 35 +2022-10-31 00:12:04 - r - INFO: - Episode: 3/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:04 - r - INFO: - Episode: 4/100, Reward: 32.0, Step: 32 +2022-10-31 00:12:04 - r - INFO: - Episode: 5/100, Reward: 16.0, Step: 16 +2022-10-31 00:12:04 - r - INFO: - Current episode 5 has the best eval reward: 15.30 +2022-10-31 00:12:04 - r - INFO: - Episode: 6/100, Reward: 12.0, Step: 12 +2022-10-31 00:12:04 - r - INFO: - Episode: 7/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:04 - r - INFO: - Episode: 8/100, Reward: 15.0, Step: 15 +2022-10-31 00:12:04 - r - INFO: - Episode: 9/100, Reward: 11.0, Step: 11 +2022-10-31 00:12:04 - r - INFO: - Episode: 10/100, Reward: 15.0, Step: 15 +2022-10-31 00:12:04 - r - INFO: - Episode: 11/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:04 - r - INFO: - Episode: 12/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:04 - r - INFO: - Episode: 13/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:04 - r - INFO: - Episode: 14/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:04 - r - INFO: - Episode: 15/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:04 - r - INFO: - Episode: 16/100, Reward: 24.0, Step: 24 +2022-10-31 00:12:04 - r - INFO: - Episode: 17/100, Reward: 8.0, Step: 8 +2022-10-31 00:12:04 - r - INFO: - Episode: 18/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:04 - r - INFO: - Episode: 19/100, Reward: 11.0, Step: 11 +2022-10-31 00:12:04 - r - INFO: - Episode: 20/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:04 - r - INFO: - Episode: 21/100, Reward: 12.0, Step: 12 +2022-10-31 00:12:04 - r - INFO: - Episode: 22/100, Reward: 11.0, Step: 11 +2022-10-31 00:12:04 - r - INFO: - Episode: 23/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:04 - r - INFO: - Episode: 24/100, Reward: 21.0, Step: 21 +2022-10-31 00:12:05 - r - INFO: - Episode: 25/100, Reward: 14.0, Step: 14 +2022-10-31 00:12:05 - r - INFO: - Episode: 26/100, Reward: 12.0, Step: 12 +2022-10-31 00:12:05 - r - INFO: - Episode: 27/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:05 - r - INFO: - Episode: 28/100, Reward: 11.0, Step: 11 +2022-10-31 00:12:05 - r - INFO: - Episode: 29/100, Reward: 12.0, Step: 12 +2022-10-31 00:12:05 - r - INFO: - Episode: 30/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:05 - r - INFO: - Episode: 31/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:05 - r - INFO: - Episode: 32/100, Reward: 13.0, Step: 13 +2022-10-31 00:12:05 - r - INFO: - Episode: 33/100, Reward: 18.0, Step: 18 +2022-10-31 00:12:05 - r - INFO: - Episode: 34/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:05 - r - INFO: - Episode: 35/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:05 - r - INFO: - Episode: 36/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:05 - r - INFO: - Episode: 37/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:05 - r - INFO: - Episode: 38/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:05 - r - INFO: - Episode: 39/100, Reward: 10.0, Step: 10 +2022-10-31 00:12:05 - r - INFO: - Episode: 40/100, Reward: 8.0, Step: 8 +2022-10-31 00:12:06 - r - INFO: - Episode: 41/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:06 - r - INFO: - Episode: 42/100, Reward: 9.0, Step: 9 +2022-10-31 00:12:06 - r - INFO: - Episode: 43/100, Reward: 20.0, Step: 20 +2022-10-31 00:12:06 - r - INFO: - Episode: 44/100, Reward: 16.0, Step: 16 +2022-10-31 00:12:06 - r - INFO: - Episode: 45/100, Reward: 17.0, Step: 17 +2022-10-31 00:12:06 - r - INFO: - Current episode 45 has the best eval reward: 17.50 +2022-10-31 00:12:06 - r - INFO: - Episode: 46/100, Reward: 17.0, Step: 17 +2022-10-31 00:12:06 - r - INFO: - Episode: 47/100, Reward: 17.0, Step: 17 +2022-10-31 00:12:06 - r - INFO: - Episode: 48/100, Reward: 18.0, Step: 18 +2022-10-31 00:12:06 - r - INFO: - Episode: 49/100, Reward: 25.0, Step: 25 +2022-10-31 00:12:06 - r - INFO: - Episode: 50/100, Reward: 31.0, Step: 31 +2022-10-31 00:12:06 - r - INFO: - Current episode 50 has the best eval reward: 24.80 +2022-10-31 00:12:06 - r - INFO: - Episode: 51/100, Reward: 22.0, Step: 22 +2022-10-31 00:12:06 - r - INFO: - Episode: 52/100, Reward: 39.0, Step: 39 +2022-10-31 00:12:06 - r - INFO: - Episode: 53/100, Reward: 36.0, Step: 36 +2022-10-31 00:12:06 - r - INFO: - Episode: 54/100, Reward: 26.0, Step: 26 +2022-10-31 00:12:07 - r - INFO: - Episode: 55/100, Reward: 33.0, Step: 33 +2022-10-31 00:12:07 - r - INFO: - Current episode 55 has the best eval reward: 38.70 +2022-10-31 00:12:07 - r - INFO: - Episode: 56/100, Reward: 56.0, Step: 56 +2022-10-31 00:12:07 - r - INFO: - Episode: 57/100, Reward: 112.0, Step: 112 +2022-10-31 00:12:07 - r - INFO: - Episode: 58/100, Reward: 101.0, Step: 101 +2022-10-31 00:12:08 - r - INFO: - Episode: 59/100, Reward: 69.0, Step: 69 +2022-10-31 00:12:08 - r - INFO: - Episode: 60/100, Reward: 75.0, Step: 75 +2022-10-31 00:12:08 - r - INFO: - Episode: 61/100, Reward: 182.0, Step: 182 +2022-10-31 00:12:09 - r - INFO: - Episode: 62/100, Reward: 52.0, Step: 52 +2022-10-31 00:12:09 - r - INFO: - Episode: 63/100, Reward: 67.0, Step: 67 +2022-10-31 00:12:09 - r - INFO: - Episode: 64/100, Reward: 53.0, Step: 53 +2022-10-31 00:12:09 - r - INFO: - Episode: 65/100, Reward: 119.0, Step: 119 +2022-10-31 00:12:10 - r - INFO: - Current episode 65 has the best eval reward: 171.90 +2022-10-31 00:12:10 - r - INFO: - Episode: 66/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:10 - r - INFO: - Episode: 67/100, Reward: 74.0, Step: 74 +2022-10-31 00:12:11 - r - INFO: - Episode: 68/100, Reward: 138.0, Step: 138 +2022-10-31 00:12:11 - r - INFO: - Episode: 69/100, Reward: 149.0, Step: 149 +2022-10-31 00:12:12 - r - INFO: - Episode: 70/100, Reward: 144.0, Step: 144 +2022-10-31 00:12:12 - r - INFO: - Current episode 70 has the best eval reward: 173.70 +2022-10-31 00:12:13 - r - INFO: - Episode: 71/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:13 - r - INFO: - Episode: 72/100, Reward: 198.0, Step: 198 +2022-10-31 00:12:14 - r - INFO: - Episode: 73/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:14 - r - INFO: - Episode: 74/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:15 - r - INFO: - Episode: 75/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:16 - r - INFO: - Current episode 75 has the best eval reward: 200.00 +2022-10-31 00:12:16 - r - INFO: - Episode: 76/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:17 - r - INFO: - Episode: 77/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:17 - r - INFO: - Episode: 78/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:18 - r - INFO: - Episode: 79/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:19 - r - INFO: - Episode: 80/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:19 - r - INFO: - Current episode 80 has the best eval reward: 200.00 +2022-10-31 00:12:20 - r - INFO: - Episode: 81/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:20 - r - INFO: - Episode: 82/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:21 - r - INFO: - Episode: 83/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:21 - r - INFO: - Episode: 84/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:22 - r - INFO: - Episode: 85/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:23 - r - INFO: - Current episode 85 has the best eval reward: 200.00 +2022-10-31 00:12:23 - r - INFO: - Episode: 86/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:24 - r - INFO: - Episode: 87/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:25 - r - INFO: - Episode: 88/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:25 - r - INFO: - Episode: 89/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:26 - r - INFO: - Episode: 90/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:27 - r - INFO: - Current episode 90 has the best eval reward: 200.00 +2022-10-31 00:12:27 - r - INFO: - Episode: 91/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:28 - r - INFO: - Episode: 92/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:28 - r - INFO: - Episode: 93/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:29 - r - INFO: - Episode: 94/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:29 - r - INFO: - Episode: 95/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:30 - r - INFO: - Current episode 95 has the best eval reward: 200.00 +2022-10-31 00:12:31 - r - INFO: - Episode: 96/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:31 - r - INFO: - Episode: 97/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:32 - r - INFO: - Episode: 98/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:32 - r - INFO: - Episode: 99/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:33 - r - INFO: - Episode: 100/100, Reward: 200.0, Step: 200 +2022-10-31 00:12:33 - r - INFO: - Current episode 100 has the best eval reward: 200.00 +2022-10-31 00:12:33 - r - INFO: - Finish training! diff --git a/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/models/checkpoint.pt b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/models/checkpoint.pt new file mode 100644 index 0000000..722eb69 Binary files /dev/null and b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/models/checkpoint.pt differ diff --git a/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/learning_curve.png b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/learning_curve.png new file mode 100644 index 0000000..331f645 Binary files /dev/null and b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/learning_curve.png differ diff --git a/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/res.csv b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/res.csv new file mode 100644 index 0000000..3bf53a3 --- /dev/null +++ b/projects/codes/DQN/Train_CartPole-v1_DQN_20221031-001201/results/res.csv @@ -0,0 +1,101 @@ +episodes,rewards,steps +0,18.0,18 +1,35.0,35 +2,13.0,13 +3,32.0,32 +4,16.0,16 +5,12.0,12 +6,13.0,13 +7,15.0,15 +8,11.0,11 +9,15.0,15 +10,9.0,9 +11,13.0,13 +12,13.0,13 +13,10.0,10 +14,9.0,9 +15,24.0,24 +16,8.0,8 +17,10.0,10 +18,11.0,11 +19,13.0,13 +20,12.0,12 +21,11.0,11 +22,9.0,9 +23,21.0,21 +24,14.0,14 +25,12.0,12 +26,9.0,9 +27,11.0,11 +28,12.0,12 +29,13.0,13 +30,10.0,10 +31,13.0,13 +32,18.0,18 +33,9.0,9 +34,10.0,10 +35,9.0,9 +36,10.0,10 +37,10.0,10 +38,10.0,10 +39,8.0,8 +40,9.0,9 +41,9.0,9 +42,20.0,20 +43,16.0,16 +44,17.0,17 +45,17.0,17 +46,17.0,17 +47,18.0,18 +48,25.0,25 +49,31.0,31 +50,22.0,22 +51,39.0,39 +52,36.0,36 +53,26.0,26 +54,33.0,33 +55,56.0,56 +56,112.0,112 +57,101.0,101 +58,69.0,69 +59,75.0,75 +60,182.0,182 +61,52.0,52 +62,67.0,67 +63,53.0,53 +64,119.0,119 +65,200.0,200 +66,74.0,74 +67,138.0,138 +68,149.0,149 +69,144.0,144 +70,200.0,200 +71,198.0,198 +72,200.0,200 +73,200.0,200 +74,200.0,200 +75,200.0,200 +76,200.0,200 +77,200.0,200 +78,200.0,200 +79,200.0,200 +80,200.0,200 +81,200.0,200 +82,200.0,200 +83,200.0,200 +84,200.0,200 +85,200.0,200 +86,200.0,200 +87,200.0,200 +88,200.0,200 +89,200.0,200 +90,200.0,200 +91,200.0,200 +92,200.0,200 +93,200.0,200 +94,200.0,200 +95,200.0,200 +96,200.0,200 +97,200.0,200 +98,200.0,200 +99,200.0,200 diff --git a/projects/codes/DQN/config/Acrobot-v1_DQN_Test.yaml b/projects/codes/DQN/config/Acrobot-v1_DQN_Test.yaml new file mode 100644 index 0000000..d6e8d84 --- /dev/null +++ b/projects/codes/DQN/config/Acrobot-v1_DQN_Test.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: Acrobot-v1 + mode: test + load_checkpoint: true + load_path: Train_Acrobot-v1_DQN_20221026-094645 + max_steps: 100000 + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 128 + buffer_size: 200000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.002 + target_update: 4 diff --git a/projects/codes/DQN/config/Acrobot-v1_DQN_Train.yaml b/projects/codes/DQN/config/Acrobot-v1_DQN_Train.yaml new file mode 100644 index 0000000..0b18e79 --- /dev/null +++ b/projects/codes/DQN/config/Acrobot-v1_DQN_Train.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: Acrobot-v1 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 100000 + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 128 + buffer_size: 200000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.002 + target_update: 4 diff --git a/projects/codes/DQN/config/CartPole-v1_DQN_Test.yaml b/projects/codes/DQN/config/CartPole-v1_DQN_Test.yaml new file mode 100644 index 0000000..baa98f0 --- /dev/null +++ b/projects/codes/DQN/config/CartPole-v1_DQN_Test.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: CartPole-v1 + mode: test + load_checkpoint: true + load_path: Train_CartPole-v1_DQN_20221031-001201 + max_steps: 200 + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 100 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/DQN/config/CartPole-v1_DQN_Train.yaml b/projects/codes/DQN/config/CartPole-v1_DQN_Train.yaml new file mode 100644 index 0000000..14297b5 --- /dev/null +++ b/projects/codes/DQN/config/CartPole-v1_DQN_Train.yaml @@ -0,0 +1,22 @@ +general_cfg: + algo_name: DQN + device: cuda + env_name: CartPole-v1 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 0 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 4 diff --git a/projects/codes/DQN/config/config.py b/projects/codes/DQN/config/config.py new file mode 100644 index 0000000..2653c8d --- /dev/null +++ b/projects/codes/DQN/config/config.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 00:37:33 +LastEditor: JiangJi +LastEditTime: 2022-10-31 00:11:57 +Discription: default parameters of DQN +''' +from common.config import GeneralConfig,AlgoConfig +class GeneralConfigDQN(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CartPole-v1" # name of environment + self.algo_name = "DQN" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cuda" # device to use + self.train_eps = 100 # number of episodes for training + self.test_eps = 10 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigDQN(AlgoConfig): + def __init__(self) -> None: + # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end + self.epsilon_start = 0.95 # epsilon start value + self.epsilon_end = 0.01 # epsilon end value + self.epsilon_decay = 500 # epsilon decay rate + self.hidden_dim = 256 # hidden_dim for MLP + self.gamma = 0.95 # discount factor + self.lr = 0.0001 # learning rate + self.buffer_size = 100000 # size of replay buffer + self.batch_size = 64 # batch size + self.target_update = 800 # target network update frequency per steps diff --git a/projects/codes/DQN/dqn.py b/projects/codes/DQN/dqn.py index fce3a73..761d25f 100644 --- a/projects/codes/DQN/dqn.py +++ b/projects/codes/DQN/dqn.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-12 00:50:49 @LastEditor: John -LastEditTime: 2022-08-29 23:30:08 +LastEditTime: 2022-10-31 00:07:19 @Discription: @Environment: python 3.7.7 ''' @@ -22,27 +22,28 @@ import numpy as np class DQN: def __init__(self,model,memory,cfg): - self.n_actions = cfg['n_actions'] - self.device = torch.device(cfg['device']) - self.gamma = cfg['gamma'] + self.n_actions = cfg.n_actions + self.device = torch.device(cfg.device) + self.gamma = cfg.gamma ## e-greedy parameters self.sample_count = 0 # sample count for epsilon decay - self.epsilon = cfg['epsilon_start'] + self.epsilon = cfg.epsilon_start self.sample_count = 0 - self.epsilon_start = cfg['epsilon_start'] - self.epsilon_end = cfg['epsilon_end'] - self.epsilon_decay = cfg['epsilon_decay'] - self.batch_size = cfg['batch_size'] + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay + self.batch_size = cfg.batch_size + self.target_update = cfg.target_update self.policy_net = model.to(self.device) self.target_net = model.to(self.device) ## copy parameters from policy net to target net for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): target_param.data.copy_(param.data) # self.target_net.load_state_dict(self.policy_net.state_dict()) # or use this to copy parameters - self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg['lr']) + self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) self.memory = memory self.update_flag = False - + def sample_action(self, state): ''' sample action with e-greedy policy ''' @@ -58,6 +59,21 @@ class DQN: else: action = random.randrange(self.n_actions) return action + # @torch.no_grad() + # def sample_action(self, state): + # ''' sample action with e-greedy policy + # ''' + # self.sample_count += 1 + # # epsilon must decay(linear,exponential and etc.) for balancing exploration and exploitation + # self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + # math.exp(-1. * self.sample_count / self.epsilon_decay) + # if random.random() > self.epsilon: + # state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + # q_values = self.policy_net(state) + # action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + # else: + # action = random.randrange(self.n_actions) + # return action def predict_action(self,state): ''' predict action ''' @@ -99,14 +115,16 @@ class DQN: for param in self.policy_net.parameters(): param.grad.data.clamp_(-1, 1) self.optimizer.step() + if self.sample_count % self.target_update == 0: # target net update, target_update means "C" in pseucodes + self.target_net.load_state_dict(self.policy_net.state_dict()) - def save_model(self, path): + def save_model(self, fpath): from pathlib import Path # create path - Path(path).mkdir(parents=True, exist_ok=True) - torch.save(self.target_net.state_dict(), f"{path}/checkpoint.pt") + Path(fpath).mkdir(parents=True, exist_ok=True) + torch.save(self.target_net.state_dict(), f"{fpath}/checkpoint.pt") - def load_model(self, path): - self.target_net.load_state_dict(torch.load(f"{path}/checkpoint.pt")) + def load_model(self, fpath): + self.target_net.load_state_dict(torch.load(f"{fpath}/checkpoint.pt")) for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()): param.data.copy_(target_param.data) diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/models/checkpoint.pt b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/models/checkpoint.pt deleted file mode 100644 index c27c438..0000000 Binary files a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/models/checkpoint.pt and /dev/null differ diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/params.json b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/params.json deleted file mode 100644 index 0501731..0000000 --- a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "DQN", "env_name": "Acrobot-v1", "train_eps": 100, "test_eps": 20, "gamma": 0.95, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 1500, "lr": 0.002, "memory_capacity": 200000, "batch_size": 128, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/results", "model_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/Acrobot-v1/20220824-124401/models", "n_states": 6, "n_actions": 3} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_curve.png b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_curve.png deleted file mode 100644 index 067e301..0000000 Binary files a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_results.csv b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_results.csv deleted file mode 100644 index 65499c3..0000000 --- a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards -0,-79.0 -1,-113.0 -2,-81.0 -3,-132.0 -4,-110.0 -5,-114.0 -6,-80.0 -7,-101.0 -8,-78.0 -9,-91.0 -10,-107.0 -11,-87.0 -12,-105.0 -13,-91.0 -14,-128.0 -15,-132.0 -16,-119.0 -17,-77.0 -18,-89.0 -19,-134.0 diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_curve.png b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_curve.png deleted file mode 100644 index 9dbeb09..0000000 Binary files a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_results.csv b/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_results.csv deleted file mode 100644 index c5afa37..0000000 --- a/projects/codes/DQN/outputs/Acrobot-v1/20220824-124401/results/training_results.csv +++ /dev/null @@ -1,101 +0,0 @@ -episodes,rewards -0,-500.0 -1,-500.0 -2,-500.0 -3,-370.0 -4,-449.0 -5,-500.0 -6,-312.0 -7,-374.0 -8,-180.0 -9,-154.0 -10,-137.0 -11,-185.0 -12,-135.0 -13,-302.0 -14,-146.0 -15,-137.0 -16,-119.0 -17,-149.0 -18,-217.0 -19,-191.0 -20,-157.0 -21,-166.0 -22,-138.0 -23,-135.0 -24,-182.0 -25,-130.0 -26,-175.0 -27,-222.0 -28,-133.0 -29,-108.0 -30,-250.0 -31,-119.0 -32,-135.0 -33,-148.0 -34,-194.0 -35,-194.0 -36,-186.0 -37,-131.0 -38,-185.0 -39,-79.0 -40,-129.0 -41,-271.0 -42,-117.0 -43,-159.0 -44,-156.0 -45,-117.0 -46,-158.0 -47,-153.0 -48,-119.0 -49,-164.0 -50,-134.0 -51,-231.0 -52,-117.0 -53,-119.0 -54,-136.0 -55,-173.0 -56,-202.0 -57,-133.0 -58,-142.0 -59,-169.0 -60,-137.0 -61,-123.0 -62,-205.0 -63,-107.0 -64,-194.0 -65,-150.0 -66,-143.0 -67,-218.0 -68,-145.0 -69,-90.0 -70,-107.0 -71,-169.0 -72,-125.0 -73,-142.0 -74,-145.0 -75,-94.0 -76,-150.0 -77,-134.0 -78,-159.0 -79,-137.0 -80,-146.0 -81,-191.0 -82,-242.0 -83,-117.0 -84,-92.0 -85,-193.0 -86,-239.0 -87,-173.0 -88,-140.0 -89,-157.0 -90,-133.0 -91,-148.0 -92,-87.0 -93,-398.0 -94,-98.0 -95,-121.0 -96,-102.0 -97,-120.0 -98,-195.0 -99,-219.0 diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/models/checkpoint.pt b/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/models/checkpoint.pt deleted file mode 100644 index db7288b..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/models/checkpoint.pt and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/params.json b/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/params.json deleted file mode 100644 index f57e151..0000000 --- a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/params.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "algo_name": "DQN", - "env_name": "CartPole-v0", - "train_eps": 200, - "test_eps": 20, - "gamma": 0.95, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 500, - "lr": 0.0001, - "memory_capacity": 100000, - "batch_size": 64, - "target_update": 4, - "hidden_dim": 256, - "device": "cpu", - "seed": 10, - "result_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v0/20220823-173936/results", - "model_path": "C:\\Users\\jiangji\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v0/20220823-173936/models", - "show_fig": false, - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/testing_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/testing_curve.png deleted file mode 100644 index 43ceb6f..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_curve.png b/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_curve.png deleted file mode 100644 index 0e7b997..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_results.csv b/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_results.csv deleted file mode 100644 index 4429b6a..0000000 --- a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/training_results.csv +++ /dev/null @@ -1,201 +0,0 @@ -episodes,rewards -0,38.0 -1,16.0 -2,37.0 -3,15.0 -4,22.0 -5,34.0 -6,20.0 -7,12.0 -8,16.0 -9,14.0 -10,13.0 -11,21.0 -12,14.0 -13,12.0 -14,17.0 -15,12.0 -16,10.0 -17,14.0 -18,10.0 -19,10.0 -20,16.0 -21,9.0 -22,14.0 -23,13.0 -24,10.0 -25,9.0 -26,12.0 -27,12.0 -28,14.0 -29,11.0 -30,9.0 -31,8.0 -32,9.0 -33,11.0 -34,12.0 -35,10.0 -36,11.0 -37,10.0 -38,10.0 -39,18.0 -40,13.0 -41,15.0 -42,10.0 -43,9.0 -44,14.0 -45,14.0 -46,23.0 -47,17.0 -48,15.0 -49,15.0 -50,20.0 -51,28.0 -52,36.0 -53,36.0 -54,23.0 -55,27.0 -56,53.0 -57,19.0 -58,35.0 -59,62.0 -60,57.0 -61,38.0 -62,61.0 -63,65.0 -64,58.0 -65,43.0 -66,67.0 -67,56.0 -68,91.0 -69,128.0 -70,71.0 -71,126.0 -72,100.0 -73,200.0 -74,200.0 -75,200.0 -76,200.0 -77,200.0 -78,200.0 -79,200.0 -80,200.0 -81,200.0 -82,200.0 -83,200.0 -84,200.0 -85,200.0 -86,200.0 -87,200.0 -88,200.0 -89,200.0 -90,200.0 -91,200.0 -92,200.0 -93,200.0 -94,200.0 -95,200.0 -96,200.0 -97,200.0 -98,200.0 -99,200.0 -100,200.0 -101,200.0 -102,200.0 -103,200.0 -104,200.0 -105,200.0 -106,200.0 -107,200.0 -108,200.0 -109,200.0 -110,200.0 -111,200.0 -112,200.0 -113,200.0 -114,200.0 -115,200.0 -116,200.0 -117,200.0 -118,200.0 -119,200.0 -120,200.0 -121,200.0 -122,200.0 -123,200.0 -124,200.0 -125,200.0 -126,200.0 -127,200.0 -128,200.0 -129,200.0 -130,200.0 -131,200.0 -132,200.0 -133,200.0 -134,200.0 -135,200.0 -136,200.0 -137,200.0 -138,200.0 -139,200.0 -140,200.0 -141,200.0 -142,200.0 -143,200.0 -144,200.0 -145,200.0 -146,200.0 -147,200.0 -148,200.0 -149,200.0 -150,200.0 -151,200.0 -152,200.0 -153,200.0 -154,200.0 -155,200.0 -156,200.0 -157,200.0 -158,200.0 -159,200.0 -160,200.0 -161,200.0 -162,200.0 -163,200.0 -164,200.0 -165,200.0 -166,200.0 -167,200.0 -168,200.0 -169,200.0 -170,200.0 -171,200.0 -172,200.0 -173,200.0 -174,200.0 -175,200.0 -176,200.0 -177,200.0 -178,200.0 -179,200.0 -180,200.0 -181,200.0 -182,200.0 -183,200.0 -184,200.0 -185,200.0 -186,200.0 -187,200.0 -188,200.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,200.0 -196,200.0 -197,200.0 -198,200.0 -199,200.0 diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt deleted file mode 100644 index e357d49..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json deleted file mode 100644 index c87e5de..0000000 --- a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "algo_name": "DQN", - "env_name": "CartPole-v1", - "train_eps": 2000, - "test_eps": 20, - "ep_max_steps": 100000, - "gamma": 0.99, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 6000, - "lr": 1e-05, - "memory_capacity": 200000, - "batch_size": 64, - "target_update": 4, - "hidden_dim": 256, - "device": "cuda", - "seed": 10, - "show_fig": false, - "save_fig": true, - "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results", - "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models", - "n_states": 4, - "n_actions": 2 -} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png deleted file mode 100644 index f97050f..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv deleted file mode 100644 index bb0b8f6..0000000 --- a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards,steps -0,371.0,371 -1,446.0,446 -2,300.0,300 -3,500.0,500 -4,313.0,313 -5,500.0,500 -6,341.0,341 -7,489.0,489 -8,304.0,304 -9,358.0,358 -10,278.0,278 -11,500.0,500 -12,500.0,500 -13,500.0,500 -14,500.0,500 -15,476.0,476 -16,308.0,308 -17,394.0,394 -18,500.0,500 -19,500.0,500 diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png deleted file mode 100644 index a14bb8c..0000000 Binary files a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png and /dev/null differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv deleted file mode 100644 index 6bfc2ad..0000000 --- a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv +++ /dev/null @@ -1,2001 +0,0 @@ -episodes,rewards,steps -0,38.0,38 -1,16.0,16 -2,28.0,28 -3,10.0,10 -4,18.0,18 -5,10.0,10 -6,8.0,8 -7,19.0,19 -8,18.0,18 -9,32.0,32 -10,12.0,12 -11,14.0,14 -12,16.0,16 -13,20.0,20 -14,33.0,33 -15,24.0,24 -16,28.0,28 -17,33.0,33 -18,31.0,31 -19,14.0,14 -20,10.0,10 -21,19.0,19 -22,16.0,16 -23,19.0,19 -24,11.0,11 -25,23.0,23 -26,20.0,20 -27,26.0,26 -28,16.0,16 -29,16.0,16 -30,11.0,11 -31,13.0,13 -32,16.0,16 -33,42.0,42 -34,15.0,15 -35,38.0,38 -36,16.0,16 -37,46.0,46 -38,28.0,28 -39,60.0,60 -40,32.0,32 -41,9.0,9 -42,23.0,23 -43,19.0,19 -44,12.0,12 -45,17.0,17 -46,27.0,27 -47,22.0,22 -48,26.0,26 -49,11.0,11 -50,15.0,15 -51,9.0,9 -52,24.0,24 -53,29.0,29 -54,11.0,11 -55,16.0,16 -56,23.0,23 -57,14.0,14 -58,11.0,11 -59,16.0,16 -60,24.0,24 -61,27.0,27 -62,11.0,11 -63,20.0,20 -64,14.0,14 -65,11.0,11 -66,15.0,15 -67,16.0,16 -68,13.0,13 -69,28.0,28 -70,14.0,14 -71,10.0,10 -72,28.0,28 -73,20.0,20 -74,13.0,13 -75,11.0,11 -76,27.0,27 -77,15.0,15 -78,14.0,14 -79,14.0,14 -80,10.0,10 -81,11.0,11 -82,14.0,14 -83,13.0,13 -84,15.0,15 -85,13.0,13 -86,10.0,10 -87,11.0,11 -88,11.0,11 -89,13.0,13 -90,14.0,14 -91,25.0,25 -92,14.0,14 -93,14.0,14 -94,19.0,19 -95,18.0,18 -96,9.0,9 -97,9.0,9 -98,11.0,11 -99,19.0,19 -100,10.0,10 -101,40.0,40 -102,10.0,10 -103,13.0,13 -104,13.0,13 -105,18.0,18 -106,13.0,13 -107,11.0,11 -108,11.0,11 -109,18.0,18 -110,20.0,20 -111,10.0,10 -112,24.0,24 -113,9.0,9 -114,10.0,10 -115,13.0,13 -116,21.0,21 -117,12.0,12 -118,14.0,14 -119,10.0,10 -120,10.0,10 -121,16.0,16 -122,10.0,10 -123,18.0,18 -124,13.0,13 -125,17.0,17 -126,14.0,14 -127,12.0,12 -128,16.0,16 -129,11.0,11 -130,15.0,15 -131,10.0,10 -132,13.0,13 -133,17.0,17 -134,9.0,9 -135,34.0,34 -136,23.0,23 -137,14.0,14 -138,17.0,17 -139,13.0,13 -140,27.0,27 -141,15.0,15 -142,16.0,16 -143,15.0,15 -144,11.0,11 -145,12.0,12 -146,9.0,9 -147,30.0,30 -148,12.0,12 -149,12.0,12 -150,13.0,13 -151,14.0,14 -152,17.0,17 -153,10.0,10 -154,15.0,15 -155,10.0,10 -156,14.0,14 -157,22.0,22 -158,12.0,12 -159,11.0,11 -160,34.0,34 -161,11.0,11 -162,12.0,12 -163,18.0,18 -164,9.0,9 -165,17.0,17 -166,16.0,16 -167,10.0,10 -168,17.0,17 -169,11.0,11 -170,21.0,21 -171,15.0,15 -172,17.0,17 -173,11.0,11 -174,23.0,23 -175,10.0,10 -176,25.0,25 -177,12.0,12 -178,10.0,10 -179,16.0,16 -180,14.0,14 -181,21.0,21 -182,11.0,11 -183,12.0,12 -184,11.0,11 -185,10.0,10 -186,11.0,11 -187,17.0,17 -188,10.0,10 -189,14.0,14 -190,11.0,11 -191,12.0,12 -192,9.0,9 -193,11.0,11 -194,11.0,11 -195,16.0,16 -196,15.0,15 -197,10.0,10 -198,9.0,9 -199,17.0,17 -200,12.0,12 -201,9.0,9 -202,11.0,11 -203,9.0,9 -204,9.0,9 -205,16.0,16 -206,15.0,15 -207,13.0,13 -208,11.0,11 -209,13.0,13 -210,17.0,17 -211,8.0,8 -212,8.0,8 -213,12.0,12 -214,15.0,15 -215,13.0,13 -216,14.0,14 -217,11.0,11 -218,14.0,14 -219,13.0,13 -220,12.0,12 -221,9.0,9 -222,10.0,10 -223,10.0,10 -224,11.0,11 -225,9.0,9 -226,16.0,16 -227,23.0,23 -228,13.0,13 -229,16.0,16 -230,9.0,9 -231,12.0,12 -232,11.0,11 -233,10.0,10 -234,13.0,13 -235,15.0,15 -236,12.0,12 -237,11.0,11 -238,9.0,9 -239,11.0,11 -240,11.0,11 -241,11.0,11 -242,12.0,12 -243,8.0,8 -244,8.0,8 -245,10.0,10 -246,12.0,12 -247,12.0,12 -248,9.0,9 -249,12.0,12 -250,13.0,13 -251,11.0,11 -252,12.0,12 -253,10.0,10 -254,10.0,10 -255,11.0,11 -256,17.0,17 -257,11.0,11 -258,14.0,14 -259,12.0,12 -260,10.0,10 -261,11.0,11 -262,16.0,16 -263,13.0,13 -264,13.0,13 -265,15.0,15 -266,11.0,11 -267,8.0,8 -268,13.0,13 -269,15.0,15 -270,11.0,11 -271,9.0,9 -272,10.0,10 -273,11.0,11 -274,11.0,11 -275,9.0,9 -276,18.0,18 -277,13.0,13 -278,11.0,11 -279,14.0,14 -280,12.0,12 -281,16.0,16 -282,10.0,10 -283,12.0,12 -284,13.0,13 -285,9.0,9 -286,14.0,14 -287,26.0,26 -288,9.0,9 -289,10.0,10 -290,12.0,12 -291,13.0,13 -292,8.0,8 -293,13.0,13 -294,12.0,12 -295,11.0,11 -296,14.0,14 -297,10.0,10 -298,11.0,11 -299,12.0,12 -300,9.0,9 -301,11.0,11 -302,11.0,11 -303,15.0,15 -304,11.0,11 -305,11.0,11 -306,13.0,13 -307,8.0,8 -308,9.0,9 -309,10.0,10 -310,12.0,12 -311,13.0,13 -312,9.0,9 -313,15.0,15 -314,11.0,11 -315,12.0,12 -316,12.0,12 -317,15.0,15 -318,13.0,13 -319,8.0,8 -320,16.0,16 -321,9.0,9 -322,11.0,11 -323,12.0,12 -324,16.0,16 -325,9.0,9 -326,13.0,13 -327,13.0,13 -328,15.0,15 -329,12.0,12 -330,12.0,12 -331,8.0,8 -332,11.0,11 -333,15.0,15 -334,12.0,12 -335,11.0,11 -336,13.0,13 -337,13.0,13 -338,16.0,16 -339,11.0,11 -340,8.0,8 -341,10.0,10 -342,14.0,14 -343,18.0,18 -344,11.0,11 -345,10.0,10 -346,11.0,11 -347,11.0,11 -348,20.0,20 -349,14.0,14 -350,10.0,10 -351,14.0,14 -352,9.0,9 -353,9.0,9 -354,12.0,12 -355,9.0,9 -356,10.0,10 -357,9.0,9 -358,10.0,10 -359,10.0,10 -360,22.0,22 -361,11.0,11 -362,12.0,12 -363,11.0,11 -364,8.0,8 -365,24.0,24 -366,11.0,11 -367,10.0,10 -368,10.0,10 -369,10.0,10 -370,10.0,10 -371,9.0,9 -372,9.0,9 -373,21.0,21 -374,10.0,10 -375,12.0,12 -376,14.0,14 -377,15.0,15 -378,10.0,10 -379,17.0,17 -380,8.0,8 -381,14.0,14 -382,11.0,11 -383,9.0,9 -384,10.0,10 -385,9.0,9 -386,15.0,15 -387,11.0,11 -388,17.0,17 -389,12.0,12 -390,11.0,11 -391,15.0,15 -392,10.0,10 -393,13.0,13 -394,12.0,12 -395,10.0,10 -396,12.0,12 -397,9.0,9 -398,14.0,14 -399,9.0,9 -400,13.0,13 -401,10.0,10 -402,13.0,13 -403,16.0,16 -404,9.0,9 -405,8.0,8 -406,11.0,11 -407,9.0,9 -408,15.0,15 -409,12.0,12 -410,15.0,15 -411,15.0,15 -412,15.0,15 -413,14.0,14 -414,12.0,12 -415,11.0,11 -416,14.0,14 -417,12.0,12 -418,14.0,14 -419,11.0,11 -420,8.0,8 -421,9.0,9 -422,13.0,13 -423,13.0,13 -424,8.0,8 -425,10.0,10 -426,10.0,10 -427,15.0,15 -428,14.0,14 -429,9.0,9 -430,12.0,12 -431,13.0,13 -432,12.0,12 -433,10.0,10 -434,14.0,14 -435,11.0,11 -436,12.0,12 -437,14.0,14 -438,10.0,10 -439,12.0,12 -440,9.0,9 -441,15.0,15 -442,12.0,12 -443,10.0,10 -444,8.0,8 -445,12.0,12 -446,14.0,14 -447,12.0,12 -448,9.0,9 -449,10.0,10 -450,13.0,13 -451,9.0,9 -452,10.0,10 -453,9.0,9 -454,11.0,11 -455,10.0,10 -456,18.0,18 -457,16.0,16 -458,10.0,10 -459,11.0,11 -460,15.0,15 -461,12.0,12 -462,11.0,11 -463,12.0,12 -464,11.0,11 -465,10.0,10 -466,12.0,12 -467,10.0,10 -468,12.0,12 -469,15.0,15 -470,11.0,11 -471,10.0,10 -472,12.0,12 -473,9.0,9 -474,13.0,13 -475,11.0,11 -476,15.0,15 -477,10.0,10 -478,11.0,11 -479,13.0,13 -480,10.0,10 -481,10.0,10 -482,10.0,10 -483,10.0,10 -484,15.0,15 -485,11.0,11 -486,12.0,12 -487,16.0,16 -488,10.0,10 -489,16.0,16 -490,11.0,11 -491,9.0,9 -492,9.0,9 -493,18.0,18 -494,10.0,10 -495,9.0,9 -496,40.0,40 -497,21.0,21 -498,10.0,10 -499,36.0,36 -500,37.0,37 -501,22.0,22 -502,30.0,30 -503,23.0,23 -504,35.0,35 -505,48.0,48 -506,32.0,32 -507,21.0,21 -508,28.0,28 -509,29.0,29 -510,10.0,10 -511,27.0,27 -512,20.0,20 -513,23.0,23 -514,24.0,24 -515,21.0,21 -516,25.0,25 -517,20.0,20 -518,15.0,15 -519,23.0,23 -520,14.0,14 -521,18.0,18 -522,18.0,18 -523,18.0,18 -524,18.0,18 -525,20.0,20 -526,13.0,13 -527,21.0,21 -528,20.0,20 -529,17.0,17 -530,17.0,17 -531,17.0,17 -532,20.0,20 -533,15.0,15 -534,17.0,17 -535,17.0,17 -536,16.0,16 -537,16.0,16 -538,14.0,14 -539,21.0,21 -540,22.0,22 -541,14.0,14 -542,20.0,20 -543,25.0,25 -544,18.0,18 -545,22.0,22 -546,21.0,21 -547,20.0,20 -548,23.0,23 -549,20.0,20 -550,20.0,20 -551,25.0,25 -552,18.0,18 -553,14.0,14 -554,16.0,16 -555,16.0,16 -556,15.0,15 -557,26.0,26 -558,18.0,18 -559,20.0,20 -560,27.0,27 -561,18.0,18 -562,20.0,20 -563,20.0,20 -564,19.0,19 -565,26.0,26 -566,21.0,21 -567,25.0,25 -568,24.0,24 -569,24.0,24 -570,24.0,24 -571,17.0,17 -572,28.0,28 -573,20.0,20 -574,22.0,22 -575,16.0,16 -576,22.0,22 -577,14.0,14 -578,27.0,27 -579,29.0,29 -580,19.0,19 -581,22.0,22 -582,29.0,29 -583,29.0,29 -584,23.0,23 -585,22.0,22 -586,21.0,21 -587,18.0,18 -588,28.0,28 -589,29.0,29 -590,23.0,23 -591,23.0,23 -592,20.0,20 -593,32.0,32 -594,38.0,38 -595,29.0,29 -596,25.0,25 -597,22.0,22 -598,37.0,37 -599,20.0,20 -600,17.0,17 -601,22.0,22 -602,23.0,23 -603,28.0,28 -604,31.0,31 -605,23.0,23 -606,26.0,26 -607,22.0,22 -608,27.0,27 -609,32.0,32 -610,33.0,33 -611,27.0,27 -612,23.0,23 -613,38.0,38 -614,26.0,26 -615,27.0,27 -616,30.0,30 -617,22.0,22 -618,27.0,27 -619,38.0,38 -620,34.0,34 -621,39.0,39 -622,24.0,24 -623,29.0,29 -624,26.0,26 -625,30.0,30 -626,33.0,33 -627,34.0,34 -628,27.0,27 -629,26.0,26 -630,27.0,27 -631,37.0,37 -632,38.0,38 -633,46.0,46 -634,48.0,48 -635,56.0,56 -636,39.0,39 -637,18.0,18 -638,63.0,63 -639,51.0,51 -640,45.0,45 -641,27.0,27 -642,56.0,56 -643,32.0,32 -644,49.0,49 -645,40.0,40 -646,59.0,59 -647,52.0,52 -648,36.0,36 -649,43.0,43 -650,54.0,54 -651,59.0,59 -652,58.0,58 -653,61.0,61 -654,66.0,66 -655,38.0,38 -656,33.0,33 -657,96.0,96 -658,82.0,82 -659,56.0,56 -660,42.0,42 -661,38.0,38 -662,48.0,48 -663,83.0,83 -664,33.0,33 -665,72.0,72 -666,41.0,41 -667,57.0,57 -668,54.0,54 -669,69.0,69 -670,63.0,63 -671,81.0,81 -672,69.0,69 -673,65.0,65 -674,55.0,55 -675,64.0,64 -676,54.0,54 -677,93.0,93 -678,47.0,47 -679,84.0,84 -680,46.0,46 -681,63.0,63 -682,51.0,51 -683,64.0,64 -684,58.0,58 -685,72.0,72 -686,35.0,35 -687,59.0,59 -688,124.0,124 -689,64.0,64 -690,59.0,59 -691,77.0,77 -692,55.0,55 -693,63.0,63 -694,100.0,100 -695,100.0,100 -696,58.0,58 -697,85.0,85 -698,50.0,50 -699,57.0,57 -700,59.0,59 -701,72.0,72 -702,200.0,200 -703,118.0,118 -704,63.0,63 -705,125.0,125 -706,80.0,80 -707,49.0,49 -708,52.0,52 -709,71.0,71 -710,68.0,68 -711,71.0,71 -712,60.0,60 -713,58.0,58 -714,192.0,192 -715,57.0,57 -716,93.0,93 -717,107.0,107 -718,59.0,59 -719,71.0,71 -720,81.0,81 -721,76.0,76 -722,98.0,98 -723,95.0,95 -724,99.0,99 -725,132.0,132 -726,99.0,99 -727,58.0,58 -728,95.0,95 -729,79.0,79 -730,70.0,70 -731,76.0,76 -732,58.0,58 -733,174.0,174 -734,58.0,58 -735,178.0,178 -736,92.0,92 -737,114.0,114 -738,101.0,101 -739,59.0,59 -740,171.0,171 -741,179.0,179 -742,85.0,85 -743,115.0,115 -744,74.0,74 -745,99.0,99 -746,174.0,174 -747,124.0,124 -748,101.0,101 -749,106.0,106 -750,75.0,75 -751,70.0,70 -752,139.0,139 -753,76.0,76 -754,86.0,86 -755,82.0,82 -756,51.0,51 -757,78.0,78 -758,67.0,67 -759,72.0,72 -760,83.0,83 -761,58.0,58 -762,109.0,109 -763,62.0,62 -764,74.0,74 -765,83.0,83 -766,111.0,111 -767,67.0,67 -768,57.0,57 -769,59.0,59 -770,112.0,112 -771,197.0,197 -772,111.0,111 -773,90.0,90 -774,111.0,111 -775,81.0,81 -776,72.0,72 -777,80.0,80 -778,130.0,130 -779,156.0,156 -780,100.0,100 -781,85.0,85 -782,66.0,66 -783,76.0,76 -784,105.0,105 -785,57.0,57 -786,201.0,201 -787,176.0,176 -788,175.0,175 -789,198.0,198 -790,73.0,73 -791,79.0,79 -792,59.0,59 -793,55.0,55 -794,84.0,84 -795,131.0,131 -796,67.0,67 -797,199.0,199 -798,94.0,94 -799,97.0,97 -800,94.0,94 -801,185.0,185 -802,98.0,98 -803,136.0,136 -804,141.0,141 -805,81.0,81 -806,77.0,77 -807,100.0,100 -808,99.0,99 -809,133.0,133 -810,154.0,154 -811,74.0,74 -812,79.0,79 -813,94.0,94 -814,168.0,168 -815,338.0,338 -816,64.0,64 -817,112.0,112 -818,69.0,69 -819,143.0,143 -820,170.0,170 -821,170.0,170 -822,77.0,77 -823,83.0,83 -824,104.0,104 -825,152.0,152 -826,198.0,198 -827,159.0,159 -828,235.0,235 -829,76.0,76 -830,201.0,201 -831,289.0,289 -832,113.0,113 -833,294.0,294 -834,74.0,74 -835,416.0,416 -836,194.0,194 -837,85.0,85 -838,170.0,170 -839,208.0,208 -840,177.0,177 -841,83.0,83 -842,82.0,82 -843,183.0,183 -844,90.0,90 -845,398.0,398 -846,244.0,244 -847,99.0,99 -848,310.0,310 -849,195.0,195 -850,183.0,183 -851,162.0,162 -852,115.0,115 -853,82.0,82 -854,233.0,233 -855,102.0,102 -856,262.0,262 -857,300.0,300 -858,245.0,245 -859,299.0,299 -860,150.0,150 -861,199.0,199 -862,79.0,79 -863,74.0,74 -864,113.0,113 -865,152.0,152 -866,126.0,126 -867,68.0,68 -868,185.0,185 -869,156.0,156 -870,63.0,63 -871,121.0,121 -872,83.0,83 -873,72.0,72 -874,337.0,337 -875,179.0,179 -876,325.0,325 -877,115.0,115 -878,217.0,217 -879,74.0,74 -880,90.0,90 -881,218.0,218 -882,82.0,82 -883,173.0,173 -884,106.0,106 -885,83.0,83 -886,68.0,68 -887,173.0,173 -888,159.0,159 -889,57.0,57 -890,80.0,80 -891,302.0,302 -892,71.0,71 -893,59.0,59 -894,153.0,153 -895,129.0,129 -896,63.0,63 -897,147.0,147 -898,82.0,82 -899,186.0,186 -900,74.0,74 -901,81.0,81 -902,61.0,61 -903,88.0,88 -904,101.0,101 -905,60.0,60 -906,154.0,154 -907,202.0,202 -908,96.0,96 -909,81.0,81 -910,91.0,91 -911,77.0,77 -912,63.0,63 -913,154.0,154 -914,57.0,57 -915,86.0,86 -916,84.0,84 -917,66.0,66 -918,141.0,141 -919,88.0,88 -920,89.0,89 -921,49.0,49 -922,97.0,97 -923,50.0,50 -924,211.0,211 -925,69.0,69 -926,278.0,278 -927,75.0,75 -928,60.0,60 -929,57.0,57 -930,200.0,200 -931,101.0,101 -932,70.0,70 -933,93.0,93 -934,61.0,61 -935,80.0,80 -936,94.0,94 -937,53.0,53 -938,223.0,223 -939,142.0,142 -940,74.0,74 -941,60.0,60 -942,75.0,75 -943,78.0,78 -944,81.0,81 -945,51.0,51 -946,215.0,215 -947,64.0,64 -948,70.0,70 -949,85.0,85 -950,102.0,102 -951,48.0,48 -952,69.0,69 -953,65.0,65 -954,70.0,70 -955,174.0,174 -956,46.0,46 -957,75.0,75 -958,75.0,75 -959,62.0,62 -960,71.0,71 -961,67.0,67 -962,48.0,48 -963,64.0,64 -964,58.0,58 -965,64.0,64 -966,82.0,82 -967,70.0,70 -968,68.0,68 -969,48.0,48 -970,48.0,48 -971,53.0,53 -972,80.0,80 -973,46.0,46 -974,101.0,101 -975,303.0,303 -976,59.0,59 -977,212.0,212 -978,64.0,64 -979,76.0,76 -980,69.0,69 -981,241.0,241 -982,46.0,46 -983,45.0,45 -984,124.0,124 -985,99.0,99 -986,210.0,210 -987,67.0,67 -988,78.0,78 -989,58.0,58 -990,54.0,54 -991,63.0,63 -992,37.0,37 -993,46.0,46 -994,63.0,63 -995,48.0,48 -996,70.0,70 -997,58.0,58 -998,88.0,88 -999,62.0,62 -1000,173.0,173 -1001,99.0,99 -1002,47.0,47 -1003,47.0,47 -1004,74.0,74 -1005,101.0,101 -1006,42.0,42 -1007,46.0,46 -1008,61.0,61 -1009,42.0,42 -1010,48.0,48 -1011,60.0,60 -1012,42.0,42 -1013,53.0,53 -1014,54.0,54 -1015,62.0,62 -1016,98.0,98 -1017,50.0,50 -1018,39.0,39 -1019,60.0,60 -1020,52.0,52 -1021,46.0,46 -1022,68.0,68 -1023,40.0,40 -1024,41.0,41 -1025,54.0,54 -1026,66.0,66 -1027,112.0,112 -1028,55.0,55 -1029,46.0,46 -1030,72.0,72 -1031,54.0,54 -1032,51.0,51 -1033,43.0,43 -1034,66.0,66 -1035,59.0,59 -1036,57.0,57 -1037,68.0,68 -1038,63.0,63 -1039,38.0,38 -1040,48.0,48 -1041,58.0,58 -1042,58.0,58 -1043,116.0,116 -1044,52.0,52 -1045,180.0,180 -1046,91.0,91 -1047,292.0,292 -1048,65.0,65 -1049,46.0,46 -1050,40.0,40 -1051,192.0,192 -1052,46.0,46 -1053,52.0,52 -1054,50.0,50 -1055,37.0,37 -1056,136.0,136 -1057,46.0,46 -1058,35.0,35 -1059,89.0,89 -1060,34.0,34 -1061,101.0,101 -1062,102.0,102 -1063,166.0,166 -1064,62.0,62 -1065,40.0,40 -1066,37.0,37 -1067,45.0,45 -1068,45.0,45 -1069,48.0,48 -1070,67.0,67 -1071,87.0,87 -1072,51.0,51 -1073,103.0,103 -1074,46.0,46 -1075,52.0,52 -1076,40.0,40 -1077,47.0,47 -1078,49.0,49 -1079,35.0,35 -1080,46.0,46 -1081,38.0,38 -1082,36.0,36 -1083,219.0,219 -1084,57.0,57 -1085,39.0,39 -1086,48.0,48 -1087,37.0,37 -1088,46.0,46 -1089,37.0,37 -1090,65.0,65 -1091,39.0,39 -1092,44.0,44 -1093,85.0,85 -1094,50.0,50 -1095,39.0,39 -1096,57.0,57 -1097,221.0,221 -1098,35.0,35 -1099,59.0,59 -1100,46.0,46 -1101,38.0,38 -1102,37.0,37 -1103,62.0,62 -1104,59.0,59 -1105,46.0,46 -1106,40.0,40 -1107,74.0,74 -1108,58.0,58 -1109,37.0,37 -1110,56.0,56 -1111,52.0,52 -1112,45.0,45 -1113,76.0,76 -1114,54.0,54 -1115,37.0,37 -1116,41.0,41 -1117,47.0,47 -1118,56.0,56 -1119,39.0,39 -1120,37.0,37 -1121,42.0,42 -1122,59.0,59 -1123,38.0,38 -1124,49.0,49 -1125,49.0,49 -1126,130.0,130 -1127,52.0,52 -1128,45.0,45 -1129,43.0,43 -1130,57.0,57 -1131,37.0,37 -1132,43.0,43 -1133,60.0,60 -1134,58.0,58 -1135,57.0,57 -1136,35.0,35 -1137,57.0,57 -1138,154.0,154 -1139,39.0,39 -1140,48.0,48 -1141,78.0,78 -1142,58.0,58 -1143,70.0,70 -1144,52.0,52 -1145,53.0,53 -1146,58.0,58 -1147,40.0,40 -1148,74.0,74 -1149,39.0,39 -1150,69.0,69 -1151,78.0,78 -1152,34.0,34 -1153,44.0,44 -1154,45.0,45 -1155,173.0,173 -1156,190.0,190 -1157,47.0,47 -1158,36.0,36 -1159,52.0,52 -1160,44.0,44 -1161,50.0,50 -1162,96.0,96 -1163,88.0,88 -1164,38.0,38 -1165,44.0,44 -1166,102.0,102 -1167,49.0,49 -1168,46.0,46 -1169,68.0,68 -1170,46.0,46 -1171,50.0,50 -1172,58.0,58 -1173,46.0,46 -1174,50.0,50 -1175,40.0,40 -1176,44.0,44 -1177,75.0,75 -1178,109.0,109 -1179,51.0,51 -1180,44.0,44 -1181,42.0,42 -1182,41.0,41 -1183,62.0,62 -1184,48.0,48 -1185,60.0,60 -1186,52.0,52 -1187,73.0,73 -1188,39.0,39 -1189,42.0,42 -1190,89.0,89 -1191,64.0,64 -1192,40.0,40 -1193,42.0,42 -1194,59.0,59 -1195,48.0,48 -1196,45.0,45 -1197,48.0,48 -1198,171.0,171 -1199,77.0,77 -1200,97.0,97 -1201,43.0,43 -1202,86.0,86 -1203,62.0,62 -1204,67.0,67 -1205,86.0,86 -1206,77.0,77 -1207,88.0,88 -1208,73.0,73 -1209,80.0,80 -1210,94.0,94 -1211,71.0,71 -1212,96.0,96 -1213,65.0,65 -1214,61.0,61 -1215,63.0,63 -1216,72.0,72 -1217,78.0,78 -1218,92.0,92 -1219,64.0,64 -1220,76.0,76 -1221,69.0,69 -1222,86.0,86 -1223,93.0,93 -1224,70.0,70 -1225,67.0,67 -1226,89.0,89 -1227,72.0,72 -1228,106.0,106 -1229,76.0,76 -1230,218.0,218 -1231,64.0,64 -1232,224.0,224 -1233,68.0,68 -1234,217.0,217 -1235,216.0,216 -1236,205.0,205 -1237,234.0,234 -1238,196.0,196 -1239,217.0,217 -1240,213.0,213 -1241,247.0,247 -1242,295.0,295 -1243,197.0,197 -1244,212.0,212 -1245,196.0,196 -1246,226.0,226 -1247,228.0,228 -1248,240.0,240 -1249,189.0,189 -1250,190.0,190 -1251,242.0,242 -1252,204.0,204 -1253,201.0,201 -1254,189.0,189 -1255,225.0,225 -1256,212.0,212 -1257,198.0,198 -1258,284.0,284 -1259,197.0,197 -1260,194.0,194 -1261,230.0,230 -1262,200.0,200 -1263,207.0,207 -1264,207.0,207 -1265,233.0,233 -1266,213.0,213 -1267,275.0,275 -1268,201.0,201 -1269,201.0,201 -1270,202.0,202 -1271,264.0,264 -1272,212.0,212 -1273,490.0,490 -1274,222.0,222 -1275,244.0,244 -1276,500.0,500 -1277,244.0,244 -1278,246.0,246 -1279,237.0,237 -1280,210.0,210 -1281,274.0,274 -1282,258.0,258 -1283,405.0,405 -1284,216.0,216 -1285,500.0,500 -1286,218.0,218 -1287,361.0,361 -1288,262.0,262 -1289,500.0,500 -1290,194.0,194 -1291,361.0,361 -1292,194.0,194 -1293,229.0,229 -1294,361.0,361 -1295,270.0,270 -1296,297.0,297 -1297,225.0,225 -1298,214.0,214 -1299,256.0,256 -1300,330.0,330 -1301,347.0,347 -1302,372.0,372 -1303,183.0,183 -1304,300.0,300 -1305,313.0,313 -1306,227.0,227 -1307,307.0,307 -1308,296.0,296 -1309,212.0,212 -1310,244.0,244 -1311,206.0,206 -1312,262.0,262 -1313,274.0,274 -1314,225.0,225 -1315,209.0,209 -1316,272.0,272 -1317,213.0,213 -1318,262.0,262 -1319,214.0,214 -1320,224.0,224 -1321,368.0,368 -1322,264.0,264 -1323,243.0,243 -1324,299.0,299 -1325,210.0,210 -1326,310.0,310 -1327,252.0,252 -1328,201.0,201 -1329,193.0,193 -1330,220.0,220 -1331,224.0,224 -1332,203.0,203 -1333,268.0,268 -1334,288.0,288 -1335,330.0,330 -1336,331.0,331 -1337,249.0,249 -1338,222.0,222 -1339,237.0,237 -1340,204.0,204 -1341,329.0,329 -1342,232.0,232 -1343,175.0,175 -1344,251.0,251 -1345,259.0,259 -1346,220.0,220 -1347,246.0,246 -1348,215.0,215 -1349,257.0,257 -1350,350.0,350 -1351,269.0,269 -1352,266.0,266 -1353,220.0,220 -1354,276.0,276 -1355,281.0,281 -1356,200.0,200 -1357,274.0,274 -1358,260.0,260 -1359,393.0,393 -1360,240.0,240 -1361,197.0,197 -1362,273.0,273 -1363,220.0,220 -1364,228.0,228 -1365,337.0,337 -1366,203.0,203 -1367,500.0,500 -1368,214.0,214 -1369,271.0,271 -1370,211.0,211 -1371,264.0,264 -1372,338.0,338 -1373,298.0,298 -1374,358.0,358 -1375,454.0,454 -1376,317.0,317 -1377,283.0,283 -1378,441.0,441 -1379,343.0,343 -1380,270.0,270 -1381,263.0,263 -1382,405.0,405 -1383,255.0,255 -1384,500.0,500 -1385,389.0,389 -1386,212.0,212 -1387,339.0,339 -1388,225.0,225 -1389,500.0,500 -1390,467.0,467 -1391,237.0,237 -1392,257.0,257 -1393,352.0,352 -1394,264.0,264 -1395,452.0,452 -1396,388.0,388 -1397,447.0,447 -1398,258.0,258 -1399,269.0,269 -1400,264.0,264 -1401,238.0,238 -1402,258.0,258 -1403,433.0,433 -1404,500.0,500 -1405,298.0,298 -1406,500.0,500 -1407,287.0,287 -1408,329.0,329 -1409,500.0,500 -1410,424.0,424 -1411,239.0,239 -1412,350.0,350 -1413,287.0,287 -1414,388.0,388 -1415,498.0,498 -1416,454.0,454 -1417,351.0,351 -1418,277.0,277 -1419,256.0,256 -1420,339.0,339 -1421,338.0,338 -1422,339.0,339 -1423,292.0,292 -1424,500.0,500 -1425,264.0,264 -1426,381.0,381 -1427,320.0,320 -1428,500.0,500 -1429,388.0,388 -1430,500.0,500 -1431,500.0,500 -1432,500.0,500 -1433,309.0,309 -1434,470.0,470 -1435,496.0,496 -1436,326.0,326 -1437,500.0,500 -1438,500.0,500 -1439,284.0,284 -1440,309.0,309 -1441,349.0,349 -1442,245.0,245 -1443,407.0,407 -1444,305.0,305 -1445,233.0,233 -1446,469.0,469 -1447,304.0,304 -1448,303.0,303 -1449,500.0,500 -1450,257.0,257 -1451,336.0,336 -1452,500.0,500 -1453,440.0,440 -1454,500.0,500 -1455,500.0,500 -1456,317.0,317 -1457,500.0,500 -1458,475.0,475 -1459,395.0,395 -1460,331.0,331 -1461,374.0,374 -1462,500.0,500 -1463,246.0,246 -1464,355.0,355 -1465,500.0,500 -1466,500.0,500 -1467,260.0,260 -1468,500.0,500 -1469,437.0,437 -1470,500.0,500 -1471,367.0,367 -1472,388.0,388 -1473,239.0,239 -1474,493.0,493 -1475,322.0,322 -1476,500.0,500 -1477,416.0,416 -1478,403.0,403 -1479,500.0,500 -1480,355.0,355 -1481,500.0,500 -1482,437.0,437 -1483,433.0,433 -1484,500.0,500 -1485,246.0,246 -1486,364.0,364 -1487,255.0,255 -1488,500.0,500 -1489,443.0,443 -1490,500.0,500 -1491,287.0,287 -1492,402.0,402 -1493,500.0,500 -1494,499.0,499 -1495,500.0,500 -1496,248.0,248 -1497,266.0,266 -1498,500.0,500 -1499,338.0,338 -1500,395.0,395 -1501,304.0,304 -1502,433.0,433 -1503,351.0,351 -1504,230.0,230 -1505,352.0,352 -1506,500.0,500 -1507,265.0,265 -1508,500.0,500 -1509,244.0,244 -1510,392.0,392 -1511,467.0,467 -1512,353.0,353 -1513,500.0,500 -1514,473.0,473 -1515,246.0,246 -1516,336.0,336 -1517,317.0,317 -1518,325.0,325 -1519,481.0,481 -1520,374.0,374 -1521,231.0,231 -1522,500.0,500 -1523,234.0,234 -1524,290.0,290 -1525,297.0,297 -1526,299.0,299 -1527,364.0,364 -1528,326.0,326 -1529,482.0,482 -1530,233.0,233 -1531,500.0,500 -1532,264.0,264 -1533,314.0,314 -1534,500.0,500 -1535,433.0,433 -1536,415.0,415 -1537,288.0,288 -1538,458.0,458 -1539,308.0,308 -1540,500.0,500 -1541,459.0,459 -1542,273.0,273 -1543,500.0,500 -1544,500.0,500 -1545,470.0,470 -1546,364.0,364 -1547,425.0,425 -1548,374.0,374 -1549,399.0,399 -1550,500.0,500 -1551,500.0,500 -1552,500.0,500 -1553,497.0,497 -1554,272.0,272 -1555,268.0,268 -1556,292.0,292 -1557,500.0,500 -1558,281.0,281 -1559,272.0,272 -1560,411.0,411 -1561,500.0,500 -1562,430.0,430 -1563,415.0,415 -1564,500.0,500 -1565,464.0,464 -1566,436.0,436 -1567,500.0,500 -1568,344.0,344 -1569,395.0,395 -1570,385.0,385 -1571,232.0,232 -1572,260.0,260 -1573,499.0,499 -1574,411.0,411 -1575,500.0,500 -1576,290.0,290 -1577,321.0,321 -1578,481.0,481 -1579,473.0,473 -1580,301.0,301 -1581,404.0,404 -1582,410.0,410 -1583,437.0,437 -1584,311.0,311 -1585,500.0,500 -1586,231.0,231 -1587,376.0,376 -1588,359.0,359 -1589,276.0,276 -1590,457.0,457 -1591,500.0,500 -1592,318.0,318 -1593,500.0,500 -1594,309.0,309 -1595,481.0,481 -1596,274.0,274 -1597,331.0,331 -1598,500.0,500 -1599,259.0,259 -1600,500.0,500 -1601,291.0,291 -1602,499.0,499 -1603,256.0,256 -1604,266.0,266 -1605,500.0,500 -1606,325.0,325 -1607,359.0,359 -1608,274.0,274 -1609,357.0,357 -1610,465.0,465 -1611,500.0,500 -1612,435.0,435 -1613,268.0,268 -1614,251.0,251 -1615,252.0,252 -1616,275.0,275 -1617,284.0,284 -1618,416.0,416 -1619,229.0,229 -1620,500.0,500 -1621,265.0,265 -1622,354.0,354 -1623,251.0,251 -1624,381.0,381 -1625,279.0,279 -1626,267.0,267 -1627,232.0,232 -1628,365.0,365 -1629,500.0,500 -1630,489.0,489 -1631,500.0,500 -1632,243.0,243 -1633,253.0,253 -1634,334.0,334 -1635,500.0,500 -1636,280.0,280 -1637,268.0,268 -1638,356.0,356 -1639,500.0,500 -1640,253.0,253 -1641,244.0,244 -1642,237.0,237 -1643,421.0,421 -1644,247.0,247 -1645,378.0,378 -1646,252.0,252 -1647,282.0,282 -1648,247.0,247 -1649,289.0,289 -1650,226.0,226 -1651,289.0,289 -1652,480.0,480 -1653,500.0,500 -1654,270.0,270 -1655,309.0,309 -1656,292.0,292 -1657,272.0,272 -1658,233.0,233 -1659,261.0,261 -1660,500.0,500 -1661,316.0,316 -1662,310.0,310 -1663,276.0,276 -1664,315.0,315 -1665,267.0,267 -1666,420.0,420 -1667,320.0,320 -1668,500.0,500 -1669,370.0,370 -1670,500.0,500 -1671,246.0,246 -1672,296.0,296 -1673,256.0,256 -1674,281.0,281 -1675,327.0,327 -1676,242.0,242 -1677,393.0,393 -1678,332.0,332 -1679,288.0,288 -1680,250.0,250 -1681,391.0,391 -1682,296.0,296 -1683,490.0,490 -1684,224.0,224 -1685,369.0,369 -1686,311.0,311 -1687,335.0,335 -1688,227.0,227 -1689,500.0,500 -1690,242.0,242 -1691,363.0,363 -1692,284.0,284 -1693,254.0,254 -1694,386.0,386 -1695,353.0,353 -1696,443.0,443 -1697,500.0,500 -1698,253.0,253 -1699,293.0,293 -1700,500.0,500 -1701,259.0,259 -1702,254.0,254 -1703,343.0,343 -1704,313.0,313 -1705,253.0,253 -1706,409.0,409 -1707,474.0,474 -1708,226.0,226 -1709,325.0,325 -1710,441.0,441 -1711,252.0,252 -1712,430.0,430 -1713,287.0,287 -1714,318.0,318 -1715,323.0,323 -1716,268.0,268 -1717,288.0,288 -1718,292.0,292 -1719,323.0,323 -1720,291.0,291 -1721,399.0,399 -1722,263.0,263 -1723,385.0,385 -1724,229.0,229 -1725,282.0,282 -1726,347.0,347 -1727,257.0,257 -1728,264.0,264 -1729,282.0,282 -1730,402.0,402 -1731,328.0,328 -1732,227.0,227 -1733,272.0,272 -1734,462.0,462 -1735,236.0,236 -1736,302.0,302 -1737,275.0,275 -1738,280.0,280 -1739,331.0,331 -1740,352.0,352 -1741,500.0,500 -1742,389.0,389 -1743,303.0,303 -1744,398.0,398 -1745,359.0,359 -1746,436.0,436 -1747,233.0,233 -1748,295.0,295 -1749,234.0,234 -1750,290.0,290 -1751,261.0,261 -1752,248.0,248 -1753,263.0,263 -1754,368.0,368 -1755,500.0,500 -1756,276.0,276 -1757,243.0,243 -1758,500.0,500 -1759,289.0,289 -1760,500.0,500 -1761,275.0,275 -1762,297.0,297 -1763,250.0,250 -1764,405.0,405 -1765,261.0,261 -1766,239.0,239 -1767,351.0,351 -1768,301.0,301 -1769,384.0,384 -1770,240.0,240 -1771,258.0,258 -1772,258.0,258 -1773,249.0,249 -1774,254.0,254 -1775,374.0,374 -1776,347.0,347 -1777,377.0,377 -1778,263.0,263 -1779,229.0,229 -1780,292.0,292 -1781,259.0,259 -1782,259.0,259 -1783,260.0,260 -1784,267.0,267 -1785,256.0,256 -1786,306.0,306 -1787,238.0,238 -1788,257.0,257 -1789,252.0,252 -1790,293.0,293 -1791,273.0,273 -1792,308.0,308 -1793,291.0,291 -1794,342.0,342 -1795,273.0,273 -1796,257.0,257 -1797,221.0,221 -1798,276.0,276 -1799,279.0,279 -1800,269.0,269 -1801,291.0,291 -1802,359.0,359 -1803,431.0,431 -1804,375.0,375 -1805,298.0,298 -1806,253.0,253 -1807,276.0,276 -1808,258.0,258 -1809,242.0,242 -1810,397.0,397 -1811,394.0,394 -1812,323.0,323 -1813,257.0,257 -1814,343.0,343 -1815,287.0,287 -1816,372.0,372 -1817,294.0,294 -1818,261.0,261 -1819,270.0,270 -1820,284.0,284 -1821,247.0,247 -1822,372.0,372 -1823,292.0,292 -1824,357.0,357 -1825,247.0,247 -1826,355.0,355 -1827,447.0,447 -1828,251.0,251 -1829,375.0,375 -1830,262.0,262 -1831,340.0,340 -1832,243.0,243 -1833,261.0,261 -1834,247.0,247 -1835,499.0,499 -1836,242.0,242 -1837,237.0,237 -1838,255.0,255 -1839,320.0,320 -1840,216.0,216 -1841,356.0,356 -1842,261.0,261 -1843,247.0,247 -1844,229.0,229 -1845,238.0,238 -1846,233.0,233 -1847,232.0,232 -1848,234.0,234 -1849,391.0,391 -1850,273.0,273 -1851,438.0,438 -1852,402.0,402 -1853,394.0,394 -1854,287.0,287 -1855,230.0,230 -1856,251.0,251 -1857,278.0,278 -1858,378.0,378 -1859,249.0,249 -1860,271.0,271 -1861,296.0,296 -1862,256.0,256 -1863,270.0,270 -1864,500.0,500 -1865,385.0,385 -1866,284.0,284 -1867,248.0,248 -1868,283.0,283 -1869,246.0,246 -1870,339.0,339 -1871,415.0,415 -1872,276.0,276 -1873,275.0,275 -1874,457.0,457 -1875,500.0,500 -1876,281.0,281 -1877,324.0,324 -1878,414.0,414 -1879,314.0,314 -1880,449.0,449 -1881,281.0,281 -1882,368.0,368 -1883,322.0,322 -1884,235.0,235 -1885,337.0,337 -1886,500.0,500 -1887,311.0,311 -1888,347.0,347 -1889,365.0,365 -1890,272.0,272 -1891,342.0,342 -1892,379.0,379 -1893,247.0,247 -1894,321.0,321 -1895,403.0,403 -1896,464.0,464 -1897,330.0,330 -1898,361.0,361 -1899,500.0,500 -1900,433.0,433 -1901,500.0,500 -1902,293.0,293 -1903,386.0,386 -1904,283.0,283 -1905,366.0,366 -1906,278.0,278 -1907,279.0,279 -1908,415.0,415 -1909,480.0,480 -1910,500.0,500 -1911,353.0,353 -1912,500.0,500 -1913,269.0,269 -1914,500.0,500 -1915,385.0,385 -1916,246.0,246 -1917,481.0,481 -1918,500.0,500 -1919,462.0,462 -1920,373.0,373 -1921,500.0,500 -1922,272.0,272 -1923,500.0,500 -1924,495.0,495 -1925,500.0,500 -1926,295.0,295 -1927,249.0,249 -1928,256.0,256 -1929,500.0,500 -1930,317.0,317 -1931,500.0,500 -1932,317.0,317 -1933,258.0,258 -1934,380.0,380 -1935,402.0,402 -1936,500.0,500 -1937,319.0,319 -1938,319.0,319 -1939,500.0,500 -1940,447.0,447 -1941,500.0,500 -1942,459.0,459 -1943,500.0,500 -1944,299.0,299 -1945,290.0,290 -1946,318.0,318 -1947,500.0,500 -1948,500.0,500 -1949,500.0,500 -1950,500.0,500 -1951,478.0,478 -1952,500.0,500 -1953,500.0,500 -1954,330.0,330 -1955,366.0,366 -1956,500.0,500 -1957,283.0,283 -1958,300.0,300 -1959,292.0,292 -1960,270.0,270 -1961,500.0,500 -1962,474.0,474 -1963,328.0,328 -1964,389.0,389 -1965,500.0,500 -1966,493.0,493 -1967,357.0,357 -1968,500.0,500 -1969,500.0,500 -1970,500.0,500 -1971,320.0,320 -1972,385.0,385 -1973,500.0,500 -1974,422.0,422 -1975,405.0,405 -1976,500.0,500 -1977,363.0,363 -1978,329.0,329 -1979,309.0,309 -1980,500.0,500 -1981,500.0,500 -1982,277.0,277 -1983,461.0,461 -1984,262.0,262 -1985,500.0,500 -1986,500.0,500 -1987,370.0,370 -1988,500.0,500 -1989,255.0,255 -1990,449.0,449 -1991,361.0,361 -1992,319.0,319 -1993,382.0,382 -1994,363.0,363 -1995,500.0,500 -1996,336.0,336 -1997,500.0,500 -1998,500.0,500 -1999,500.0,500 diff --git a/projects/codes/DQN/task0.py b/projects/codes/DQN/task0.py new file mode 100644 index 0000000..e69ed45 --- /dev/null +++ b/projects/codes/DQN/task0.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-12 11:09:54 +LastEditor: JiangJi +LastEditTime: 2022-10-31 00:13:31 +Discription: CartPole-v1,Acrobot-v1 +''' +import sys,os +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add to system path +import gym +from common.utils import all_seed,merge_class_attrs +from common.models import MLP +from common.memories import ReplayBuffer +from common.launcher import Launcher +from envs.register import register_env +from dqn import DQN +from config.config import GeneralConfigDQN,AlgoConfigDQN +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigDQN()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigDQN()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=True) # create env + if cfg.seed !=0: # set random seed + all_seed(env,seed=cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + # cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + model = MLP(n_states,n_actions,hidden_dim=cfg.hidden_dim) + memory = ReplayBuffer(cfg.buffer_size) # replay buffer + agent = DQN(model,memory,cfg) # create agent + return env, agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + agent.memory.push(state, action, reward, + next_state, terminated) # save transitions + agent.update() # update agent + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.predict_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step + # def train(self,env, agent,cfg,logger): + # ''' 训练 + # ''' + # logger.info("Start training!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.train_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # ep_step += 1 + # action = agent.sample_action(state) # sample action + # next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + # agent.memory.push(state, action, reward, + # next_state, terminated) # save transitions + # state = next_state # update next state for env + # agent.update() # update agent + # ep_reward += reward # + # if terminated: + # break + # if (i_ep + 1) % cfg.target_update == 0: # target net update, target_update means "C" in pseucodes + # agent.target_net.load_state_dict(agent.policy_net.state_dict()) + # steps.append(ep_step) + # rewards.append(ep_reward) + # logger.info(f'Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}') + # logger.info("Finish training!") + # env.close() + # res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + # return res_dic + + # def test(self,cfg, env, agent,logger): + # logger.info("Start testing!") + # logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + # rewards = [] # record rewards for all episodes + # steps = [] # record steps for all episodes + # for i_ep in range(cfg.test_eps): + # ep_reward = 0 # reward per episode + # ep_step = 0 + # state = env.reset() # reset and obtain initial state + # for _ in range(cfg.max_steps): + # ep_step+=1 + # action = agent.predict_action(state) # predict action + # next_state, reward, terminated, _, _ = env.step(action) + # state = next_state + # ep_reward += reward + # if terminated: + # break + # steps.append(ep_step) + # rewards.append(ep_reward) + # logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}") + # logger.info("Finish testing!") + # env.close() + # return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + + +if __name__ == "__main__": + main = Main() + main.run() + diff --git a/projects/codes/DQN/main.py b/projects/codes/DQN/task1.py similarity index 54% rename from projects/codes/DQN/main.py rename to projects/codes/DQN/task1.py index 651a98e..590d0c2 100644 --- a/projects/codes/DQN/main.py +++ b/projects/codes/DQN/task1.py @@ -1,3 +1,13 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-24 08:21:31 +LastEditor: JiangJi +LastEditTime: 2022-10-26 09:50:49 +Discription: Not finished +''' import sys,os os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." curr_path = os.path.dirname(os.path.abspath(__file__)) # current path @@ -15,6 +25,73 @@ from common.memories import ReplayBuffer from common.launcher import Launcher from envs.register import register_env from dqn import DQN +import torch.nn as nn +import torch.nn.functional as F +import torchvision.transforms as T +from PIL import Image +resize = T.Compose([T.ToPILImage(), + T.Resize(40, interpolation=Image.CUBIC), + T.ToTensor()]) + +# xvfb-run -s "-screen 0 640x480x24" python main1.py +def get_cart_location(env,screen_width): + world_width = env.x_threshold * 2 + scale = screen_width / world_width + return int(env.state[0] * scale + screen_width / 2.0) # MIDDLE OF CART + +def get_screen(env): + # Returned screen requested by gym is 400x600x3, but is sometimes larger + # such as 800x1200x3. Transpose it into torch order (CHW). + screen = env.render().transpose((2, 0, 1)) + # Cart is in the lower half, so strip off the top and bottom of the screen + _, screen_height, screen_width = screen.shape + screen = screen[:, int(screen_height*0.4):int(screen_height * 0.8)] + view_width = int(screen_width * 0.6) + cart_location = get_cart_location(env,screen_width) + if cart_location < view_width // 2: + slice_range = slice(view_width) + elif cart_location > (screen_width - view_width // 2): + slice_range = slice(-view_width, None) + else: + slice_range = slice(cart_location - view_width // 2, + cart_location + view_width // 2) + # Strip off the edges, so that we have a square image centered on a cart + screen = screen[:, :, slice_range] + # Convert to float, rescale, convert to torch tensor + # (this doesn't require a copy) + screen = np.ascontiguousarray(screen, dtype=np.float32) / 255 + screen = torch.from_numpy(screen) + # Resize, and add a batch dimension (BCHW) + return resize(screen) + + +class CNN(nn.Module): + + def __init__(self, h, w, outputs): + super(CNN, self).__init__() + self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2) + self.bn1 = nn.BatchNorm2d(16) + self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) + self.bn2 = nn.BatchNorm2d(32) + self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2) + self.bn3 = nn.BatchNorm2d(32) + + # Number of Linear input connections depends on output of conv2d layers + # and therefore the input image size, so compute it. + def conv2d_size_out(size, kernel_size = 5, stride = 2): + return (size - (kernel_size - 1) - 1) // stride + 1 + convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w))) + convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h))) + linear_input_size = convw * convh * 32 + self.head = nn.Linear(linear_input_size, outputs) + + # Called with either one element to determine next action, or a batch + # during optimization. Returns tensor([[left0exp,right0exp]...]). + def forward(self, x): + x = F.relu(self.bn1(self.conv1(x))) + x = F.relu(self.bn2(self.conv2(x))) + x = F.relu(self.bn3(self.conv3(x))) + return self.head(x.view(x.size(0), -1)) class Main(Launcher): def get_args(self): """ hyperparameters @@ -22,20 +99,20 @@ class Main(Launcher): curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time parser = argparse.ArgumentParser(description="hyperparameters") parser.add_argument('--algo_name',default='DQN',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") + parser.add_argument('--env_name',default='CartPole-v1',type=str,help="name of environment") + parser.add_argument('--train_eps',default=800,type=int,help="episodes of training") parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") - parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor") + parser.add_argument('--gamma',default=0.999,type=float,help="discounted factor") parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") parser.add_argument('--epsilon_decay',default=500,type=int,help="decay rate of epsilon, the higher value, the slower decay") parser.add_argument('--lr',default=0.0001,type=float,help="learning rate") parser.add_argument('--memory_capacity',default=100000,type=int,help="memory capacity") - parser.add_argument('--batch_size',default=64,type=int) + parser.add_argument('--batch_size',default=128,type=int) parser.add_argument('--target_update',default=4,type=int) parser.add_argument('--hidden_dim',default=256,type=int) - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--device',default='cuda',type=str,help="cpu or cuda") parser.add_argument('--seed',default=10,type=int,help="seed") parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") @@ -48,11 +125,10 @@ class Main(Launcher): args = {**vars(args)} # type(dict) return args - def env_agent_config(cfg): + def env_agent_config(self,cfg): ''' create env and agent ''' - register_env(cfg['env_name']) - env = gym.make(cfg['env_name']) + env = gym.make('CartPole-v1', new_step_api=True, render_mode='single_rgb_array').unwrapped if cfg['seed'] !=0: # set random seed all_seed(env,seed=cfg["seed"]) try: # state dimension @@ -62,12 +138,15 @@ class Main(Launcher): n_actions = env.action_space.n # action dimension print(f"n_states: {n_states}, n_actions: {n_actions}") cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters - model = MLP(n_states,n_actions,hidden_dim=cfg["hidden_dim"]) + env.reset() + init_screen = get_screen(env) + _, screen_height, screen_width = init_screen.shape + model = CNN(screen_height, screen_width, n_actions) memory = ReplayBuffer(cfg["memory_capacity"]) # replay buffer agent = DQN(model,memory,cfg) # create agent return env, agent - def train(cfg, env, agent): + def train(self,cfg, env, agent): ''' 训练 ''' print("Start training!") @@ -78,12 +157,18 @@ class Main(Launcher): ep_reward = 0 # reward per episode ep_step = 0 state = env.reset() # reset and obtain initial state + last_screen = get_screen(env) + current_screen = get_screen(env) + state = current_screen - last_screen for _ in range(cfg['ep_max_steps']): ep_step += 1 action = agent.sample_action(state) # sample action - next_state, reward, done, _ = env.step(action) # update env and return transitions - agent.memory.push(state, action, reward, - next_state, done) # save transitions + _, reward, done, _,_ = env.step(action) # update env and return transitions + last_screen = current_screen + current_screen = get_screen(env) + next_state = current_screen - last_screen + agent.memory.push(state.cpu().numpy(), action, reward, + next_state.cpu().numpy(), done) # save transitions state = next_state # update next state for env agent.update() # update agent ep_reward += reward # @@ -94,13 +179,13 @@ class Main(Launcher): steps.append(ep_step) rewards.append(ep_reward) if (i_ep + 1) % 10 == 0: - print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}') + print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, step: {ep_step:d}, Epislon: {agent.epsilon:.3f}') print("Finish training!") env.close() res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} return res_dic - def test(cfg, env, agent): + def test(self,cfg, env, agent): print("Start testing!") print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") rewards = [] # record rewards for all episodes @@ -109,10 +194,16 @@ class Main(Launcher): ep_reward = 0 # reward per episode ep_step = 0 state = env.reset() # reset and obtain initial state + last_screen = get_screen(env) + current_screen = get_screen(env) + state = current_screen - last_screen for _ in range(cfg['ep_max_steps']): ep_step+=1 action = agent.predict_action(state) # predict action - next_state, reward, done, _ = env.step(action) + _, reward, done, _,_ = env.step(action) + last_screen = current_screen + current_screen = get_screen(env) + next_state = current_screen - last_screen state = next_state ep_reward += reward if done: diff --git a/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/config.yaml b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/config.yaml new file mode 100644 index 0000000..326f84e --- /dev/null +++ b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: FirstVisitMC + device: cpu + env_name: Racetrack-v0 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 200 +algo_cfg: + epsilon: 0.15 + gamma: 0.9 + lr: 0.1 diff --git a/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/logs/log.txt b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/logs/log.txt new file mode 100644 index 0000000..993059b --- /dev/null +++ b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/logs/log.txt @@ -0,0 +1,210 @@ +2022-11-06 01:05:04 - r - INFO: - n_states: 4, n_actions: 9 +2022-11-06 01:05:04 - r - INFO: - Start training! +2022-11-06 01:05:04 - r - INFO: - Env: Racetrack-v0, Algorithm: FirstVisitMC, Device: cpu +2022-11-06 01:05:40 - r - INFO: - Episode: 1/200, Reward: -760.000, Step: 200 +2022-11-06 01:05:58 - r - INFO: - Episode: 2/200, Reward: -560.000, Step: 200 +2022-11-06 01:05:59 - r - INFO: - Episode: 3/200, Reward: -156.000, Step: 66 +2022-11-06 01:06:17 - r - INFO: - Episode: 4/200, Reward: -500.000, Step: 200 +2022-11-06 01:06:38 - r - INFO: - Episode: 5/200, Reward: -600.000, Step: 200 +2022-11-06 01:06:38 - r - INFO: - Current episode 5 has the best eval reward: -208.000 +2022-11-06 01:06:52 - r - INFO: - Episode: 6/200, Reward: -350.000, Step: 200 +2022-11-06 01:07:07 - r - INFO: - Episode: 7/200, Reward: -430.000, Step: 200 +2022-11-06 01:07:10 - r - INFO: - Episode: 8/200, Reward: -206.000, Step: 96 +2022-11-06 01:07:31 - r - INFO: - Episode: 9/200, Reward: -460.000, Step: 200 +2022-11-06 01:07:45 - r - INFO: - Episode: 10/200, Reward: -410.000, Step: 200 +2022-11-06 01:07:45 - r - INFO: - Current episode 10 has the best eval reward: -204.000 +2022-11-06 01:07:58 - r - INFO: - Episode: 11/200, Reward: -400.000, Step: 200 +2022-11-06 01:08:08 - r - INFO: - Episode: 12/200, Reward: -380.000, Step: 200 +2022-11-06 01:08:09 - r - INFO: - Episode: 13/200, Reward: -155.000, Step: 75 +2022-11-06 01:08:24 - r - INFO: - Episode: 14/200, Reward: -400.000, Step: 200 +2022-11-06 01:08:37 - r - INFO: - Episode: 15/200, Reward: -350.000, Step: 200 +2022-11-06 01:08:37 - r - INFO: - Current episode 15 has the best eval reward: -203.000 +2022-11-06 01:08:51 - r - INFO: - Episode: 16/200, Reward: -400.000, Step: 200 +2022-11-06 01:09:05 - r - INFO: - Episode: 17/200, Reward: -360.000, Step: 200 +2022-11-06 01:09:23 - r - INFO: - Episode: 18/200, Reward: -420.000, Step: 200 +2022-11-06 01:09:37 - r - INFO: - Episode: 19/200, Reward: -430.000, Step: 200 +2022-11-06 01:09:48 - r - INFO: - Episode: 20/200, Reward: -360.000, Step: 200 +2022-11-06 01:09:48 - r - INFO: - Current episode 20 has the best eval reward: -187.300 +2022-11-06 01:10:08 - r - INFO: - Episode: 21/200, Reward: -420.000, Step: 200 +2022-11-06 01:10:19 - r - INFO: - Episode: 22/200, Reward: -390.000, Step: 200 +2022-11-06 01:10:19 - r - INFO: - Episode: 23/200, Reward: -59.000, Step: 49 +2022-11-06 01:10:33 - r - INFO: - Episode: 24/200, Reward: -390.000, Step: 200 +2022-11-06 01:10:33 - r - INFO: - Episode: 25/200, Reward: 2.000, Step: 8 +2022-11-06 01:10:36 - r - INFO: - Episode: 26/200, Reward: -217.000, Step: 117 +2022-11-06 01:10:43 - r - INFO: - Episode: 27/200, Reward: -287.000, Step: 167 +2022-11-06 01:10:47 - r - INFO: - Episode: 28/200, Reward: -248.000, Step: 118 +2022-11-06 01:11:04 - r - INFO: - Episode: 29/200, Reward: -370.000, Step: 200 +2022-11-06 01:11:19 - r - INFO: - Episode: 30/200, Reward: -390.000, Step: 200 +2022-11-06 01:11:32 - r - INFO: - Episode: 31/200, Reward: -370.000, Step: 200 +2022-11-06 01:11:39 - r - INFO: - Episode: 32/200, Reward: -360.000, Step: 200 +2022-11-06 01:11:57 - r - INFO: - Episode: 33/200, Reward: -420.000, Step: 200 +2022-11-06 01:12:16 - r - INFO: - Episode: 34/200, Reward: -430.000, Step: 200 +2022-11-06 01:12:34 - r - INFO: - Episode: 35/200, Reward: -430.000, Step: 200 +2022-11-06 01:12:55 - r - INFO: - Episode: 36/200, Reward: -430.000, Step: 200 +2022-11-06 01:13:09 - r - INFO: - Episode: 37/200, Reward: -380.000, Step: 200 +2022-11-06 01:13:27 - r - INFO: - Episode: 38/200, Reward: -420.000, Step: 200 +2022-11-06 01:13:40 - r - INFO: - Episode: 39/200, Reward: -350.000, Step: 200 +2022-11-06 01:13:55 - r - INFO: - Episode: 40/200, Reward: -370.000, Step: 200 +2022-11-06 01:14:09 - r - INFO: - Episode: 41/200, Reward: -400.000, Step: 200 +2022-11-06 01:14:26 - r - INFO: - Episode: 42/200, Reward: -410.000, Step: 200 +2022-11-06 01:14:40 - r - INFO: - Episode: 43/200, Reward: -360.000, Step: 200 +2022-11-06 01:14:40 - r - INFO: - Episode: 44/200, Reward: -16.000, Step: 16 +2022-11-06 01:14:40 - r - INFO: - Episode: 45/200, Reward: -23.000, Step: 13 +2022-11-06 01:14:52 - r - INFO: - Episode: 46/200, Reward: -390.000, Step: 200 +2022-11-06 01:15:08 - r - INFO: - Episode: 47/200, Reward: -390.000, Step: 200 +2022-11-06 01:15:09 - r - INFO: - Episode: 48/200, Reward: -109.000, Step: 79 +2022-11-06 01:15:22 - r - INFO: - Episode: 49/200, Reward: -300.000, Step: 200 +2022-11-06 01:15:39 - r - INFO: - Episode: 50/200, Reward: -370.000, Step: 200 +2022-11-06 01:15:55 - r - INFO: - Episode: 51/200, Reward: -460.000, Step: 200 +2022-11-06 01:16:11 - r - INFO: - Episode: 52/200, Reward: -350.000, Step: 200 +2022-11-06 01:16:23 - r - INFO: - Episode: 53/200, Reward: -320.000, Step: 200 +2022-11-06 01:16:32 - r - INFO: - Episode: 54/200, Reward: -310.000, Step: 200 +2022-11-06 01:16:47 - r - INFO: - Episode: 55/200, Reward: -390.000, Step: 200 +2022-11-06 01:17:01 - r - INFO: - Episode: 56/200, Reward: -370.000, Step: 200 +2022-11-06 01:17:19 - r - INFO: - Episode: 57/200, Reward: -390.000, Step: 200 +2022-11-06 01:17:34 - r - INFO: - Episode: 58/200, Reward: -350.000, Step: 200 +2022-11-06 01:17:35 - r - INFO: - Episode: 59/200, Reward: -123.000, Step: 73 +2022-11-06 01:17:39 - r - INFO: - Episode: 60/200, Reward: -204.000, Step: 124 +2022-11-06 01:17:40 - r - INFO: - Episode: 61/200, Reward: -39.000, Step: 29 +2022-11-06 01:17:41 - r - INFO: - Episode: 62/200, Reward: -155.000, Step: 85 +2022-11-06 01:17:42 - r - INFO: - Episode: 63/200, Reward: -108.000, Step: 58 +2022-11-06 01:17:49 - r - INFO: - Episode: 64/200, Reward: -249.000, Step: 169 +2022-11-06 01:17:51 - r - INFO: - Episode: 65/200, Reward: -170.000, Step: 100 +2022-11-06 01:17:51 - r - INFO: - Current episode 65 has the best eval reward: -181.800 +2022-11-06 01:17:51 - r - INFO: - Episode: 66/200, Reward: 1.000, Step: 9 +2022-11-06 01:17:51 - r - INFO: - Episode: 67/200, Reward: -23.000, Step: 23 +2022-11-06 01:17:52 - r - INFO: - Episode: 68/200, Reward: -104.000, Step: 74 +2022-11-06 01:17:56 - r - INFO: - Episode: 69/200, Reward: -223.000, Step: 123 +2022-11-06 01:18:11 - r - INFO: - Episode: 70/200, Reward: -350.000, Step: 200 +2022-11-06 01:18:13 - r - INFO: - Episode: 71/200, Reward: -124.000, Step: 104 +2022-11-06 01:18:13 - r - INFO: - Episode: 72/200, Reward: -20.000, Step: 20 +2022-11-06 01:18:26 - r - INFO: - Episode: 73/200, Reward: -360.000, Step: 200 +2022-11-06 01:18:26 - r - INFO: - Episode: 74/200, Reward: -67.000, Step: 37 +2022-11-06 01:18:40 - r - INFO: - Episode: 75/200, Reward: -360.000, Step: 200 +2022-11-06 01:18:41 - r - INFO: - Episode: 76/200, Reward: -71.000, Step: 41 +2022-11-06 01:18:41 - r - INFO: - Episode: 77/200, Reward: -23.000, Step: 23 +2022-11-06 01:18:41 - r - INFO: - Episode: 78/200, Reward: -41.000, Step: 21 +2022-11-06 01:18:41 - r - INFO: - Episode: 79/200, Reward: -1.000, Step: 11 +2022-11-06 01:18:50 - r - INFO: - Episode: 80/200, Reward: -270.000, Step: 200 +2022-11-06 01:18:50 - r - INFO: - Current episode 80 has the best eval reward: -163.100 +2022-11-06 01:19:02 - r - INFO: - Episode: 81/200, Reward: -330.000, Step: 200 +2022-11-06 01:19:10 - r - INFO: - Episode: 82/200, Reward: -290.000, Step: 200 +2022-11-06 01:19:11 - r - INFO: - Episode: 83/200, Reward: -2.000, Step: 12 +2022-11-06 01:19:25 - r - INFO: - Episode: 84/200, Reward: -300.000, Step: 200 +2022-11-06 01:19:37 - r - INFO: - Episode: 85/200, Reward: -380.000, Step: 200 +2022-11-06 01:19:37 - r - INFO: - Episode: 86/200, Reward: -47.000, Step: 37 +2022-11-06 01:19:53 - r - INFO: - Episode: 87/200, Reward: -350.000, Step: 200 +2022-11-06 01:20:04 - r - INFO: - Episode: 88/200, Reward: -308.000, Step: 188 +2022-11-06 01:20:21 - r - INFO: - Episode: 89/200, Reward: -370.000, Step: 200 +2022-11-06 01:20:27 - r - INFO: - Episode: 90/200, Reward: -214.000, Step: 154 +2022-11-06 01:20:43 - r - INFO: - Episode: 91/200, Reward: -290.000, Step: 200 +2022-11-06 01:21:00 - r - INFO: - Episode: 92/200, Reward: -370.000, Step: 200 +2022-11-06 01:21:01 - r - INFO: - Episode: 93/200, Reward: -32.000, Step: 22 +2022-11-06 01:21:21 - r - INFO: - Episode: 94/200, Reward: -400.000, Step: 200 +2022-11-06 01:21:25 - r - INFO: - Episode: 95/200, Reward: -217.000, Step: 127 +2022-11-06 01:21:41 - r - INFO: - Episode: 96/200, Reward: -330.000, Step: 200 +2022-11-06 01:21:55 - r - INFO: - Episode: 97/200, Reward: -380.000, Step: 200 +2022-11-06 01:22:16 - r - INFO: - Episode: 98/200, Reward: -320.000, Step: 200 +2022-11-06 01:22:32 - r - INFO: - Episode: 99/200, Reward: -300.000, Step: 200 +2022-11-06 01:22:46 - r - INFO: - Episode: 100/200, Reward: -350.000, Step: 200 +2022-11-06 01:23:00 - r - INFO: - Episode: 101/200, Reward: -400.000, Step: 200 +2022-11-06 01:23:11 - r - INFO: - Episode: 102/200, Reward: -330.000, Step: 200 +2022-11-06 01:23:29 - r - INFO: - Episode: 103/200, Reward: -360.000, Step: 200 +2022-11-06 01:23:45 - r - INFO: - Episode: 104/200, Reward: -380.000, Step: 200 +2022-11-06 01:24:06 - r - INFO: - Episode: 105/200, Reward: -400.000, Step: 200 +2022-11-06 01:24:16 - r - INFO: - Episode: 106/200, Reward: -290.000, Step: 200 +2022-11-06 01:24:19 - r - INFO: - Episode: 107/200, Reward: -203.000, Step: 103 +2022-11-06 01:24:19 - r - INFO: - Episode: 108/200, Reward: -74.000, Step: 54 +2022-11-06 01:24:36 - r - INFO: - Episode: 109/200, Reward: -330.000, Step: 200 +2022-11-06 01:24:54 - r - INFO: - Episode: 110/200, Reward: -380.000, Step: 200 +2022-11-06 01:25:03 - r - INFO: - Episode: 111/200, Reward: -263.000, Step: 173 +2022-11-06 01:25:20 - r - INFO: - Episode: 112/200, Reward: -290.000, Step: 200 +2022-11-06 01:25:34 - r - INFO: - Episode: 113/200, Reward: -340.000, Step: 200 +2022-11-06 01:25:34 - r - INFO: - Episode: 114/200, Reward: -86.000, Step: 66 +2022-11-06 01:25:50 - r - INFO: - Episode: 115/200, Reward: -340.000, Step: 200 +2022-11-06 01:25:52 - r - INFO: - Episode: 116/200, Reward: -160.000, Step: 110 +2022-11-06 01:26:07 - r - INFO: - Episode: 117/200, Reward: -340.000, Step: 200 +2022-11-06 01:26:15 - r - INFO: - Episode: 118/200, Reward: -320.000, Step: 200 +2022-11-06 01:26:29 - r - INFO: - Episode: 119/200, Reward: -320.000, Step: 200 +2022-11-06 01:26:43 - r - INFO: - Episode: 120/200, Reward: -360.000, Step: 200 +2022-11-06 01:26:56 - r - INFO: - Episode: 121/200, Reward: -330.000, Step: 200 +2022-11-06 01:27:09 - r - INFO: - Episode: 122/200, Reward: -350.000, Step: 200 +2022-11-06 01:27:25 - r - INFO: - Episode: 123/200, Reward: -300.000, Step: 200 +2022-11-06 01:27:38 - r - INFO: - Episode: 124/200, Reward: -320.000, Step: 200 +2022-11-06 01:27:39 - r - INFO: - Episode: 125/200, Reward: -70.000, Step: 40 +2022-11-06 01:27:39 - r - INFO: - Episode: 126/200, Reward: -59.000, Step: 39 +2022-11-06 01:27:55 - r - INFO: - Episode: 127/200, Reward: -340.000, Step: 200 +2022-11-06 01:27:56 - r - INFO: - Episode: 128/200, Reward: -87.000, Step: 77 +2022-11-06 01:28:13 - r - INFO: - Episode: 129/200, Reward: -330.000, Step: 200 +2022-11-06 01:28:22 - r - INFO: - Episode: 130/200, Reward: -260.000, Step: 200 +2022-11-06 01:28:38 - r - INFO: - Episode: 131/200, Reward: -290.000, Step: 200 +2022-11-06 01:28:57 - r - INFO: - Episode: 132/200, Reward: -330.000, Step: 200 +2022-11-06 01:29:07 - r - INFO: - Episode: 133/200, Reward: -340.000, Step: 200 +2022-11-06 01:29:08 - r - INFO: - Episode: 134/200, Reward: -78.000, Step: 48 +2022-11-06 01:29:23 - r - INFO: - Episode: 135/200, Reward: -390.000, Step: 200 +2022-11-06 01:29:33 - r - INFO: - Episode: 136/200, Reward: -320.000, Step: 200 +2022-11-06 01:29:51 - r - INFO: - Episode: 137/200, Reward: -360.000, Step: 200 +2022-11-06 01:30:06 - r - INFO: - Episode: 138/200, Reward: -340.000, Step: 200 +2022-11-06 01:30:10 - r - INFO: - Episode: 139/200, Reward: -185.000, Step: 115 +2022-11-06 01:30:26 - r - INFO: - Episode: 140/200, Reward: -340.000, Step: 200 +2022-11-06 01:30:43 - r - INFO: - Episode: 141/200, Reward: -250.000, Step: 200 +2022-11-06 01:30:57 - r - INFO: - Episode: 142/200, Reward: -347.000, Step: 197 +2022-11-06 01:31:11 - r - INFO: - Episode: 143/200, Reward: -320.000, Step: 200 +2022-11-06 01:31:25 - r - INFO: - Episode: 144/200, Reward: -330.000, Step: 200 +2022-11-06 01:31:37 - r - INFO: - Episode: 145/200, Reward: -270.000, Step: 200 +2022-11-06 01:31:55 - r - INFO: - Episode: 146/200, Reward: -380.000, Step: 200 +2022-11-06 01:32:10 - r - INFO: - Episode: 147/200, Reward: -320.000, Step: 200 +2022-11-06 01:32:27 - r - INFO: - Episode: 148/200, Reward: -340.000, Step: 200 +2022-11-06 01:32:38 - r - INFO: - Episode: 149/200, Reward: -310.000, Step: 200 +2022-11-06 01:32:57 - r - INFO: - Episode: 150/200, Reward: -290.000, Step: 200 +2022-11-06 01:33:10 - r - INFO: - Episode: 151/200, Reward: -380.000, Step: 200 +2022-11-06 01:33:21 - r - INFO: - Episode: 152/200, Reward: -281.000, Step: 181 +2022-11-06 01:33:21 - r - INFO: - Episode: 153/200, Reward: -30.000, Step: 30 +2022-11-06 01:33:33 - r - INFO: - Episode: 154/200, Reward: -280.000, Step: 200 +2022-11-06 01:33:45 - r - INFO: - Episode: 155/200, Reward: -300.000, Step: 200 +2022-11-06 01:33:59 - r - INFO: - Episode: 156/200, Reward: -300.000, Step: 200 +2022-11-06 01:34:10 - r - INFO: - Episode: 157/200, Reward: -300.000, Step: 200 +2022-11-06 01:34:28 - r - INFO: - Episode: 158/200, Reward: -370.000, Step: 200 +2022-11-06 01:34:45 - r - INFO: - Episode: 159/200, Reward: -320.000, Step: 200 +2022-11-06 01:34:52 - r - INFO: - Episode: 160/200, Reward: -250.000, Step: 200 +2022-11-06 01:35:04 - r - INFO: - Episode: 161/200, Reward: -370.000, Step: 200 +2022-11-06 01:35:16 - r - INFO: - Episode: 162/200, Reward: -290.000, Step: 200 +2022-11-06 01:35:31 - r - INFO: - Episode: 163/200, Reward: -320.000, Step: 200 +2022-11-06 01:35:41 - r - INFO: - Episode: 164/200, Reward: -290.000, Step: 200 +2022-11-06 01:35:41 - r - INFO: - Episode: 165/200, Reward: -44.000, Step: 44 +2022-11-06 01:35:53 - r - INFO: - Episode: 166/200, Reward: -216.000, Step: 196 +2022-11-06 01:36:06 - r - INFO: - Episode: 167/200, Reward: -340.000, Step: 200 +2022-11-06 01:36:23 - r - INFO: - Episode: 168/200, Reward: -360.000, Step: 200 +2022-11-06 01:36:38 - r - INFO: - Episode: 169/200, Reward: -310.000, Step: 200 +2022-11-06 01:36:51 - r - INFO: - Episode: 170/200, Reward: -320.000, Step: 200 +2022-11-06 01:37:08 - r - INFO: - Episode: 171/200, Reward: -280.000, Step: 200 +2022-11-06 01:37:17 - r - INFO: - Episode: 172/200, Reward: -290.000, Step: 200 +2022-11-06 01:37:33 - r - INFO: - Episode: 173/200, Reward: -280.000, Step: 200 +2022-11-06 01:37:45 - r - INFO: - Episode: 174/200, Reward: -300.000, Step: 200 +2022-11-06 01:38:02 - r - INFO: - Episode: 175/200, Reward: -350.000, Step: 200 +2022-11-06 01:38:17 - r - INFO: - Episode: 176/200, Reward: -320.000, Step: 200 +2022-11-06 01:38:31 - r - INFO: - Episode: 177/200, Reward: -320.000, Step: 200 +2022-11-06 01:38:47 - r - INFO: - Episode: 178/200, Reward: -320.000, Step: 200 +2022-11-06 01:39:03 - r - INFO: - Episode: 179/200, Reward: -300.000, Step: 200 +2022-11-06 01:39:04 - r - INFO: - Episode: 180/200, Reward: -117.000, Step: 87 +2022-11-06 01:39:06 - r - INFO: - Episode: 181/200, Reward: -158.000, Step: 88 +2022-11-06 01:39:23 - r - INFO: - Episode: 182/200, Reward: -300.000, Step: 200 +2022-11-06 01:39:34 - r - INFO: - Episode: 183/200, Reward: -290.000, Step: 200 +2022-11-06 01:39:51 - r - INFO: - Episode: 184/200, Reward: -350.000, Step: 200 +2022-11-06 01:40:09 - r - INFO: - Episode: 185/200, Reward: -310.000, Step: 200 +2022-11-06 01:40:10 - r - INFO: - Episode: 186/200, Reward: -58.000, Step: 38 +2022-11-06 01:40:26 - r - INFO: - Episode: 187/200, Reward: -290.000, Step: 200 +2022-11-06 01:40:42 - r - INFO: - Episode: 188/200, Reward: -310.000, Step: 200 +2022-11-06 01:40:57 - r - INFO: - Episode: 189/200, Reward: -350.000, Step: 200 +2022-11-06 01:41:12 - r - INFO: - Episode: 190/200, Reward: -300.000, Step: 200 +2022-11-06 01:41:32 - r - INFO: - Episode: 191/200, Reward: -380.000, Step: 200 +2022-11-06 01:41:37 - r - INFO: - Episode: 192/200, Reward: -230.000, Step: 200 +2022-11-06 01:41:37 - r - INFO: - Episode: 193/200, Reward: -26.000, Step: 26 +2022-11-06 01:41:56 - r - INFO: - Episode: 194/200, Reward: -340.000, Step: 200 +2022-11-06 01:42:09 - r - INFO: - Episode: 195/200, Reward: -280.000, Step: 200 +2022-11-06 01:42:10 - r - INFO: - Episode: 196/200, Reward: -106.000, Step: 66 +2022-11-06 01:42:10 - r - INFO: - Episode: 197/200, Reward: -7.000, Step: 17 +2022-11-06 01:42:20 - r - INFO: - Episode: 198/200, Reward: -248.000, Step: 178 +2022-11-06 01:42:22 - r - INFO: - Episode: 199/200, Reward: -161.000, Step: 101 +2022-11-06 01:42:22 - r - INFO: - Episode: 200/200, Reward: -3.000, Step: 13 +2022-11-06 01:42:22 - r - INFO: - Finish training! diff --git a/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/models/Q_table b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/models/Q_table new file mode 100644 index 0000000..3231a0b Binary files /dev/null and b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/models/Q_table differ diff --git a/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/learning_curve.png b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/learning_curve.png new file mode 100644 index 0000000..3799635 Binary files /dev/null and b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/learning_curve.png differ diff --git a/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/res.csv b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/res.csv new file mode 100644 index 0000000..214239b --- /dev/null +++ b/projects/codes/MonteCarlo/Train_Racetrack-v0_FirstVisitMC_20221106-010504/results/res.csv @@ -0,0 +1,201 @@ +episodes,rewards,steps +0,-760,200 +1,-560,200 +2,-156,66 +3,-500,200 +4,-600,200 +5,-350,200 +6,-430,200 +7,-206,96 +8,-460,200 +9,-410,200 +10,-400,200 +11,-380,200 +12,-155,75 +13,-400,200 +14,-350,200 +15,-400,200 +16,-360,200 +17,-420,200 +18,-430,200 +19,-360,200 +20,-420,200 +21,-390,200 +22,-59,49 +23,-390,200 +24,2,8 +25,-217,117 +26,-287,167 +27,-248,118 +28,-370,200 +29,-390,200 +30,-370,200 +31,-360,200 +32,-420,200 +33,-430,200 +34,-430,200 +35,-430,200 +36,-380,200 +37,-420,200 +38,-350,200 +39,-370,200 +40,-400,200 +41,-410,200 +42,-360,200 +43,-16,16 +44,-23,13 +45,-390,200 +46,-390,200 +47,-109,79 +48,-300,200 +49,-370,200 +50,-460,200 +51,-350,200 +52,-320,200 +53,-310,200 +54,-390,200 +55,-370,200 +56,-390,200 +57,-350,200 +58,-123,73 +59,-204,124 +60,-39,29 +61,-155,85 +62,-108,58 +63,-249,169 +64,-170,100 +65,1,9 +66,-23,23 +67,-104,74 +68,-223,123 +69,-350,200 +70,-124,104 +71,-20,20 +72,-360,200 +73,-67,37 +74,-360,200 +75,-71,41 +76,-23,23 +77,-41,21 +78,-1,11 +79,-270,200 +80,-330,200 +81,-290,200 +82,-2,12 +83,-300,200 +84,-380,200 +85,-47,37 +86,-350,200 +87,-308,188 +88,-370,200 +89,-214,154 +90,-290,200 +91,-370,200 +92,-32,22 +93,-400,200 +94,-217,127 +95,-330,200 +96,-380,200 +97,-320,200 +98,-300,200 +99,-350,200 +100,-400,200 +101,-330,200 +102,-360,200 +103,-380,200 +104,-400,200 +105,-290,200 +106,-203,103 +107,-74,54 +108,-330,200 +109,-380,200 +110,-263,173 +111,-290,200 +112,-340,200 +113,-86,66 +114,-340,200 +115,-160,110 +116,-340,200 +117,-320,200 +118,-320,200 +119,-360,200 +120,-330,200 +121,-350,200 +122,-300,200 +123,-320,200 +124,-70,40 +125,-59,39 +126,-340,200 +127,-87,77 +128,-330,200 +129,-260,200 +130,-290,200 +131,-330,200 +132,-340,200 +133,-78,48 +134,-390,200 +135,-320,200 +136,-360,200 +137,-340,200 +138,-185,115 +139,-340,200 +140,-250,200 +141,-347,197 +142,-320,200 +143,-330,200 +144,-270,200 +145,-380,200 +146,-320,200 +147,-340,200 +148,-310,200 +149,-290,200 +150,-380,200 +151,-281,181 +152,-30,30 +153,-280,200 +154,-300,200 +155,-300,200 +156,-300,200 +157,-370,200 +158,-320,200 +159,-250,200 +160,-370,200 +161,-290,200 +162,-320,200 +163,-290,200 +164,-44,44 +165,-216,196 +166,-340,200 +167,-360,200 +168,-310,200 +169,-320,200 +170,-280,200 +171,-290,200 +172,-280,200 +173,-300,200 +174,-350,200 +175,-320,200 +176,-320,200 +177,-320,200 +178,-300,200 +179,-117,87 +180,-158,88 +181,-300,200 +182,-290,200 +183,-350,200 +184,-310,200 +185,-58,38 +186,-290,200 +187,-310,200 +188,-350,200 +189,-300,200 +190,-380,200 +191,-230,200 +192,-26,26 +193,-340,200 +194,-280,200 +195,-106,66 +196,-7,17 +197,-248,178 +198,-161,101 +199,-3,13 diff --git a/projects/codes/MonteCarlo/agent.py b/projects/codes/MonteCarlo/agent.py index fe2a287..c426527 100644 --- a/projects/codes/MonteCarlo/agent.py +++ b/projects/codes/MonteCarlo/agent.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:14:34 LastEditor: John -LastEditTime: 2022-08-15 18:10:13 +LastEditTime: 2022-11-06 01:04:57 Discription: Environment: ''' @@ -17,15 +17,16 @@ import dill class FisrtVisitMC: ''' On-Policy First-Visit MC Control ''' - def __init__(self,n_actions,cfg): - self.n_actions = n_actions + def __init__(self,cfg): + self.n_actions = cfg.n_actions self.epsilon = cfg.epsilon self.gamma = cfg.gamma - self.Q_table = defaultdict(lambda: np.zeros(n_actions)) + self.Q_table = defaultdict(lambda: np.zeros(cfg.n_actions)) self.returns_sum = defaultdict(float) # 保存return之和 self.returns_count = defaultdict(float) - def sample(self,state): + def sample_action(self,state): + state = str(state) if state in self.Q_table.keys(): best_action = np.argmax(self.Q_table[state]) action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions @@ -34,7 +35,8 @@ class FisrtVisitMC: else: action = np.random.randint(0,self.n_actions) return action - def predict(self,state): + def predict_action(self,state): + state = str(state) if state in self.Q_table.keys(): best_action = np.argmax(self.Q_table[state]) action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions @@ -46,19 +48,20 @@ class FisrtVisitMC: def update(self,one_ep_transition): # Find all (state, action) pairs we've visited in this one_ep_transition # We convert each state to a tuple so that we can use it as a dict key - sa_in_episode = set([(tuple(x[0]), x[1]) for x in one_ep_transition]) + sa_in_episode = set([(str(x[0]), x[1]) for x in one_ep_transition]) for state, action in sa_in_episode: sa_pair = (state, action) # Find the first occurence of the (state, action) pair in the one_ep_transition + first_occurence_idx = next(i for i,x in enumerate(one_ep_transition) - if x[0] == state and x[1] == action) + if str(x[0]) == state and x[1] == action) # Sum up all rewards since the first occurance G = sum([x[2]*(self.gamma**i) for i,x in enumerate(one_ep_transition[first_occurence_idx:])]) # Calculate average return for this state over all sampled episodes self.returns_sum[sa_pair] += G self.returns_count[sa_pair] += 1.0 self.Q_table[state][action] = self.returns_sum[sa_pair] / self.returns_count[sa_pair] - def save(self,path=None): + def save_model(self,path=None): '''把 Q表格 的数据保存到文件中 ''' from pathlib import Path @@ -69,7 +72,7 @@ class FisrtVisitMC: pickle_module=dill ) - def load(self, path=None): + def load_model(self, path=None): '''从文件中读取数据到 Q表格 ''' self.Q_table =torch.load(f=path+"Q_table",pickle_module=dill) \ No newline at end of file diff --git a/projects/codes/MonteCarlo/config/config.py b/projects/codes/MonteCarlo/config/config.py new file mode 100644 index 0000000..d255547 --- /dev/null +++ b/projects/codes/MonteCarlo/config/config.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-11-06 00:31:35 +LastEditor: JiangJi +LastEditTime: 2022-11-06 00:45:44 +Discription: parameters of MonteCarlo +''' +from common.config import GeneralConfig,AlgoConfig + +class GeneralConfigMC(GeneralConfig): + def __init__(self) -> None: + self.env_name = "Racetrack-v0" # name of environment + self.algo_name = "FirstVisitMC" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cpu" # device to use + self.train_eps = 200 # number of episodes for training + self.test_eps = 20 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigMC(AlgoConfig): + def __init__(self) -> None: + self.gamma = 0.90 # discount factor + self.epsilon = 0.15 # epsilon greedy + self.lr = 0.1 # learning rate \ No newline at end of file diff --git a/projects/codes/MonteCarlo/task0.py b/projects/codes/MonteCarlo/task0.py index d6e75bd..4570967 100644 --- a/projects/codes/MonteCarlo/task0.py +++ b/projects/codes/MonteCarlo/task0.py @@ -5,51 +5,82 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-11 14:26:44 LastEditor: John -LastEditTime: 2022-08-15 18:12:13 +LastEditTime: 2022-11-06 00:44:56 Discription: Environment: ''' import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 -parent_path = os.path.dirname(curr_path) # 父路径 -sys.path.append(parent_path) # 添加路径到系统路径 +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path import datetime -import argparse -from common.utils import save_results,save_args,plot_rewards - +import gym +from envs.wrappers import CliffWalkingWapper +from envs.register import register_env +from common.utils import merge_class_attrs,all_seed +from common.launcher import Launcher from MonteCarlo.agent import FisrtVisitMC -from envs.racetrack import RacetrackEnv +from MonteCarlo.config.config import GeneralConfigMC,AlgoConfigMC + curr_time = datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") # obtain current time - -def get_args(): - """ 超参数 - """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='First-Visit MC',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='Racetrack',type=str,help="name of environment") - parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.9,type=float,help="discounted factor") - parser.add_argument('--epsilon',default=0.15,type=float,help="the probability to select a random action") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/results/' ) - parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) - parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - return args - -def env_agent_config(cfg,seed=1): - env = RacetrackEnv() - n_actions = env.action_space.n - agent = FisrtVisitMC(n_actions, cfg) - return env,agent +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigMC()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigMC()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=False) # create env + if cfg.env_name == 'CliffWalking-v0': + env = CliffWalkingWapper(env) + if cfg.seed !=0: # set random seed + all_seed(env,seed=cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + agent = FisrtVisitMC(cfg) + return env,agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + one_ep_transition = [] + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.sample_action(state) # sample action + next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + one_ep_transition.append((state, action, reward)) # save transitions + agent.update(one_ep_transition) # update agent + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.predict_action(state) # sample action + next_state, reward, terminated, info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step def train(cfg, env, agent): print("开始训练!") @@ -93,18 +124,5 @@ def test(cfg, env, agent): return {'rewards':rewards} if __name__ == "__main__": - cfg = get_args() - # 训练 - env, agent = env_agent_config(cfg) - res_dic = train(cfg, env, agent) - save_args(cfg,path = cfg.result_path) # 保存参数到模型路径上 - agent.save(path = cfg.model_path) # 保存模型 - save_results(res_dic, tag = 'train', path = cfg.result_path) - plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "train") - # 测试 - env, agent = env_agent_config(cfg) # 也可以不加,加这一行的是为了避免训练之后环境可能会出现问题,因此新建一个环境用于测试 - agent.load(path = cfg.model_path) # 导入模型 - res_dic = test(cfg, env, agent) - save_results(res_dic, tag='test', - path = cfg.result_path) # 保存结果 - plot_rewards(res_dic['rewards'], cfg, path = cfg.result_path,tag = "test") # 画出结果 + main = Main() + main.run() \ No newline at end of file diff --git a/projects/codes/PPO/config/config.py b/projects/codes/PPO/config/config.py new file mode 100644 index 0000000..b8f9870 --- /dev/null +++ b/projects/codes/PPO/config/config.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 11:30:56 +LastEditor: JiangJi +LastEditTime: 2022-10-31 00:33:15 +Discription: default parameters of PPO +''' +from common.config import GeneralConfig,AlgoConfig + +class GeneralConfigPPO(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CartPole-v0" + self.algo_name = "PPO" + self.seed = 1 + self.device = "cuda" + self.train_eps = 100 # number of episodes for training + self.test_eps = 10 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + +class AlgoConfigPPO(AlgoConfig): + def __init__(self) -> None: + self.gamma = 0.99 # discount factor + self.continuous = False # continuous action space or not + self.policy_clip = 0.2 # clip range of policy + self.n_epochs = 10 # number of epochs + self.gae_lambda = 0.95 # gae lambda + self.actor_lr = 0.0003 # learning rate of actor + self.critic_lr = 0.0003 # learning rate of critic + self.actor_hidden_dim = 256 # + self.critic_hidden_dim = 256 + self.n_epochs = 4 # epochs + self.batch_size = 5 # + self.policy_clip = 0.2 + self.update_fre = 20 # frequency of updating agent diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt deleted file mode 100644 index 5419b72..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_actor.pt and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt deleted file mode 100644 index af97c9b..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/models/ppo_critic.pt and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json deleted file mode 100644 index 748044c..0000000 --- a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/params.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "algo_name": "PPO", - "env_name": "CartPole-v0", - "continuous": false, - "train_eps": 200, - "test_eps": 20, - "gamma": 0.99, - "batch_size": 5, - "n_epochs": 4, - "actor_lr": 0.0003, - "critic_lr": 0.0003, - "gae_lambda": 0.95, - "policy_clip": 0.2, - "update_fre": 20, - "hidden_dim": 256, - "device": "cpu", - "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220731-233512/results/", - "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220731-233512/models/", - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_ma_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_ma_rewards.npy deleted file mode 100644 index 14bca8b..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards.npy deleted file mode 100644 index 14bca8b..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png deleted file mode 100644 index b52cc37..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy deleted file mode 100644 index 11249ed..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_ma_rewards.npy and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy deleted file mode 100644 index 078b31f..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png deleted file mode 100644 index 7a4f892..0000000 Binary files a/projects/codes/PPO/outputs/CartPole-v0/20220731-233512/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_actor.pt b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_actor.pt new file mode 100644 index 0000000..e7660b4 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_actor.pt differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_critic.pt b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_critic.pt new file mode 100644 index 0000000..f0ec0d4 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/models/ppo_critic.pt differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/params.json b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/params.json new file mode 100644 index 0000000..15097c6 --- /dev/null +++ b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/params.json @@ -0,0 +1,25 @@ +{ + "algo_name": "PPO", + "env_name": "CartPole-v0", + "continuous": false, + "train_eps": 200, + "test_eps": 20, + "gamma": 0.99, + "batch_size": 5, + "n_epochs": 4, + "actor_lr": 0.0003, + "critic_lr": 0.0003, + "gae_lambda": 0.95, + "policy_clip": 0.2, + "update_fre": 20, + "actor_hidden_dim": 256, + "critic_hidden_dim": 256, + "device": "cpu", + "seed": 10, + "show_fig": false, + "save_fig": true, + "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220920-213310/results/", + "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PPO/outputs/CartPole-v0/20220920-213310/models/", + "n_states": 4, + "n_actions": 2 +} \ No newline at end of file diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/testing_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/testing_curve.png new file mode 100644 index 0000000..badf029 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/testing_curve.png differ diff --git a/projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/testing_results.csv b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/testing_results.csv similarity index 100% rename from projects/codes/DQN/outputs/CartPole-v0/20220823-173936/results/testing_results.csv rename to projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/testing_results.csv diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_curve.png b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_curve.png new file mode 100644 index 0000000..1bc6604 Binary files /dev/null and b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_curve.png differ diff --git a/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_results.csv b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_results.csv new file mode 100644 index 0000000..7836df5 --- /dev/null +++ b/projects/codes/PPO/outputs/CartPole-v0/20220920-213310/results/training_results.csv @@ -0,0 +1,201 @@ +episodes,rewards +0,34.0 +1,12.0 +2,47.0 +3,29.0 +4,20.0 +5,23.0 +6,33.0 +7,25.0 +8,11.0 +9,30.0 +10,18.0 +11,16.0 +12,15.0 +13,25.0 +14,33.0 +15,19.0 +16,50.0 +17,23.0 +18,21.0 +19,42.0 +20,60.0 +21,64.0 +22,30.0 +23,31.0 +24,90.0 +25,43.0 +26,54.0 +27,74.0 +28,30.0 +29,82.0 +30,50.0 +31,53.0 +32,25.0 +33,27.0 +34,145.0 +35,118.0 +36,141.0 +37,148.0 +38,200.0 +39,191.0 +40,71.0 +41,105.0 +42,100.0 +43,120.0 +44,80.0 +45,40.0 +46,104.0 +47,39.0 +48,89.0 +49,60.0 +50,30.0 +51,24.0 +52,20.0 +53,23.0 +54,30.0 +55,32.0 +56,20.0 +57,12.0 +58,25.0 +59,25.0 +60,24.0 +61,29.0 +62,200.0 +63,62.0 +64,200.0 +65,58.0 +66,81.0 +67,200.0 +68,52.0 +69,140.0 +70,200.0 +71,74.0 +72,200.0 +73,29.0 +74,124.0 +75,129.0 +76,200.0 +77,194.0 +78,175.0 +79,117.0 +80,200.0 +81,186.0 +82,114.0 +83,200.0 +84,166.0 +85,150.0 +86,135.0 +87,200.0 +88,200.0 +89,133.0 +90,111.0 +91,200.0 +92,90.0 +93,200.0 +94,147.0 +95,30.0 +96,137.0 +97,200.0 +98,200.0 +99,179.0 +100,167.0 +101,186.0 +102,169.0 +103,200.0 +104,200.0 +105,171.0 +106,200.0 +107,181.0 +108,125.0 +109,200.0 +110,200.0 +111,122.0 +112,200.0 +113,124.0 +114,95.0 +115,102.0 +116,118.0 +117,91.0 +118,64.0 +119,124.0 +120,122.0 +121,76.0 +122,68.0 +123,40.0 +124,52.0 +125,51.0 +126,50.0 +127,49.0 +128,37.0 +129,76.0 +130,83.0 +131,76.0 +132,92.0 +133,113.0 +134,94.0 +135,157.0 +136,92.0 +137,200.0 +138,123.0 +139,200.0 +140,200.0 +141,200.0 +142,140.0 +143,200.0 +144,200.0 +145,200.0 +146,200.0 +147,200.0 +148,200.0 +149,200.0 +150,200.0 +151,78.0 +152,200.0 +153,200.0 +154,200.0 +155,200.0 +156,200.0 +157,200.0 +158,200.0 +159,200.0 +160,200.0 +161,200.0 +162,107.0 +163,187.0 +164,200.0 +165,200.0 +166,200.0 +167,200.0 +168,200.0 +169,200.0 +170,200.0 +171,200.0 +172,200.0 +173,200.0 +174,200.0 +175,200.0 +176,200.0 +177,200.0 +178,200.0 +179,200.0 +180,200.0 +181,200.0 +182,200.0 +183,200.0 +184,200.0 +185,200.0 +186,200.0 +187,200.0 +188,200.0 +189,200.0 +190,200.0 +191,200.0 +192,200.0 +193,200.0 +194,200.0 +195,200.0 +196,200.0 +197,200.0 +198,200.0 +199,200.0 diff --git a/projects/codes/PPO/ppo2.py b/projects/codes/PPO/ppo2.py index 13cfab7..5d399b8 100644 --- a/projects/codes/PPO/ppo2.py +++ b/projects/codes/PPO/ppo2.py @@ -1,99 +1,53 @@ #!/usr/bin/env python # coding=utf-8 ''' -Author: John +Author: JiangJi Email: johnjim0816@gmail.com -Date: 2021-03-23 15:17:42 -LastEditor: John -LastEditTime: 2021-12-31 19:38:33 -Discription: -Environment: +Date: 2022-09-26 16:11:36 +LastEditor: JiangJi +LastEditTime: 2022-10-31 00:36:37 +Discription: PPO-clip ''' + import os import numpy as np import torch import torch.optim as optim -import torch.nn as nn from torch.distributions.categorical import Categorical -class PPOMemory: - def __init__(self, batch_size): - self.states = [] - self.probs = [] - self.vals = [] - self.actions = [] - self.rewards = [] - self.dones = [] - self.batch_size = batch_size - def sample(self): - batch_step = np.arange(0, len(self.states), self.batch_size) - indices = np.arange(len(self.states), dtype=np.int64) - np.random.shuffle(indices) - batches = [indices[i:i+self.batch_size] for i in batch_step] - return np.array(self.states),np.array(self.actions),np.array(self.probs),\ - np.array(self.vals),np.array(self.rewards),np.array(self.dones),batches - - def push(self, state, action, probs, vals, reward, done): - self.states.append(state) - self.actions.append(action) - self.probs.append(probs) - self.vals.append(vals) - self.rewards.append(reward) - self.dones.append(done) - def clear(self): - self.states = [] - self.probs = [] - self.actions = [] - self.rewards = [] - self.dones = [] - self.vals = [] -class Actor(nn.Module): - def __init__(self,n_states, n_actions, - hidden_dim): - super(Actor, self).__init__() - self.actor = nn.Sequential( - nn.Linear(n_states, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, n_actions), - nn.Softmax(dim=-1) - ) - def forward(self, state): - dist = self.actor(state) - dist = Categorical(dist) - return dist - -class Critic(nn.Module): - def __init__(self, n_states,hidden_dim): - super(Critic, self).__init__() - self.critic = nn.Sequential( - nn.Linear(n_states, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, 1) - ) - def forward(self, state): - value = self.critic(state) - return value class PPO: - def __init__(self, n_states, n_actions,cfg): + def __init__(self, models,memory,cfg): self.gamma = cfg.gamma - self.continuous = cfg.continuous + self.continuous = cfg.continuous self.policy_clip = cfg.policy_clip self.n_epochs = cfg.n_epochs + self.batch_size = cfg.batch_size self.gae_lambda = cfg.gae_lambda - self.device = cfg.device - self.actor = Actor(n_states, n_actions,cfg.hidden_dim).to(self.device) - self.critic = Critic(n_states,cfg.hidden_dim).to(self.device) + self.device = torch.device(cfg.device) + self.actor = models['Actor'].to(self.device) + self.critic = models['Critic'].to(self.device) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=cfg.actor_lr) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=cfg.critic_lr) - self.memory = PPOMemory(cfg.batch_size) + self.memory = memory self.loss = 0 - def choose_action(self, state): + def sample_action(self, state): + state = np.array([state]) # 先转成数组再转tensor更高效 + state = torch.tensor(state, dtype=torch.float).to(self.device) + probs = self.actor(state) + dist = Categorical(probs) + value = self.critic(state) + action = dist.sample() + probs = torch.squeeze(dist.log_prob(action)).item() + if self.continuous: + action = torch.tanh(action) + else: + action = torch.squeeze(action).item() + value = torch.squeeze(value).item() + return action, probs, value + @torch.no_grad() + def predict_action(self, state): state = np.array([state]) # 先转成数组再转tensor更高效 state = torch.tensor(state, dtype=torch.float).to(self.device) dist = self.actor(state) @@ -148,12 +102,15 @@ class PPO: self.actor_optimizer.step() self.critic_optimizer.step() self.memory.clear() - def save(self,path): + def save_model(self,path): + from pathlib import Path + # create path + Path(path).mkdir(parents=True, exist_ok=True) actor_checkpoint = os.path.join(path, 'ppo_actor.pt') critic_checkpoint= os.path.join(path, 'ppo_critic.pt') torch.save(self.actor.state_dict(), actor_checkpoint) torch.save(self.critic.state_dict(), critic_checkpoint) - def load(self,path): + def load_model(self,path): actor_checkpoint = os.path.join(path, 'ppo_actor.pt') critic_checkpoint= os.path.join(path, 'ppo_critic.pt') self.actor.load_state_dict(torch.load(actor_checkpoint)) diff --git a/projects/codes/PPO/task0.py b/projects/codes/PPO/task0.py index 9cd5063..dbf0e7a 100644 --- a/projects/codes/PPO/task0.py +++ b/projects/codes/PPO/task0.py @@ -1,132 +1,159 @@ import sys,os -curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 -parent_path = os.path.dirname(curr_path) # 父路径 -sys.path.append(parent_path) # 添加路径到系统路径 +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path import gym import torch -import numpy as np import datetime +import numpy as np import argparse -from common.utils import plot_rewards,save_args,save_results,make_dir +import torch.nn as nn + + +from common.utils import all_seed,merge_class_attrs +from common.models import ActorSoftmax, Critic +from common.memories import PGReplay +from common.launcher import Launcher +from envs.register import register_env from ppo2 import PPO +from config,config import GeneralConfigPPO,AlgoConfigPPO +class PPOMemory: + def __init__(self, batch_size): + self.states = [] + self.probs = [] + self.vals = [] + self.actions = [] + self.rewards = [] + self.terminateds = [] + self.batch_size = batch_size + def sample(self): + batch_step = np.arange(0, len(self.states), self.batch_size) + indices = np.arange(len(self.states), dtype=np.int64) + np.random.shuffle(indices) + batches = [indices[i:i+self.batch_size] for i in batch_step] + return np.array(self.states),np.array(self.actions),np.array(self.probs),\ + np.array(self.vals),np.array(self.rewards),np.array(self.terminateds),batches + + def push(self, state, action, probs, vals, reward, terminated): + self.states.append(state) + self.actions.append(action) + self.probs.append(probs) + self.vals.append(vals) + self.rewards.append(reward) + self.terminateds.append(terminated) -def get_args(): - """ Hyperparameters - """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='PPO',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") - parser.add_argument('--continuous',default=False,type=bool,help="if PPO is continous") # PPO既可适用于连续动作空间,也可以适用于离散动作空间 - parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--batch_size',default=5,type=int) # mini-batch SGD中的批量大小 - parser.add_argument('--n_epochs',default=4,type=int) - parser.add_argument('--actor_lr',default=0.0003,type=float,help="learning rate of actor net") - parser.add_argument('--critic_lr',default=0.0003,type=float,help="learning rate of critic net") - parser.add_argument('--gae_lambda',default=0.95,type=float) - parser.add_argument('--policy_clip',default=0.2,type=float) # PPO-clip中的clip参数,一般是0.1~0.2左右 - parser.add_argument('--update_fre',default=20,type=int) - parser.add_argument('--hidden_dim',default=256,type=int) - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/results/' ) - parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) # path to save models - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - return args - -def env_agent_config(cfg,seed = 1): - ''' 创建环境和智能体 - ''' - env = gym.make(cfg.env_name) # 创建环境 - n_states = env.observation_space.shape[0] # 状态维度 - if cfg.continuous: - n_actions = env.action_space.shape[0] # 动作维度 - else: - n_actions = env.action_space.n # 动作维度 - agent = PPO(n_states, n_actions, cfg) # 创建智能体 - if seed !=0: # 设置随机种子 - torch.manual_seed(seed) - env.seed(seed) - np.random.seed(seed) - return env, agent + def clear(self): + self.states = [] + self.probs = [] + self.actions = [] + self.rewards = [] + self.terminateds = [] + self.vals = [] -def train(cfg,env,agent): - print('开始训练!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - steps = 0 - for i_ep in range(cfg.train_eps): + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigPPO()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigPPO()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=False) # create env + if cfg.seed !=0: # set random seed + all_seed(env,seed=cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + models = {'Actor':ActorSoftmax(n_states,n_actions, hidden_dim = cfg.actor_hidden_dim),'Critic':Critic(n_states,1,hidden_dim=cfg.critic_hidden_dim)} + memory = PGReplay # replay buffer + agent = PPO(models,memory,cfg) # create agent + return env, agent + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode state = env.reset() - done = False - ep_reward = 0 - while not done: - action, prob, val = agent.choose_action(state) - state_, reward, done, _ = env.step(action) - steps += 1 + for _ in range(cfg.max_steps): + action, prob, val = agent.sample_action(state) + next_state, reward, terminated, _ = env.step(action) ep_reward += reward - agent.memory.push(state, action, prob, val, reward, done) - if steps % cfg.update_fre == 0: + ep_step += 1 + agent.memory.push((state, action, prob, val, reward, terminated)) + if ep_step % cfg['update_fre'] == 0: agent.update() - state = state_ - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append(0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - if (i_ep+1)%10 == 0: - print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f}") - print('完成训练!') - env.close() - res_dic = {'rewards':rewards,'ma_rewards':ma_rewards} - return res_dic - -def test(cfg,env,agent): - print('开始测试!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - ma_rewards = [] # 记录所有回合的滑动平均奖励 - for i_ep in range(cfg.test_eps): + state = next_state + if terminated: + break + return agent, ep_reward, ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode state = env.reset() - done = False - ep_reward = 0 - while not done: - action, prob, val = agent.choose_action(state) - state_, reward, done, _ = env.step(action) + for _ in range(cfg.max_steps): + action, prob, val = agent.sample_action(state) + next_state, reward, terminated, _ = env.step(action) ep_reward += reward - state = state_ - rewards.append(ep_reward) - if ma_rewards: - ma_rewards.append( - 0.9*ma_rewards[-1]+0.1*ep_reward) - else: - ma_rewards.append(ep_reward) - print('回合:{}/{}, 奖励:{}'.format(i_ep+1, cfg.test_eps, ep_reward)) - print('完成训练!') - env.close() - res_dic = {'rewards':rewards,'ma_rewards':ma_rewards} - return res_dic + ep_step += 1 + state = next_state + if terminated: + break + return agent, ep_reward, ep_step + def train(self,cfg,env,agent): + ''' train agent + ''' + print("Start training!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = 0 + for i_ep in range(cfg['train_eps']): + state = env.reset() + ep_reward = 0 + while True: + action, prob, val = agent.sample_action(state) + next_state, reward, terminated, _ = env.step(action) + steps += 1 + ep_reward += reward + agent.memory.push(state, action, prob, val, reward, terminated) + if steps % cfg['update_fre'] == 0: + agent.update() + state = next_state + if terminated: + break + rewards.append(ep_reward) + if (i_ep+1)%10==0: + print(f"Episode: {i_ep+1}/{cfg['train_eps']}, Reward: {ep_reward:.2f}") + print("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards} + def test(self,cfg,env,agent): + ''' test agent + ''' + print("Start testing!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + for i_ep in range(cfg['test_eps']): + state = env.reset() + ep_reward = 0 + while True: + action, prob, val = agent.predict_action(state) + next_state, reward, terminated, _ = env.step(action) + ep_reward += reward + state = next_state + if terminated: + break + rewards.append(ep_reward) + print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Reward: {ep_reward:.2f}") + print("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards} if __name__ == "__main__": - cfg = get_args() - # 训练 - env, agent = env_agent_config(cfg) - res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) # 保存参数 - agent.save(path=cfg.model_path) # save model - save_results(res_dic, tag='train', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'], cfg, tag="train") - # 测试 - env, agent = env_agent_config(cfg) - agent.load(path=cfg.model_path) # 导入模型 - res_dic = test(cfg, env, agent) - save_results(res_dic, tag='test', - path=cfg.result_path) # 保存结果 - plot_rewards(res_dic['rewards'], res_dic['ma_rewards'],cfg, tag="test") # 画出结果 \ No newline at end of file + main = Main() + main.run() \ No newline at end of file diff --git a/projects/codes/PPO/task1.py b/projects/codes/PPO/task1.py index 04726cb..d664770 100644 --- a/projects/codes/PPO/task1.py +++ b/projects/codes/PPO/task1.py @@ -1,3 +1,13 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-09-19 14:48:16 +LastEditor: JiangJi +LastEditTime: 2022-10-30 00:45:14 +Discription: +''' import sys,os curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 parent_path = os.path.dirname(curr_path) # 父路径 diff --git a/projects/codes/PolicyGradient/pg.py b/projects/codes/PolicyGradient/pg.py index d0b4956..7d84c6e 100644 --- a/projects/codes/PolicyGradient/pg.py +++ b/projects/codes/PolicyGradient/pg.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-11-22 23:27:44 LastEditor: John -LastEditTime: 2022-08-27 13:45:26 +LastEditTime: 2022-10-09 21:28:18 Discription: Environment: ''' @@ -31,8 +31,6 @@ class PolicyGradient: state = torch.from_numpy(state).float() state = Variable(state) probs = self.policy_net(state) - print("probs") - print(probs) m = Bernoulli(probs) # 伯努利分布 action = m.sample() diff --git a/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/config.yaml b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/config.yaml new file mode 100644 index 0000000..d9b4258 --- /dev/null +++ b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/config.yaml @@ -0,0 +1,21 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: CliffWalking-v0 + load_checkpoint: true + load_path: Train_CliffWalking-v0_QLearning_20221030-013856 + max_steps: 200 + mode: test + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/logs/log.txt b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/logs/log.txt new file mode 100644 index 0000000..d89037e --- /dev/null +++ b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/logs/log.txt @@ -0,0 +1,24 @@ +2022-10-30 01:41:51 - r - INFO: - n_states: 48, n_actions: 4 +2022-10-30 01:41:51 - r - INFO: - Start testing! +2022-10-30 01:41:51 - r - INFO: - Env: CliffWalking-v0, Algorithm: QLearning, Device: cpu +2022-10-30 01:41:51 - r - INFO: - Episode: 1/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 2/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 3/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 4/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 5/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 6/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 7/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 8/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 9/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 10/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 11/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 12/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 13/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 14/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 15/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 16/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 17/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 18/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 19/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Episode: 20/20, Steps:13 Reward: -13.00 +2022-10-30 01:41:51 - r - INFO: - Finish testing! diff --git a/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/models/Qleaning_model.pkl b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/models/Qleaning_model.pkl new file mode 100644 index 0000000..2022d46 Binary files /dev/null and b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/results/learning_curve.png b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/results/learning_curve.png new file mode 100644 index 0000000..49a7daa Binary files /dev/null and b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/results/learning_curve.png differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv b/projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/results/res.csv similarity index 100% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv rename to projects/codes/QLearning/Test_CliffWalking-v0_QLearning_20221030-014151/results/res.csv diff --git a/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/config.yaml b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/config.yaml new file mode 100644 index 0000000..537c003 --- /dev/null +++ b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: FrozenLakeNoSlippery-v1 + load_checkpoint: true + load_path: Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504 + max_steps: 200 + mode: test + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 2000 + epsilon_end: 0.1 + epsilon_start: 0.7 + gamma: 0.95 + lr: 0.9 diff --git a/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/logs/log.txt b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/logs/log.txt new file mode 100644 index 0000000..d972a0c --- /dev/null +++ b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/logs/log.txt @@ -0,0 +1,24 @@ +2022-10-30 01:45:52 - r - INFO: - n_states: 16, n_actions: 4 +2022-10-30 01:45:52 - r - INFO: - Start testing! +2022-10-30 01:45:52 - r - INFO: - Env: FrozenLakeNoSlippery-v1, Algorithm: QLearning, Device: cpu +2022-10-30 01:45:52 - r - INFO: - Episode: 1/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 2/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 3/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 4/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 5/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 6/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 7/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 8/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 9/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 10/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 11/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 12/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 13/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 14/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 15/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 16/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 17/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 18/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 19/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Episode: 20/20, Steps:6 Reward: 1.00 +2022-10-30 01:45:52 - r - INFO: - Finish testing! diff --git a/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/models/Qleaning_model.pkl b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/models/Qleaning_model.pkl new file mode 100644 index 0000000..41a5a05 Binary files /dev/null and b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/results/learning_curve.png b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/results/learning_curve.png new file mode 100644 index 0000000..60eeac6 Binary files /dev/null and b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/results/learning_curve.png differ diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/testing_results.csv b/projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/results/res.csv similarity index 100% rename from projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/testing_results.csv rename to projects/codes/QLearning/Test_FrozenLakeNoSlippery-v1_QLearning_20221030-014552/results/res.csv diff --git a/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/config.yaml b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/config.yaml new file mode 100644 index 0000000..42d7573 --- /dev/null +++ b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: Racetrack-v0 + load_checkpoint: true + load_path: Train_Racetrack-v0_QLearning_20221030-014833 + max_steps: 200 + mode: test + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.9 + lr: 0.1 diff --git a/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/logs/log.txt b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/logs/log.txt new file mode 100644 index 0000000..f36fac9 --- /dev/null +++ b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/logs/log.txt @@ -0,0 +1,24 @@ +2022-10-30 01:49:58 - r - INFO: - n_states: 4, n_actions: 9 +2022-10-30 01:49:58 - r - INFO: - Start testing! +2022-10-30 01:49:58 - r - INFO: - Env: Racetrack-v0, Algorithm: QLearning, Device: cpu +2022-10-30 01:49:58 - r - INFO: - Episode: 1/20, Steps:14 Reward: -4.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 2/20, Steps:8 Reward: 2.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 3/20, Steps:6 Reward: 4.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 4/20, Steps:22 Reward: -12.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 5/20, Steps:15 Reward: -15.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 6/20, Steps:6 Reward: 4.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 7/20, Steps:5 Reward: 5.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 8/20, Steps:8 Reward: 2.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 9/20, Steps:15 Reward: -5.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 10/20, Steps:8 Reward: 2.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 11/20, Steps:5 Reward: 5.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 12/20, Steps:15 Reward: -5.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 13/20, Steps:6 Reward: 4.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 14/20, Steps:31 Reward: -51.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 15/20, Steps:13 Reward: -13.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 16/20, Steps:7 Reward: 3.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 17/20, Steps:6 Reward: 4.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 18/20, Steps:5 Reward: 5.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 19/20, Steps:17 Reward: -17.00 +2022-10-30 01:49:58 - r - INFO: - Episode: 20/20, Steps:15 Reward: -5.00 +2022-10-30 01:49:58 - r - INFO: - Finish testing! diff --git a/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/models/Qleaning_model.pkl b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/models/Qleaning_model.pkl new file mode 100644 index 0000000..1f458e1 Binary files /dev/null and b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/learning_curve.png b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/learning_curve.png new file mode 100644 index 0000000..869b2c9 Binary files /dev/null and b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/learning_curve.png differ diff --git a/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/res.csv b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/res.csv new file mode 100644 index 0000000..cf33c86 --- /dev/null +++ b/projects/codes/QLearning/Test_Racetrack-v0_QLearning_20221030-014958/results/res.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,-4,14 +1,2,8 +2,4,6 +3,-12,22 +4,-15,15 +5,4,6 +6,5,5 +7,2,8 +8,-5,15 +9,2,8 +10,5,5 +11,-5,15 +12,4,6 +13,-51,31 +14,-13,13 +15,3,7 +16,4,6 +17,5,5 +18,-17,17 +19,-5,15 diff --git a/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/config.yaml b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/config.yaml new file mode 100644 index 0000000..7610f6c --- /dev/null +++ b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: CliffWalking-v0 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/logs/log.txt b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/logs/log.txt new file mode 100644 index 0000000..d42935f --- /dev/null +++ b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/logs/log.txt @@ -0,0 +1,804 @@ +2022-10-30 01:49:16 - r - INFO: - n_states: 48, n_actions: 4 +2022-10-30 01:49:16 - r - INFO: - Start training! +2022-10-30 01:49:16 - r - INFO: - Env: CliffWalking-v0, Algorithm: QLearning, Device: cpu +2022-10-30 01:49:16 - r - INFO: - Episode: 1/800, Reward: -1586.00: Epislon: 0.493 +2022-10-30 01:49:16 - r - INFO: - Episode: 2/800, Reward: -1091.00: Epislon: 0.258 +2022-10-30 01:49:16 - r - INFO: - Episode: 3/800, Reward: -596.00: Epislon: 0.137 +2022-10-30 01:49:16 - r - INFO: - Episode: 4/800, Reward: -497.00: Epislon: 0.075 +2022-10-30 01:49:16 - r - INFO: - Episode: 5/800, Reward: -398.00: Epislon: 0.044 +2022-10-30 01:49:16 - r - INFO: - Episode: 6/800, Reward: -362.00: Epislon: 0.029 +2022-10-30 01:49:16 - r - INFO: - Episode: 7/800, Reward: -179.00: Epislon: 0.021 +2022-10-30 01:49:16 - r - INFO: - Episode: 8/800, Reward: -398.00: Epislon: 0.015 +2022-10-30 01:49:16 - r - INFO: - Episode: 9/800, Reward: -79.00: Epislon: 0.014 +2022-10-30 01:49:16 - r - INFO: - Episode: 10/800, Reward: -141.00: Epislon: 0.013 +2022-10-30 01:49:16 - r - INFO: - Episode: 11/800, Reward: -143.00: Epislon: 0.012 +2022-10-30 01:49:16 - r - INFO: - Episode: 12/800, Reward: -134.00: Epislon: 0.011 +2022-10-30 01:49:16 - r - INFO: - Episode: 13/800, Reward: -299.00: Epislon: 0.011 +2022-10-30 01:49:16 - r - INFO: - Episode: 14/800, Reward: -102.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 15/800, Reward: -61.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 16/800, Reward: -136.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 17/800, Reward: -176.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 18/800, Reward: -98.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 19/800, Reward: -92.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 20/800, Reward: -110.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 21/800, Reward: -67.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 22/800, Reward: -136.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 23/800, Reward: -98.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 24/800, Reward: -164.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 25/800, Reward: -65.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 26/800, Reward: -98.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 27/800, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 28/800, Reward: -161.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 29/800, Reward: -72.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 30/800, Reward: -73.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 31/800, Reward: -116.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 32/800, Reward: -50.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 33/800, Reward: -66.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 34/800, Reward: -123.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 35/800, Reward: -40.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 36/800, Reward: -100.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 37/800, Reward: -56.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 38/800, Reward: -101.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 39/800, Reward: -55.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 40/800, Reward: -84.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 41/800, Reward: -68.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 42/800, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 43/800, Reward: -113.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 44/800, Reward: -72.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 45/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 46/800, Reward: -84.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 47/800, Reward: -45.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 48/800, Reward: -86.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 49/800, Reward: -57.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 50/800, Reward: -92.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 51/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 52/800, Reward: -76.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 53/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 54/800, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 55/800, Reward: -88.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 56/800, Reward: -40.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 57/800, Reward: -55.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 58/800, Reward: -69.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 59/800, Reward: -51.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 60/800, Reward: -69.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 61/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 62/800, Reward: -84.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 63/800, Reward: -38.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 64/800, Reward: -56.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 65/800, Reward: -58.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 66/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 67/800, Reward: -64.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 68/800, Reward: -38.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 69/800, Reward: -53.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 70/800, Reward: -70.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 71/800, Reward: -40.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 72/800, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 73/800, Reward: -71.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 74/800, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 75/800, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 76/800, Reward: -70.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 77/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 78/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 79/800, Reward: -73.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 80/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 81/800, Reward: -67.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 82/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 83/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 84/800, Reward: -64.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 85/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 86/800, Reward: -61.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 87/800, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 88/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 89/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 90/800, Reward: -86.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 91/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 92/800, Reward: -53.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 93/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 94/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 95/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 96/800, Reward: -45.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 97/800, Reward: -43.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 98/800, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 99/800, Reward: -37.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 100/800, Reward: -43.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 101/800, Reward: -46.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 102/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 103/800, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 104/800, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 105/800, Reward: -37.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 106/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 107/800, Reward: -38.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 108/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 109/800, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 110/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 111/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 112/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 113/800, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 114/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 115/800, Reward: -52.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 116/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 117/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 118/800, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 119/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 120/800, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 121/800, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 122/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 123/800, Reward: -40.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 124/800, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 125/800, Reward: -23.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 126/800, Reward: -35.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 127/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 128/800, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 129/800, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 130/800, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 131/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 132/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 133/800, Reward: -43.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 134/800, Reward: -51.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 135/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 136/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 137/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 138/800, Reward: -42.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 139/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 140/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 141/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 142/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 143/800, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 144/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 145/800, Reward: -42.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 146/800, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 147/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 148/800, Reward: -42.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 149/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 150/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 151/800, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 152/800, Reward: -20.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 153/800, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 154/800, Reward: -20.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 155/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 156/800, Reward: -20.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 157/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 158/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 159/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 160/800, Reward: -38.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 161/800, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 162/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 163/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 164/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 165/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 166/800, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 167/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 168/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 169/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 170/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 171/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 172/800, Reward: -20.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 173/800, Reward: -58.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 174/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 175/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 176/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 177/800, Reward: -125.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 178/800, Reward: -156.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 179/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 180/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 181/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 182/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 183/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 184/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 185/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 186/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 187/800, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 188/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 189/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 190/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 191/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 192/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 193/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 194/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 195/800, Reward: -35.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 196/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 197/800, Reward: -134.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 198/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 199/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 200/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 201/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 202/800, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 203/800, Reward: -23.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 204/800, Reward: -23.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 205/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 206/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 207/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 208/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 209/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 210/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 211/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 212/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 213/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 214/800, Reward: -133.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 215/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 216/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 217/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 218/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 219/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 220/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 221/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 222/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 223/800, Reward: -143.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 224/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 225/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 226/800, Reward: -23.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 227/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 228/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 229/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 230/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 231/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 232/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 233/800, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 234/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 235/800, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 236/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 237/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 238/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 239/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 240/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 241/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 242/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 243/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 244/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 245/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 246/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 247/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 248/800, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 249/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 250/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 251/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 252/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 253/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 254/800, Reward: -23.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 255/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 256/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 257/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 258/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 259/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 260/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 261/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 262/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 263/800, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 264/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 265/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 266/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 267/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 268/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 269/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 270/800, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 271/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 272/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 273/800, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 274/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 275/800, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 276/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 277/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 278/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 279/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 280/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 281/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 282/800, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 283/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 284/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 285/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 286/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 287/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 288/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 289/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 290/800, Reward: -24.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 291/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 292/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 293/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 294/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 295/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 296/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 297/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 298/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 299/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 300/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 301/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 302/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 303/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 304/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 305/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 306/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 307/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 308/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 309/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 310/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 311/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 312/800, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 313/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 314/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 315/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 316/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 317/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 318/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 319/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 320/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 321/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 322/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 323/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 324/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 325/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 326/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 327/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 328/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 329/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 330/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 331/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 332/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 333/800, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 334/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 335/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 336/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 337/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 338/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 339/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 340/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 341/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 342/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 343/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 344/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 345/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 346/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 347/800, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 348/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 349/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 350/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 351/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 352/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 353/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 354/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 355/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 356/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 357/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 358/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 359/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 360/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 361/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 362/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 363/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 364/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 365/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 366/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 367/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 368/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 369/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 370/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 371/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 372/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 373/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 374/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 375/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 376/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 377/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 378/800, Reward: -123.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 379/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 380/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 381/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 382/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 383/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 384/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 385/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 386/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 387/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 388/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 389/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 390/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 391/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 392/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 393/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 394/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 395/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 396/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 397/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 398/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 399/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 400/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 401/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 402/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 403/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 404/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 405/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 406/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 407/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 408/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 409/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 410/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 411/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 412/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 413/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 414/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 415/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 416/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 417/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 418/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 419/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 420/800, Reward: -113.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 421/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 422/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 423/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 424/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 425/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 426/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 427/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 428/800, Reward: -115.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 429/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 430/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 431/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 432/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 433/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 434/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 435/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 436/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 437/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 438/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 439/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 440/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 441/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 442/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 443/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 444/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 445/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 446/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 447/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 448/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 449/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 450/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 451/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 452/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 453/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 454/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 455/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 456/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 457/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 458/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 459/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 460/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 461/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 462/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 463/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 464/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 465/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 466/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 467/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 468/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 469/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 470/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 471/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 472/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 473/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 474/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 475/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 476/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 477/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 478/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 479/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 480/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 481/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 482/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 483/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 484/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 485/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 486/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 487/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 488/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 489/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 490/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 491/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 492/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 493/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 494/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 495/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 496/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 497/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 498/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 499/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 500/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 501/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 502/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 503/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 504/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 505/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 506/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 507/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 508/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 509/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 510/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 511/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 512/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 513/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 514/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 515/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 516/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 517/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 518/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 519/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 520/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 521/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 522/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 523/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 524/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 525/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 526/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 527/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 528/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 529/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 530/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 531/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 532/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 533/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 534/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 535/800, Reward: -122.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 536/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 537/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 538/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 539/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 540/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 541/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 542/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 543/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 544/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 545/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 546/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 547/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 548/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 549/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 550/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 551/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 552/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 553/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 554/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 555/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 556/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 557/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 558/800, Reward: -115.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 559/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 560/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 561/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 562/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 563/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 564/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 565/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 566/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 567/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 568/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 569/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 570/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 571/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 572/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 573/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 574/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 575/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 576/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 577/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 578/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 579/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 580/800, Reward: -122.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 581/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 582/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 583/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 584/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 585/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 586/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 587/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 588/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 589/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 590/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 591/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 592/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 593/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 594/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 595/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 596/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 597/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 598/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 599/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 600/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 601/800, Reward: -122.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 602/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 603/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 604/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 605/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 606/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 607/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 608/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 609/800, Reward: -116.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 610/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 611/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 612/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 613/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 614/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 615/800, Reward: -115.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 616/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 617/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 618/800, Reward: -122.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 619/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 620/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 621/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 622/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 623/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 624/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 625/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 626/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 627/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 628/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 629/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 630/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 631/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 632/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 633/800, Reward: -116.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 634/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 635/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 636/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 637/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 638/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 639/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 640/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 641/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 642/800, Reward: -117.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 643/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 644/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 645/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 646/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 647/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 648/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 649/800, Reward: -223.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 650/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 651/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 652/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 653/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 654/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 655/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 656/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 657/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 658/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 659/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 660/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 661/800, Reward: -221.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 662/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 663/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 664/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 665/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 666/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 667/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 668/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 669/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 670/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 671/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 672/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 673/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 674/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 675/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 676/800, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 677/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 678/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 679/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 680/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 681/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 682/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 683/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 684/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 685/800, Reward: -113.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 686/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 687/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 688/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 689/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 690/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 691/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 692/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 693/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 694/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 695/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 696/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 697/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 698/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 699/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 700/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 701/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 702/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 703/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 704/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 705/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 706/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 707/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 708/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 709/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 710/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 711/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 712/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 713/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 714/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 715/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 716/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 717/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 718/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 719/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 720/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 721/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 722/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 723/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 724/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 725/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 726/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 727/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 728/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 729/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 730/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 731/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 732/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 733/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 734/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 735/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 736/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 737/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 738/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 739/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 740/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 741/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 742/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 743/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 744/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 745/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 746/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 747/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 748/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 749/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 750/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 751/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 752/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 753/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 754/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 755/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 756/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 757/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 758/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 759/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 760/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 761/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 762/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 763/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 764/800, Reward: -122.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 765/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 766/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 767/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 768/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 769/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 770/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 771/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 772/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 773/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 774/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 775/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 776/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 777/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 778/800, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 779/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 780/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 781/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 782/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 783/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 784/800, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 785/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 786/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 787/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 788/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 789/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 790/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 791/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 792/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 793/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 794/800, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 795/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 796/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 797/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 798/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 799/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Episode: 800/800, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:49:16 - r - INFO: - Finish training! diff --git a/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/models/Qleaning_model.pkl b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/models/Qleaning_model.pkl new file mode 100644 index 0000000..3be0dc4 Binary files /dev/null and b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/learning_curve.png b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/learning_curve.png new file mode 100644 index 0000000..ee7abc9 Binary files /dev/null and b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/learning_curve.png differ diff --git a/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/res.csv b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/res.csv new file mode 100644 index 0000000..3799662 --- /dev/null +++ b/projects/codes/QLearning/Train_CliffWalking-v0_QLearning_20221030-014916/results/res.csv @@ -0,0 +1,801 @@ +episodes,rewards,steps +0,-1586,200 +1,-1091,200 +2,-596,200 +3,-497,200 +4,-398,200 +5,-362,164 +6,-179,179 +7,-398,200 +8,-79,79 +9,-141,141 +10,-143,143 +11,-134,134 +12,-299,200 +13,-102,102 +14,-61,61 +15,-136,136 +16,-176,176 +17,-98,98 +18,-92,92 +19,-110,110 +20,-67,67 +21,-136,136 +22,-98,98 +23,-164,164 +24,-65,65 +25,-98,98 +26,-33,33 +27,-161,161 +28,-72,72 +29,-73,73 +30,-116,116 +31,-50,50 +32,-66,66 +33,-123,123 +34,-40,40 +35,-100,100 +36,-56,56 +37,-101,101 +38,-55,55 +39,-84,84 +40,-68,68 +41,-33,33 +42,-113,113 +43,-72,72 +44,-36,36 +45,-84,84 +46,-45,45 +47,-86,86 +48,-57,57 +49,-92,92 +50,-39,39 +51,-76,76 +52,-39,39 +53,-47,47 +54,-88,88 +55,-40,40 +56,-55,55 +57,-69,69 +58,-51,51 +59,-69,69 +60,-36,36 +61,-84,84 +62,-38,38 +63,-56,56 +64,-58,58 +65,-27,27 +66,-64,64 +67,-38,38 +68,-53,53 +69,-70,70 +70,-40,40 +71,-47,47 +72,-71,71 +73,-47,47 +74,-32,32 +75,-70,70 +76,-36,36 +77,-36,36 +78,-73,73 +79,-18,18 +80,-67,67 +81,-29,29 +82,-25,25 +83,-64,64 +84,-28,28 +85,-61,61 +86,-33,33 +87,-30,30 +88,-25,25 +89,-86,86 +90,-19,19 +91,-53,53 +92,-39,39 +93,-39,39 +94,-36,36 +95,-45,45 +96,-43,43 +97,-31,31 +98,-37,37 +99,-43,43 +100,-46,46 +101,-28,28 +102,-32,32 +103,-47,47 +104,-37,37 +105,-39,39 +106,-38,38 +107,-29,29 +108,-44,44 +109,-39,39 +110,-26,26 +111,-28,28 +112,-63,63 +113,-18,18 +114,-52,52 +115,-24,24 +116,-26,26 +117,-39,39 +118,-21,21 +119,-44,44 +120,-48,48 +121,-24,24 +122,-40,40 +123,-31,31 +124,-23,23 +125,-35,35 +126,-26,26 +127,-31,31 +128,-33,33 +129,-32,32 +130,-36,36 +131,-21,21 +132,-43,43 +133,-51,51 +134,-17,17 +135,-28,28 +136,-24,24 +137,-42,42 +138,-21,21 +139,-24,24 +140,-30,30 +141,-24,24 +142,-36,36 +143,-14,14 +144,-42,42 +145,-44,44 +146,-25,25 +147,-42,42 +148,-16,16 +149,-30,30 +150,-32,32 +151,-20,20 +152,-44,44 +153,-20,20 +154,-29,29 +155,-20,20 +156,-30,30 +157,-18,18 +158,-25,25 +159,-38,38 +160,-34,34 +161,-25,25 +162,-30,30 +163,-24,24 +164,-19,19 +165,-34,34 +166,-15,15 +167,-27,27 +168,-22,22 +169,-28,28 +170,-13,13 +171,-20,20 +172,-58,58 +173,-18,18 +174,-19,19 +175,-29,29 +176,-125,26 +177,-156,57 +178,-18,18 +179,-25,25 +180,-27,27 +181,-21,21 +182,-22,22 +183,-21,21 +184,-22,22 +185,-25,25 +186,-25,25 +187,-27,27 +188,-19,19 +189,-26,26 +190,-17,17 +191,-21,21 +192,-30,30 +193,-16,16 +194,-35,35 +195,-21,21 +196,-134,35 +197,-21,21 +198,-18,18 +199,-27,27 +200,-16,16 +201,-44,44 +202,-23,23 +203,-23,23 +204,-16,16 +205,-18,18 +206,-17,17 +207,-18,18 +208,-16,16 +209,-13,13 +210,-19,19 +211,-26,26 +212,-27,27 +213,-133,34 +214,-21,21 +215,-24,24 +216,-29,29 +217,-24,24 +218,-16,16 +219,-16,16 +220,-24,24 +221,-17,17 +222,-143,44 +223,-15,15 +224,-15,15 +225,-23,23 +226,-21,21 +227,-26,26 +228,-16,16 +229,-19,19 +230,-13,13 +231,-19,19 +232,-29,29 +233,-26,26 +234,-30,30 +235,-13,13 +236,-13,13 +237,-21,21 +238,-15,15 +239,-18,18 +240,-13,13 +241,-16,16 +242,-27,27 +243,-17,17 +244,-27,27 +245,-15,15 +246,-14,14 +247,-28,28 +248,-17,17 +249,-15,15 +250,-17,17 +251,-19,19 +252,-13,13 +253,-23,23 +254,-15,15 +255,-15,15 +256,-19,19 +257,-15,15 +258,-22,22 +259,-13,13 +260,-16,16 +261,-15,15 +262,-34,34 +263,-16,16 +264,-26,26 +265,-13,13 +266,-15,15 +267,-15,15 +268,-18,18 +269,-18,18 +270,-13,13 +271,-13,13 +272,-19,19 +273,-13,13 +274,-27,27 +275,-13,13 +276,-13,13 +277,-22,22 +278,-17,17 +279,-17,17 +280,-13,13 +281,-26,26 +282,-13,13 +283,-13,13 +284,-14,14 +285,-15,15 +286,-13,13 +287,-13,13 +288,-14,14 +289,-24,24 +290,-21,21 +291,-13,13 +292,-13,13 +293,-14,14 +294,-15,15 +295,-13,13 +296,-13,13 +297,-13,13 +298,-14,14 +299,-21,21 +300,-15,15 +301,-13,13 +302,-13,13 +303,-21,21 +304,-13,13 +305,-22,22 +306,-13,13 +307,-13,13 +308,-16,16 +309,-15,15 +310,-13,13 +311,-31,31 +312,-13,13 +313,-13,13 +314,-15,15 +315,-13,13 +316,-13,13 +317,-13,13 +318,-14,14 +319,-13,13 +320,-15,15 +321,-13,13 +322,-13,13 +323,-13,13 +324,-16,16 +325,-13,13 +326,-13,13 +327,-13,13 +328,-13,13 +329,-13,13 +330,-13,13 +331,-15,15 +332,-16,16 +333,-13,13 +334,-13,13 +335,-13,13 +336,-13,13 +337,-13,13 +338,-15,15 +339,-13,13 +340,-13,13 +341,-13,13 +342,-13,13 +343,-13,13 +344,-13,13 +345,-13,13 +346,-22,22 +347,-13,13 +348,-13,13 +349,-13,13 +350,-17,17 +351,-13,13 +352,-13,13 +353,-13,13 +354,-13,13 +355,-13,13 +356,-13,13 +357,-13,13 +358,-13,13 +359,-13,13 +360,-13,13 +361,-13,13 +362,-13,13 +363,-13,13 +364,-13,13 +365,-13,13 +366,-13,13 +367,-13,13 +368,-13,13 +369,-13,13 +370,-13,13 +371,-13,13 +372,-13,13 +373,-13,13 +374,-13,13 +375,-13,13 +376,-21,21 +377,-123,24 +378,-13,13 +379,-14,14 +380,-13,13 +381,-13,13 +382,-13,13 +383,-13,13 +384,-13,13 +385,-13,13 +386,-13,13 +387,-13,13 +388,-13,13 +389,-13,13 +390,-13,13 +391,-13,13 +392,-13,13 +393,-13,13 +394,-13,13 +395,-13,13 +396,-15,15 +397,-13,13 +398,-13,13 +399,-13,13 +400,-14,14 +401,-13,13 +402,-13,13 +403,-13,13 +404,-13,13 +405,-13,13 +406,-13,13 +407,-13,13 +408,-13,13 +409,-13,13 +410,-13,13 +411,-13,13 +412,-13,13 +413,-13,13 +414,-13,13 +415,-13,13 +416,-13,13 +417,-13,13 +418,-13,13 +419,-113,14 +420,-13,13 +421,-13,13 +422,-13,13 +423,-13,13 +424,-13,13 +425,-13,13 +426,-13,13 +427,-115,16 +428,-13,13 +429,-13,13 +430,-13,13 +431,-13,13 +432,-13,13 +433,-13,13 +434,-15,15 +435,-13,13 +436,-13,13 +437,-13,13 +438,-13,13 +439,-13,13 +440,-13,13 +441,-13,13 +442,-13,13 +443,-13,13 +444,-13,13 +445,-15,15 +446,-13,13 +447,-13,13 +448,-13,13 +449,-13,13 +450,-13,13 +451,-13,13 +452,-13,13 +453,-13,13 +454,-13,13 +455,-13,13 +456,-13,13 +457,-13,13 +458,-13,13 +459,-13,13 +460,-13,13 +461,-13,13 +462,-13,13 +463,-13,13 +464,-13,13 +465,-13,13 +466,-13,13 +467,-13,13 +468,-13,13 +469,-15,15 +470,-13,13 +471,-13,13 +472,-13,13 +473,-13,13 +474,-13,13 +475,-13,13 +476,-13,13 +477,-13,13 +478,-13,13 +479,-13,13 +480,-13,13 +481,-13,13 +482,-13,13 +483,-13,13 +484,-13,13 +485,-13,13 +486,-13,13 +487,-13,13 +488,-13,13 +489,-13,13 +490,-13,13 +491,-13,13 +492,-13,13 +493,-13,13 +494,-13,13 +495,-13,13 +496,-13,13 +497,-13,13 +498,-13,13 +499,-13,13 +500,-13,13 +501,-13,13 +502,-15,15 +503,-13,13 +504,-13,13 +505,-15,15 +506,-13,13 +507,-13,13 +508,-13,13 +509,-13,13 +510,-13,13 +511,-13,13 +512,-13,13 +513,-13,13 +514,-13,13 +515,-13,13 +516,-13,13 +517,-13,13 +518,-13,13 +519,-13,13 +520,-13,13 +521,-13,13 +522,-13,13 +523,-13,13 +524,-13,13 +525,-13,13 +526,-15,15 +527,-13,13 +528,-13,13 +529,-13,13 +530,-13,13 +531,-13,13 +532,-13,13 +533,-13,13 +534,-122,23 +535,-13,13 +536,-13,13 +537,-13,13 +538,-13,13 +539,-13,13 +540,-13,13 +541,-13,13 +542,-13,13 +543,-15,15 +544,-13,13 +545,-13,13 +546,-13,13 +547,-13,13 +548,-13,13 +549,-13,13 +550,-13,13 +551,-15,15 +552,-13,13 +553,-13,13 +554,-13,13 +555,-13,13 +556,-13,13 +557,-115,16 +558,-13,13 +559,-15,15 +560,-13,13 +561,-13,13 +562,-13,13 +563,-13,13 +564,-13,13 +565,-13,13 +566,-13,13 +567,-13,13 +568,-13,13 +569,-13,13 +570,-13,13 +571,-13,13 +572,-13,13 +573,-13,13 +574,-15,15 +575,-13,13 +576,-13,13 +577,-13,13 +578,-13,13 +579,-122,23 +580,-13,13 +581,-13,13 +582,-13,13 +583,-13,13 +584,-14,14 +585,-14,14 +586,-13,13 +587,-13,13 +588,-13,13 +589,-13,13 +590,-13,13 +591,-13,13 +592,-13,13 +593,-13,13 +594,-15,15 +595,-13,13 +596,-13,13 +597,-13,13 +598,-13,13 +599,-13,13 +600,-122,23 +601,-13,13 +602,-13,13 +603,-13,13 +604,-13,13 +605,-13,13 +606,-13,13 +607,-13,13 +608,-116,17 +609,-13,13 +610,-13,13 +611,-13,13 +612,-13,13 +613,-13,13 +614,-115,16 +615,-13,13 +616,-13,13 +617,-122,23 +618,-13,13 +619,-13,13 +620,-13,13 +621,-13,13 +622,-13,13 +623,-13,13 +624,-13,13 +625,-13,13 +626,-13,13 +627,-13,13 +628,-13,13 +629,-13,13 +630,-15,15 +631,-13,13 +632,-116,17 +633,-13,13 +634,-13,13 +635,-13,13 +636,-13,13 +637,-13,13 +638,-13,13 +639,-13,13 +640,-13,13 +641,-117,18 +642,-13,13 +643,-13,13 +644,-13,13 +645,-13,13 +646,-13,13 +647,-13,13 +648,-223,25 +649,-13,13 +650,-13,13 +651,-13,13 +652,-13,13 +653,-13,13 +654,-15,15 +655,-13,13 +656,-13,13 +657,-13,13 +658,-13,13 +659,-13,13 +660,-221,23 +661,-13,13 +662,-15,15 +663,-13,13 +664,-13,13 +665,-13,13 +666,-13,13 +667,-13,13 +668,-13,13 +669,-13,13 +670,-13,13 +671,-13,13 +672,-13,13 +673,-13,13 +674,-13,13 +675,-21,21 +676,-13,13 +677,-15,15 +678,-13,13 +679,-13,13 +680,-13,13 +681,-13,13 +682,-13,13 +683,-13,13 +684,-113,14 +685,-13,13 +686,-13,13 +687,-13,13 +688,-13,13 +689,-13,13 +690,-13,13 +691,-13,13 +692,-13,13 +693,-13,13 +694,-13,13 +695,-13,13 +696,-13,13 +697,-13,13 +698,-13,13 +699,-13,13 +700,-13,13 +701,-15,15 +702,-13,13 +703,-15,15 +704,-13,13 +705,-13,13 +706,-15,15 +707,-13,13 +708,-13,13 +709,-13,13 +710,-13,13 +711,-17,17 +712,-13,13 +713,-13,13 +714,-13,13 +715,-13,13 +716,-13,13 +717,-13,13 +718,-13,13 +719,-13,13 +720,-14,14 +721,-13,13 +722,-13,13 +723,-13,13 +724,-13,13 +725,-13,13 +726,-13,13 +727,-13,13 +728,-13,13 +729,-13,13 +730,-13,13 +731,-13,13 +732,-14,14 +733,-13,13 +734,-13,13 +735,-13,13 +736,-13,13 +737,-15,15 +738,-13,13 +739,-15,15 +740,-13,13 +741,-13,13 +742,-13,13 +743,-13,13 +744,-15,15 +745,-13,13 +746,-13,13 +747,-13,13 +748,-15,15 +749,-13,13 +750,-13,13 +751,-13,13 +752,-13,13 +753,-13,13 +754,-13,13 +755,-13,13 +756,-13,13 +757,-13,13 +758,-13,13 +759,-13,13 +760,-13,13 +761,-13,13 +762,-13,13 +763,-122,23 +764,-15,15 +765,-13,13 +766,-13,13 +767,-13,13 +768,-13,13 +769,-13,13 +770,-13,13 +771,-13,13 +772,-13,13 +773,-13,13 +774,-15,15 +775,-13,13 +776,-13,13 +777,-14,14 +778,-13,13 +779,-13,13 +780,-13,13 +781,-13,13 +782,-13,13 +783,-17,17 +784,-13,13 +785,-13,13 +786,-13,13 +787,-15,15 +788,-13,13 +789,-13,13 +790,-13,13 +791,-13,13 +792,-13,13 +793,-15,15 +794,-13,13 +795,-13,13 +796,-13,13 +797,-13,13 +798,-13,13 +799,-13,13 diff --git a/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/config.yaml b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/config.yaml new file mode 100644 index 0000000..a0bf456 --- /dev/null +++ b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: FrozenLakeNoSlippery-v1 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 2000 + epsilon_end: 0.1 + epsilon_start: 0.7 + gamma: 0.95 + lr: 0.9 diff --git a/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/logs/log.txt b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/logs/log.txt new file mode 100644 index 0000000..f52cf7f --- /dev/null +++ b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/logs/log.txt @@ -0,0 +1,804 @@ +2022-10-30 01:45:04 - r - INFO: - n_states: 16, n_actions: 4 +2022-10-30 01:45:04 - r - INFO: - Start training! +2022-10-30 01:45:04 - r - INFO: - Env: FrozenLakeNoSlippery-v1, Algorithm: QLearning, Device: cpu +2022-10-30 01:45:04 - r - INFO: - Episode: 1/800, Reward: 0.00: Epislon: 0.694 +2022-10-30 01:45:04 - r - INFO: - Episode: 2/800, Reward: 0.00: Epislon: 0.690 +2022-10-30 01:45:04 - r - INFO: - Episode: 3/800, Reward: 0.00: Epislon: 0.686 +2022-10-30 01:45:04 - r - INFO: - Episode: 4/800, Reward: 0.00: Epislon: 0.683 +2022-10-30 01:45:04 - r - INFO: - Episode: 5/800, Reward: 0.00: Epislon: 0.681 +2022-10-30 01:45:04 - r - INFO: - Episode: 6/800, Reward: 0.00: Epislon: 0.679 +2022-10-30 01:45:04 - r - INFO: - Episode: 7/800, Reward: 0.00: Epislon: 0.676 +2022-10-30 01:45:04 - r - INFO: - Episode: 8/800, Reward: 0.00: Epislon: 0.674 +2022-10-30 01:45:04 - r - INFO: - Episode: 9/800, Reward: 0.00: Epislon: 0.673 +2022-10-30 01:45:04 - r - INFO: - Episode: 10/800, Reward: 0.00: Epislon: 0.670 +2022-10-30 01:45:04 - r - INFO: - Episode: 11/800, Reward: 0.00: Epislon: 0.667 +2022-10-30 01:45:04 - r - INFO: - Episode: 12/800, Reward: 0.00: Epislon: 0.661 +2022-10-30 01:45:04 - r - INFO: - Episode: 13/800, Reward: 0.00: Epislon: 0.660 +2022-10-30 01:45:04 - r - INFO: - Episode: 14/800, Reward: 0.00: Epislon: 0.655 +2022-10-30 01:45:04 - r - INFO: - Episode: 15/800, Reward: 0.00: Epislon: 0.654 +2022-10-30 01:45:04 - r - INFO: - Episode: 16/800, Reward: 0.00: Epislon: 0.652 +2022-10-30 01:45:04 - r - INFO: - Episode: 17/800, Reward: 0.00: Epislon: 0.647 +2022-10-30 01:45:04 - r - INFO: - Episode: 18/800, Reward: 0.00: Epislon: 0.646 +2022-10-30 01:45:04 - r - INFO: - Episode: 19/800, Reward: 0.00: Epislon: 0.645 +2022-10-30 01:45:04 - r - INFO: - Episode: 20/800, Reward: 0.00: Epislon: 0.643 +2022-10-30 01:45:04 - r - INFO: - Episode: 21/800, Reward: 0.00: Epislon: 0.641 +2022-10-30 01:45:04 - r - INFO: - Episode: 22/800, Reward: 0.00: Epislon: 0.640 +2022-10-30 01:45:04 - r - INFO: - Episode: 23/800, Reward: 0.00: Epislon: 0.634 +2022-10-30 01:45:04 - r - INFO: - Episode: 24/800, Reward: 0.00: Epislon: 0.630 +2022-10-30 01:45:04 - r - INFO: - Episode: 25/800, Reward: 0.00: Epislon: 0.629 +2022-10-30 01:45:04 - r - INFO: - Episode: 26/800, Reward: 0.00: Epislon: 0.624 +2022-10-30 01:45:04 - r - INFO: - Episode: 27/800, Reward: 0.00: Epislon: 0.623 +2022-10-30 01:45:04 - r - INFO: - Episode: 28/800, Reward: 0.00: Epislon: 0.618 +2022-10-30 01:45:04 - r - INFO: - Episode: 29/800, Reward: 0.00: Epislon: 0.612 +2022-10-30 01:45:04 - r - INFO: - Episode: 30/800, Reward: 0.00: Epislon: 0.608 +2022-10-30 01:45:04 - r - INFO: - Episode: 31/800, Reward: 0.00: Epislon: 0.605 +2022-10-30 01:45:05 - r - INFO: - Episode: 32/800, Reward: 0.00: Epislon: 0.600 +2022-10-30 01:45:05 - r - INFO: - Episode: 33/800, Reward: 0.00: Epislon: 0.593 +2022-10-30 01:45:05 - r - INFO: - Episode: 34/800, Reward: 0.00: Epislon: 0.587 +2022-10-30 01:45:05 - r - INFO: - Episode: 35/800, Reward: 0.00: Epislon: 0.586 +2022-10-30 01:45:05 - r - INFO: - Episode: 36/800, Reward: 0.00: Epislon: 0.583 +2022-10-30 01:45:05 - r - INFO: - Episode: 37/800, Reward: 0.00: Epislon: 0.582 +2022-10-30 01:45:05 - r - INFO: - Episode: 38/800, Reward: 0.00: Epislon: 0.578 +2022-10-30 01:45:05 - r - INFO: - Episode: 39/800, Reward: 0.00: Epislon: 0.577 +2022-10-30 01:45:05 - r - INFO: - Episode: 40/800, Reward: 0.00: Epislon: 0.575 +2022-10-30 01:45:05 - r - INFO: - Episode: 41/800, Reward: 0.00: Epislon: 0.573 +2022-10-30 01:45:05 - r - INFO: - Episode: 42/800, Reward: 0.00: Epislon: 0.572 +2022-10-30 01:45:05 - r - INFO: - Episode: 43/800, Reward: 0.00: Epislon: 0.571 +2022-10-30 01:45:05 - r - INFO: - Episode: 44/800, Reward: 0.00: Epislon: 0.570 +2022-10-30 01:45:05 - r - INFO: - Episode: 45/800, Reward: 0.00: Epislon: 0.560 +2022-10-30 01:45:05 - r - INFO: - Episode: 46/800, Reward: 0.00: Epislon: 0.558 +2022-10-30 01:45:05 - r - INFO: - Episode: 47/800, Reward: 0.00: Epislon: 0.553 +2022-10-30 01:45:05 - r - INFO: - Episode: 48/800, Reward: 0.00: Epislon: 0.552 +2022-10-30 01:45:05 - r - INFO: - Episode: 49/800, Reward: 1.00: Epislon: 0.544 +2022-10-30 01:45:05 - r - INFO: - Episode: 50/800, Reward: 0.00: Epislon: 0.537 +2022-10-30 01:45:05 - r - INFO: - Episode: 51/800, Reward: 0.00: Epislon: 0.534 +2022-10-30 01:45:05 - r - INFO: - Episode: 52/800, Reward: 0.00: Epislon: 0.533 +2022-10-30 01:45:05 - r - INFO: - Episode: 53/800, Reward: 0.00: Epislon: 0.532 +2022-10-30 01:45:05 - r - INFO: - Episode: 54/800, Reward: 0.00: Epislon: 0.527 +2022-10-30 01:45:05 - r - INFO: - Episode: 55/800, Reward: 0.00: Epislon: 0.526 +2022-10-30 01:45:05 - r - INFO: - Episode: 56/800, Reward: 0.00: Epislon: 0.525 +2022-10-30 01:45:05 - r - INFO: - Episode: 57/800, Reward: 0.00: Epislon: 0.519 +2022-10-30 01:45:05 - r - INFO: - Episode: 58/800, Reward: 0.00: Epislon: 0.518 +2022-10-30 01:45:05 - r - INFO: - Episode: 59/800, Reward: 0.00: Epislon: 0.516 +2022-10-30 01:45:05 - r - INFO: - Episode: 60/800, Reward: 0.00: Epislon: 0.514 +2022-10-30 01:45:05 - r - INFO: - Episode: 61/800, Reward: 0.00: Epislon: 0.512 +2022-10-30 01:45:05 - r - INFO: - Episode: 62/800, Reward: 0.00: Epislon: 0.511 +2022-10-30 01:45:05 - r - INFO: - Episode: 63/800, Reward: 0.00: Epislon: 0.506 +2022-10-30 01:45:05 - r - INFO: - Episode: 64/800, Reward: 0.00: Epislon: 0.504 +2022-10-30 01:45:05 - r - INFO: - Episode: 65/800, Reward: 0.00: Epislon: 0.503 +2022-10-30 01:45:05 - r - INFO: - Episode: 66/800, Reward: 0.00: Epislon: 0.502 +2022-10-30 01:45:05 - r - INFO: - Episode: 67/800, Reward: 0.00: Epislon: 0.501 +2022-10-30 01:45:05 - r - INFO: - Episode: 68/800, Reward: 0.00: Epislon: 0.497 +2022-10-30 01:45:05 - r - INFO: - Episode: 69/800, Reward: 0.00: Epislon: 0.496 +2022-10-30 01:45:05 - r - INFO: - Episode: 70/800, Reward: 0.00: Epislon: 0.491 +2022-10-30 01:45:05 - r - INFO: - Episode: 71/800, Reward: 0.00: Epislon: 0.489 +2022-10-30 01:45:05 - r - INFO: - Episode: 72/800, Reward: 0.00: Epislon: 0.487 +2022-10-30 01:45:05 - r - INFO: - Episode: 73/800, Reward: 0.00: Epislon: 0.486 +2022-10-30 01:45:05 - r - INFO: - Episode: 74/800, Reward: 0.00: Epislon: 0.481 +2022-10-30 01:45:05 - r - INFO: - Episode: 75/800, Reward: 0.00: Epislon: 0.477 +2022-10-30 01:45:05 - r - INFO: - Episode: 76/800, Reward: 0.00: Epislon: 0.475 +2022-10-30 01:45:05 - r - INFO: - Episode: 77/800, Reward: 0.00: Epislon: 0.474 +2022-10-30 01:45:05 - r - INFO: - Episode: 78/800, Reward: 0.00: Epislon: 0.468 +2022-10-30 01:45:05 - r - INFO: - Episode: 79/800, Reward: 0.00: Epislon: 0.465 +2022-10-30 01:45:05 - r - INFO: - Episode: 80/800, Reward: 0.00: Epislon: 0.464 +2022-10-30 01:45:05 - r - INFO: - Episode: 81/800, Reward: 0.00: Epislon: 0.462 +2022-10-30 01:45:05 - r - INFO: - Episode: 82/800, Reward: 0.00: Epislon: 0.460 +2022-10-30 01:45:05 - r - INFO: - Episode: 83/800, Reward: 0.00: Epislon: 0.457 +2022-10-30 01:45:05 - r - INFO: - Episode: 84/800, Reward: 0.00: Epislon: 0.455 +2022-10-30 01:45:05 - r - INFO: - Episode: 85/800, Reward: 0.00: Epislon: 0.454 +2022-10-30 01:45:05 - r - INFO: - Episode: 86/800, Reward: 0.00: Epislon: 0.452 +2022-10-30 01:45:05 - r - INFO: - Episode: 87/800, Reward: 0.00: Epislon: 0.444 +2022-10-30 01:45:05 - r - INFO: - Episode: 88/800, Reward: 0.00: Epislon: 0.440 +2022-10-30 01:45:05 - r - INFO: - Episode: 89/800, Reward: 0.00: Epislon: 0.414 +2022-10-30 01:45:05 - r - INFO: - Episode: 90/800, Reward: 0.00: Epislon: 0.413 +2022-10-30 01:45:05 - r - INFO: - Episode: 91/800, Reward: 0.00: Epislon: 0.411 +2022-10-30 01:45:05 - r - INFO: - Episode: 92/800, Reward: 0.00: Epislon: 0.407 +2022-10-30 01:45:05 - r - INFO: - Episode: 93/800, Reward: 0.00: Epislon: 0.407 +2022-10-30 01:45:05 - r - INFO: - Episode: 94/800, Reward: 0.00: Epislon: 0.406 +2022-10-30 01:45:05 - r - INFO: - Episode: 95/800, Reward: 0.00: Epislon: 0.403 +2022-10-30 01:45:05 - r - INFO: - Episode: 96/800, Reward: 0.00: Epislon: 0.390 +2022-10-30 01:45:05 - r - INFO: - Episode: 97/800, Reward: 0.00: Epislon: 0.386 +2022-10-30 01:45:05 - r - INFO: - Episode: 98/800, Reward: 0.00: Epislon: 0.385 +2022-10-30 01:45:05 - r - INFO: - Episode: 99/800, Reward: 0.00: Epislon: 0.385 +2022-10-30 01:45:05 - r - INFO: - Episode: 100/800, Reward: 0.00: Epislon: 0.383 +2022-10-30 01:45:05 - r - INFO: - Episode: 101/800, Reward: 0.00: Epislon: 0.381 +2022-10-30 01:45:05 - r - INFO: - Episode: 102/800, Reward: 0.00: Epislon: 0.380 +2022-10-30 01:45:05 - r - INFO: - Episode: 103/800, Reward: 0.00: Epislon: 0.378 +2022-10-30 01:45:05 - r - INFO: - Episode: 104/800, Reward: 0.00: Epislon: 0.366 +2022-10-30 01:45:05 - r - INFO: - Episode: 105/800, Reward: 0.00: Epislon: 0.365 +2022-10-30 01:45:05 - r - INFO: - Episode: 106/800, Reward: 0.00: Epislon: 0.359 +2022-10-30 01:45:05 - r - INFO: - Episode: 107/800, Reward: 0.00: Epislon: 0.357 +2022-10-30 01:45:05 - r - INFO: - Episode: 108/800, Reward: 0.00: Epislon: 0.356 +2022-10-30 01:45:05 - r - INFO: - Episode: 109/800, Reward: 0.00: Epislon: 0.350 +2022-10-30 01:45:05 - r - INFO: - Episode: 110/800, Reward: 0.00: Epislon: 0.347 +2022-10-30 01:45:05 - r - INFO: - Episode: 111/800, Reward: 0.00: Epislon: 0.345 +2022-10-30 01:45:05 - r - INFO: - Episode: 112/800, Reward: 0.00: Epislon: 0.343 +2022-10-30 01:45:05 - r - INFO: - Episode: 113/800, Reward: 0.00: Epislon: 0.322 +2022-10-30 01:45:05 - r - INFO: - Episode: 114/800, Reward: 0.00: Epislon: 0.317 +2022-10-30 01:45:05 - r - INFO: - Episode: 115/800, Reward: 0.00: Epislon: 0.308 +2022-10-30 01:45:05 - r - INFO: - Episode: 116/800, Reward: 0.00: Epislon: 0.306 +2022-10-30 01:45:05 - r - INFO: - Episode: 117/800, Reward: 0.00: Epislon: 0.303 +2022-10-30 01:45:05 - r - INFO: - Episode: 118/800, Reward: 0.00: Epislon: 0.300 +2022-10-30 01:45:05 - r - INFO: - Episode: 119/800, Reward: 0.00: Epislon: 0.300 +2022-10-30 01:45:05 - r - INFO: - Episode: 120/800, Reward: 0.00: Epislon: 0.291 +2022-10-30 01:45:05 - r - INFO: - Episode: 121/800, Reward: 0.00: Epislon: 0.290 +2022-10-30 01:45:05 - r - INFO: - Episode: 122/800, Reward: 0.00: Epislon: 0.284 +2022-10-30 01:45:05 - r - INFO: - Episode: 123/800, Reward: 0.00: Epislon: 0.282 +2022-10-30 01:45:05 - r - INFO: - Episode: 124/800, Reward: 0.00: Epislon: 0.276 +2022-10-30 01:45:05 - r - INFO: - Episode: 125/800, Reward: 0.00: Epislon: 0.269 +2022-10-30 01:45:05 - r - INFO: - Episode: 126/800, Reward: 0.00: Epislon: 0.262 +2022-10-30 01:45:05 - r - INFO: - Episode: 127/800, Reward: 0.00: Epislon: 0.246 +2022-10-30 01:45:05 - r - INFO: - Episode: 128/800, Reward: 0.00: Epislon: 0.244 +2022-10-30 01:45:05 - r - INFO: - Episode: 129/800, Reward: 0.00: Epislon: 0.241 +2022-10-30 01:45:05 - r - INFO: - Episode: 130/800, Reward: 0.00: Epislon: 0.236 +2022-10-30 01:45:05 - r - INFO: - Episode: 131/800, Reward: 0.00: Epislon: 0.235 +2022-10-30 01:45:05 - r - INFO: - Episode: 132/800, Reward: 0.00: Epislon: 0.234 +2022-10-30 01:45:05 - r - INFO: - Episode: 133/800, Reward: 0.00: Epislon: 0.233 +2022-10-30 01:45:05 - r - INFO: - Episode: 134/800, Reward: 0.00: Epislon: 0.231 +2022-10-30 01:45:05 - r - INFO: - Episode: 135/800, Reward: 0.00: Epislon: 0.229 +2022-10-30 01:45:05 - r - INFO: - Episode: 136/800, Reward: 0.00: Epislon: 0.227 +2022-10-30 01:45:05 - r - INFO: - Episode: 137/800, Reward: 0.00: Epislon: 0.226 +2022-10-30 01:45:05 - r - INFO: - Episode: 138/800, Reward: 0.00: Epislon: 0.223 +2022-10-30 01:45:05 - r - INFO: - Episode: 139/800, Reward: 0.00: Epislon: 0.216 +2022-10-30 01:45:05 - r - INFO: - Episode: 140/800, Reward: 0.00: Epislon: 0.214 +2022-10-30 01:45:05 - r - INFO: - Episode: 141/800, Reward: 0.00: Epislon: 0.213 +2022-10-30 01:45:05 - r - INFO: - Episode: 142/800, Reward: 0.00: Epislon: 0.211 +2022-10-30 01:45:05 - r - INFO: - Episode: 143/800, Reward: 0.00: Epislon: 0.210 +2022-10-30 01:45:05 - r - INFO: - Episode: 144/800, Reward: 0.00: Epislon: 0.207 +2022-10-30 01:45:05 - r - INFO: - Episode: 145/800, Reward: 0.00: Epislon: 0.202 +2022-10-30 01:45:05 - r - INFO: - Episode: 146/800, Reward: 0.00: Epislon: 0.201 +2022-10-30 01:45:05 - r - INFO: - Episode: 147/800, Reward: 0.00: Epislon: 0.198 +2022-10-30 01:45:05 - r - INFO: - Episode: 148/800, Reward: 0.00: Epislon: 0.196 +2022-10-30 01:45:05 - r - INFO: - Episode: 149/800, Reward: 0.00: Epislon: 0.195 +2022-10-30 01:45:05 - r - INFO: - Episode: 150/800, Reward: 0.00: Epislon: 0.192 +2022-10-30 01:45:05 - r - INFO: - Episode: 151/800, Reward: 0.00: Epislon: 0.190 +2022-10-30 01:45:05 - r - INFO: - Episode: 152/800, Reward: 0.00: Epislon: 0.188 +2022-10-30 01:45:05 - r - INFO: - Episode: 153/800, Reward: 0.00: Epislon: 0.186 +2022-10-30 01:45:05 - r - INFO: - Episode: 154/800, Reward: 0.00: Epislon: 0.185 +2022-10-30 01:45:05 - r - INFO: - Episode: 155/800, Reward: 0.00: Epislon: 0.185 +2022-10-30 01:45:05 - r - INFO: - Episode: 156/800, Reward: 0.00: Epislon: 0.183 +2022-10-30 01:45:05 - r - INFO: - Episode: 157/800, Reward: 0.00: Epislon: 0.182 +2022-10-30 01:45:05 - r - INFO: - Episode: 158/800, Reward: 0.00: Epislon: 0.181 +2022-10-30 01:45:05 - r - INFO: - Episode: 159/800, Reward: 0.00: Epislon: 0.179 +2022-10-30 01:45:05 - r - INFO: - Episode: 160/800, Reward: 0.00: Epislon: 0.173 +2022-10-30 01:45:05 - r - INFO: - Episode: 161/800, Reward: 0.00: Epislon: 0.169 +2022-10-30 01:45:05 - r - INFO: - Episode: 162/800, Reward: 0.00: Epislon: 0.167 +2022-10-30 01:45:05 - r - INFO: - Episode: 163/800, Reward: 0.00: Epislon: 0.165 +2022-10-30 01:45:05 - r - INFO: - Episode: 164/800, Reward: 0.00: Epislon: 0.165 +2022-10-30 01:45:05 - r - INFO: - Episode: 165/800, Reward: 0.00: Epislon: 0.163 +2022-10-30 01:45:05 - r - INFO: - Episode: 166/800, Reward: 0.00: Epislon: 0.163 +2022-10-30 01:45:05 - r - INFO: - Episode: 167/800, Reward: 0.00: Epislon: 0.162 +2022-10-30 01:45:05 - r - INFO: - Episode: 168/800, Reward: 0.00: Epislon: 0.161 +2022-10-30 01:45:05 - r - INFO: - Episode: 169/800, Reward: 0.00: Epislon: 0.160 +2022-10-30 01:45:05 - r - INFO: - Episode: 170/800, Reward: 0.00: Epislon: 0.159 +2022-10-30 01:45:05 - r - INFO: - Episode: 171/800, Reward: 0.00: Epislon: 0.158 +2022-10-30 01:45:05 - r - INFO: - Episode: 172/800, Reward: 0.00: Epislon: 0.155 +2022-10-30 01:45:05 - r - INFO: - Episode: 173/800, Reward: 0.00: Epislon: 0.151 +2022-10-30 01:45:05 - r - INFO: - Episode: 174/800, Reward: 0.00: Epislon: 0.149 +2022-10-30 01:45:05 - r - INFO: - Episode: 175/800, Reward: 0.00: Epislon: 0.148 +2022-10-30 01:45:05 - r - INFO: - Episode: 176/800, Reward: 0.00: Epislon: 0.148 +2022-10-30 01:45:05 - r - INFO: - Episode: 177/800, Reward: 0.00: Epislon: 0.148 +2022-10-30 01:45:05 - r - INFO: - Episode: 178/800, Reward: 0.00: Epislon: 0.147 +2022-10-30 01:45:05 - r - INFO: - Episode: 179/800, Reward: 0.00: Epislon: 0.146 +2022-10-30 01:45:05 - r - INFO: - Episode: 180/800, Reward: 0.00: Epislon: 0.146 +2022-10-30 01:45:05 - r - INFO: - Episode: 181/800, Reward: 0.00: Epislon: 0.145 +2022-10-30 01:45:05 - r - INFO: - Episode: 182/800, Reward: 0.00: Epislon: 0.144 +2022-10-30 01:45:05 - r - INFO: - Episode: 183/800, Reward: 0.00: Epislon: 0.140 +2022-10-30 01:45:05 - r - INFO: - Episode: 184/800, Reward: 0.00: Epislon: 0.139 +2022-10-30 01:45:05 - r - INFO: - Episode: 185/800, Reward: 0.00: Epislon: 0.138 +2022-10-30 01:45:05 - r - INFO: - Episode: 186/800, Reward: 0.00: Epislon: 0.137 +2022-10-30 01:45:05 - r - INFO: - Episode: 187/800, Reward: 0.00: Epislon: 0.137 +2022-10-30 01:45:05 - r - INFO: - Episode: 188/800, Reward: 0.00: Epislon: 0.134 +2022-10-30 01:45:05 - r - INFO: - Episode: 189/800, Reward: 0.00: Epislon: 0.134 +2022-10-30 01:45:05 - r - INFO: - Episode: 190/800, Reward: 0.00: Epislon: 0.133 +2022-10-30 01:45:05 - r - INFO: - Episode: 191/800, Reward: 0.00: Epislon: 0.133 +2022-10-30 01:45:05 - r - INFO: - Episode: 192/800, Reward: 0.00: Epislon: 0.132 +2022-10-30 01:45:05 - r - INFO: - Episode: 193/800, Reward: 0.00: Epislon: 0.131 +2022-10-30 01:45:05 - r - INFO: - Episode: 194/800, Reward: 0.00: Epislon: 0.131 +2022-10-30 01:45:05 - r - INFO: - Episode: 195/800, Reward: 0.00: Epislon: 0.130 +2022-10-30 01:45:05 - r - INFO: - Episode: 196/800, Reward: 0.00: Epislon: 0.129 +2022-10-30 01:45:05 - r - INFO: - Episode: 197/800, Reward: 0.00: Epislon: 0.129 +2022-10-30 01:45:05 - r - INFO: - Episode: 198/800, Reward: 0.00: Epislon: 0.126 +2022-10-30 01:45:05 - r - INFO: - Episode: 199/800, Reward: 0.00: Epislon: 0.123 +2022-10-30 01:45:05 - r - INFO: - Episode: 200/800, Reward: 0.00: Epislon: 0.123 +2022-10-30 01:45:05 - r - INFO: - Episode: 201/800, Reward: 0.00: Epislon: 0.122 +2022-10-30 01:45:05 - r - INFO: - Episode: 202/800, Reward: 0.00: Epislon: 0.122 +2022-10-30 01:45:05 - r - INFO: - Episode: 203/800, Reward: 0.00: Epislon: 0.122 +2022-10-30 01:45:05 - r - INFO: - Episode: 204/800, Reward: 0.00: Epislon: 0.121 +2022-10-30 01:45:05 - r - INFO: - Episode: 205/800, Reward: 0.00: Epislon: 0.119 +2022-10-30 01:45:05 - r - INFO: - Episode: 206/800, Reward: 0.00: Epislon: 0.119 +2022-10-30 01:45:05 - r - INFO: - Episode: 207/800, Reward: 0.00: Epislon: 0.119 +2022-10-30 01:45:05 - r - INFO: - Episode: 208/800, Reward: 0.00: Epislon: 0.118 +2022-10-30 01:45:05 - r - INFO: - Episode: 209/800, Reward: 0.00: Epislon: 0.118 +2022-10-30 01:45:05 - r - INFO: - Episode: 210/800, Reward: 0.00: Epislon: 0.118 +2022-10-30 01:45:05 - r - INFO: - Episode: 211/800, Reward: 0.00: Epislon: 0.116 +2022-10-30 01:45:05 - r - INFO: - Episode: 212/800, Reward: 0.00: Epislon: 0.115 +2022-10-30 01:45:05 - r - INFO: - Episode: 213/800, Reward: 0.00: Epislon: 0.115 +2022-10-30 01:45:05 - r - INFO: - Episode: 214/800, Reward: 0.00: Epislon: 0.114 +2022-10-30 01:45:05 - r - INFO: - Episode: 215/800, Reward: 0.00: Epislon: 0.113 +2022-10-30 01:45:05 - r - INFO: - Episode: 216/800, Reward: 0.00: Epislon: 0.113 +2022-10-30 01:45:05 - r - INFO: - Episode: 217/800, Reward: 0.00: Epislon: 0.112 +2022-10-30 01:45:05 - r - INFO: - Episode: 218/800, Reward: 0.00: Epislon: 0.111 +2022-10-30 01:45:05 - r - INFO: - Episode: 219/800, Reward: 0.00: Epislon: 0.111 +2022-10-30 01:45:05 - r - INFO: - Episode: 220/800, Reward: 0.00: Epislon: 0.111 +2022-10-30 01:45:05 - r - INFO: - Episode: 221/800, Reward: 0.00: Epislon: 0.110 +2022-10-30 01:45:05 - r - INFO: - Episode: 222/800, Reward: 0.00: Epislon: 0.110 +2022-10-30 01:45:05 - r - INFO: - Episode: 223/800, Reward: 0.00: Epislon: 0.109 +2022-10-30 01:45:05 - r - INFO: - Episode: 224/800, Reward: 0.00: Epislon: 0.108 +2022-10-30 01:45:05 - r - INFO: - Episode: 225/800, Reward: 0.00: Epislon: 0.108 +2022-10-30 01:45:05 - r - INFO: - Episode: 226/800, Reward: 0.00: Epislon: 0.108 +2022-10-30 01:45:05 - r - INFO: - Episode: 227/800, Reward: 0.00: Epislon: 0.108 +2022-10-30 01:45:05 - r - INFO: - Episode: 228/800, Reward: 0.00: Epislon: 0.107 +2022-10-30 01:45:05 - r - INFO: - Episode: 229/800, Reward: 0.00: Epislon: 0.107 +2022-10-30 01:45:05 - r - INFO: - Episode: 230/800, Reward: 0.00: Epislon: 0.107 +2022-10-30 01:45:05 - r - INFO: - Episode: 231/800, Reward: 0.00: Epislon: 0.107 +2022-10-30 01:45:05 - r - INFO: - Episode: 232/800, Reward: 0.00: Epislon: 0.106 +2022-10-30 01:45:05 - r - INFO: - Episode: 233/800, Reward: 0.00: Epislon: 0.106 +2022-10-30 01:45:05 - r - INFO: - Episode: 234/800, Reward: 0.00: Epislon: 0.106 +2022-10-30 01:45:05 - r - INFO: - Episode: 235/800, Reward: 0.00: Epislon: 0.105 +2022-10-30 01:45:05 - r - INFO: - Episode: 236/800, Reward: 0.00: Epislon: 0.105 +2022-10-30 01:45:05 - r - INFO: - Episode: 237/800, Reward: 0.00: Epislon: 0.105 +2022-10-30 01:45:05 - r - INFO: - Episode: 238/800, Reward: 0.00: Epislon: 0.105 +2022-10-30 01:45:05 - r - INFO: - Episode: 239/800, Reward: 0.00: Epislon: 0.104 +2022-10-30 01:45:05 - r - INFO: - Episode: 240/800, Reward: 0.00: Epislon: 0.104 +2022-10-30 01:45:05 - r - INFO: - Episode: 241/800, Reward: 0.00: Epislon: 0.104 +2022-10-30 01:45:05 - r - INFO: - Episode: 242/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 243/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 244/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 245/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 246/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 247/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 248/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 249/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 250/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 251/800, Reward: 0.00: Epislon: 0.103 +2022-10-30 01:45:05 - r - INFO: - Episode: 252/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 253/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 254/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 255/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 256/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 257/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 258/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 259/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 260/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 261/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 262/800, Reward: 0.00: Epislon: 0.102 +2022-10-30 01:45:05 - r - INFO: - Episode: 263/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 264/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 265/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 266/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 267/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 268/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 269/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 270/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 271/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 272/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 273/800, Reward: 1.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 274/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 275/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 276/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 277/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 278/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 279/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 280/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 281/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 282/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 283/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 284/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 285/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 286/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 287/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 288/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 289/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 290/800, Reward: 0.00: Epislon: 0.101 +2022-10-30 01:45:05 - r - INFO: - Episode: 291/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 292/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 293/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 294/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 295/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 296/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 297/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 298/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 299/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 300/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 301/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 302/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 303/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 304/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 305/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 306/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 307/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 308/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 309/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 310/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 311/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 312/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 313/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 314/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 315/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 316/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 317/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 318/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 319/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 320/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 321/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 322/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 323/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 324/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 325/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 326/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 327/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 328/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 329/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 330/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 331/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 332/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 333/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 334/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 335/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 336/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 337/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 338/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 339/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 340/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 341/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 342/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 343/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 344/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 345/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 346/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 347/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 348/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 349/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 350/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 351/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 352/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 353/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 354/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 355/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 356/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 357/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 358/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 359/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 360/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 361/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 362/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 363/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 364/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 365/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 366/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 367/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 368/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 369/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 370/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 371/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 372/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 373/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 374/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 375/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 376/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 377/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 378/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 379/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 380/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 381/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 382/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 383/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 384/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 385/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 386/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 387/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 388/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 389/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 390/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 391/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 392/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 393/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 394/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 395/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 396/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 397/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 398/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 399/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 400/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 401/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 402/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 403/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 404/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 405/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 406/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 407/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 408/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 409/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 410/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 411/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 412/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 413/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 414/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 415/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 416/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 417/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 418/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 419/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 420/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 421/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 422/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 423/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 424/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 425/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 426/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 427/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 428/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 429/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 430/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 431/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 432/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 433/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 434/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 435/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 436/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 437/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 438/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 439/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 440/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 441/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 442/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 443/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 444/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 445/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 446/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 447/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 448/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 449/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 450/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 451/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 452/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 453/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 454/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 455/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 456/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 457/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 458/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 459/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 460/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 461/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 462/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 463/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 464/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 465/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 466/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 467/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 468/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 469/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 470/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 471/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 472/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 473/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 474/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 475/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 476/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 477/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 478/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 479/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 480/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 481/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 482/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 483/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 484/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 485/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 486/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 487/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 488/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 489/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 490/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 491/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 492/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 493/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 494/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 495/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 496/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 497/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 498/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 499/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 500/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 501/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 502/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 503/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 504/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 505/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 506/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 507/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 508/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 509/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 510/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 511/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 512/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 513/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 514/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 515/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 516/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 517/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 518/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 519/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 520/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 521/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 522/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 523/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 524/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 525/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 526/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 527/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 528/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 529/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 530/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 531/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 532/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 533/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 534/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 535/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 536/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 537/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 538/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 539/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 540/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 541/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 542/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 543/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 544/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 545/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 546/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 547/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 548/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 549/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 550/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 551/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 552/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 553/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 554/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 555/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 556/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 557/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 558/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 559/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 560/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 561/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 562/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 563/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 564/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 565/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 566/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 567/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 568/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 569/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 570/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 571/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 572/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 573/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 574/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 575/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 576/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 577/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 578/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 579/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 580/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 581/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 582/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 583/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 584/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 585/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 586/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 587/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 588/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 589/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 590/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 591/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 592/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 593/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 594/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 595/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 596/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 597/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 598/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 599/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 600/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 601/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 602/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 603/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 604/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 605/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 606/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 607/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 608/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 609/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 610/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 611/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 612/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 613/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 614/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 615/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 616/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 617/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 618/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 619/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 620/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 621/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 622/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 623/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 624/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 625/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 626/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 627/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 628/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 629/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 630/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 631/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 632/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 633/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 634/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 635/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 636/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 637/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 638/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 639/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 640/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 641/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 642/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 643/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 644/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 645/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 646/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 647/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 648/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 649/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 650/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 651/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 652/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 653/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 654/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 655/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 656/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 657/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 658/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 659/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 660/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 661/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 662/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 663/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 664/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 665/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 666/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 667/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 668/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 669/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 670/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 671/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 672/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 673/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 674/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 675/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 676/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 677/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 678/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 679/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 680/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 681/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 682/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 683/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 684/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 685/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 686/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 687/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 688/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 689/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 690/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 691/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 692/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 693/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 694/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 695/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 696/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 697/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 698/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 699/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 700/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 701/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 702/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 703/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 704/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 705/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 706/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 707/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 708/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 709/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 710/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 711/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 712/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 713/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 714/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 715/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 716/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 717/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 718/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 719/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 720/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 721/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 722/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 723/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 724/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 725/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 726/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 727/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 728/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 729/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 730/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 731/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 732/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 733/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 734/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 735/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 736/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 737/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 738/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 739/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 740/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 741/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 742/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 743/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 744/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 745/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 746/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 747/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 748/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 749/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 750/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 751/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 752/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 753/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 754/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 755/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 756/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 757/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 758/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 759/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 760/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 761/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 762/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 763/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 764/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 765/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 766/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 767/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 768/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 769/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 770/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 771/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 772/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 773/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 774/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 775/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 776/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 777/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 778/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 779/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 780/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 781/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 782/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 783/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 784/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 785/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 786/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 787/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 788/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 789/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 790/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 791/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 792/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 793/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 794/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 795/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 796/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 797/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 798/800, Reward: 0.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 799/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Episode: 800/800, Reward: 1.00: Epislon: 0.100 +2022-10-30 01:45:05 - r - INFO: - Finish training! diff --git a/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/models/Qleaning_model.pkl b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/models/Qleaning_model.pkl new file mode 100644 index 0000000..41a5a05 Binary files /dev/null and b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/learning_curve.png b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/learning_curve.png new file mode 100644 index 0000000..ad789b7 Binary files /dev/null and b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/learning_curve.png differ diff --git a/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/res.csv b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/res.csv new file mode 100644 index 0000000..335c1d8 --- /dev/null +++ b/projects/codes/QLearning/Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504/results/res.csv @@ -0,0 +1,801 @@ +episodes,rewards,steps +0,0.0,20 +1,0.0,14 +2,0.0,13 +3,0.0,9 +4,0.0,10 +5,0.0,6 +6,0.0,11 +7,0.0,6 +8,0.0,3 +9,0.0,9 +10,0.0,11 +11,0.0,22 +12,0.0,5 +13,0.0,16 +14,0.0,4 +15,0.0,9 +16,0.0,18 +17,0.0,2 +18,0.0,4 +19,0.0,8 +20,0.0,7 +21,0.0,4 +22,0.0,22 +23,0.0,15 +24,0.0,5 +25,0.0,16 +26,0.0,7 +27,0.0,19 +28,0.0,22 +29,0.0,16 +30,0.0,11 +31,0.0,22 +32,0.0,28 +33,0.0,23 +34,0.0,4 +35,0.0,11 +36,0.0,8 +37,0.0,15 +38,0.0,5 +39,0.0,7 +40,0.0,9 +41,0.0,4 +42,0.0,3 +43,0.0,6 +44,0.0,41 +45,0.0,9 +46,0.0,23 +47,0.0,3 +48,1.0,38 +49,0.0,29 +50,0.0,17 +51,0.0,4 +52,0.0,2 +53,0.0,25 +54,0.0,6 +55,0.0,2 +56,0.0,30 +57,0.0,6 +58,0.0,7 +59,0.0,11 +60,0.0,9 +61,0.0,8 +62,0.0,23 +63,0.0,10 +64,0.0,3 +65,0.0,5 +66,0.0,7 +67,0.0,18 +68,0.0,8 +69,0.0,26 +70,0.0,6 +71,0.0,14 +72,0.0,4 +73,0.0,25 +74,0.0,21 +75,0.0,13 +76,0.0,4 +77,0.0,29 +78,0.0,21 +79,0.0,6 +80,0.0,6 +81,0.0,11 +82,0.0,21 +83,0.0,9 +84,0.0,9 +85,0.0,7 +86,0.0,48 +87,0.0,23 +88,0.0,160 +89,0.0,7 +90,0.0,10 +91,0.0,24 +92,0.0,4 +93,0.0,7 +94,0.0,17 +95,0.0,87 +96,0.0,28 +97,0.0,7 +98,0.0,5 +99,0.0,12 +100,0.0,14 +101,0.0,6 +102,0.0,13 +103,0.0,93 +104,0.0,4 +105,0.0,50 +106,0.0,8 +107,0.0,12 +108,0.0,43 +109,0.0,30 +110,0.0,15 +111,0.0,19 +112,0.0,182 +113,0.0,40 +114,0.0,88 +115,0.0,19 +116,0.0,30 +117,0.0,27 +118,0.0,5 +119,0.0,87 +120,0.0,9 +121,0.0,64 +122,0.0,27 +123,0.0,68 +124,0.0,81 +125,0.0,86 +126,0.0,200 +127,0.0,27 +128,0.0,41 +129,0.0,70 +130,0.0,27 +131,0.0,6 +132,0.0,18 +133,0.0,38 +134,0.0,26 +135,0.0,36 +136,0.0,3 +137,0.0,61 +138,0.0,105 +139,0.0,38 +140,0.0,18 +141,0.0,33 +142,0.0,29 +143,0.0,49 +144,0.0,88 +145,0.0,22 +146,0.0,65 +147,0.0,36 +148,0.0,30 +149,0.0,58 +150,0.0,43 +151,0.0,53 +152,0.0,43 +153,0.0,13 +154,0.0,8 +155,0.0,39 +156,0.0,29 +157,0.0,26 +158,0.0,60 +159,0.0,153 +160,0.0,116 +161,0.0,53 +162,0.0,54 +163,0.0,8 +164,0.0,58 +165,0.0,3 +166,0.0,47 +167,0.0,16 +168,0.0,21 +169,0.0,44 +170,0.0,29 +171,0.0,104 +172,0.0,158 +173,0.0,83 +174,0.0,26 +175,0.0,24 +176,0.0,10 +177,0.0,12 +178,0.0,40 +179,0.0,25 +180,0.0,18 +181,0.0,60 +182,0.0,200 +183,0.0,24 +184,0.0,56 +185,0.0,71 +186,0.0,19 +187,0.0,118 +188,0.0,26 +189,0.0,41 +190,0.0,41 +191,0.0,60 +192,0.0,31 +193,0.0,34 +194,0.0,35 +195,0.0,59 +196,0.0,51 +197,0.0,200 +198,0.0,200 +199,0.0,37 +200,0.0,68 +201,0.0,40 +202,0.0,17 +203,0.0,79 +204,0.0,126 +205,0.0,61 +206,0.0,25 +207,0.0,18 +208,0.0,27 +209,0.0,13 +210,0.0,187 +211,0.0,160 +212,0.0,32 +213,0.0,108 +214,0.0,164 +215,0.0,17 +216,0.0,82 +217,0.0,194 +218,0.0,7 +219,0.0,36 +220,0.0,156 +221,0.0,17 +222,0.0,183 +223,0.0,200 +224,0.0,43 +225,0.0,87 +226,0.0,42 +227,0.0,80 +228,0.0,54 +229,0.0,82 +230,0.0,97 +231,0.0,65 +232,0.0,83 +233,0.0,159 +234,0.0,178 +235,0.0,104 +236,0.0,21 +237,0.0,118 +238,0.0,80 +239,0.0,170 +240,0.0,94 +241,0.0,200 +242,0.0,37 +243,0.0,11 +244,0.0,31 +245,0.0,134 +246,0.0,32 +247,0.0,58 +248,0.0,38 +249,0.0,28 +250,0.0,159 +251,0.0,182 +252,0.0,51 +253,0.0,25 +254,0.0,73 +255,0.0,56 +256,0.0,55 +257,0.0,38 +258,0.0,200 +259,0.0,92 +260,0.0,200 +261,0.0,119 +262,0.0,100 +263,0.0,84 +264,0.0,24 +265,0.0,17 +266,0.0,159 +267,0.0,25 +268,0.0,73 +269,0.0,130 +270,0.0,111 +271,0.0,65 +272,1.0,58 +273,0.0,47 +274,0.0,48 +275,0.0,13 +276,0.0,100 +277,0.0,38 +278,0.0,111 +279,0.0,200 +280,0.0,26 +281,0.0,38 +282,0.0,83 +283,0.0,42 +284,0.0,199 +285,0.0,83 +286,0.0,28 +287,0.0,46 +288,0.0,200 +289,0.0,62 +290,0.0,123 +291,0.0,91 +292,0.0,53 +293,0.0,19 +294,0.0,26 +295,0.0,93 +296,0.0,38 +297,0.0,22 +298,0.0,43 +299,0.0,163 +300,0.0,25 +301,0.0,59 +302,0.0,71 +303,0.0,20 +304,0.0,115 +305,0.0,200 +306,0.0,48 +307,0.0,66 +308,0.0,58 +309,0.0,129 +310,0.0,122 +311,0.0,47 +312,0.0,60 +313,0.0,79 +314,1.0,137 +315,0.0,27 +316,1.0,93 +317,0.0,46 +318,1.0,83 +319,1.0,8 +320,1.0,6 +321,1.0,6 +322,0.0,4 +323,1.0,6 +324,0.0,2 +325,1.0,6 +326,1.0,6 +327,1.0,6 +328,1.0,6 +329,1.0,8 +330,0.0,5 +331,1.0,6 +332,1.0,7 +333,0.0,5 +334,1.0,6 +335,1.0,6 +336,1.0,8 +337,1.0,6 +338,1.0,6 +339,1.0,6 +340,1.0,7 +341,1.0,6 +342,1.0,6 +343,0.0,3 +344,1.0,7 +345,0.0,4 +346,1.0,6 +347,1.0,6 +348,1.0,7 +349,1.0,6 +350,1.0,6 +351,1.0,7 +352,1.0,7 +353,1.0,7 +354,1.0,6 +355,1.0,6 +356,1.0,6 +357,1.0,6 +358,1.0,6 +359,1.0,6 +360,1.0,6 +361,1.0,7 +362,0.0,4 +363,1.0,8 +364,1.0,8 +365,1.0,7 +366,1.0,6 +367,1.0,8 +368,1.0,6 +369,1.0,6 +370,1.0,7 +371,1.0,6 +372,1.0,6 +373,1.0,8 +374,1.0,7 +375,1.0,6 +376,1.0,6 +377,0.0,3 +378,1.0,11 +379,1.0,6 +380,1.0,8 +381,0.0,2 +382,1.0,6 +383,1.0,6 +384,1.0,6 +385,1.0,6 +386,1.0,8 +387,1.0,6 +388,1.0,7 +389,1.0,6 +390,1.0,7 +391,1.0,6 +392,1.0,8 +393,0.0,2 +394,1.0,6 +395,1.0,7 +396,1.0,6 +397,1.0,6 +398,1.0,10 +399,1.0,7 +400,1.0,6 +401,1.0,6 +402,1.0,6 +403,1.0,6 +404,1.0,6 +405,1.0,7 +406,0.0,4 +407,1.0,7 +408,1.0,6 +409,1.0,8 +410,0.0,3 +411,1.0,6 +412,1.0,6 +413,1.0,6 +414,1.0,6 +415,0.0,2 +416,1.0,6 +417,1.0,6 +418,1.0,6 +419,1.0,6 +420,1.0,6 +421,1.0,7 +422,1.0,6 +423,1.0,6 +424,1.0,7 +425,1.0,6 +426,1.0,6 +427,1.0,6 +428,1.0,6 +429,1.0,6 +430,1.0,6 +431,1.0,6 +432,1.0,8 +433,1.0,6 +434,1.0,8 +435,1.0,7 +436,1.0,6 +437,0.0,3 +438,1.0,6 +439,1.0,7 +440,1.0,6 +441,1.0,6 +442,1.0,6 +443,1.0,10 +444,1.0,6 +445,1.0,6 +446,1.0,6 +447,1.0,6 +448,1.0,10 +449,1.0,6 +450,1.0,8 +451,1.0,8 +452,1.0,7 +453,1.0,6 +454,0.0,5 +455,0.0,2 +456,1.0,8 +457,1.0,6 +458,1.0,10 +459,1.0,6 +460,1.0,8 +461,1.0,10 +462,1.0,6 +463,1.0,6 +464,1.0,6 +465,1.0,10 +466,1.0,6 +467,0.0,4 +468,1.0,6 +469,1.0,6 +470,1.0,6 +471,1.0,15 +472,1.0,6 +473,1.0,6 +474,1.0,6 +475,1.0,6 +476,1.0,6 +477,1.0,6 +478,1.0,8 +479,1.0,6 +480,1.0,7 +481,1.0,6 +482,1.0,6 +483,1.0,8 +484,1.0,6 +485,1.0,6 +486,1.0,8 +487,1.0,8 +488,1.0,6 +489,1.0,6 +490,1.0,6 +491,1.0,10 +492,1.0,6 +493,1.0,6 +494,1.0,6 +495,1.0,6 +496,1.0,6 +497,1.0,6 +498,1.0,6 +499,1.0,8 +500,1.0,8 +501,1.0,6 +502,1.0,6 +503,0.0,2 +504,1.0,6 +505,1.0,6 +506,1.0,6 +507,1.0,8 +508,1.0,6 +509,1.0,6 +510,1.0,6 +511,1.0,6 +512,1.0,6 +513,1.0,6 +514,1.0,6 +515,1.0,6 +516,1.0,6 +517,1.0,7 +518,0.0,3 +519,1.0,7 +520,1.0,6 +521,1.0,6 +522,1.0,6 +523,0.0,2 +524,1.0,6 +525,1.0,8 +526,1.0,6 +527,1.0,6 +528,1.0,6 +529,1.0,6 +530,1.0,9 +531,1.0,6 +532,1.0,6 +533,1.0,6 +534,1.0,6 +535,1.0,6 +536,1.0,6 +537,1.0,9 +538,1.0,7 +539,0.0,4 +540,1.0,6 +541,1.0,8 +542,1.0,11 +543,1.0,6 +544,1.0,6 +545,1.0,6 +546,1.0,6 +547,1.0,6 +548,1.0,8 +549,1.0,6 +550,1.0,6 +551,1.0,8 +552,1.0,7 +553,1.0,6 +554,1.0,8 +555,1.0,6 +556,0.0,5 +557,1.0,9 +558,1.0,8 +559,1.0,8 +560,1.0,6 +561,1.0,8 +562,1.0,8 +563,1.0,6 +564,0.0,5 +565,0.0,3 +566,0.0,2 +567,1.0,8 +568,1.0,6 +569,1.0,6 +570,1.0,6 +571,1.0,6 +572,1.0,6 +573,1.0,6 +574,1.0,6 +575,1.0,6 +576,1.0,6 +577,1.0,6 +578,1.0,6 +579,1.0,6 +580,1.0,6 +581,1.0,6 +582,0.0,2 +583,1.0,6 +584,0.0,4 +585,1.0,6 +586,1.0,6 +587,1.0,6 +588,1.0,6 +589,1.0,6 +590,1.0,8 +591,0.0,5 +592,1.0,6 +593,1.0,6 +594,1.0,6 +595,1.0,6 +596,1.0,6 +597,1.0,6 +598,0.0,3 +599,1.0,6 +600,1.0,6 +601,1.0,6 +602,0.0,2 +603,1.0,6 +604,0.0,4 +605,1.0,6 +606,1.0,6 +607,1.0,6 +608,1.0,6 +609,1.0,8 +610,1.0,6 +611,1.0,7 +612,1.0,6 +613,1.0,7 +614,1.0,6 +615,0.0,2 +616,1.0,6 +617,1.0,6 +618,0.0,5 +619,0.0,3 +620,0.0,3 +621,1.0,6 +622,0.0,5 +623,1.0,8 +624,1.0,8 +625,1.0,6 +626,1.0,6 +627,1.0,7 +628,1.0,6 +629,1.0,6 +630,1.0,6 +631,1.0,6 +632,1.0,6 +633,1.0,8 +634,0.0,2 +635,1.0,6 +636,1.0,6 +637,1.0,6 +638,1.0,6 +639,1.0,6 +640,1.0,6 +641,1.0,6 +642,1.0,8 +643,1.0,6 +644,1.0,8 +645,1.0,6 +646,1.0,6 +647,1.0,8 +648,1.0,8 +649,0.0,5 +650,0.0,4 +651,0.0,4 +652,1.0,6 +653,1.0,6 +654,1.0,6 +655,1.0,6 +656,1.0,8 +657,1.0,6 +658,0.0,4 +659,1.0,6 +660,1.0,8 +661,1.0,6 +662,1.0,6 +663,1.0,6 +664,1.0,6 +665,1.0,6 +666,1.0,6 +667,1.0,6 +668,1.0,8 +669,1.0,8 +670,1.0,6 +671,1.0,8 +672,1.0,9 +673,1.0,6 +674,1.0,6 +675,1.0,6 +676,1.0,6 +677,1.0,10 +678,1.0,6 +679,1.0,6 +680,1.0,6 +681,1.0,11 +682,1.0,10 +683,1.0,8 +684,1.0,6 +685,1.0,6 +686,1.0,6 +687,0.0,5 +688,1.0,6 +689,0.0,2 +690,1.0,9 +691,1.0,6 +692,1.0,8 +693,1.0,7 +694,1.0,6 +695,1.0,6 +696,1.0,7 +697,0.0,3 +698,1.0,7 +699,0.0,2 +700,1.0,6 +701,1.0,6 +702,1.0,8 +703,1.0,8 +704,1.0,6 +705,1.0,6 +706,0.0,2 +707,1.0,8 +708,1.0,6 +709,1.0,8 +710,1.0,6 +711,1.0,6 +712,1.0,9 +713,1.0,6 +714,1.0,8 +715,1.0,11 +716,1.0,6 +717,1.0,6 +718,1.0,6 +719,1.0,6 +720,1.0,8 +721,1.0,6 +722,1.0,6 +723,1.0,6 +724,0.0,5 +725,1.0,6 +726,1.0,6 +727,1.0,6 +728,1.0,6 +729,1.0,6 +730,1.0,7 +731,1.0,6 +732,1.0,6 +733,1.0,6 +734,1.0,6 +735,1.0,10 +736,1.0,6 +737,1.0,6 +738,1.0,6 +739,1.0,6 +740,1.0,6 +741,1.0,7 +742,1.0,6 +743,1.0,8 +744,1.0,7 +745,1.0,6 +746,1.0,6 +747,1.0,14 +748,1.0,6 +749,1.0,6 +750,1.0,12 +751,1.0,6 +752,1.0,6 +753,1.0,6 +754,1.0,6 +755,1.0,6 +756,1.0,6 +757,0.0,3 +758,1.0,6 +759,1.0,6 +760,1.0,6 +761,1.0,7 +762,1.0,6 +763,1.0,6 +764,1.0,6 +765,1.0,8 +766,0.0,2 +767,1.0,6 +768,1.0,6 +769,1.0,6 +770,1.0,6 +771,1.0,6 +772,1.0,6 +773,1.0,6 +774,1.0,6 +775,1.0,6 +776,0.0,4 +777,1.0,8 +778,1.0,6 +779,0.0,2 +780,1.0,10 +781,1.0,8 +782,1.0,6 +783,1.0,6 +784,1.0,6 +785,0.0,3 +786,1.0,6 +787,1.0,6 +788,0.0,6 +789,1.0,8 +790,1.0,6 +791,1.0,9 +792,1.0,6 +793,1.0,6 +794,1.0,8 +795,1.0,8 +796,1.0,6 +797,0.0,5 +798,1.0,6 +799,1.0,6 diff --git a/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/config.yaml b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/config.yaml new file mode 100644 index 0000000..d5b9c4c --- /dev/null +++ b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: Racetrack-v0 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.9 + lr: 0.1 diff --git a/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/logs/log.txt b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/logs/log.txt new file mode 100644 index 0000000..e737550 --- /dev/null +++ b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/logs/log.txt @@ -0,0 +1,404 @@ +2022-10-30 01:48:33 - r - INFO: - n_states: 4, n_actions: 9 +2022-10-30 01:48:33 - r - INFO: - Start training! +2022-10-30 01:48:33 - r - INFO: - Env: Racetrack-v0, Algorithm: QLearning, Device: cpu +2022-10-30 01:48:33 - r - INFO: - Episode: 1/400, Reward: -850.00: Epislon: 0.493 +2022-10-30 01:48:33 - r - INFO: - Episode: 2/400, Reward: -780.00: Epislon: 0.258 +2022-10-30 01:48:33 - r - INFO: - Episode: 3/400, Reward: -730.00: Epislon: 0.137 +2022-10-30 01:48:33 - r - INFO: - Episode: 4/400, Reward: -650.00: Epislon: 0.075 +2022-10-30 01:48:33 - r - INFO: - Episode: 5/400, Reward: -540.00: Epislon: 0.044 +2022-10-30 01:48:33 - r - INFO: - Episode: 6/400, Reward: -640.00: Epislon: 0.027 +2022-10-30 01:48:34 - r - INFO: - Episode: 7/400, Reward: -570.00: Epislon: 0.019 +2022-10-30 01:48:34 - r - INFO: - Episode: 8/400, Reward: -570.00: Epislon: 0.015 +2022-10-30 01:48:34 - r - INFO: - Episode: 9/400, Reward: -550.00: Epislon: 0.012 +2022-10-30 01:48:34 - r - INFO: - Episode: 10/400, Reward: -550.00: Epislon: 0.011 +2022-10-30 01:48:34 - r - INFO: - Episode: 11/400, Reward: -580.00: Epislon: 0.011 +2022-10-30 01:48:34 - r - INFO: - Episode: 12/400, Reward: -530.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 13/400, Reward: -580.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 14/400, Reward: -570.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 15/400, Reward: -550.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 16/400, Reward: -560.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 17/400, Reward: -550.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 18/400, Reward: -580.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 19/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 20/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 21/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 22/400, Reward: -540.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 23/400, Reward: -550.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 24/400, Reward: -560.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 25/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 26/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 27/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 28/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 29/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 30/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 31/400, Reward: -540.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 32/400, Reward: -540.00: Epislon: 0.010 +2022-10-30 01:48:34 - r - INFO: - Episode: 33/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 34/400, Reward: -540.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 35/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 36/400, Reward: -530.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 37/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 38/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 39/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 40/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 41/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 42/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 43/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 44/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 45/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 46/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 47/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 48/400, Reward: -530.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 49/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 50/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 51/400, Reward: -500.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 52/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 53/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 54/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 55/400, Reward: -500.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 56/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 57/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 58/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 59/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 60/400, Reward: -530.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 61/400, Reward: -440.00: Epislon: 0.010 +2022-10-30 01:48:35 - r - INFO: - Episode: 62/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 63/400, Reward: -520.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 64/400, Reward: -510.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 65/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 66/400, Reward: -344.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 67/400, Reward: -500.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 68/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 69/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 70/400, Reward: -440.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 71/400, Reward: -77.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 72/400, Reward: -198.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 73/400, Reward: -440.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 74/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 75/400, Reward: -354.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 76/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 77/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 78/400, Reward: -38.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 79/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 80/400, Reward: -480.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 81/400, Reward: -490.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 82/400, Reward: -140.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 83/400, Reward: -102.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 84/400, Reward: -265.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 85/400, Reward: -145.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 86/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 87/400, Reward: -500.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 88/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 89/400, Reward: -325.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 90/400, Reward: -470.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 91/400, Reward: -376.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 92/400, Reward: -98.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 93/400, Reward: -130.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 94/400, Reward: -450.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 95/400, Reward: -146.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 96/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 97/400, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 98/400, Reward: -102.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 99/400, Reward: -163.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 100/400, Reward: -209.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 101/400, Reward: -460.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 102/400, Reward: -286.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 103/400, Reward: -189.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 104/400, Reward: -50.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 105/400, Reward: -398.00: Epislon: 0.010 +2022-10-30 01:48:36 - r - INFO: - Episode: 106/400, Reward: -72.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 107/400, Reward: -450.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 108/400, Reward: -125.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 109/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 110/400, Reward: -161.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 111/400, Reward: -408.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 112/400, Reward: -440.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 113/400, Reward: -188.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 114/400, Reward: -114.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 115/400, Reward: -415.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 116/400, Reward: -159.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 117/400, Reward: -234.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 118/400, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 119/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 120/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 121/400, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 122/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 123/400, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 124/400, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 125/400, Reward: -49.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 126/400, Reward: -87.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 127/400, Reward: -2.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 128/400, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 129/400, Reward: -238.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 130/400, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 131/400, Reward: -235.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 132/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 133/400, Reward: -135.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 134/400, Reward: -20.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 135/400, Reward: -46.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 136/400, Reward: -66.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 137/400, Reward: -45.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 138/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 139/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 140/400, Reward: -106.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 141/400, Reward: -112.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 142/400, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 143/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 144/400, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 145/400, Reward: -147.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 146/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 147/400, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 148/400, Reward: -167.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 149/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 150/400, Reward: -72.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 151/400, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 152/400, Reward: -76.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 153/400, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 154/400, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 155/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 156/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 157/400, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 158/400, Reward: -80.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 159/400, Reward: -168.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 160/400, Reward: -164.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 161/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 162/400, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 163/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 164/400, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 165/400, Reward: -80.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 166/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 167/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 168/400, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 169/400, Reward: -56.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 170/400, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 171/400, Reward: -76.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 172/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 173/400, Reward: -145.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 174/400, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 175/400, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 176/400, Reward: -106.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 177/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 178/400, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 179/400, Reward: -60.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 180/400, Reward: -49.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 181/400, Reward: -52.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 182/400, Reward: -84.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 183/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 184/400, Reward: -55.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 185/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 186/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 187/400, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 188/400, Reward: -47.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 189/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 190/400, Reward: -53.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 191/400, Reward: -50.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 192/400, Reward: -104.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 193/400, Reward: -253.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 194/400, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 195/400, Reward: -190.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 196/400, Reward: -43.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 197/400, Reward: -35.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 198/400, Reward: 0.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 199/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 200/400, Reward: -11.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 201/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 202/400, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 203/400, Reward: -99.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 204/400, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 205/400, Reward: -170.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 206/400, Reward: -109.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 207/400, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 208/400, Reward: -275.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 209/400, Reward: -49.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 210/400, Reward: -147.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 211/400, Reward: -51.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 212/400, Reward: -67.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 213/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 214/400, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 215/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 216/400, Reward: -69.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 217/400, Reward: -218.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 218/400, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 219/400, Reward: -11.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 220/400, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 221/400, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 222/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 223/400, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 224/400, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 225/400, Reward: -148.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 226/400, Reward: -19.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 227/400, Reward: 1.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 228/400, Reward: -49.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 229/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 230/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 231/400, Reward: -223.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 232/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 233/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 234/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 235/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 236/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 237/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 238/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 239/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 240/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 241/400, Reward: -44.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 242/400, Reward: -10.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 243/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 244/400, Reward: -108.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 245/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 246/400, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 247/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 248/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 249/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 250/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 251/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 252/400, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 253/400, Reward: -112.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 254/400, Reward: -39.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 255/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 256/400, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 257/400, Reward: -149.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 258/400, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 259/400, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 260/400, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 261/400, Reward: -29.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 262/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 263/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 264/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 265/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 266/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 267/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 268/400, Reward: -52.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 269/400, Reward: -53.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 270/400, Reward: -62.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 271/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 272/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 273/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 274/400, Reward: -10.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 275/400, Reward: -8.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 276/400, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 277/400, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 278/400, Reward: -45.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 279/400, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 280/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 281/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 282/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 283/400, Reward: -26.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 284/400, Reward: -116.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 285/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 286/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 287/400, Reward: -42.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 288/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 289/400, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 290/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 291/400, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 292/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 293/400, Reward: -43.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 294/400, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 295/400, Reward: -33.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 296/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 297/400, Reward: -28.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 298/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 299/400, Reward: 0.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 300/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 301/400, Reward: -6.00: Epislon: 0.010 +2022-10-30 01:48:37 - r - INFO: - Episode: 302/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 303/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 304/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 305/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 306/400, Reward: -77.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 307/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 308/400, Reward: -32.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 309/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 310/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 311/400, Reward: -36.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 312/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 313/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 314/400, Reward: -34.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 315/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 316/400, Reward: -21.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 317/400, Reward: -48.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 318/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 319/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 320/400, Reward: -25.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 321/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 322/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 323/400, Reward: -135.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 324/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 325/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 326/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 327/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 328/400, Reward: -11.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 329/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 330/400, Reward: -11.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 331/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 332/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 333/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 334/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 335/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 336/400, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 337/400, Reward: -16.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 338/400, Reward: -17.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 339/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 340/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 341/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 342/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 343/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 344/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 345/400, Reward: -90.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 346/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 347/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 348/400, Reward: -53.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 349/400, Reward: -87.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 350/400, Reward: -22.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 351/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 352/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 353/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 354/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 355/400, Reward: -113.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 356/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 357/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 358/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 359/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 360/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 361/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 362/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 363/400, Reward: -63.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 364/400, Reward: -14.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 365/400, Reward: -15.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 366/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 367/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 368/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 369/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 370/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 371/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 372/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 373/400, Reward: -12.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 374/400, Reward: -30.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 375/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 376/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 377/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 378/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 379/400, Reward: -31.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 380/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 381/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 382/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 383/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 384/400, Reward: -84.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 385/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 386/400, Reward: -27.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 387/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 388/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 389/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 390/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 391/400, Reward: 2.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 392/400, Reward: 3.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 393/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 394/400, Reward: 4.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 395/400, Reward: -18.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 396/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 397/400, Reward: -41.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 398/400, Reward: 5.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 399/400, Reward: -41.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Episode: 400/400, Reward: -13.00: Epislon: 0.010 +2022-10-30 01:48:38 - r - INFO: - Finish training! diff --git a/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/models/Qleaning_model.pkl b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/models/Qleaning_model.pkl new file mode 100644 index 0000000..1f458e1 Binary files /dev/null and b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/learning_curve.png b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/learning_curve.png new file mode 100644 index 0000000..8c1c331 Binary files /dev/null and b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/learning_curve.png differ diff --git a/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/res.csv b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/res.csv new file mode 100644 index 0000000..79373d8 --- /dev/null +++ b/projects/codes/QLearning/Train_Racetrack-v0_QLearning_20221030-014833/results/res.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-850,200 +1,-780,200 +2,-730,200 +3,-650,200 +4,-540,200 +5,-640,200 +6,-570,200 +7,-570,200 +8,-550,200 +9,-550,200 +10,-580,200 +11,-530,200 +12,-580,200 +13,-570,200 +14,-550,200 +15,-560,200 +16,-550,200 +17,-580,200 +18,-520,200 +19,-490,200 +20,-480,200 +21,-540,200 +22,-550,200 +23,-560,200 +24,-510,200 +25,-520,200 +26,-480,200 +27,-520,200 +28,-480,200 +29,-470,200 +30,-540,200 +31,-540,200 +32,-470,200 +33,-540,200 +34,-490,200 +35,-530,200 +36,-520,200 +37,-510,200 +38,-520,200 +39,-510,200 +40,-480,200 +41,-510,200 +42,-470,200 +43,-490,200 +44,-490,200 +45,-490,200 +46,-520,200 +47,-530,200 +48,-510,200 +49,-460,200 +50,-500,200 +51,-470,200 +52,-520,200 +53,-490,200 +54,-500,200 +55,-460,200 +56,-490,200 +57,-510,200 +58,-460,200 +59,-530,200 +60,-440,200 +61,-510,200 +62,-520,200 +63,-510,200 +64,-460,200 +65,-344,154 +66,-500,200 +67,-490,200 +68,-490,200 +69,-440,200 +70,-77,47 +71,-198,88 +72,-440,200 +73,-480,200 +74,-354,154 +75,-470,200 +76,-480,200 +77,-38,28 +78,-460,200 +79,-480,200 +80,-490,200 +81,-140,70 +82,-102,52 +83,-265,125 +84,-145,75 +85,-460,200 +86,-500,200 +87,-470,200 +88,-325,155 +89,-470,200 +90,-376,156 +91,-98,58 +92,-130,70 +93,-450,200 +94,-146,66 +95,2,8 +96,-18,18 +97,-102,52 +98,-163,73 +99,-209,89 +100,-460,200 +101,-286,126 +102,-189,89 +103,-50,30 +104,-398,168 +105,-72,32 +106,-450,200 +107,-125,65 +108,4,6 +109,-161,71 +110,-408,178 +111,-440,200 +112,-188,78 +113,-114,64 +114,-415,185 +115,-159,69 +116,-234,104 +117,-31,21 +118,3,7 +119,4,6 +120,-63,33 +121,5,5 +122,-47,27 +123,-16,16 +124,-49,29 +125,-87,47 +126,-2,12 +127,-26,16 +128,-238,108 +129,-18,18 +130,-235,105 +131,-13,13 +132,-135,65 +133,-20,20 +134,-46,26 +135,-66,36 +136,-45,25 +137,-14,14 +138,1,9 +139,-106,56 +140,-112,62 +141,-47,27 +142,1,9 +143,-30,20 +144,-147,77 +145,5,5 +146,-30,20 +147,-167,77 +148,1,9 +149,-72,32 +150,-44,24 +151,-76,46 +152,-63,33 +153,-34,24 +154,5,5 +155,5,5 +156,-26,16 +157,-80,40 +158,-168,78 +159,-164,74 +160,1,9 +161,-19,19 +162,-12,12 +163,-44,24 +164,-80,40 +165,5,5 +166,4,6 +167,-29,19 +168,-56,26 +169,-47,27 +170,-76,46 +171,-13,13 +172,-145,65 +173,-28,18 +174,-63,33 +175,-106,56 +176,3,7 +177,-28,28 +178,-60,30 +179,-49,29 +180,-52,32 +181,-84,44 +182,5,5 +183,-55,35 +184,-14,14 +185,1,9 +186,-39,19 +187,-47,27 +188,-13,13 +189,-53,33 +190,-50,30 +191,-104,54 +192,-253,113 +193,-48,28 +194,-190,90 +195,-43,23 +196,-35,25 +197,0,10 +198,5,5 +199,-11,11 +200,5,5 +201,-16,16 +202,-99,49 +203,-22,22 +204,-170,80 +205,-109,59 +206,-48,28 +207,-275,115 +208,-49,29 +209,-147,77 +210,-51,31 +211,-67,37 +212,4,6 +213,-17,17 +214,3,7 +215,-69,39 +216,-218,88 +217,-63,33 +218,-11,11 +219,-34,24 +220,-32,22 +221,-15,15 +222,-26,16 +223,-19,19 +224,-148,78 +225,-19,19 +226,1,9 +227,-49,29 +228,5,5 +229,3,7 +230,-223,103 +231,-14,14 +232,4,6 +233,5,5 +234,2,8 +235,5,5 +236,4,6 +237,3,7 +238,3,7 +239,4,6 +240,-44,24 +241,-10,10 +242,2,8 +243,-108,58 +244,4,6 +245,-27,17 +246,3,7 +247,5,5 +248,5,5 +249,3,7 +250,-15,15 +251,-28,28 +252,-112,52 +253,-39,29 +254,4,6 +255,-48,28 +256,-149,69 +257,-27,17 +258,-33,23 +259,-30,20 +260,-29,19 +261,4,6 +262,4,6 +263,3,7 +264,3,7 +265,4,6 +266,5,5 +267,-52,42 +268,-53,33 +269,-62,42 +270,5,5 +271,4,6 +272,4,6 +273,-10,10 +274,-8,8 +275,-30,20 +276,-25,15 +277,-45,35 +278,-48,28 +279,-15,15 +280,4,6 +281,-14,14 +282,-26,16 +283,-116,56 +284,5,5 +285,-14,14 +286,-42,22 +287,3,7 +288,-31,21 +289,4,6 +290,-25,25 +291,5,5 +292,-43,23 +293,-21,21 +294,-33,23 +295,-12,12 +296,-28,18 +297,3,7 +298,0,10 +299,4,6 +300,-6,16 +301,4,6 +302,2,8 +303,-12,12 +304,4,6 +305,-77,47 +306,5,5 +307,-32,22 +308,5,5 +309,-12,12 +310,-36,26 +311,4,6 +312,4,6 +313,-34,24 +314,4,6 +315,-21,21 +316,-48,28 +317,4,6 +318,5,5 +319,-25,15 +320,4,6 +321,-14,14 +322,-135,65 +323,3,7 +324,5,5 +325,4,6 +326,3,7 +327,-11,11 +328,3,7 +329,-11,11 +330,3,7 +331,4,6 +332,3,7 +333,5,5 +334,-12,12 +335,-22,22 +336,-16,16 +337,-17,17 +338,-14,14 +339,3,7 +340,5,5 +341,-15,15 +342,-13,13 +343,4,6 +344,-90,40 +345,3,7 +346,3,7 +347,-53,33 +348,-87,47 +349,-22,22 +350,5,5 +351,-12,12 +352,3,7 +353,4,6 +354,-113,53 +355,3,7 +356,3,7 +357,-13,13 +358,-15,15 +359,-14,14 +360,2,8 +361,-15,15 +362,-63,33 +363,-14,14 +364,-15,15 +365,3,7 +366,3,7 +367,4,6 +368,4,6 +369,4,6 +370,3,7 +371,-13,13 +372,-12,12 +373,-30,20 +374,3,7 +375,2,8 +376,-13,13 +377,5,5 +378,-31,21 +379,3,7 +380,2,8 +381,4,6 +382,4,6 +383,-84,44 +384,3,7 +385,-27,17 +386,4,6 +387,4,6 +388,4,6 +389,2,8 +390,2,8 +391,3,7 +392,4,6 +393,4,6 +394,-18,18 +395,-13,13 +396,-41,31 +397,5,5 +398,-41,31 +399,-13,13 diff --git a/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Test.yaml b/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Test.yaml new file mode 100644 index 0000000..d1a9903 --- /dev/null +++ b/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Test.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: CliffWalking-v0 + mode: test + load_checkpoint: true + load_path: Train_CliffWalking-v0_QLearning_20221030-013856 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Train.yaml b/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Train.yaml new file mode 100644 index 0000000..332f3ab --- /dev/null +++ b/projects/codes/QLearning/config/CliffWalking-v0_QLearning_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: CliffWalking-v0 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Test.yaml b/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Test.yaml new file mode 100644 index 0000000..089e391 --- /dev/null +++ b/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Test.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: FrozenLakeNoSlippery-v1 + mode: test + load_checkpoint: true + load_path: Train_FrozenLakeNoSlippery-v1_QLearning_20221030-014504 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 2000 + epsilon_end: 0.1 + epsilon_start: 0.7 + gamma: 0.95 + lr: 0.9 diff --git a/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Train.yaml b/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Train.yaml new file mode 100644 index 0000000..760750a --- /dev/null +++ b/projects/codes/QLearning/config/FrozenLakeNoSlippery-v1_QLearning_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: FrozenLakeNoSlippery-v1 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 2000 + epsilon_end: 0.1 + epsilon_start: 0.7 + gamma: 0.95 + lr: 0.9 diff --git a/projects/codes/QLearning/config/Racetrack-v0_QLearning_Test.yaml b/projects/codes/QLearning/config/Racetrack-v0_QLearning_Test.yaml new file mode 100644 index 0000000..3aa9985 --- /dev/null +++ b/projects/codes/QLearning/config/Racetrack-v0_QLearning_Test.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: Racetrack-v0 + mode: test + load_checkpoint: true + load_path: Train_Racetrack-v0_QLearning_20221030-014833 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.9 + lr: 0.1 diff --git a/projects/codes/QLearning/config/Racetrack-v0_QLearning_Train.yaml b/projects/codes/QLearning/config/Racetrack-v0_QLearning_Train.yaml new file mode 100644 index 0000000..63e51c3 --- /dev/null +++ b/projects/codes/QLearning/config/Racetrack-v0_QLearning_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: QLearning + device: cpu + env_name: Racetrack-v0 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.9 + lr: 0.1 diff --git a/projects/codes/QLearning/config/config.py b/projects/codes/QLearning/config/config.py new file mode 100644 index 0000000..e0ed62a --- /dev/null +++ b/projects/codes/QLearning/config/config.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 01:23:07 +LastEditor: JiangJi +LastEditTime: 2022-10-30 01:39:54 +Discription: default parameters of QLearning +''' +from common.config import GeneralConfig,AlgoConfig + +class GeneralConfigQLearning(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CliffWalking-v0" # name of environment + self.algo_name = "QLearning" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cpu" # device to use + self.train_eps = 400 # number of episodes for training + self.test_eps = 20 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigQLearning(AlgoConfig): + def __init__(self) -> None: + # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end + self.epsilon_start = 0.95 # epsilon start value + self.epsilon_end = 0.01 # epsilon end value + self.epsilon_decay = 300 # epsilon decay rate + self.gamma = 0.90 # discount factor + self.lr = 0.1 # learning rate \ No newline at end of file diff --git a/projects/codes/QLearning/main.py b/projects/codes/QLearning/main.py deleted file mode 100644 index 7adbfbe..0000000 --- a/projects/codes/QLearning/main.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-09-11 23:03:00 -LastEditor: John -LastEditTime: 2022-08-26 22:46:21 -Discription: -Environment: -''' -import sys,os -os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path -parent_path = os.path.dirname(curr_path) # parent path -sys.path.append(parent_path) # add path to system path - -import gym -import datetime -import argparse -from envs.gridworld_env import FrozenLakeWapper -from envs.wrappers import CliffWalkingWapper -from envs.register import register_env -from qlearning import QLearning -from common.utils import all_seed -from common.launcher import Launcher - -class Main(Launcher): - def get_args(self): - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='Q-learning',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default=400,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.90,type=float,help="discounted factor") - parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") - parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") - parser.add_argument('--epsilon_decay',default=300,type=int,help="decay rate of epsilon") - parser.add_argument('--lr',default=0.1,type=float,help="learning rate") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--seed',default=10,type=int,help="seed") - parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", - 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", - } - args = {**vars(args),**default_args} # type(dict) - return args - def env_agent_config(self,cfg): - ''' create env and agent - ''' - register_env(cfg['env_name']) - env = gym.make(cfg['env_name']) - if cfg['env_name'] == 'CliffWalking-v0': - env = CliffWalkingWapper(env) - if cfg['seed'] !=0: # set random seed - all_seed(env,seed=cfg["seed"]) - try: # state dimension - n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) - except AttributeError: - n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) - n_actions = env.action_space.n # action dimension - print(f"n_states: {n_states}, n_actions: {n_actions}") - cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters - agent = QLearning(cfg) - return env,agent - def train(self,cfg,env,agent): - print("Start training!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['train_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 # step per episode - state = env.reset() # reset and obtain initial state - while True: - action = agent.sample_action(state) # sample action - next_state, reward, done, _ = env.step(action) # update env and return transitions - agent.update(state, action, reward, next_state, done) # update agent - state = next_state # update state - ep_reward += reward - ep_step += 1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - if (i_ep+1)%10==0: - print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}, Epislon: {agent.epsilon:.3f}') - print("Finish training!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - def test(self,cfg,env,agent): - print("Start testing!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['test_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 - state = env.reset() # reset and obtain initial state - while True: - action = agent.predict_action(state) # predict action - next_state, reward, done, _ = env.step(action) - state = next_state - ep_reward += reward - ep_step += 1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}") - print("Finish testing!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - -if __name__ == "__main__": - main = Main() - main.run() - - - - diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl deleted file mode 100644 index 2369fe1..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json deleted file mode 100644 index 09764c9..0000000 --- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "Q-learning", "env_name": "CliffWalking-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/models/", "n_states": 48, "n_actions": 4} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png deleted file mode 100644 index 527fff3..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png deleted file mode 100644 index 819e106..0000000 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv deleted file mode 100644 index 523dc54..0000000 --- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards,steps -0,-2131,448 -1,-1086,492 -2,-586,388 -3,-220,220 -4,-154,154 -5,-122,122 -6,-150,150 -7,-159,159 -8,-164,164 -9,-88,88 -10,-195,195 -11,-114,114 -12,-60,60 -13,-179,179 -14,-101,101 -15,-304,205 -16,-96,96 -17,-119,119 -18,-113,113 -19,-98,98 -20,-106,106 -21,-105,105 -22,-77,77 -23,-51,51 -24,-105,105 -25,-136,136 -26,-100,100 -27,-29,29 -28,-79,79 -29,-114,114 -30,-82,82 -31,-70,70 -32,-75,75 -33,-51,51 -34,-94,94 -35,-52,52 -36,-93,93 -37,-71,71 -38,-73,73 -39,-48,48 -40,-52,52 -41,-96,96 -42,-46,46 -43,-65,65 -44,-57,57 -45,-41,41 -46,-104,104 -47,-51,51 -48,-181,82 -49,-229,130 -50,-39,39 -51,-69,69 -52,-53,53 -53,-59,59 -54,-26,26 -55,-75,75 -56,-31,31 -57,-60,60 -58,-63,63 -59,-40,40 -60,-35,35 -61,-79,79 -62,-42,42 -63,-22,22 -64,-73,73 -65,-71,71 -66,-18,18 -67,-55,55 -68,-29,29 -69,-43,43 -70,-70,70 -71,-49,49 -72,-42,42 -73,-29,29 -74,-81,81 -75,-36,36 -76,-38,38 -77,-36,36 -78,-52,52 -79,-28,28 -80,-42,42 -81,-52,52 -82,-66,66 -83,-31,31 -84,-27,27 -85,-49,49 -86,-28,28 -87,-54,54 -88,-34,34 -89,-35,35 -90,-50,50 -91,-36,36 -92,-36,36 -93,-46,46 -94,-34,34 -95,-135,36 -96,-39,39 -97,-36,36 -98,-26,26 -99,-56,56 -100,-40,40 -101,-40,40 -102,-26,26 -103,-28,28 -104,-31,31 -105,-35,35 -106,-26,26 -107,-57,57 -108,-44,44 -109,-41,41 -110,-31,31 -111,-26,26 -112,-25,25 -113,-41,41 -114,-32,32 -115,-44,44 -116,-30,30 -117,-32,32 -118,-30,30 -119,-25,25 -120,-23,23 -121,-47,47 -122,-24,24 -123,-45,45 -124,-39,39 -125,-21,21 -126,-43,43 -127,-143,44 -128,-26,26 -129,-20,20 -130,-32,32 -131,-16,16 -132,-24,24 -133,-42,42 -134,-25,25 -135,-36,36 -136,-19,19 -137,-29,29 -138,-43,43 -139,-17,17 -140,-150,51 -141,-32,32 -142,-34,34 -143,-19,19 -144,-26,26 -145,-30,30 -146,-31,31 -147,-49,49 -148,-33,33 -149,-21,21 -150,-17,17 -151,-48,48 -152,-34,34 -153,-20,20 -154,-20,20 -155,-26,26 -156,-21,21 -157,-13,13 -158,-40,40 -159,-22,22 -160,-26,26 -161,-30,30 -162,-29,29 -163,-25,25 -164,-26,26 -165,-27,27 -166,-21,21 -167,-29,29 -168,-24,24 -169,-17,17 -170,-22,22 -171,-35,35 -172,-35,35 -173,-18,18 -174,-135,36 -175,-15,15 -176,-23,23 -177,-28,28 -178,-25,25 -179,-24,24 -180,-29,29 -181,-31,31 -182,-24,24 -183,-129,30 -184,-45,45 -185,-24,24 -186,-17,17 -187,-20,20 -188,-21,21 -189,-23,23 -190,-15,15 -191,-32,32 -192,-22,22 -193,-19,19 -194,-17,17 -195,-45,45 -196,-15,15 -197,-14,14 -198,-14,14 -199,-37,37 -200,-23,23 -201,-17,17 -202,-19,19 -203,-21,21 -204,-23,23 -205,-27,27 -206,-14,14 -207,-18,18 -208,-23,23 -209,-34,34 -210,-23,23 -211,-13,13 -212,-25,25 -213,-17,17 -214,-13,13 -215,-21,21 -216,-29,29 -217,-18,18 -218,-24,24 -219,-15,15 -220,-27,27 -221,-25,25 -222,-21,21 -223,-19,19 -224,-17,17 -225,-18,18 -226,-13,13 -227,-22,22 -228,-14,14 -229,-13,13 -230,-29,29 -231,-23,23 -232,-15,15 -233,-15,15 -234,-14,14 -235,-28,28 -236,-25,25 -237,-17,17 -238,-23,23 -239,-29,29 -240,-15,15 -241,-14,14 -242,-15,15 -243,-23,23 -244,-15,15 -245,-16,16 -246,-19,19 -247,-13,13 -248,-16,16 -249,-17,17 -250,-25,25 -251,-30,30 -252,-13,13 -253,-14,14 -254,-15,15 -255,-22,22 -256,-14,14 -257,-17,17 -258,-126,27 -259,-15,15 -260,-21,21 -261,-16,16 -262,-23,23 -263,-14,14 -264,-13,13 -265,-13,13 -266,-19,19 -267,-13,13 -268,-19,19 -269,-17,17 -270,-17,17 -271,-13,13 -272,-19,19 -273,-13,13 -274,-13,13 -275,-16,16 -276,-22,22 -277,-14,14 -278,-15,15 -279,-19,19 -280,-34,34 -281,-13,13 -282,-15,15 -283,-32,32 -284,-13,13 -285,-13,13 -286,-13,13 -287,-14,14 -288,-16,16 -289,-13,13 -290,-13,13 -291,-17,17 -292,-13,13 -293,-13,13 -294,-22,22 -295,-14,14 -296,-15,15 -297,-13,13 -298,-13,13 -299,-13,13 -300,-16,16 -301,-13,13 -302,-14,14 -303,-13,13 -304,-13,13 -305,-13,13 -306,-24,24 -307,-13,13 -308,-13,13 -309,-15,15 -310,-13,13 -311,-13,13 -312,-13,13 -313,-15,15 -314,-13,13 -315,-19,19 -316,-15,15 -317,-17,17 -318,-13,13 -319,-13,13 -320,-13,13 -321,-13,13 -322,-13,13 -323,-15,15 -324,-13,13 -325,-13,13 -326,-13,13 -327,-123,24 -328,-13,13 -329,-13,13 -330,-13,13 -331,-13,13 -332,-13,13 -333,-13,13 -334,-13,13 -335,-13,13 -336,-16,16 -337,-13,13 -338,-23,23 -339,-13,13 -340,-13,13 -341,-13,13 -342,-13,13 -343,-13,13 -344,-13,13 -345,-13,13 -346,-13,13 -347,-13,13 -348,-13,13 -349,-13,13 -350,-134,35 -351,-13,13 -352,-13,13 -353,-13,13 -354,-13,13 -355,-13,13 -356,-13,13 -357,-13,13 -358,-13,13 -359,-13,13 -360,-15,15 -361,-13,13 -362,-13,13 -363,-13,13 -364,-13,13 -365,-13,13 -366,-13,13 -367,-13,13 -368,-13,13 -369,-14,14 -370,-13,13 -371,-13,13 -372,-13,13 -373,-13,13 -374,-13,13 -375,-13,13 -376,-13,13 -377,-124,25 -378,-13,13 -379,-13,13 -380,-13,13 -381,-13,13 -382,-13,13 -383,-13,13 -384,-13,13 -385,-13,13 -386,-13,13 -387,-13,13 -388,-13,13 -389,-121,22 -390,-13,13 -391,-13,13 -392,-13,13 -393,-13,13 -394,-13,13 -395,-13,13 -396,-13,13 -397,-13,13 -398,-17,17 -399,-13,13 diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/models/Qleaning_model.pkl deleted file mode 100644 index 3699d45..0000000 Binary files a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/models/Qleaning_model.pkl and /dev/null differ diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/params.json b/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/params.json deleted file mode 100644 index 045ef65..0000000 --- a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/params.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "algo_name": "Q-learning", - "env_name": "FrozenLakeNoSlippery-v1", - "train_eps": 800, - "test_eps": 20, - "gamma": 0.9, - "epsilon_start": 0.7, - "epsilon_end": 0.1, - "epsilon_decay": 2000, - "lr": 0.9, - "device": "cpu", - "seed": 10, - "show_fig": false, - "save_fig": true, - "result_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/", - "model_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/models/", - "n_states": 16, - "n_actions": 4 -} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/testing_curve.png b/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/testing_curve.png deleted file mode 100644 index e70c9f5..0000000 Binary files a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_curve.png b/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_curve.png deleted file mode 100644 index 98c0816..0000000 Binary files a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_results.csv b/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_results.csv deleted file mode 100644 index 4177f08..0000000 --- a/projects/codes/QLearning/outputs/FrozenLakeNoSlippery-v1/20220825-114335/results/training_results.csv +++ /dev/null @@ -1,801 +0,0 @@ -episodes,rewards,steps -0,0.0,20 -1,0.0,14 -2,0.0,13 -3,0.0,9 -4,0.0,10 -5,0.0,6 -6,0.0,11 -7,0.0,6 -8,0.0,3 -9,0.0,9 -10,0.0,11 -11,0.0,22 -12,0.0,5 -13,0.0,16 -14,0.0,4 -15,0.0,9 -16,0.0,18 -17,0.0,2 -18,0.0,4 -19,0.0,8 -20,0.0,7 -21,0.0,4 -22,0.0,22 -23,0.0,15 -24,0.0,5 -25,0.0,16 -26,0.0,7 -27,0.0,19 -28,0.0,22 -29,0.0,16 -30,0.0,11 -31,0.0,22 -32,0.0,28 -33,0.0,23 -34,0.0,4 -35,0.0,11 -36,0.0,8 -37,0.0,15 -38,0.0,5 -39,0.0,7 -40,0.0,9 -41,0.0,4 -42,0.0,3 -43,0.0,6 -44,0.0,41 -45,0.0,9 -46,0.0,23 -47,0.0,3 -48,1.0,38 -49,0.0,29 -50,0.0,17 -51,0.0,4 -52,0.0,2 -53,0.0,25 -54,0.0,6 -55,0.0,2 -56,0.0,30 -57,0.0,6 -58,0.0,7 -59,0.0,11 -60,0.0,9 -61,0.0,8 -62,0.0,23 -63,0.0,10 -64,0.0,3 -65,0.0,5 -66,0.0,7 -67,0.0,18 -68,0.0,8 -69,0.0,26 -70,0.0,6 -71,0.0,14 -72,0.0,4 -73,0.0,25 -74,0.0,21 -75,0.0,13 -76,0.0,4 -77,0.0,29 -78,0.0,21 -79,0.0,6 -80,0.0,6 -81,0.0,11 -82,0.0,21 -83,0.0,9 -84,0.0,9 -85,0.0,7 -86,0.0,48 -87,0.0,23 -88,0.0,160 -89,0.0,7 -90,0.0,10 -91,0.0,24 -92,0.0,4 -93,0.0,7 -94,0.0,17 -95,0.0,87 -96,0.0,28 -97,0.0,7 -98,0.0,5 -99,0.0,12 -100,0.0,14 -101,0.0,6 -102,0.0,13 -103,0.0,93 -104,0.0,4 -105,0.0,50 -106,0.0,8 -107,0.0,12 -108,0.0,43 -109,0.0,30 -110,0.0,15 -111,0.0,19 -112,0.0,182 -113,0.0,40 -114,0.0,88 -115,0.0,19 -116,0.0,30 -117,0.0,27 -118,0.0,5 -119,0.0,87 -120,0.0,9 -121,0.0,64 -122,0.0,27 -123,0.0,68 -124,0.0,81 -125,0.0,86 -126,0.0,227 -127,0.0,41 -128,0.0,70 -129,0.0,27 -130,0.0,6 -131,0.0,18 -132,0.0,38 -133,0.0,26 -134,0.0,36 -135,0.0,3 -136,0.0,61 -137,0.0,105 -138,0.0,38 -139,0.0,18 -140,0.0,33 -141,0.0,29 -142,0.0,49 -143,0.0,88 -144,0.0,22 -145,0.0,65 -146,0.0,36 -147,0.0,30 -148,0.0,58 -149,0.0,43 -150,0.0,53 -151,0.0,43 -152,0.0,13 -153,0.0,8 -154,0.0,39 -155,0.0,29 -156,0.0,26 -157,0.0,60 -158,0.0,153 -159,0.0,116 -160,0.0,53 -161,0.0,54 -162,0.0,8 -163,0.0,58 -164,0.0,3 -165,0.0,47 -166,0.0,16 -167,0.0,21 -168,0.0,44 -169,0.0,29 -170,0.0,104 -171,0.0,158 -172,0.0,83 -173,0.0,26 -174,0.0,24 -175,0.0,10 -176,0.0,12 -177,0.0,40 -178,0.0,25 -179,0.0,18 -180,0.0,60 -181,0.0,203 -182,0.0,23 -183,0.0,54 -184,0.0,71 -185,0.0,19 -186,0.0,118 -187,0.0,26 -188,0.0,41 -189,0.0,41 -190,0.0,60 -191,0.0,31 -192,0.0,34 -193,0.0,35 -194,0.0,59 -195,0.0,51 -196,0.0,426 -197,0.0,79 -198,0.0,40 -199,0.0,17 -200,0.0,79 -201,0.0,126 -202,0.0,61 -203,0.0,25 -204,0.0,18 -205,0.0,27 -206,0.0,13 -207,0.0,187 -208,0.0,160 -209,0.0,32 -210,0.0,108 -211,0.0,164 -212,0.0,17 -213,0.0,82 -214,0.0,194 -215,0.0,7 -216,0.0,36 -217,0.0,156 -218,0.0,17 -219,0.0,183 -220,0.0,243 -221,0.0,87 -222,0.0,42 -223,0.0,80 -224,0.0,54 -225,0.0,82 -226,0.0,97 -227,0.0,65 -228,0.0,83 -229,0.0,159 -230,0.0,178 -231,0.0,104 -232,0.0,21 -233,0.0,118 -234,0.0,80 -235,0.0,170 -236,0.0,94 -237,0.0,235 -238,0.0,13 -239,0.0,31 -240,0.0,134 -241,0.0,32 -242,0.0,58 -243,0.0,38 -244,0.0,28 -245,0.0,159 -246,0.0,182 -247,0.0,51 -248,0.0,25 -249,0.0,73 -250,0.0,56 -251,0.0,55 -252,0.0,38 -253,0.0,292 -254,0.0,319 -255,0.0,100 -256,0.0,84 -257,0.0,24 -258,0.0,17 -259,0.0,159 -260,0.0,25 -261,0.0,73 -262,0.0,130 -263,0.0,111 -264,0.0,65 -265,1.0,58 -266,0.0,47 -267,0.0,48 -268,0.0,13 -269,0.0,100 -270,0.0,38 -271,0.0,111 -272,0.0,226 -273,0.0,38 -274,0.0,83 -275,0.0,42 -276,0.0,199 -277,0.0,83 -278,0.0,28 -279,0.0,46 -280,0.0,262 -281,0.0,123 -282,0.0,91 -283,0.0,53 -284,0.0,19 -285,0.0,26 -286,0.0,93 -287,0.0,38 -288,0.0,22 -289,0.0,43 -290,0.0,163 -291,0.0,25 -292,0.0,59 -293,0.0,71 -294,0.0,20 -295,0.0,115 -296,0.0,248 -297,0.0,66 -298,0.0,58 -299,0.0,129 -300,0.0,122 -301,0.0,47 -302,0.0,60 -303,0.0,79 -304,1.0,137 -305,0.0,27 -306,1.0,93 -307,0.0,46 -308,1.0,83 -309,1.0,8 -310,1.0,6 -311,1.0,6 -312,0.0,4 -313,1.0,6 -314,0.0,2 -315,1.0,6 -316,1.0,6 -317,1.0,6 -318,1.0,6 -319,1.0,8 -320,0.0,5 -321,1.0,6 -322,1.0,7 -323,0.0,5 -324,1.0,6 -325,1.0,6 -326,1.0,8 -327,1.0,6 -328,1.0,6 -329,1.0,6 -330,1.0,7 -331,1.0,6 -332,1.0,6 -333,0.0,3 -334,1.0,7 -335,0.0,4 -336,1.0,6 -337,1.0,6 -338,1.0,7 -339,1.0,6 -340,1.0,6 -341,1.0,7 -342,1.0,7 -343,1.0,7 -344,1.0,6 -345,1.0,6 -346,1.0,6 -347,1.0,6 -348,1.0,6 -349,1.0,6 -350,1.0,6 -351,1.0,7 -352,0.0,4 -353,1.0,8 -354,1.0,8 -355,1.0,7 -356,1.0,6 -357,1.0,8 -358,1.0,6 -359,1.0,6 -360,1.0,7 -361,1.0,6 -362,1.0,6 -363,1.0,8 -364,1.0,7 -365,1.0,6 -366,1.0,6 -367,0.0,3 -368,1.0,11 -369,1.0,6 -370,1.0,8 -371,0.0,2 -372,1.0,6 -373,1.0,6 -374,1.0,6 -375,1.0,6 -376,1.0,8 -377,1.0,6 -378,1.0,7 -379,1.0,6 -380,1.0,7 -381,1.0,6 -382,1.0,8 -383,0.0,2 -384,1.0,6 -385,1.0,7 -386,1.0,6 -387,1.0,6 -388,1.0,10 -389,1.0,7 -390,1.0,6 -391,1.0,6 -392,1.0,6 -393,1.0,6 -394,1.0,6 -395,1.0,7 -396,0.0,4 -397,1.0,7 -398,1.0,6 -399,1.0,8 -400,0.0,3 -401,1.0,6 -402,1.0,6 -403,1.0,6 -404,1.0,6 -405,0.0,2 -406,1.0,6 -407,1.0,6 -408,1.0,6 -409,1.0,6 -410,1.0,6 -411,1.0,7 -412,1.0,6 -413,1.0,6 -414,1.0,7 -415,1.0,6 -416,1.0,6 -417,1.0,6 -418,1.0,6 -419,1.0,6 -420,1.0,6 -421,1.0,6 -422,1.0,8 -423,1.0,6 -424,1.0,8 -425,1.0,7 -426,1.0,6 -427,0.0,3 -428,1.0,6 -429,1.0,7 -430,1.0,6 -431,1.0,6 -432,1.0,6 -433,1.0,10 -434,1.0,6 -435,1.0,6 -436,1.0,6 -437,1.0,6 -438,1.0,10 -439,1.0,6 -440,1.0,8 -441,1.0,8 -442,1.0,7 -443,1.0,6 -444,0.0,5 -445,0.0,2 -446,1.0,8 -447,1.0,6 -448,1.0,10 -449,1.0,6 -450,1.0,8 -451,1.0,10 -452,1.0,6 -453,1.0,6 -454,1.0,6 -455,1.0,10 -456,1.0,6 -457,0.0,4 -458,1.0,6 -459,1.0,6 -460,1.0,6 -461,1.0,15 -462,1.0,6 -463,1.0,6 -464,1.0,6 -465,1.0,6 -466,1.0,6 -467,1.0,6 -468,1.0,8 -469,1.0,6 -470,1.0,7 -471,1.0,6 -472,1.0,6 -473,1.0,8 -474,1.0,6 -475,1.0,6 -476,1.0,8 -477,1.0,8 -478,1.0,6 -479,1.0,6 -480,1.0,6 -481,1.0,10 -482,1.0,6 -483,1.0,6 -484,1.0,6 -485,1.0,6 -486,1.0,6 -487,1.0,6 -488,1.0,6 -489,1.0,8 -490,1.0,8 -491,1.0,6 -492,1.0,6 -493,0.0,2 -494,1.0,6 -495,1.0,6 -496,1.0,6 -497,1.0,8 -498,1.0,6 -499,1.0,6 -500,1.0,6 -501,1.0,6 -502,1.0,6 -503,1.0,6 -504,1.0,6 -505,1.0,6 -506,1.0,6 -507,1.0,7 -508,0.0,3 -509,1.0,7 -510,1.0,6 -511,1.0,6 -512,1.0,6 -513,0.0,2 -514,1.0,6 -515,1.0,8 -516,1.0,6 -517,1.0,6 -518,1.0,6 -519,1.0,6 -520,1.0,9 -521,1.0,6 -522,1.0,6 -523,1.0,6 -524,1.0,6 -525,1.0,6 -526,1.0,6 -527,1.0,9 -528,1.0,7 -529,0.0,4 -530,1.0,6 -531,1.0,8 -532,1.0,11 -533,1.0,6 -534,1.0,6 -535,1.0,6 -536,1.0,6 -537,1.0,6 -538,1.0,8 -539,1.0,6 -540,1.0,6 -541,1.0,8 -542,1.0,7 -543,1.0,6 -544,1.0,8 -545,1.0,6 -546,0.0,5 -547,1.0,9 -548,1.0,8 -549,1.0,8 -550,1.0,6 -551,1.0,8 -552,1.0,8 -553,1.0,6 -554,0.0,5 -555,0.0,3 -556,0.0,2 -557,1.0,8 -558,1.0,6 -559,1.0,6 -560,1.0,6 -561,1.0,6 -562,1.0,6 -563,1.0,6 -564,1.0,6 -565,1.0,6 -566,1.0,6 -567,1.0,6 -568,1.0,6 -569,1.0,6 -570,1.0,6 -571,1.0,6 -572,0.0,2 -573,1.0,6 -574,0.0,4 -575,1.0,6 -576,1.0,6 -577,1.0,6 -578,1.0,6 -579,1.0,6 -580,1.0,8 -581,0.0,5 -582,1.0,6 -583,1.0,6 -584,1.0,6 -585,1.0,6 -586,1.0,6 -587,1.0,6 -588,0.0,3 -589,1.0,6 -590,1.0,6 -591,1.0,6 -592,0.0,2 -593,1.0,6 -594,0.0,4 -595,1.0,6 -596,1.0,6 -597,1.0,6 -598,1.0,6 -599,1.0,8 -600,1.0,6 -601,1.0,7 -602,1.0,6 -603,1.0,7 -604,1.0,6 -605,0.0,2 -606,1.0,6 -607,1.0,6 -608,0.0,5 -609,0.0,3 -610,0.0,3 -611,1.0,6 -612,0.0,5 -613,1.0,8 -614,1.0,8 -615,1.0,6 -616,1.0,6 -617,1.0,7 -618,1.0,6 -619,1.0,6 -620,1.0,6 -621,1.0,6 -622,1.0,6 -623,1.0,8 -624,0.0,2 -625,1.0,6 -626,1.0,6 -627,1.0,6 -628,1.0,6 -629,1.0,6 -630,1.0,6 -631,1.0,6 -632,1.0,8 -633,1.0,6 -634,1.0,8 -635,1.0,6 -636,1.0,6 -637,1.0,8 -638,1.0,8 -639,0.0,5 -640,0.0,4 -641,0.0,4 -642,1.0,6 -643,1.0,6 -644,1.0,6 -645,1.0,6 -646,1.0,8 -647,1.0,6 -648,0.0,4 -649,1.0,6 -650,1.0,8 -651,1.0,6 -652,1.0,6 -653,1.0,6 -654,1.0,6 -655,1.0,6 -656,1.0,6 -657,1.0,6 -658,1.0,8 -659,1.0,8 -660,1.0,6 -661,1.0,8 -662,1.0,9 -663,1.0,6 -664,1.0,6 -665,1.0,6 -666,1.0,6 -667,1.0,10 -668,1.0,6 -669,1.0,6 -670,1.0,6 -671,1.0,11 -672,1.0,10 -673,1.0,8 -674,1.0,6 -675,1.0,6 -676,1.0,6 -677,0.0,5 -678,1.0,6 -679,0.0,2 -680,1.0,9 -681,1.0,6 -682,1.0,8 -683,1.0,7 -684,1.0,6 -685,1.0,6 -686,1.0,7 -687,0.0,3 -688,1.0,7 -689,0.0,2 -690,1.0,6 -691,1.0,6 -692,1.0,8 -693,1.0,8 -694,1.0,6 -695,1.0,6 -696,0.0,2 -697,1.0,8 -698,1.0,6 -699,1.0,8 -700,1.0,6 -701,1.0,6 -702,1.0,9 -703,1.0,6 -704,1.0,8 -705,1.0,11 -706,1.0,6 -707,1.0,6 -708,1.0,6 -709,1.0,6 -710,1.0,8 -711,1.0,6 -712,1.0,6 -713,1.0,6 -714,0.0,5 -715,1.0,6 -716,1.0,6 -717,1.0,6 -718,1.0,6 -719,1.0,6 -720,1.0,7 -721,1.0,6 -722,1.0,6 -723,1.0,6 -724,1.0,6 -725,1.0,10 -726,1.0,6 -727,1.0,6 -728,1.0,6 -729,1.0,6 -730,1.0,6 -731,1.0,7 -732,1.0,6 -733,1.0,8 -734,1.0,7 -735,1.0,6 -736,1.0,6 -737,1.0,14 -738,1.0,6 -739,1.0,6 -740,1.0,12 -741,1.0,6 -742,1.0,6 -743,1.0,6 -744,1.0,6 -745,1.0,6 -746,1.0,6 -747,0.0,3 -748,1.0,6 -749,1.0,6 -750,1.0,6 -751,1.0,7 -752,1.0,6 -753,1.0,6 -754,1.0,6 -755,1.0,8 -756,0.0,2 -757,1.0,6 -758,1.0,6 -759,1.0,6 -760,1.0,6 -761,1.0,6 -762,1.0,6 -763,1.0,6 -764,1.0,6 -765,1.0,6 -766,0.0,4 -767,1.0,8 -768,1.0,6 -769,0.0,2 -770,1.0,10 -771,1.0,8 -772,1.0,6 -773,1.0,6 -774,1.0,6 -775,0.0,3 -776,1.0,6 -777,1.0,6 -778,0.0,6 -779,1.0,8 -780,1.0,6 -781,1.0,9 -782,1.0,6 -783,1.0,6 -784,1.0,8 -785,1.0,8 -786,1.0,6 -787,0.0,5 -788,1.0,6 -789,1.0,6 -790,1.0,6 -791,1.0,6 -792,1.0,6 -793,1.0,6 -794,1.0,8 -795,1.0,6 -796,0.0,2 -797,1.0,8 -798,1.0,7 -799,1.0,6 diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl deleted file mode 100644 index 6d6b01f..0000000 Binary files a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl and /dev/null differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json deleted file mode 100644 index ead445f..0000000 --- a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "Q-learning", "env_name": "Racetrack-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/models/", "n_states": 4, "n_actions": 9} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png deleted file mode 100644 index fa1588a..0000000 Binary files a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv deleted file mode 100644 index 3d60bb2..0000000 --- a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards,steps -0,-1000,1000 -1,2,8 -2,4,6 -3,3,7 -4,2,8 -5,3,7 -6,4,6 -7,-1000,1000 -8,3,7 -9,-11,11 -10,-19,19 -11,-18,18 -12,1,9 -13,1,9 -14,4,6 -15,-16,16 -16,-17,17 -17,4,6 -18,-16,16 -19,4,6 diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png deleted file mode 100644 index c0c7b24..0000000 Binary files a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png and /dev/null differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv deleted file mode 100644 index a7df26d..0000000 --- a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards,steps -0,-3580,1000 -1,-2960,1000 -2,-2670,1000 -3,-2720,1000 -4,-2670,1000 -5,-2570,1000 -6,-2407,977 -7,-2012,852 -8,-2500,1000 -9,-2530,1000 -10,-2550,1000 -11,-437,187 -12,-80,40 -13,-2450,1000 -14,-338,148 -15,-1175,525 -16,-755,325 -17,-411,181 -18,-1068,448 -19,-785,325 -20,-149,79 -21,-628,268 -22,-423,183 -23,-282,122 -24,-2198,938 -25,-13,13 -26,-253,113 -27,-48,28 -28,-72,42 -29,-123,63 -30,-305,145 -31,-72,32 -32,-142,72 -33,-13,13 -34,4,6 -35,-1285,545 -36,-174,94 -37,-436,196 -38,-759,339 -39,-11,11 -40,-17,17 -41,-283,123 -42,-181,81 -43,-44,24 -44,-55,35 -45,-135,65 -46,-577,277 -47,-234,114 -48,-54,34 -49,4,6 -50,-29,19 -51,-100,50 -52,-32,22 -53,-23,23 -54,4,6 -55,-17,17 -56,-18,18 -57,-48,28 -58,-34,24 -59,-45,25 -60,-29,19 -61,1,9 -62,-77,37 -63,3,7 -64,-25,15 -65,-3,13 -66,-78,48 -67,-69,39 -68,-105,45 -69,-48,28 -70,3,7 -71,4,6 -72,-100,50 -73,-130,60 -74,-20,20 -75,4,6 -76,4,6 -77,4,6 -78,4,6 -79,-47,27 -80,4,6 -81,4,6 -82,-174,94 -83,-12,12 -84,-26,16 -85,3,7 -86,3,7 -87,-42,32 -88,-48,28 -89,-97,57 -90,-11,11 -91,-16,16 -92,-15,15 -93,4,6 -94,-147,67 -95,-52,32 -96,-97,47 -97,3,7 -98,-17,17 -99,3,7 -100,4,6 -101,3,7 -102,3,7 -103,3,7 -104,1,9 -105,4,6 -106,4,6 -107,3,7 -108,4,6 -109,-68,38 -110,3,7 -111,4,6 -112,-14,14 -113,4,6 -114,-57,37 -115,3,7 -116,4,6 -117,-12,12 -118,3,7 -119,3,7 -120,-64,34 -121,-13,13 -122,3,7 -123,-13,13 -124,4,6 -125,3,7 -126,-32,22 -127,-41,31 -128,3,7 -129,3,7 -130,3,7 -131,4,6 -132,4,6 -133,3,7 -134,-12,12 -135,-31,21 -136,4,6 -137,3,7 -138,-51,31 -139,-48,28 -140,4,6 -141,-85,45 -142,-14,14 -143,4,6 -144,3,7 -145,-6,16 -146,4,6 -147,4,6 -148,-15,15 -149,4,6 -150,-24,24 -151,3,7 -152,-14,14 -153,-18,18 -154,3,7 -155,4,6 -156,-85,45 -157,-51,31 -158,3,7 -159,2,8 -160,3,7 -161,-79,39 -162,-14,14 -163,-13,13 -164,4,6 -165,3,7 -166,4,6 -167,3,7 -168,-74,34 -169,-15,15 -170,4,6 -171,-14,14 -172,4,6 -173,-31,21 -174,-8,18 -175,4,6 -176,4,6 -177,4,6 -178,4,6 -179,-29,19 -180,4,6 -181,3,7 -182,4,6 -183,-82,42 -184,3,7 -185,4,6 -186,4,6 -187,-11,11 -188,-23,23 -189,-33,23 -190,3,7 -191,-12,12 -192,-44,24 -193,-62,42 -194,-16,16 -195,4,6 -196,-12,12 -197,3,7 -198,-13,13 -199,3,7 -200,3,7 -201,4,6 -202,4,6 -203,4,6 -204,-28,18 -205,-16,16 -206,3,7 -207,4,6 -208,-12,12 -209,-13,13 -210,-66,36 -211,-14,14 -212,4,6 -213,4,6 -214,-15,15 -215,-60,30 -216,4,6 -217,3,7 -218,4,6 -219,-33,23 -220,-12,12 -221,-14,14 -222,4,6 -223,3,7 -224,-97,47 -225,4,6 -226,2,8 -227,4,6 -228,4,6 -229,3,7 -230,-11,11 -231,4,6 -232,3,7 -233,3,7 -234,4,6 -235,3,7 -236,3,7 -237,-32,22 -238,-13,13 -239,3,7 -240,-22,22 -241,4,6 -242,2,8 -243,-31,21 -244,4,6 -245,-4,14 -246,-30,20 -247,4,6 -248,3,7 -249,-26,16 -250,4,6 -251,-12,12 -252,2,8 -253,1,9 -254,4,6 -255,2,8 -256,2,8 -257,-12,12 -258,3,7 -259,-48,28 -260,4,6 -261,4,6 -262,-51,31 -263,-12,12 -264,4,6 -265,2,8 -266,2,8 -267,2,8 -268,3,7 -269,4,6 -270,4,6 -271,-17,17 -272,4,6 -273,-13,13 -274,-16,16 -275,-97,57 -276,3,7 -277,-1,11 -278,-32,22 -279,3,7 -280,4,6 -281,3,7 -282,3,7 -283,3,7 -284,3,7 -285,2,8 -286,3,7 -287,-15,15 -288,2,8 -289,-18,18 -290,4,6 -291,-36,26 -292,4,6 -293,4,6 -294,4,6 -295,4,6 -296,-77,47 -297,-14,14 -298,3,7 -299,3,7 -300,3,7 -301,4,6 -302,3,7 -303,4,6 -304,-12,12 -305,-45,35 -306,-63,43 -307,2,8 -308,4,6 -309,4,6 -310,-13,13 -311,4,6 -312,-13,13 -313,4,6 -314,3,7 -315,-30,20 -316,-13,13 -317,3,7 -318,4,6 -319,4,6 -320,-12,12 -321,-13,13 -322,3,7 -323,3,7 -324,3,7 -325,3,7 -326,-36,26 -327,4,6 -328,3,7 -329,3,7 -330,3,7 -331,3,7 -332,-14,14 -333,-16,16 -334,3,7 -335,3,7 -336,-14,14 -337,1,9 -338,2,8 -339,3,7 -340,4,6 -341,-36,26 -342,-14,14 -343,-78,48 -344,2,8 -345,-37,27 -346,3,7 -347,3,7 -348,-37,27 -349,-16,16 -350,4,6 -351,-15,15 -352,4,6 -353,2,8 -354,-44,24 -355,-13,13 -356,-14,14 -357,-17,17 -358,-13,13 -359,3,7 -360,2,8 -361,4,6 -362,3,7 -363,-5,15 -364,-14,14 -365,2,8 -366,-12,12 -367,3,7 -368,4,6 -369,2,8 -370,2,8 -371,1,9 -372,-16,16 -373,1,9 -374,4,6 -375,-16,16 -376,3,7 -377,2,8 -378,-13,13 -379,-44,34 -380,-16,16 -381,-30,20 -382,4,6 -383,4,6 -384,2,8 -385,-15,15 -386,4,6 -387,3,7 -388,2,8 -389,4,6 -390,2,8 -391,3,7 -392,3,7 -393,-14,14 -394,-15,15 -395,3,7 -396,-13,13 -397,3,7 -398,4,6 -399,3,7 diff --git a/projects/codes/QLearning/qlearning.py b/projects/codes/QLearning/qlearning.py index c987242..48dfa37 100644 --- a/projects/codes/QLearning/qlearning.py +++ b/projects/codes/QLearning/qlearning.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2022-08-24 10:31:04 +LastEditTime: 2022-10-30 01:38:26 Discription: use defaultdict to define Q table Environment: ''' @@ -16,14 +16,14 @@ from collections import defaultdict class QLearning(object): def __init__(self,cfg): - self.n_actions = cfg['n_actions'] - self.lr = cfg['lr'] - self.gamma = cfg['gamma'] - self.epsilon = cfg['epsilon_start'] + self.n_actions = cfg.n_actions + self.lr = cfg.lr + self.gamma = cfg.gamma + self.epsilon = cfg.epsilon_start self.sample_count = 0 - self.epsilon_start = cfg['epsilon_start'] - self.epsilon_end = cfg['epsilon_end'] - self.epsilon_decay = cfg['epsilon_decay'] + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay self.Q_table = defaultdict(lambda: np.zeros(self.n_actions)) # use nested dictionary to represent Q(s,a), here set all Q(s,a)=0 initially, not like pseudo code def sample_action(self, state): ''' sample action with e-greedy policy while training diff --git a/projects/codes/QLearning/task0.py b/projects/codes/QLearning/task0.py new file mode 100644 index 0000000..da52113 --- /dev/null +++ b/projects/codes/QLearning/task0.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: John +Email: johnjim0816@gmail.com +Date: 2020-09-11 23:03:00 +LastEditor: John +LastEditTime: 2022-10-30 02:04:55 +Discription: +Environment: +''' +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import gym +import datetime +import argparse +from envs.gridworld_env import FrozenLakeWapper +from envs.wrappers import CliffWalkingWapper +from envs.register import register_env +from qlearning import QLearning +from common.utils import all_seed,merge_class_attrs +from common.launcher import Launcher +from config.config import GeneralConfigQLearning,AlgoConfigQLearning + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigQLearning()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigQLearning()) + def env_agent_config(self,cfg,logger): + ''' create env and agent + ''' + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=False) # create env + if cfg.env_name == 'CliffWalking-v0': + env = CliffWalkingWapper(env) + if cfg.seed !=0: # set random seed + all_seed(env,seed=cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + agent = QLearning(cfg) + return env,agent + def train(self,cfg,env,agent,logger): + logger.info("Start training!") + logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg.train_eps): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.sample_action(state) # sample action + next_state, reward, terminated, _ = env.step(action) # update env and return transitions + agent.update(state, action, reward, next_state, terminated) # update agent + state = next_state # update state + ep_reward += reward + ep_step += 1 + if terminated: + break + rewards.append(ep_reward) + steps.append(ep_step) + logger.info(f'Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step:d}, Epislon: {agent.epsilon:.3f}') + logger.info("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + def test(self,cfg,env,agent,logger): + logger.info("Start testing!") + logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg.test_eps): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.predict_action(state) # predict action + next_state, reward, terminated, _ = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if terminated: + break + rewards.append(ep_reward) + steps.append(ep_step) + logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step:d}") + logger.info("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/config.yaml b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/config.yaml new file mode 100644 index 0000000..f1c252d --- /dev/null +++ b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: CliffWalking-v0 + load_checkpoint: true + load_path: Train_CliffWalking-v0_Sarsa_20221030-021146 + max_steps: 200 + mode: test + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/logs/log.txt b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/logs/log.txt new file mode 100644 index 0000000..29ed4a8 --- /dev/null +++ b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/logs/log.txt @@ -0,0 +1,24 @@ +2022-10-30 02:12:06 - r - INFO: - n_states: 48, n_actions: 4 +2022-10-30 02:12:06 - r - INFO: - Start testing! +2022-10-30 02:12:06 - r - INFO: - Env: CliffWalking-v0, Algorithm: Sarsa, Device: cpu +2022-10-30 02:12:06 - r - INFO: - Episode: 1/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 2/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 3/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 4/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 5/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 6/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 7/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 8/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 9/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 10/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 11/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 12/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 13/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 14/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 15/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 16/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 17/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 18/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 19/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Episode: 20/20, Reward: -15.00, Steps:15 +2022-10-30 02:12:06 - r - INFO: - Finish testing! diff --git a/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/models/checkpoint.pkl b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/models/checkpoint.pkl new file mode 100644 index 0000000..d226d4c Binary files /dev/null and b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/models/checkpoint.pkl differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/results/learning_curve.png similarity index 100% rename from projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png rename to projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/results/learning_curve.png diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv b/projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/results/res.csv similarity index 100% rename from projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv rename to projects/codes/Sarsa/Test_CliffWalking-v0_Sarsa_20221030-021206/results/res.csv diff --git a/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/config.yaml b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/config.yaml new file mode 100644 index 0000000..7c1b16f --- /dev/null +++ b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: Racetrack-v0 + load_checkpoint: true + load_path: Train_Racetrack-v0_Sarsa_20221030-021315 + max_steps: 200 + mode: test + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 200 + epsilon_end: 0.01 + epsilon_start: 0.9 + gamma: 0.99 + lr: 0.1 diff --git a/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/logs/log.txt b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/logs/log.txt new file mode 100644 index 0000000..7fd4614 --- /dev/null +++ b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/logs/log.txt @@ -0,0 +1,24 @@ +2022-10-30 02:13:47 - r - INFO: - n_states: 4, n_actions: 9 +2022-10-30 02:13:47 - r - INFO: - Start testing! +2022-10-30 02:13:47 - r - INFO: - Env: Racetrack-v0, Algorithm: Sarsa, Device: cpu +2022-10-30 02:13:47 - r - INFO: - Episode: 1/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 2/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 3/20, Reward: 2.00, Steps:8 +2022-10-30 02:13:47 - r - INFO: - Episode: 4/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 5/20, Reward: -12.00, Steps:12 +2022-10-30 02:13:47 - r - INFO: - Episode: 6/20, Reward: -49.00, Steps:29 +2022-10-30 02:13:47 - r - INFO: - Episode: 7/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 8/20, Reward: -17.00, Steps:17 +2022-10-30 02:13:47 - r - INFO: - Episode: 9/20, Reward: 4.00, Steps:6 +2022-10-30 02:13:47 - r - INFO: - Episode: 10/20, Reward: -17.00, Steps:17 +2022-10-30 02:13:47 - r - INFO: - Episode: 11/20, Reward: 2.00, Steps:8 +2022-10-30 02:13:47 - r - INFO: - Episode: 12/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 13/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 14/20, Reward: 2.00, Steps:8 +2022-10-30 02:13:47 - r - INFO: - Episode: 15/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 16/20, Reward: -34.00, Steps:24 +2022-10-30 02:13:47 - r - INFO: - Episode: 17/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Episode: 18/20, Reward: 5.00, Steps:5 +2022-10-30 02:13:47 - r - INFO: - Episode: 19/20, Reward: 5.00, Steps:5 +2022-10-30 02:13:47 - r - INFO: - Episode: 20/20, Reward: 3.00, Steps:7 +2022-10-30 02:13:47 - r - INFO: - Finish testing! diff --git a/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/models/checkpoint.pkl b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/models/checkpoint.pkl new file mode 100644 index 0000000..d950b3f Binary files /dev/null and b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/models/checkpoint.pkl differ diff --git a/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/learning_curve.png b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/learning_curve.png new file mode 100644 index 0000000..fde014e Binary files /dev/null and b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/learning_curve.png differ diff --git a/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/res.csv b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/res.csv new file mode 100644 index 0000000..5d08ed0 --- /dev/null +++ b/projects/codes/Sarsa/Test_Racetrack-v0_Sarsa_20221030-021347/results/res.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,3,7 +1,3,7 +2,2,8 +3,3,7 +4,-12,12 +5,-49,29 +6,3,7 +7,-17,17 +8,4,6 +9,-17,17 +10,2,8 +11,3,7 +12,3,7 +13,2,8 +14,3,7 +15,-34,24 +16,3,7 +17,5,5 +18,5,5 +19,3,7 diff --git a/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/config.yaml b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/config.yaml new file mode 100644 index 0000000..4d61198 --- /dev/null +++ b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: CliffWalking-v0 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/logs/log.txt b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/logs/log.txt new file mode 100644 index 0000000..76df5b3 --- /dev/null +++ b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/logs/log.txt @@ -0,0 +1,804 @@ +2022-10-30 02:11:46 - r - INFO: - n_states: 48, n_actions: 4 +2022-10-30 02:11:46 - r - INFO: - Start training! +2022-10-30 02:11:46 - r - INFO: - Env: CliffWalking-v0, Algorithm: Sarsa, Device: cpu +2022-10-30 02:11:46 - r - INFO: - Episode: 1/800, Reward: -1091.00, Steps:200, Epislon: 0.491 +2022-10-30 02:11:46 - r - INFO: - Episode: 2/800, Reward: -320.00, Steps:122, Epislon: 0.329 +2022-10-30 02:11:46 - r - INFO: - Episode: 3/800, Reward: -794.00, Steps:200, Epislon: 0.173 +2022-10-30 02:11:46 - r - INFO: - Episode: 4/800, Reward: -596.00, Steps:200, Epislon: 0.094 +2022-10-30 02:11:46 - r - INFO: - Episode: 5/800, Reward: -398.00, Steps:200, Epislon: 0.053 +2022-10-30 02:11:46 - r - INFO: - Episode: 6/800, Reward: -59.00, Steps:59, Epislon: 0.045 +2022-10-30 02:11:46 - r - INFO: - Episode: 7/800, Reward: -299.00, Steps:200, Epislon: 0.028 +2022-10-30 02:11:46 - r - INFO: - Episode: 8/800, Reward: -82.00, Steps:82, Epislon: 0.024 +2022-10-30 02:11:46 - r - INFO: - Episode: 9/800, Reward: -125.00, Steps:125, Epislon: 0.019 +2022-10-30 02:11:46 - r - INFO: - Episode: 10/800, Reward: -75.00, Steps:75, Epislon: 0.017 +2022-10-30 02:11:46 - r - INFO: - Episode: 11/800, Reward: -285.00, Steps:186, Epislon: 0.014 +2022-10-30 02:11:46 - r - INFO: - Episode: 12/800, Reward: -103.00, Steps:103, Epislon: 0.013 +2022-10-30 02:11:46 - r - INFO: - Episode: 13/800, Reward: -103.00, Steps:103, Epislon: 0.012 +2022-10-30 02:11:46 - r - INFO: - Episode: 14/800, Reward: -131.00, Steps:131, Epislon: 0.011 +2022-10-30 02:11:46 - r - INFO: - Episode: 15/800, Reward: -53.00, Steps:53, Epislon: 0.011 +2022-10-30 02:11:46 - r - INFO: - Episode: 16/800, Reward: -113.00, Steps:113, Epislon: 0.011 +2022-10-30 02:11:46 - r - INFO: - Episode: 17/800, Reward: -125.00, Steps:125, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 18/800, Reward: -95.00, Steps:95, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 19/800, Reward: -97.00, Steps:97, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 20/800, Reward: -145.00, Steps:145, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 21/800, Reward: -89.00, Steps:89, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 22/800, Reward: -97.00, Steps:97, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 23/800, Reward: -115.00, Steps:115, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 24/800, Reward: -121.00, Steps:121, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 25/800, Reward: -53.00, Steps:53, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 26/800, Reward: -111.00, Steps:111, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 27/800, Reward: -97.00, Steps:97, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 28/800, Reward: -206.00, Steps:107, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 29/800, Reward: -147.00, Steps:147, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 30/800, Reward: -36.00, Steps:36, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 31/800, Reward: -216.00, Steps:117, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 32/800, Reward: -103.00, Steps:103, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 33/800, Reward: -87.00, Steps:87, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 34/800, Reward: -80.00, Steps:80, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 35/800, Reward: -73.00, Steps:73, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 36/800, Reward: -83.00, Steps:83, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 37/800, Reward: -143.00, Steps:44, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 38/800, Reward: -241.00, Steps:142, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 39/800, Reward: -77.00, Steps:77, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 40/800, Reward: -49.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 41/800, Reward: -87.00, Steps:87, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 42/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 43/800, Reward: -89.00, Steps:89, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 44/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 45/800, Reward: -192.00, Steps:93, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 46/800, Reward: -85.00, Steps:85, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 47/800, Reward: -55.00, Steps:55, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 48/800, Reward: -59.00, Steps:59, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 49/800, Reward: -60.00, Steps:60, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 50/800, Reward: -50.00, Steps:50, Epislon: 0.010 +2022-10-30 02:11:46 - r - INFO: - Episode: 51/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 52/800, Reward: -101.00, Steps:101, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 53/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 54/800, Reward: -70.00, Steps:70, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 55/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 56/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 57/800, Reward: -80.00, Steps:80, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 58/800, Reward: -61.00, Steps:61, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 59/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 60/800, Reward: -73.00, Steps:73, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 61/800, Reward: -54.00, Steps:54, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 62/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 63/800, Reward: -65.00, Steps:65, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 64/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 65/800, Reward: -81.00, Steps:81, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 66/800, Reward: -39.00, Steps:39, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 67/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 68/800, Reward: -61.00, Steps:61, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 69/800, Reward: -57.00, Steps:57, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 70/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 71/800, Reward: -59.00, Steps:59, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 72/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 73/800, Reward: -51.00, Steps:51, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 74/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 75/800, Reward: -69.00, Steps:69, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 76/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 77/800, Reward: -194.00, Steps:95, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 78/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 79/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 80/800, Reward: -81.00, Steps:81, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 81/800, Reward: -65.00, Steps:65, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 82/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 83/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 84/800, Reward: -53.00, Steps:53, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 85/800, Reward: -165.00, Steps:66, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 86/800, Reward: -69.00, Steps:69, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 87/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 88/800, Reward: -56.00, Steps:56, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 89/800, Reward: -164.00, Steps:65, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 90/800, Reward: -45.00, Steps:45, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 91/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 92/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 93/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 94/800, Reward: -69.00, Steps:69, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 95/800, Reward: -33.00, Steps:33, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 96/800, Reward: -57.00, Steps:57, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 97/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 98/800, Reward: -55.00, Steps:55, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 99/800, Reward: -61.00, Steps:61, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 100/800, Reward: -162.00, Steps:63, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 101/800, Reward: -55.00, Steps:55, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 102/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 103/800, Reward: -53.00, Steps:53, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 104/800, Reward: -39.00, Steps:39, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 105/800, Reward: -55.00, Steps:55, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 106/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 107/800, Reward: -33.00, Steps:33, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 108/800, Reward: -49.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 109/800, Reward: -65.00, Steps:65, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 110/800, Reward: -45.00, Steps:45, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 111/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 112/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 113/800, Reward: -51.00, Steps:51, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 114/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 115/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 116/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 117/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 118/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 119/800, Reward: -180.00, Steps:81, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 120/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 121/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 122/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 123/800, Reward: -65.00, Steps:65, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 124/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 125/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 126/800, Reward: -49.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 127/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 128/800, Reward: -45.00, Steps:45, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 129/800, Reward: -49.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 130/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 131/800, Reward: -49.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 132/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 133/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 134/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 135/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 136/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 137/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 138/800, Reward: -51.00, Steps:51, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 139/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 140/800, Reward: -51.00, Steps:51, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 141/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 142/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 143/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 144/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 145/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 146/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 147/800, Reward: -47.00, Steps:47, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 148/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 149/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 150/800, Reward: -45.00, Steps:45, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 151/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 152/800, Reward: -33.00, Steps:33, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 153/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 154/800, Reward: -148.00, Steps:49, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 155/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 156/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 157/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 158/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 159/800, Reward: -33.00, Steps:33, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 160/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 161/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 162/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 163/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 164/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 165/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 166/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 167/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 168/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 169/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 170/800, Reward: -41.00, Steps:41, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 171/800, Reward: -39.00, Steps:39, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 172/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 173/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 174/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 175/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 176/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 177/800, Reward: -155.00, Steps:56, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 178/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 179/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 180/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 181/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 182/800, Reward: -39.00, Steps:39, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 183/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 184/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 185/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 186/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 187/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 188/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 189/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 190/800, Reward: -42.00, Steps:42, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 191/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 192/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 193/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 194/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 195/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 196/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 197/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 198/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 199/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 200/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 201/800, Reward: -33.00, Steps:33, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 202/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 203/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 204/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 205/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 206/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 207/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 208/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 209/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 210/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 211/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 212/800, Reward: -37.00, Steps:37, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 213/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 214/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 215/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 216/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 217/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 218/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 219/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 220/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 221/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 222/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 223/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 224/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 225/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 226/800, Reward: -43.00, Steps:43, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 227/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 228/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 229/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 230/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 231/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 232/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 233/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 234/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 235/800, Reward: -31.00, Steps:31, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 236/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 237/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 238/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 239/800, Reward: -30.00, Steps:30, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 240/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 241/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 242/800, Reward: -24.00, Steps:24, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 243/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 244/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 245/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 246/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 247/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 248/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 249/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 250/800, Reward: -29.00, Steps:29, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 251/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 252/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 253/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 254/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 255/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 256/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 257/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 258/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 259/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 260/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 261/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 262/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 263/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 264/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 265/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 266/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 267/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 268/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 269/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 270/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 271/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 272/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 273/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 274/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 275/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 276/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 277/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 278/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 279/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 280/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 281/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 282/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 283/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 284/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 285/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 286/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 287/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 288/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 289/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 290/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 291/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 292/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 293/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 294/800, Reward: -120.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 295/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 296/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 297/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 298/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 299/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 300/800, Reward: -35.00, Steps:35, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 301/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 302/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 303/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 304/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 305/800, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 306/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 307/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 308/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 309/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 310/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 311/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 312/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 313/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 314/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 315/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 316/800, Reward: -32.00, Steps:32, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 317/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 318/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 319/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 320/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 321/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 322/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 323/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 324/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 325/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 326/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 327/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 328/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 329/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 330/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 331/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 332/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 333/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 334/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 335/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 336/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 337/800, Reward: -26.00, Steps:26, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 338/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 339/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 340/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 341/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 342/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 343/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 344/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 345/800, Reward: -27.00, Steps:27, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 346/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 347/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 348/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 349/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 350/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 351/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 352/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 353/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 354/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 355/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 356/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 357/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 358/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 359/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 360/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 361/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 362/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 363/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 364/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 365/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 366/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 367/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 368/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 369/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 370/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 371/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 372/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 373/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 374/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 375/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 376/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 377/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 378/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 379/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 380/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 381/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 382/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 383/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 384/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 385/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 386/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 387/800, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 388/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 389/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 390/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 391/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 392/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 393/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 394/800, Reward: -122.00, Steps:23, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 395/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 396/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 397/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 398/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 399/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 400/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 401/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 402/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 403/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 404/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 405/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 406/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 407/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 408/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 409/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 410/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 411/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 412/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 413/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 414/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 415/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 416/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 417/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 418/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 419/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 420/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 421/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 422/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 423/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 424/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 425/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 426/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 427/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 428/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 429/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 430/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 431/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 432/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 433/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 434/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 435/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 436/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 437/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 438/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 439/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 440/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 441/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 442/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 443/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 444/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 445/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 446/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 447/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 448/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 449/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 450/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 451/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 452/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 453/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 454/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 455/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 456/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 457/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 458/800, Reward: -22.00, Steps:22, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 459/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 460/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 461/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 462/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 463/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 464/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 465/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 466/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 467/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 468/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 469/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 470/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 471/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 472/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 473/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 474/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 475/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 476/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 477/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 478/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 479/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 480/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 481/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 482/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 483/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 484/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 485/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 486/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 487/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 488/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 489/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 490/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 491/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 492/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 493/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 494/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 495/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 496/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 497/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 498/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 499/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 500/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 501/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 502/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 503/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 504/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 505/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 506/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 507/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 508/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 509/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 510/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 511/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 512/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 513/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 514/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 515/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 516/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 517/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 518/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 519/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 520/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 521/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 522/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 523/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 524/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 525/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 526/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 527/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 528/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 529/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 530/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 531/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 532/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 533/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 534/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 535/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 536/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 537/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 538/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 539/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 540/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 541/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 542/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 543/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 544/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 545/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 546/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 547/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 548/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 549/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 550/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 551/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 552/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 553/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 554/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 555/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 556/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 557/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 558/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:47 - r - INFO: - Episode: 559/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 560/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 561/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 562/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 563/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 564/800, Reward: -20.00, Steps:20, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 565/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 566/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 567/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 568/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 569/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 570/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 571/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 572/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 573/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 574/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 575/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 576/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 577/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 578/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 579/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 580/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 581/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 582/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 583/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 584/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 585/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 586/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 587/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 588/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 589/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 590/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 591/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 592/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 593/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 594/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 595/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 596/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 597/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 598/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 599/800, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 600/800, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 601/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 602/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 603/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 604/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 605/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 606/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 607/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 608/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 609/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 610/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 611/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 612/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 613/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 614/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 615/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 616/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 617/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 618/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 619/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 620/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 621/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 622/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 623/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 624/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 625/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 626/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 627/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 628/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 629/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 630/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 631/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 632/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 633/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 634/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 635/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 636/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 637/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 638/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 639/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 640/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 641/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 642/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 643/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 644/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 645/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 646/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 647/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 648/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 649/800, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 650/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 651/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 652/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 653/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 654/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 655/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 656/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 657/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 658/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 659/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 660/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 661/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 662/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 663/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 664/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 665/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 666/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 667/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 668/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 669/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 670/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 671/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 672/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 673/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 674/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 675/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 676/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 677/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 678/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 679/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 680/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 681/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 682/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 683/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 684/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 685/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 686/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 687/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 688/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 689/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 690/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 691/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 692/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 693/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 694/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 695/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 696/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 697/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 698/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 699/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 700/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 701/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 702/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 703/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 704/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 705/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 706/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 707/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 708/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 709/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 710/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 711/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 712/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 713/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 714/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 715/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 716/800, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 717/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 718/800, Reward: -117.00, Steps:18, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 719/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 720/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 721/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 722/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 723/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 724/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 725/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 726/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 727/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 728/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 729/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 730/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 731/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 732/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 733/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 734/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 735/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 736/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 737/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 738/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 739/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 740/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 741/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 742/800, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 743/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 744/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 745/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 746/800, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 747/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 748/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 749/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 750/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 751/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 752/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 753/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 754/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 755/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 756/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 757/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 758/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 759/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 760/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 761/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 762/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 763/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 764/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 765/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 766/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 767/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 768/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 769/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 770/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 771/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 772/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 773/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 774/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 775/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 776/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 777/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 778/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 779/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 780/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 781/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 782/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 783/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 784/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 785/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 786/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 787/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 788/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 789/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 790/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 791/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 792/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 793/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 794/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 795/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 796/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 797/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 798/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 799/800, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Episode: 800/800, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:11:48 - r - INFO: - Finish training! diff --git a/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/models/checkpoint.pkl b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/models/checkpoint.pkl new file mode 100644 index 0000000..d226d4c Binary files /dev/null and b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/models/checkpoint.pkl differ diff --git a/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/learning_curve.png b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/learning_curve.png new file mode 100644 index 0000000..3c4dd0f Binary files /dev/null and b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/learning_curve.png differ diff --git a/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/res.csv b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/res.csv new file mode 100644 index 0000000..16858a4 --- /dev/null +++ b/projects/codes/Sarsa/Train_CliffWalking-v0_Sarsa_20221030-021146/results/res.csv @@ -0,0 +1,801 @@ +episodes,rewards,steps +0,-1091,200 +1,-320,122 +2,-794,200 +3,-596,200 +4,-398,200 +5,-59,59 +6,-299,200 +7,-82,82 +8,-125,125 +9,-75,75 +10,-285,186 +11,-103,103 +12,-103,103 +13,-131,131 +14,-53,53 +15,-113,113 +16,-125,125 +17,-95,95 +18,-97,97 +19,-145,145 +20,-89,89 +21,-97,97 +22,-115,115 +23,-121,121 +24,-53,53 +25,-111,111 +26,-97,97 +27,-206,107 +28,-147,147 +29,-36,36 +30,-216,117 +31,-103,103 +32,-87,87 +33,-80,80 +34,-73,73 +35,-83,83 +36,-143,44 +37,-241,142 +38,-77,77 +39,-49,49 +40,-87,87 +41,-47,47 +42,-89,89 +43,-31,31 +44,-192,93 +45,-85,85 +46,-55,55 +47,-59,59 +48,-60,60 +49,-50,50 +50,-23,23 +51,-101,101 +52,-43,43 +53,-70,70 +54,-35,35 +55,-47,47 +56,-80,80 +57,-61,61 +58,-35,35 +59,-73,73 +60,-54,54 +61,-37,37 +62,-65,65 +63,-41,41 +64,-81,81 +65,-39,39 +66,-35,35 +67,-61,61 +68,-57,57 +69,-43,43 +70,-59,59 +71,-43,43 +72,-51,51 +73,-43,43 +74,-69,69 +75,-41,41 +76,-194,95 +77,-35,35 +78,-35,35 +79,-81,81 +80,-65,65 +81,-35,35 +82,-47,47 +83,-53,53 +84,-165,66 +85,-69,69 +86,-35,35 +87,-56,56 +88,-164,65 +89,-45,45 +90,-43,43 +91,-43,43 +92,-29,29 +93,-69,69 +94,-33,33 +95,-57,57 +96,-29,29 +97,-55,55 +98,-61,61 +99,-162,63 +100,-55,55 +101,-31,31 +102,-53,53 +103,-39,39 +104,-55,55 +105,-25,25 +106,-33,33 +107,-49,49 +108,-65,65 +109,-45,45 +110,-29,29 +111,-25,25 +112,-51,51 +113,-43,43 +114,-47,47 +115,-41,41 +116,-27,27 +117,-31,31 +118,-180,81 +119,-43,43 +120,-25,25 +121,-47,47 +122,-65,65 +123,-29,29 +124,-31,31 +125,-49,49 +126,-25,25 +127,-45,45 +128,-49,49 +129,-37,37 +130,-49,49 +131,-25,25 +132,-29,29 +133,-35,35 +134,-37,37 +135,-43,43 +136,-31,31 +137,-51,51 +138,-25,25 +139,-51,51 +140,-21,21 +141,-35,35 +142,-31,31 +143,-41,41 +144,-29,29 +145,-27,27 +146,-47,47 +147,-27,27 +148,-23,23 +149,-45,45 +150,-31,31 +151,-33,33 +152,-31,31 +153,-148,49 +154,-41,41 +155,-25,25 +156,-29,29 +157,-31,31 +158,-33,33 +159,-27,27 +160,-27,27 +161,-29,29 +162,-27,27 +163,-27,27 +164,-23,23 +165,-35,35 +166,-21,21 +167,-23,23 +168,-23,23 +169,-41,41 +170,-39,39 +171,-21,21 +172,-25,25 +173,-23,23 +174,-31,31 +175,-21,21 +176,-155,56 +177,-21,21 +178,-37,37 +179,-17,17 +180,-19,19 +181,-39,39 +182,-25,25 +183,-25,25 +184,-19,19 +185,-29,29 +186,-29,29 +187,-25,25 +188,-25,25 +189,-42,42 +190,-21,21 +191,-21,21 +192,-25,25 +193,-29,29 +194,-15,15 +195,-21,21 +196,-17,17 +197,-29,29 +198,-25,25 +199,-15,15 +200,-33,33 +201,-37,37 +202,-17,17 +203,-29,29 +204,-17,17 +205,-25,25 +206,-23,23 +207,-25,25 +208,-25,25 +209,-25,25 +210,-21,21 +211,-37,37 +212,-17,17 +213,-17,17 +214,-23,23 +215,-31,31 +216,-21,21 +217,-21,21 +218,-21,21 +219,-19,19 +220,-17,17 +221,-27,27 +222,-23,23 +223,-15,15 +224,-19,19 +225,-43,43 +226,-15,15 +227,-25,25 +228,-19,19 +229,-19,19 +230,-19,19 +231,-19,19 +232,-21,21 +233,-23,23 +234,-31,31 +235,-23,23 +236,-19,19 +237,-29,29 +238,-30,30 +239,-19,19 +240,-17,17 +241,-24,24 +242,-15,15 +243,-23,23 +244,-21,21 +245,-15,15 +246,-29,29 +247,-19,19 +248,-17,17 +249,-29,29 +250,-21,21 +251,-25,25 +252,-25,25 +253,-19,19 +254,-25,25 +255,-21,21 +256,-19,19 +257,-19,19 +258,-15,15 +259,-21,21 +260,-19,19 +261,-23,23 +262,-27,27 +263,-19,19 +264,-21,21 +265,-21,21 +266,-23,23 +267,-17,17 +268,-25,25 +269,-27,27 +270,-19,19 +271,-19,19 +272,-21,21 +273,-15,15 +274,-21,21 +275,-17,17 +276,-15,15 +277,-23,23 +278,-17,17 +279,-19,19 +280,-23,23 +281,-27,27 +282,-17,17 +283,-23,23 +284,-19,19 +285,-17,17 +286,-17,17 +287,-15,15 +288,-21,21 +289,-15,15 +290,-21,21 +291,-19,19 +292,-17,17 +293,-120,21 +294,-21,21 +295,-15,15 +296,-19,19 +297,-15,15 +298,-15,15 +299,-35,35 +300,-21,21 +301,-21,21 +302,-15,15 +303,-15,15 +304,-23,23 +305,-17,17 +306,-21,21 +307,-17,17 +308,-15,15 +309,-15,15 +310,-15,15 +311,-17,17 +312,-19,19 +313,-25,25 +314,-19,19 +315,-32,32 +316,-15,15 +317,-17,17 +318,-21,21 +319,-15,15 +320,-17,17 +321,-19,19 +322,-17,17 +323,-17,17 +324,-17,17 +325,-17,17 +326,-19,19 +327,-15,15 +328,-15,15 +329,-21,21 +330,-19,19 +331,-15,15 +332,-17,17 +333,-17,17 +334,-15,15 +335,-19,19 +336,-26,26 +337,-17,17 +338,-15,15 +339,-21,21 +340,-17,17 +341,-15,15 +342,-19,19 +343,-17,17 +344,-27,27 +345,-15,15 +346,-17,17 +347,-15,15 +348,-17,17 +349,-17,17 +350,-19,19 +351,-15,15 +352,-15,15 +353,-15,15 +354,-15,15 +355,-17,17 +356,-17,17 +357,-15,15 +358,-19,19 +359,-15,15 +360,-17,17 +361,-17,17 +362,-19,19 +363,-17,17 +364,-17,17 +365,-21,21 +366,-17,17 +367,-15,15 +368,-17,17 +369,-21,21 +370,-19,19 +371,-17,17 +372,-15,15 +373,-15,15 +374,-25,25 +375,-15,15 +376,-15,15 +377,-17,17 +378,-17,17 +379,-17,17 +380,-15,15 +381,-15,15 +382,-15,15 +383,-15,15 +384,-17,17 +385,-17,17 +386,-25,25 +387,-17,17 +388,-15,15 +389,-15,15 +390,-17,17 +391,-15,15 +392,-15,15 +393,-122,23 +394,-15,15 +395,-15,15 +396,-15,15 +397,-15,15 +398,-21,21 +399,-15,15 +400,-17,17 +401,-17,17 +402,-17,17 +403,-15,15 +404,-15,15 +405,-15,15 +406,-17,17 +407,-15,15 +408,-15,15 +409,-17,17 +410,-15,15 +411,-15,15 +412,-17,17 +413,-19,19 +414,-17,17 +415,-17,17 +416,-17,17 +417,-15,15 +418,-15,15 +419,-17,17 +420,-15,15 +421,-15,15 +422,-15,15 +423,-21,21 +424,-15,15 +425,-15,15 +426,-17,17 +427,-15,15 +428,-17,17 +429,-15,15 +430,-15,15 +431,-15,15 +432,-15,15 +433,-15,15 +434,-21,21 +435,-15,15 +436,-15,15 +437,-17,17 +438,-15,15 +439,-15,15 +440,-15,15 +441,-17,17 +442,-15,15 +443,-15,15 +444,-17,17 +445,-15,15 +446,-17,17 +447,-17,17 +448,-15,15 +449,-17,17 +450,-15,15 +451,-17,17 +452,-15,15 +453,-15,15 +454,-15,15 +455,-15,15 +456,-15,15 +457,-22,22 +458,-15,15 +459,-15,15 +460,-15,15 +461,-15,15 +462,-15,15 +463,-15,15 +464,-15,15 +465,-15,15 +466,-15,15 +467,-15,15 +468,-15,15 +469,-15,15 +470,-15,15 +471,-17,17 +472,-21,21 +473,-15,15 +474,-15,15 +475,-15,15 +476,-15,15 +477,-17,17 +478,-15,15 +479,-17,17 +480,-17,17 +481,-15,15 +482,-15,15 +483,-15,15 +484,-17,17 +485,-21,21 +486,-15,15 +487,-15,15 +488,-15,15 +489,-15,15 +490,-15,15 +491,-17,17 +492,-15,15 +493,-17,17 +494,-19,19 +495,-15,15 +496,-15,15 +497,-15,15 +498,-15,15 +499,-15,15 +500,-15,15 +501,-15,15 +502,-15,15 +503,-15,15 +504,-15,15 +505,-15,15 +506,-15,15 +507,-15,15 +508,-17,17 +509,-15,15 +510,-15,15 +511,-15,15 +512,-15,15 +513,-15,15 +514,-15,15 +515,-15,15 +516,-17,17 +517,-15,15 +518,-15,15 +519,-15,15 +520,-15,15 +521,-15,15 +522,-15,15 +523,-15,15 +524,-15,15 +525,-15,15 +526,-15,15 +527,-15,15 +528,-15,15 +529,-15,15 +530,-15,15 +531,-17,17 +532,-17,17 +533,-15,15 +534,-17,17 +535,-15,15 +536,-15,15 +537,-15,15 +538,-15,15 +539,-15,15 +540,-15,15 +541,-15,15 +542,-19,19 +543,-15,15 +544,-15,15 +545,-15,15 +546,-17,17 +547,-15,15 +548,-15,15 +549,-15,15 +550,-15,15 +551,-15,15 +552,-15,15 +553,-15,15 +554,-15,15 +555,-15,15 +556,-15,15 +557,-15,15 +558,-15,15 +559,-15,15 +560,-15,15 +561,-15,15 +562,-15,15 +563,-20,20 +564,-17,17 +565,-15,15 +566,-15,15 +567,-15,15 +568,-15,15 +569,-15,15 +570,-17,17 +571,-17,17 +572,-15,15 +573,-15,15 +574,-15,15 +575,-15,15 +576,-17,17 +577,-15,15 +578,-15,15 +579,-15,15 +580,-15,15 +581,-15,15 +582,-15,15 +583,-17,17 +584,-15,15 +585,-15,15 +586,-15,15 +587,-15,15 +588,-15,15 +589,-15,15 +590,-15,15 +591,-15,15 +592,-15,15 +593,-15,15 +594,-17,17 +595,-15,15 +596,-15,15 +597,-15,15 +598,-16,16 +599,-16,16 +600,-15,15 +601,-15,15 +602,-15,15 +603,-15,15 +604,-15,15 +605,-17,17 +606,-15,15 +607,-15,15 +608,-15,15 +609,-15,15 +610,-15,15 +611,-15,15 +612,-15,15 +613,-15,15 +614,-15,15 +615,-15,15 +616,-15,15 +617,-15,15 +618,-15,15 +619,-15,15 +620,-15,15 +621,-15,15 +622,-15,15 +623,-15,15 +624,-15,15 +625,-15,15 +626,-15,15 +627,-15,15 +628,-15,15 +629,-15,15 +630,-15,15 +631,-15,15 +632,-15,15 +633,-21,21 +634,-15,15 +635,-15,15 +636,-15,15 +637,-15,15 +638,-15,15 +639,-15,15 +640,-15,15 +641,-15,15 +642,-15,15 +643,-15,15 +644,-15,15 +645,-15,15 +646,-17,17 +647,-15,15 +648,-21,21 +649,-15,15 +650,-15,15 +651,-15,15 +652,-15,15 +653,-17,17 +654,-15,15 +655,-15,15 +656,-15,15 +657,-15,15 +658,-15,15 +659,-15,15 +660,-15,15 +661,-15,15 +662,-15,15 +663,-15,15 +664,-17,17 +665,-15,15 +666,-15,15 +667,-15,15 +668,-15,15 +669,-15,15 +670,-17,17 +671,-15,15 +672,-15,15 +673,-15,15 +674,-15,15 +675,-15,15 +676,-15,15 +677,-17,17 +678,-15,15 +679,-15,15 +680,-15,15 +681,-15,15 +682,-15,15 +683,-15,15 +684,-15,15 +685,-15,15 +686,-15,15 +687,-15,15 +688,-15,15 +689,-15,15 +690,-15,15 +691,-15,15 +692,-15,15 +693,-15,15 +694,-15,15 +695,-15,15 +696,-15,15 +697,-15,15 +698,-15,15 +699,-15,15 +700,-15,15 +701,-15,15 +702,-15,15 +703,-17,17 +704,-15,15 +705,-15,15 +706,-15,15 +707,-15,15 +708,-15,15 +709,-15,15 +710,-17,17 +711,-15,15 +712,-15,15 +713,-15,15 +714,-15,15 +715,-16,16 +716,-15,15 +717,-117,18 +718,-15,15 +719,-17,17 +720,-15,15 +721,-15,15 +722,-15,15 +723,-15,15 +724,-15,15 +725,-15,15 +726,-15,15 +727,-15,15 +728,-15,15 +729,-15,15 +730,-15,15 +731,-15,15 +732,-15,15 +733,-15,15 +734,-15,15 +735,-15,15 +736,-15,15 +737,-15,15 +738,-15,15 +739,-15,15 +740,-15,15 +741,-16,16 +742,-15,15 +743,-17,17 +744,-15,15 +745,-19,19 +746,-15,15 +747,-15,15 +748,-15,15 +749,-17,17 +750,-15,15 +751,-15,15 +752,-17,17 +753,-15,15 +754,-15,15 +755,-15,15 +756,-15,15 +757,-17,17 +758,-15,15 +759,-15,15 +760,-15,15 +761,-17,17 +762,-15,15 +763,-15,15 +764,-15,15 +765,-15,15 +766,-15,15 +767,-17,17 +768,-15,15 +769,-15,15 +770,-15,15 +771,-15,15 +772,-15,15 +773,-15,15 +774,-17,17 +775,-15,15 +776,-15,15 +777,-15,15 +778,-15,15 +779,-15,15 +780,-15,15 +781,-15,15 +782,-15,15 +783,-15,15 +784,-15,15 +785,-15,15 +786,-15,15 +787,-15,15 +788,-15,15 +789,-15,15 +790,-15,15 +791,-15,15 +792,-17,17 +793,-15,15 +794,-15,15 +795,-15,15 +796,-15,15 +797,-15,15 +798,-17,17 +799,-15,15 diff --git a/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/config.yaml b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/config.yaml new file mode 100644 index 0000000..79e3694 --- /dev/null +++ b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/config.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: Racetrack-v0 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 200 + epsilon_end: 0.01 + epsilon_start: 0.9 + gamma: 0.99 + lr: 0.1 diff --git a/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/logs/log.txt b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/logs/log.txt new file mode 100644 index 0000000..ffa79ca --- /dev/null +++ b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/logs/log.txt @@ -0,0 +1,404 @@ +2022-10-30 02:13:15 - r - INFO: - n_states: 4, n_actions: 9 +2022-10-30 02:13:15 - r - INFO: - Start training! +2022-10-30 02:13:15 - r - INFO: - Env: Racetrack-v0, Algorithm: Sarsa, Device: cpu +2022-10-30 02:13:15 - r - INFO: - Episode: 1/400, Reward: -870.00, Steps:200, Epislon: 0.336 +2022-10-30 02:13:15 - r - INFO: - Episode: 2/400, Reward: -740.00, Steps:200, Epislon: 0.129 +2022-10-30 02:13:15 - r - INFO: - Episode: 3/400, Reward: -710.00, Steps:200, Epislon: 0.054 +2022-10-30 02:13:15 - r - INFO: - Episode: 4/400, Reward: -600.00, Steps:200, Epislon: 0.026 +2022-10-30 02:13:15 - r - INFO: - Episode: 5/400, Reward: -580.00, Steps:200, Epislon: 0.016 +2022-10-30 02:13:15 - r - INFO: - Episode: 6/400, Reward: -620.00, Steps:200, Epislon: 0.012 +2022-10-30 02:13:16 - r - INFO: - Episode: 7/400, Reward: -590.00, Steps:200, Epislon: 0.011 +2022-10-30 02:13:16 - r - INFO: - Episode: 8/400, Reward: -590.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 9/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 10/400, Reward: -570.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 11/400, Reward: -580.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 12/400, Reward: -580.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 13/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 14/400, Reward: -540.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 15/400, Reward: -510.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 16/400, Reward: -570.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 17/400, Reward: -560.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 18/400, Reward: -540.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 19/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 20/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 21/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 22/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 23/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 24/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 25/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 26/400, Reward: -510.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 27/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:16 - r - INFO: - Episode: 28/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 29/400, Reward: -560.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 30/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 31/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 32/400, Reward: -359.00, Steps:149, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 33/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 34/400, Reward: -510.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 35/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 36/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 37/400, Reward: -540.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 38/400, Reward: -560.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 39/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 40/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 41/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 42/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 43/400, Reward: -540.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 44/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 45/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 46/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 47/400, Reward: -550.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 48/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 49/400, Reward: -540.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 50/400, Reward: -420.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 51/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:17 - r - INFO: - Episode: 52/400, Reward: -510.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 53/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 54/400, Reward: -460.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 55/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 56/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 57/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 58/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 59/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 60/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 61/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 62/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 63/400, Reward: -450.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 64/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 65/400, Reward: -420.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 66/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 67/400, Reward: -440.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 68/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 69/400, Reward: -188.00, Steps:88, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 70/400, Reward: -327.00, Steps:167, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 71/400, Reward: -530.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 72/400, Reward: -48.00, Steps:28, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 73/400, Reward: -460.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 74/400, Reward: -460.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 75/400, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 76/400, Reward: -428.00, Steps:178, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 77/400, Reward: -460.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 78/400, Reward: -341.00, Steps:151, Epislon: 0.010 +2022-10-30 02:13:18 - r - INFO: - Episode: 79/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 80/400, Reward: -346.00, Steps:156, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 81/400, Reward: -34.00, Steps:24, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 82/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 83/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 84/400, Reward: -222.00, Steps:112, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 85/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 86/400, Reward: -409.00, Steps:169, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 87/400, Reward: -139.00, Steps:59, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 88/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 89/400, Reward: -108.00, Steps:58, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 90/400, Reward: -3.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 91/400, Reward: -131.00, Steps:71, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 92/400, Reward: -355.00, Steps:145, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 93/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 94/400, Reward: -450.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 95/400, Reward: -490.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 96/400, Reward: -425.00, Steps:185, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 97/400, Reward: -130.00, Steps:70, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 98/400, Reward: -246.00, Steps:116, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 99/400, Reward: -480.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 100/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 101/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 102/400, Reward: -63.00, Steps:33, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 103/400, Reward: -311.00, Steps:131, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 104/400, Reward: -450.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 105/400, Reward: -520.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 106/400, Reward: -430.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 107/400, Reward: -79.00, Steps:39, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 108/400, Reward: -94.00, Steps:44, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 109/400, Reward: -37.00, Steps:27, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 110/400, Reward: -235.00, Steps:115, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 111/400, Reward: -440.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 112/400, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 113/400, Reward: -424.00, Steps:194, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 114/400, Reward: -470.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:19 - r - INFO: - Episode: 115/400, Reward: -344.00, Steps:164, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 116/400, Reward: -307.00, Steps:147, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 117/400, Reward: -82.00, Steps:52, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 118/400, Reward: -387.00, Steps:177, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 119/400, Reward: -500.00, Steps:200, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 120/400, Reward: -315.00, Steps:145, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 121/400, Reward: -289.00, Steps:119, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 122/400, Reward: -139.00, Steps:79, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 123/400, Reward: -392.00, Steps:192, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 124/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 125/400, Reward: -35.00, Steps:25, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 126/400, Reward: -82.00, Steps:42, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 127/400, Reward: -134.00, Steps:64, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 128/400, Reward: -93.00, Steps:53, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 129/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 130/400, Reward: -212.00, Steps:102, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 131/400, Reward: -87.00, Steps:47, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 132/400, Reward: -70.00, Steps:40, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 133/400, Reward: -109.00, Steps:49, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 134/400, Reward: -77.00, Steps:47, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 135/400, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 136/400, Reward: -118.00, Steps:58, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 137/400, Reward: -132.00, Steps:62, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 138/400, Reward: -76.00, Steps:36, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 139/400, Reward: -93.00, Steps:63, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 140/400, Reward: -357.00, Steps:157, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 141/400, Reward: -129.00, Steps:69, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 142/400, Reward: -46.00, Steps:26, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 143/400, Reward: -60.00, Steps:30, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 144/400, Reward: -339.00, Steps:159, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 145/400, Reward: -10.00, Steps:10, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 146/400, Reward: -164.00, Steps:84, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 147/400, Reward: -145.00, Steps:75, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 148/400, Reward: -53.00, Steps:33, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 149/400, Reward: -3.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 150/400, Reward: -55.00, Steps:35, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 151/400, Reward: -398.00, Steps:178, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 152/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 153/400, Reward: -20.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 154/400, Reward: -354.00, Steps:154, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 155/400, Reward: -439.00, Steps:189, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 156/400, Reward: -122.00, Steps:62, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 157/400, Reward: -80.00, Steps:40, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 158/400, Reward: -29.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 159/400, Reward: -185.00, Steps:85, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 160/400, Reward: -354.00, Steps:154, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 161/400, Reward: -35.00, Steps:25, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 162/400, Reward: -132.00, Steps:62, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 163/400, Reward: -155.00, Steps:75, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 164/400, Reward: -261.00, Steps:111, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 165/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 166/400, Reward: -135.00, Steps:65, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 167/400, Reward: -57.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 168/400, Reward: -432.00, Steps:182, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 169/400, Reward: -63.00, Steps:33, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 170/400, Reward: -119.00, Steps:59, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 171/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 172/400, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 173/400, Reward: -112.00, Steps:62, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 174/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 175/400, Reward: -354.00, Steps:164, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 176/400, Reward: -101.00, Steps:61, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 177/400, Reward: -86.00, Steps:46, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 178/400, Reward: -33.00, Steps:23, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 179/400, Reward: -339.00, Steps:139, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 180/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 181/400, Reward: -9.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 182/400, Reward: -224.00, Steps:104, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 183/400, Reward: -11.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 184/400, Reward: -52.00, Steps:32, Epislon: 0.010 +2022-10-30 02:13:20 - r - INFO: - Episode: 185/400, Reward: -98.00, Steps:48, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 186/400, Reward: -26.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 187/400, Reward: -89.00, Steps:39, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 188/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 189/400, Reward: -66.00, Steps:36, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 190/400, Reward: -77.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 191/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 192/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 193/400, Reward: -64.00, Steps:34, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 194/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 195/400, Reward: -10.00, Steps:10, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 196/400, Reward: -79.00, Steps:39, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 197/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 198/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 199/400, Reward: 0.00, Steps:10, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 200/400, Reward: -33.00, Steps:23, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 201/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 202/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 203/400, Reward: -110.00, Steps:50, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 204/400, Reward: -43.00, Steps:23, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 205/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 206/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 207/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 208/400, Reward: -32.00, Steps:22, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 209/400, Reward: -77.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 210/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 211/400, Reward: -23.00, Steps:23, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 212/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 213/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 214/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 215/400, Reward: -42.00, Steps:22, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 216/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 217/400, Reward: -64.00, Steps:34, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 218/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 219/400, Reward: -2.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 220/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 221/400, Reward: -129.00, Steps:69, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 222/400, Reward: -133.00, Steps:63, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 223/400, Reward: -47.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 224/400, Reward: -11.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 225/400, Reward: -25.00, Steps:25, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 226/400, Reward: -1.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 227/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 228/400, Reward: -103.00, Steps:53, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 229/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 230/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 231/400, Reward: -67.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 232/400, Reward: -65.00, Steps:35, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 233/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 234/400, Reward: -30.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 235/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 236/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 237/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 238/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 239/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 240/400, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 241/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 242/400, Reward: -39.00, Steps:29, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 243/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 244/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 245/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 246/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 247/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 248/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 249/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 250/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 251/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 252/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 253/400, Reward: -57.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 254/400, Reward: -29.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 255/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 256/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 257/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 258/400, Reward: -40.00, Steps:30, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 259/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 260/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 261/400, Reward: -30.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 262/400, Reward: -34.00, Steps:24, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 263/400, Reward: -1.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 264/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 265/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 266/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 267/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 268/400, Reward: -42.00, Steps:32, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 269/400, Reward: -17.00, Steps:17, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 270/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 271/400, Reward: -28.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 272/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 273/400, Reward: -2.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 274/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 275/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 276/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 277/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 278/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 279/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 280/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 281/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 282/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 283/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 284/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 285/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 286/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 287/400, Reward: -1.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 288/400, Reward: -39.00, Steps:29, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 289/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 290/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 291/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 292/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 293/400, Reward: -11.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 294/400, Reward: -30.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 295/400, Reward: -18.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 296/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 297/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 298/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 299/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 300/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 301/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 302/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 303/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 304/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 305/400, Reward: -55.00, Steps:35, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 306/400, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 307/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 308/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 309/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 310/400, Reward: -67.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 311/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 312/400, Reward: -20.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 313/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 314/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 315/400, Reward: -20.00, Steps:20, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 316/400, Reward: -36.00, Steps:26, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 317/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 318/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 319/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 320/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 321/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 322/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 323/400, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 324/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 325/400, Reward: -18.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 326/400, Reward: -36.00, Steps:26, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 327/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 328/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 329/400, Reward: -28.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 330/400, Reward: -31.00, Steps:21, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 331/400, Reward: -1.00, Steps:11, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 332/400, Reward: -109.00, Steps:59, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 333/400, Reward: -29.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 334/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 335/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 336/400, Reward: 0.00, Steps:10, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 337/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 338/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 339/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 340/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 341/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 342/400, Reward: -35.00, Steps:25, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 343/400, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 344/400, Reward: -21.00, Steps:21, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 345/400, Reward: -28.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 346/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 347/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 348/400, Reward: -28.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 349/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 350/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 351/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 352/400, Reward: -10.00, Steps:10, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 353/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 354/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 355/400, Reward: -62.00, Steps:32, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 356/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 357/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 358/400, Reward: -28.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 359/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 360/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 361/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 362/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 363/400, Reward: -16.00, Steps:16, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 364/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 365/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 366/400, Reward: -18.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 367/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 368/400, Reward: -18.00, Steps:18, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 369/400, Reward: -15.00, Steps:15, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 370/400, Reward: 5.00, Steps:5, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 371/400, Reward: -29.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 372/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 373/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 374/400, Reward: 1.00, Steps:9, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 375/400, Reward: -19.00, Steps:19, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 376/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 377/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 378/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 379/400, Reward: -31.00, Steps:21, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 380/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 381/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 382/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 383/400, Reward: -14.00, Steps:14, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 384/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 385/400, Reward: 2.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 386/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 387/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 388/400, Reward: -8.00, Steps:8, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 389/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 390/400, Reward: 4.00, Steps:6, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 391/400, Reward: -13.00, Steps:13, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 392/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 393/400, Reward: -12.00, Steps:12, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 394/400, Reward: -32.00, Steps:22, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 395/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 396/400, Reward: -27.00, Steps:17, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 397/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 398/400, Reward: 3.00, Steps:7, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 399/400, Reward: -37.00, Steps:27, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Episode: 400/400, Reward: -57.00, Steps:37, Epislon: 0.010 +2022-10-30 02:13:21 - r - INFO: - Finish training! diff --git a/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/models/checkpoint.pkl b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/models/checkpoint.pkl new file mode 100644 index 0000000..d950b3f Binary files /dev/null and b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/models/checkpoint.pkl differ diff --git a/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/learning_curve.png b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/learning_curve.png new file mode 100644 index 0000000..0626795 Binary files /dev/null and b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/learning_curve.png differ diff --git a/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/res.csv b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/res.csv new file mode 100644 index 0000000..d251623 --- /dev/null +++ b/projects/codes/Sarsa/Train_Racetrack-v0_Sarsa_20221030-021315/results/res.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-870,200 +1,-740,200 +2,-710,200 +3,-600,200 +4,-580,200 +5,-620,200 +6,-590,200 +7,-590,200 +8,-520,200 +9,-570,200 +10,-580,200 +11,-580,200 +12,-500,200 +13,-540,200 +14,-510,200 +15,-570,200 +16,-560,200 +17,-540,200 +18,-490,200 +19,-490,200 +20,-530,200 +21,-520,200 +22,-530,200 +23,-520,200 +24,-500,200 +25,-510,200 +26,-520,200 +27,-530,200 +28,-560,200 +29,-490,200 +30,-530,200 +31,-359,149 +32,-470,200 +33,-510,200 +34,-520,200 +35,-500,200 +36,-540,200 +37,-560,200 +38,-500,200 +39,-480,200 +40,-490,200 +41,-480,200 +42,-540,200 +43,-500,200 +44,-500,200 +45,-480,200 +46,-550,200 +47,-490,200 +48,-540,200 +49,-420,200 +50,-530,200 +51,-510,200 +52,-530,200 +53,-460,200 +54,-480,200 +55,-480,200 +56,-470,200 +57,-490,200 +58,-470,200 +59,-500,200 +60,-500,200 +61,-480,200 +62,-450,200 +63,-490,200 +64,-420,200 +65,-480,200 +66,-440,200 +67,-490,200 +68,-188,88 +69,-327,167 +70,-530,200 +71,-48,28 +72,-460,200 +73,-460,200 +74,-25,25 +75,-428,178 +76,-460,200 +77,-341,151 +78,-480,200 +79,-346,156 +80,-34,24 +81,-480,200 +82,-480,200 +83,-222,112 +84,-470,200 +85,-409,169 +86,-139,59 +87,-520,200 +88,-108,58 +89,-3,13 +90,-131,71 +91,-355,145 +92,-470,200 +93,-450,200 +94,-490,200 +95,-425,185 +96,-130,70 +97,-246,116 +98,-480,200 +99,-500,200 +100,-13,13 +101,-63,33 +102,-311,131 +103,-450,200 +104,-520,200 +105,-430,200 +106,-79,39 +107,-94,44 +108,-37,27 +109,-235,115 +110,-440,200 +111,-19,19 +112,-424,194 +113,-470,200 +114,-344,164 +115,-307,147 +116,-82,52 +117,-387,177 +118,-500,200 +119,-315,145 +120,-289,119 +121,-139,79 +122,-392,192 +123,-13,13 +124,-35,25 +125,-82,42 +126,-134,64 +127,-93,53 +128,2,8 +129,-212,102 +130,-87,47 +131,-70,40 +132,-109,49 +133,-77,47 +134,-17,17 +135,-118,58 +136,-132,62 +137,-76,36 +138,-93,63 +139,-357,157 +140,-129,69 +141,-46,26 +142,-60,30 +143,-339,159 +144,-10,10 +145,-164,84 +146,-145,75 +147,-53,33 +148,-3,13 +149,-55,35 +150,-398,178 +151,3,7 +152,-20,20 +153,-354,154 +154,-439,189 +155,-122,62 +156,-80,40 +157,-29,19 +158,-185,85 +159,-354,154 +160,-35,25 +161,-132,62 +162,-155,75 +163,-261,111 +164,3,7 +165,-135,65 +166,-57,37 +167,-432,182 +168,-63,33 +169,-119,59 +170,3,7 +171,-16,16 +172,-112,62 +173,1,9 +174,-354,164 +175,-101,61 +176,-86,46 +177,-33,23 +178,-339,139 +179,3,7 +180,-9,9 +181,-224,104 +182,-11,11 +183,-52,32 +184,-98,48 +185,-26,16 +186,-89,39 +187,1,9 +188,-66,36 +189,-77,37 +190,5,5 +191,2,8 +192,-64,34 +193,5,5 +194,-10,10 +195,-79,39 +196,3,7 +197,3,7 +198,0,10 +199,-33,23 +200,2,8 +201,5,5 +202,-110,50 +203,-43,23 +204,3,7 +205,-13,13 +206,1,9 +207,-32,22 +208,-77,37 +209,5,5 +210,-23,23 +211,-15,15 +212,4,6 +213,1,9 +214,-42,22 +215,-13,13 +216,-64,34 +217,-13,13 +218,-2,12 +219,5,5 +220,-129,69 +221,-133,63 +222,-47,37 +223,-11,11 +224,-25,25 +225,-1,11 +226,5,5 +227,-103,53 +228,3,7 +229,2,8 +230,-67,37 +231,-65,35 +232,-15,15 +233,-30,20 +234,3,7 +235,4,6 +236,3,7 +237,-13,13 +238,1,9 +239,-16,16 +240,3,7 +241,-39,29 +242,3,7 +243,3,7 +244,3,7 +245,-13,13 +246,5,5 +247,3,7 +248,2,8 +249,-12,12 +250,-14,14 +251,2,8 +252,-57,37 +253,-29,19 +254,4,6 +255,2,8 +256,-13,13 +257,-40,30 +258,3,7 +259,3,7 +260,-30,20 +261,-34,24 +262,-1,11 +263,-13,13 +264,2,8 +265,5,5 +266,3,7 +267,-42,32 +268,-17,17 +269,-12,12 +270,-28,18 +271,-13,13 +272,-2,12 +273,3,7 +274,3,7 +275,3,7 +276,-14,14 +277,-14,14 +278,3,7 +279,4,6 +280,3,7 +281,5,5 +282,-13,13 +283,3,7 +284,2,8 +285,5,5 +286,-1,11 +287,-39,29 +288,5,5 +289,3,7 +290,3,7 +291,3,7 +292,-11,11 +293,-30,20 +294,-18,18 +295,-13,13 +296,2,8 +297,5,5 +298,3,7 +299,4,6 +300,2,8 +301,-15,15 +302,-14,14 +303,-13,13 +304,-55,35 +305,-19,19 +306,3,7 +307,-12,12 +308,3,7 +309,-67,37 +310,3,7 +311,-20,20 +312,4,6 +313,5,5 +314,-20,20 +315,-36,26 +316,3,7 +317,4,6 +318,2,8 +319,-12,12 +320,5,5 +321,4,6 +322,-16,16 +323,4,6 +324,-18,18 +325,-36,26 +326,3,7 +327,3,7 +328,-28,18 +329,-31,21 +330,-1,11 +331,-109,59 +332,-29,19 +333,3,7 +334,3,7 +335,0,10 +336,-15,15 +337,-12,12 +338,3,7 +339,3,7 +340,-14,14 +341,-35,25 +342,-16,16 +343,-21,21 +344,-28,18 +345,2,8 +346,-12,12 +347,-28,18 +348,3,7 +349,3,7 +350,3,7 +351,-10,10 +352,3,7 +353,3,7 +354,-62,32 +355,5,5 +356,4,6 +357,-28,18 +358,3,7 +359,3,7 +360,2,8 +361,2,8 +362,-16,16 +363,2,8 +364,-15,15 +365,-18,18 +366,3,7 +367,-18,18 +368,-15,15 +369,5,5 +370,-29,19 +371,3,7 +372,-14,14 +373,1,9 +374,-19,19 +375,3,7 +376,3,7 +377,3,7 +378,-31,21 +379,2,8 +380,3,7 +381,3,7 +382,-14,14 +383,3,7 +384,2,8 +385,3,7 +386,4,6 +387,-8,8 +388,3,7 +389,4,6 +390,-13,13 +391,3,7 +392,-12,12 +393,-32,22 +394,3,7 +395,-27,17 +396,3,7 +397,3,7 +398,-37,27 +399,-57,37 diff --git a/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Test.yaml b/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Test.yaml new file mode 100644 index 0000000..f39b31b --- /dev/null +++ b/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Test.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: CliffWalking-v0 + mode: test + load_checkpoint: true + load_path: Train_CliffWalking-v0_Sarsa_20221030-021146 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Train.yaml b/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Train.yaml new file mode 100644 index 0000000..630ead8 --- /dev/null +++ b/projects/codes/Sarsa/config/CliffWalking-v0_Sarsa_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: CliffWalking-v0 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 800 +algo_cfg: + epsilon_decay: 300 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.1 diff --git a/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Test.yaml b/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Test.yaml new file mode 100644 index 0000000..e07a7e2 --- /dev/null +++ b/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Test.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: Racetrack-v0 + mode: test + load_checkpoint: true + load_path: Train_Racetrack-v0_Sarsa_20221030-021315 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 200 + epsilon_end: 0.01 + epsilon_start: 0.9 + gamma: 0.99 + lr: 0.1 diff --git a/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Train.yaml b/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Train.yaml new file mode 100644 index 0000000..da6299f --- /dev/null +++ b/projects/codes/Sarsa/config/Racetrack-v0_Sarsa_Train.yaml @@ -0,0 +1,19 @@ +general_cfg: + algo_name: Sarsa + device: cpu + env_name: Racetrack-v0 + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 10 + show_fig: false + test_eps: 20 + train_eps: 400 +algo_cfg: + epsilon_decay: 200 + epsilon_end: 0.01 + epsilon_start: 0.9 + gamma: 0.99 + lr: 0.1 diff --git a/projects/codes/Sarsa/config/config.py b/projects/codes/Sarsa/config/config.py new file mode 100644 index 0000000..9980c04 --- /dev/null +++ b/projects/codes/Sarsa/config/config.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-10-30 01:23:07 +LastEditor: JiangJi +LastEditTime: 2022-10-30 02:01:54 +Discription: default parameters of QLearning +''' +from common.config import GeneralConfig,AlgoConfig + +class GeneralConfigSarsa(GeneralConfig): + def __init__(self) -> None: + self.env_name = "CliffWalking-v0" # name of environment + self.algo_name = "Sarsa" # name of algorithm + self.mode = "train" # train or test + self.seed = 1 # random seed + self.device = "cpu" # device to use + self.train_eps = 400 # number of episodes for training + self.test_eps = 20 # number of episodes for testing + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = "tasks" # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfigSarsa(AlgoConfig): + def __init__(self) -> None: + # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end + self.epsilon_start = 0.95 # epsilon start value + self.epsilon_end = 0.01 # epsilon end value + self.epsilon_decay = 300 # epsilon decay rate + self.gamma = 0.90 # discount factor + self.lr = 0.1 # learning rate \ No newline at end of file diff --git a/projects/codes/Sarsa/main.py b/projects/codes/Sarsa/main.py deleted file mode 100644 index cb1b22c..0000000 --- a/projects/codes/Sarsa/main.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2021-03-11 17:59:16 -LastEditor: John -LastEditTime: 2022-08-26 23:03:39 -Discription: -Environment: -''' -import sys,os -os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." -curr_path = os.path.dirname(os.path.abspath(__file__)) # current path -parent_path = os.path.dirname(curr_path) # parent path -sys.path.append(parent_path) # add path to system path -import gym -import datetime -import argparse -from envs.register import register_env -from envs.wrappers import CliffWalkingWapper -from Sarsa.sarsa import Sarsa -from common.utils import all_seed -from common.launcher import Launcher - -class Main(Launcher): - def get_args(self): - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default = 'Sarsa',type=str,help="name of algorithm") - parser.add_argument('--env_name',default = 'Racetrack-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default = 300,type=int,help="episodes of training") - parser.add_argument('--test_eps',default = 20,type=int,help="episodes of testing") - parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") - parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") - parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") - parser.add_argument('--lr',default=0.2,type=float,help="learning rate") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--seed',default=10,type=int,help="seed") - parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", - 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", - } - args = {**vars(args),**default_args} # type(dict) - return args - - def env_agent_config(self,cfg): - register_env(cfg['env_name']) - env = gym.make(cfg['env_name']) - if cfg['seed'] !=0: # set random seed - all_seed(env,seed= cfg['seed']) - if cfg['env_name'] == 'CliffWalking-v0': - env = CliffWalkingWapper(env) - try: # state dimension - n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) - except AttributeError: - n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) - n_actions = env.action_space.n # action dimension - print(f"n_states: {n_states}, n_actions: {n_actions}") - cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters - agent = Sarsa(cfg) - return env,agent - - def train(self,cfg,env,agent): - print("Start training!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['train_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 # step per episode - state = env.reset() # reset and obtain initial state - action = agent.sample_action(state) - # while True: - for _ in range(cfg['ep_max_steps']): - next_state, reward, done, _ = env.step(action) # update env and return transitions - next_action = agent.sample_action(next_state) - agent.update(state, action, reward, next_state, next_action,done) # update agent - state = next_state # update state - action = next_action - ep_reward += reward - ep_step += 1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - if (i_ep+1)%10==0: - print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps: {ep_step}, Epislon: {agent.epsilon:.3f}') - print("Finish training!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - - def test(self,cfg,env,agent): - print("Start testing!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['test_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 - state = env.reset() # reset and obtain initial state - for _ in range(cfg['ep_max_steps']): - action = agent.predict_action(state) - next_state, reward, done, _ = env.step(action) - state = next_state - ep_reward+=reward - ep_step+=1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps: {ep_step}, Reward: {ep_reward:.2f}") - print("Finish testing!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} - -if __name__ == "__main__": - main = Main() - main.run() - - - diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl deleted file mode 100644 index fb8efd6..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json deleted file mode 100644 index e16e735..0000000 --- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "algo_name": "Sarsa", - "env_name": "CliffWalking-v0", - "train_eps": 400, - "test_eps": 20, - "gamma": 0.9, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 300, - "lr": 0.1, - "device": "cpu", - "seed": 10, - "show_fig": false, - "save_fig": true, - "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/", - "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/", - "n_states": 48, - "n_actions": 4 -} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png deleted file mode 100644 index 14dbf39..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv deleted file mode 100644 index c51b354..0000000 --- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards,steps -0,-649,154 -1,-2822,842 -2,-176,176 -3,-139,139 -4,-221,221 -5,-51,51 -6,-219,219 -7,-247,148 -8,-90,90 -9,-145,145 -10,-104,104 -11,-162,162 -12,-49,49 -13,-129,129 -14,-140,140 -15,-19,19 -16,-131,131 -17,-115,115 -18,-43,43 -19,-133,133 -20,-73,73 -21,-89,89 -22,-131,131 -23,-61,61 -24,-113,113 -25,-119,119 -26,-119,119 -27,-71,71 -28,-132,132 -29,-47,47 -30,-79,79 -31,-57,57 -32,-125,125 -33,-77,77 -34,-87,87 -35,-49,49 -36,-57,57 -37,-81,81 -38,-81,81 -39,-97,97 -40,-61,61 -41,-85,85 -42,-217,118 -43,-39,39 -44,-117,117 -45,-41,41 -46,-71,71 -47,-105,105 -48,-73,73 -49,-68,68 -50,-95,95 -51,-41,41 -52,-41,41 -53,-67,67 -54,-71,71 -55,-65,65 -56,-41,41 -57,-61,61 -58,-81,81 -59,-21,21 -60,-76,76 -61,-80,80 -62,-23,23 -63,-53,53 -64,-67,67 -65,-33,33 -66,-41,41 -67,-59,59 -68,-33,33 -69,-64,64 -70,-188,89 -71,-47,47 -72,-57,57 -73,-45,45 -74,-33,33 -75,-79,79 -76,-45,45 -77,-23,23 -78,-47,47 -79,-57,57 -80,-47,47 -81,-45,45 -82,-53,53 -83,-29,29 -84,-33,33 -85,-69,69 -86,-61,61 -87,-35,35 -88,-59,59 -89,-43,43 -90,-17,17 -91,-39,39 -92,-59,59 -93,-29,29 -94,-31,31 -95,-55,55 -96,-35,35 -97,-45,45 -98,-29,29 -99,-59,59 -100,-25,25 -101,-29,29 -102,-33,33 -103,-39,39 -104,-19,19 -105,-47,47 -106,-57,57 -107,-19,19 -108,-47,47 -109,-25,25 -110,-23,23 -111,-53,53 -112,-39,39 -113,-34,34 -114,-27,27 -115,-27,27 -116,-63,63 -117,-33,33 -118,-17,17 -119,-21,21 -120,-19,19 -121,-49,49 -122,-25,25 -123,-39,39 -124,-25,25 -125,-167,68 -126,-35,35 -127,-29,29 -128,-31,31 -129,-44,44 -130,-33,33 -131,-23,23 -132,-37,37 -133,-134,35 -134,-31,31 -135,-19,19 -136,-29,29 -137,-37,37 -138,-25,25 -139,-39,39 -140,-47,47 -141,-29,29 -142,-27,27 -143,-21,21 -144,-41,41 -145,-29,29 -146,-25,25 -147,-25,25 -148,-21,21 -149,-29,29 -150,-39,39 -151,-35,35 -152,-35,35 -153,-32,32 -154,-31,31 -155,-19,19 -156,-21,21 -157,-35,35 -158,-33,33 -159,-37,37 -160,-25,25 -161,-41,41 -162,-25,25 -163,-23,23 -164,-27,27 -165,-25,25 -166,-39,39 -167,-28,28 -168,-24,24 -169,-23,23 -170,-41,41 -171,-17,17 -172,-35,35 -173,-23,23 -174,-29,29 -175,-17,17 -176,-39,39 -177,-33,33 -178,-29,29 -179,-24,24 -180,-23,23 -181,-19,19 -182,-15,15 -183,-23,23 -184,-39,39 -185,-25,25 -186,-35,35 -187,-33,33 -188,-19,19 -189,-35,35 -190,-21,21 -191,-131,32 -192,-15,15 -193,-23,23 -194,-21,21 -195,-17,17 -196,-23,23 -197,-31,31 -198,-21,21 -199,-31,31 -200,-35,35 -201,-27,27 -202,-19,19 -203,-21,21 -204,-23,23 -205,-23,23 -206,-21,21 -207,-31,31 -208,-25,25 -209,-23,23 -210,-17,17 -211,-19,19 -212,-25,25 -213,-23,23 -214,-19,19 -215,-19,19 -216,-25,25 -217,-25,25 -218,-25,25 -219,-25,25 -220,-23,23 -221,-19,19 -222,-19,19 -223,-149,50 -224,-41,41 -225,-19,19 -226,-29,29 -227,-37,37 -228,-17,17 -229,-17,17 -230,-19,19 -231,-27,27 -232,-19,19 -233,-33,33 -234,-23,23 -235,-23,23 -236,-34,34 -237,-15,15 -238,-33,33 -239,-29,29 -240,-17,17 -241,-23,23 -242,-17,17 -243,-19,19 -244,-21,21 -245,-23,23 -246,-17,17 -247,-15,15 -248,-39,39 -249,-21,21 -250,-23,23 -251,-29,29 -252,-15,15 -253,-17,17 -254,-29,29 -255,-15,15 -256,-21,21 -257,-19,19 -258,-19,19 -259,-21,21 -260,-17,17 -261,-21,21 -262,-27,27 -263,-27,27 -264,-21,21 -265,-19,19 -266,-17,17 -267,-23,23 -268,-19,19 -269,-17,17 -270,-19,19 -271,-19,19 -272,-17,17 -273,-23,23 -274,-17,17 -275,-22,22 -276,-31,31 -277,-19,19 -278,-17,17 -279,-33,33 -280,-19,19 -281,-17,17 -282,-31,31 -283,-15,15 -284,-15,15 -285,-15,15 -286,-29,29 -287,-19,19 -288,-17,17 -289,-26,26 -290,-17,17 -291,-19,19 -292,-15,15 -293,-21,21 -294,-21,21 -295,-15,15 -296,-19,19 -297,-15,15 -298,-17,17 -299,-19,19 -300,-17,17 -301,-21,21 -302,-17,17 -303,-27,27 -304,-17,17 -305,-19,19 -306,-15,15 -307,-19,19 -308,-33,33 -309,-17,17 -310,-20,20 -311,-19,19 -312,-17,17 -313,-15,15 -314,-23,23 -315,-15,15 -316,-15,15 -317,-17,17 -318,-25,25 -319,-15,15 -320,-17,17 -321,-19,19 -322,-17,17 -323,-15,15 -324,-23,23 -325,-19,19 -326,-17,17 -327,-23,23 -328,-15,15 -329,-19,19 -330,-15,15 -331,-17,17 -332,-19,19 -333,-15,15 -334,-17,17 -335,-17,17 -336,-19,19 -337,-15,15 -338,-19,19 -339,-19,19 -340,-17,17 -341,-15,15 -342,-21,21 -343,-19,19 -344,-17,17 -345,-17,17 -346,-15,15 -347,-21,21 -348,-20,20 -349,-15,15 -350,-15,15 -351,-15,15 -352,-19,19 -353,-17,17 -354,-15,15 -355,-27,27 -356,-15,15 -357,-15,15 -358,-23,23 -359,-125,26 -360,-132,33 -361,-17,17 -362,-15,15 -363,-17,17 -364,-23,23 -365,-17,17 -366,-15,15 -367,-15,15 -368,-17,17 -369,-15,15 -370,-17,17 -371,-15,15 -372,-15,15 -373,-15,15 -374,-15,15 -375,-15,15 -376,-15,15 -377,-15,15 -378,-15,15 -379,-15,15 -380,-17,17 -381,-15,15 -382,-15,15 -383,-19,19 -384,-15,15 -385,-17,17 -386,-27,27 -387,-15,15 -388,-21,21 -389,-125,26 -390,-15,15 -391,-15,15 -392,-15,15 -393,-27,27 -394,-15,15 -395,-15,15 -396,-17,17 -397,-15,15 -398,-15,15 -399,-15,15 diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl deleted file mode 100644 index 81268a4..0000000 Binary files a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json deleted file mode 100644 index accb050..0000000 --- a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "Sarsa", "env_name": "Racetrack-v0", "train_eps": 300, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/", "n_states": 4, "n_actions": 9} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png deleted file mode 100644 index c78b938..0000000 Binary files a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv deleted file mode 100644 index 2cb817f..0000000 --- a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards,steps -0,4,6 -1,4,6 -2,-1010,1000 -3,-14,14 -4,4,6 -5,4,6 -6,4,6 -7,-1060,1000 -8,2,8 -9,-12,12 -10,3,7 -11,-15,15 -12,3,7 -13,4,6 -14,-14,14 -15,3,7 -16,-18,18 -17,4,6 -18,4,6 -19,-1020,1000 diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png deleted file mode 100644 index 5c612d6..0000000 Binary files a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv deleted file mode 100644 index 0912f70..0000000 --- a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv +++ /dev/null @@ -1,301 +0,0 @@ -episodes,rewards,steps -0,-3460,1000 -1,-2800,1000 -2,-2910,1000 -3,-2620,1000 -4,-2620,1000 -5,-2590,1000 -6,-2390,1000 -7,-2510,1000 -8,-2470,1000 -9,-611,251 -10,-891,371 -11,-265,125 -12,-2281,911 -13,-1203,523 -14,-616,266 -15,-213,113 -16,-633,273 -17,-1112,482 -18,-350,160 -19,-852,342 -20,-87,47 -21,-11,11 -22,-27,17 -23,-117,57 -24,-15,15 -25,4,6 -26,-27,17 -27,-94,44 -28,-184,84 -29,-44,24 -30,-150,80 -31,-14,14 -32,-219,89 -33,-50,30 -34,-111,61 -35,-10,10 -36,-28,18 -37,-34,24 -38,-12,12 -39,-19,19 -40,-136,66 -41,-171,71 -42,-51,31 -43,4,6 -44,-117,57 -45,4,6 -46,4,6 -47,-127,67 -48,-78,48 -49,-311,131 -50,-25,15 -51,4,6 -52,-49,29 -53,-25,15 -54,-78,48 -55,-238,108 -56,4,6 -57,-17,17 -58,-29,19 -59,-218,98 -60,4,6 -61,-129,59 -62,-344,144 -63,-25,15 -64,-15,15 -65,-77,37 -66,2,8 -67,0,10 -68,4,6 -69,4,6 -70,-242,102 -71,3,7 -72,4,6 -73,-53,33 -74,-14,14 -75,4,6 -76,4,6 -77,-30,20 -78,-12,12 -79,2,8 -80,-12,12 -81,-150,70 -82,-48,28 -83,-102,52 -84,4,6 -85,-97,47 -86,-10,10 -87,-125,55 -88,-28,18 -89,-26,16 -90,-107,57 -91,4,6 -92,-16,16 -93,-84,44 -94,-13,13 -95,-43,23 -96,-14,14 -97,-12,12 -98,-13,13 -99,-2,12 -100,-14,14 -101,-47,27 -102,4,6 -103,4,6 -104,-91,51 -105,-65,35 -106,4,6 -107,-12,12 -108,-14,14 -109,-13,13 -110,4,6 -111,-41,31 -112,-13,13 -113,4,6 -114,-4,14 -115,-74,34 -116,4,6 -117,-60,30 -118,4,6 -119,-15,15 -120,3,7 -121,4,6 -122,4,6 -123,-19,19 -124,4,6 -125,-49,29 -126,-13,13 -127,-30,20 -128,2,8 -129,-21,21 -130,-45,25 -131,-32,22 -132,-67,37 -133,-46,26 -134,0,10 -135,-12,12 -136,-9,9 -137,-10,10 -138,-14,14 -139,4,6 -140,-11,11 -141,-12,12 -142,2,8 -143,-35,25 -144,4,6 -145,-73,43 -146,4,6 -147,-20,20 -148,4,6 -149,2,8 -150,-29,19 -151,-20,20 -152,4,6 -153,-28,18 -154,4,6 -155,4,6 -156,4,6 -157,4,6 -158,-34,24 -159,4,6 -160,4,6 -161,4,6 -162,-25,15 -163,4,6 -164,3,7 -165,-48,28 -166,4,6 -167,-58,38 -168,-20,20 -169,-9,9 -170,3,7 -171,4,6 -172,3,7 -173,-33,23 -174,-50,30 -175,-16,16 -176,-32,22 -177,-65,35 -178,4,6 -179,-13,13 -180,-11,11 -181,3,7 -182,4,6 -183,-16,16 -184,-12,12 -185,4,6 -186,-48,28 -187,-13,13 -188,2,8 -189,3,7 -190,-27,17 -191,3,7 -192,4,6 -193,4,6 -194,4,6 -195,4,6 -196,4,6 -197,-13,13 -198,-14,14 -199,4,6 -200,4,6 -201,-13,13 -202,-33,23 -203,4,6 -204,-32,22 -205,4,6 -206,-48,28 -207,4,6 -208,4,6 -209,3,7 -210,4,6 -211,-34,24 -212,3,7 -213,4,6 -214,4,6 -215,4,6 -216,3,7 -217,-12,12 -218,3,7 -219,-8,8 -220,3,7 -221,4,6 -222,-46,26 -223,-33,23 -224,4,6 -225,1,9 -226,3,7 -227,2,8 -228,-34,24 -229,4,6 -230,4,6 -231,4,6 -232,4,6 -233,-55,35 -234,-37,27 -235,4,6 -236,-14,14 -237,-65,35 -238,4,6 -239,-13,13 -240,4,6 -241,4,6 -242,-13,13 -243,-30,20 -244,3,7 -245,-13,13 -246,4,6 -247,4,6 -248,-13,13 -249,-32,22 -250,4,6 -251,-55,35 -252,-12,12 -253,3,7 -254,3,7 -255,3,7 -256,4,6 -257,2,8 -258,-12,12 -259,3,7 -260,-10,10 -261,-12,12 -262,4,6 -263,3,7 -264,3,7 -265,-16,16 -266,3,7 -267,-47,27 -268,-13,13 -269,4,6 -270,3,7 -271,-13,13 -272,4,6 -273,4,6 -274,-17,17 -275,4,6 -276,3,7 -277,3,7 -278,4,6 -279,-41,31 -280,3,7 -281,-47,27 -282,-32,22 -283,4,6 -284,3,7 -285,-17,17 -286,3,7 -287,3,7 -288,3,7 -289,-12,12 -290,4,6 -291,3,7 -292,3,7 -293,-24,14 -294,3,7 -295,4,6 -296,3,7 -297,3,7 -298,3,7 -299,-13,13 diff --git a/projects/codes/Sarsa/sarsa.py b/projects/codes/Sarsa/sarsa.py index 37ed818..753ee95 100644 --- a/projects/codes/Sarsa/sarsa.py +++ b/projects/codes/Sarsa/sarsa.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:58:16 LastEditor: John -LastEditTime: 2022-08-25 21:26:08 +LastEditTime: 2022-10-30 02:00:51 Discription: Environment: ''' @@ -15,14 +15,14 @@ import torch import math class Sarsa(object): def __init__(self,cfg): - self.n_actions = cfg['n_actions'] - self.lr = cfg['lr'] - self.gamma = cfg['gamma'] - self.epsilon = cfg['epsilon_start'] + self.n_actions = cfg.n_actions + self.lr = cfg.lr + self.gamma = cfg.gamma + self.epsilon = cfg.epsilon_start self.sample_count = 0 - self.epsilon_start = cfg['epsilon_start'] - self.epsilon_end = cfg['epsilon_end'] - self.epsilon_decay = cfg['epsilon_decay'] + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay self.Q_table = defaultdict(lambda: np.zeros(self.n_actions)) # Q table def sample_action(self, state): ''' another way to represent e-greedy policy diff --git a/projects/codes/Sarsa/task0.py b/projects/codes/Sarsa/task0.py new file mode 100644 index 0000000..bdd5fda --- /dev/null +++ b/projects/codes/Sarsa/task0.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2022-09-19 14:48:16 +LastEditor: JiangJi +LastEditTime: 2022-10-30 02:11:31 +Discription: +''' +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path +import gym +import datetime +import argparse +from envs.register import register_env +from envs.wrappers import CliffWalkingWapper +from Sarsa.sarsa import Sarsa +from common.utils import all_seed,merge_class_attrs +from common.launcher import Launcher +from config.config import GeneralConfigSarsa,AlgoConfigSarsa + +class Main(Launcher): + def __init__(self) -> None: + super().__init__() + self.cfgs['general_cfg'] = merge_class_attrs(self.cfgs['general_cfg'],GeneralConfigSarsa()) + self.cfgs['algo_cfg'] = merge_class_attrs(self.cfgs['algo_cfg'],AlgoConfigSarsa()) + + def env_agent_config(self,cfg,logger): + register_env(cfg.env_name) + env = gym.make(cfg.env_name,new_step_api=False) # create env + if cfg.env_name == 'CliffWalking-v0': + env = CliffWalkingWapper(env) + if cfg.seed !=0: # set random seed + all_seed(env,seed=cfg.seed) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + logger.info(f"n_states: {n_states}, n_actions: {n_actions}") # print info + # update to cfg paramters + setattr(cfg, 'n_states', n_states) + setattr(cfg, 'n_actions', n_actions) + agent = Sarsa(cfg) + return env,agent + + def train(self,cfg,env,agent,logger): + logger.info("Start training!") + logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg.train_eps): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + action = agent.sample_action(state) + # while True: + for _ in range(cfg.max_steps): + next_state, reward, done, _ = env.step(action) # update env and return transitions + next_action = agent.sample_action(next_state) + agent.update(state, action, reward, next_state, next_action,done) # update agent + state = next_state # update state + action = next_action + ep_reward += reward + ep_step += 1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + logger.info(f'Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step:d}, Epislon: {agent.epsilon:.3f}') + logger.info("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + + def test(self,cfg,env,agent,logger): + logger.info("Start testing!") + logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg.test_eps): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + action = agent.predict_action(state) + next_state, reward, done, _ = env.step(action) + state = next_state + ep_reward+=reward + ep_step+=1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.2f}, Steps:{ep_step:d}") + logger.info("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + diff --git a/projects/codes/common/config.py b/projects/codes/common/config.py new file mode 100644 index 0000000..da0beb9 --- /dev/null +++ b/projects/codes/common/config.py @@ -0,0 +1,38 @@ + +class DefaultConfig: + def __init__(self) -> None: + pass + def print_cfg(self): + print(self.__dict__) +class GeneralConfig(DefaultConfig): + def __init__(self) -> None: + self.env_name = "CartPole-v1" # name of environment + self.algo_name = "DQN" # name of algorithm + self.mode = "train" # train or test + self.seed = 0 # random seed + self.device = "cuda" # device to use + self.train_eps = 200 # number of episodes for training + self.test_eps = 20 # number of episodes for testing + self.eval_eps = 10 # number of episodes for evaluation + self.eval_per_episode = 5 # evaluation per episode + self.max_steps = 200 # max steps for each episode + self.load_checkpoint = False + self.load_path = None # path to load model + self.show_fig = False # show figure or not + self.save_fig = True # save figure or not + +class AlgoConfig(DefaultConfig): + def __init__(self) -> None: + # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end + # self.epsilon_start = 0.95 # epsilon start value + # self.epsilon_end = 0.01 # epsilon end value + # self.epsilon_decay = 500 # epsilon decay rate + self.gamma = 0.95 # discount factor + # self.lr = 0.0001 # learning rate + # self.buffer_size = 100000 # size of replay buffer + # self.batch_size = 64 # batch size + # self.target_update = 4 # target network update frequency +class MergedConfig: + def __init__(self) -> None: + pass + \ No newline at end of file diff --git a/projects/codes/common/launcher.py b/projects/codes/common/launcher.py index 43f6f45..148d200 100644 --- a/projects/codes/common/launcher.py +++ b/projects/codes/common/launcher.py @@ -1,32 +1,124 @@ -from common.utils import save_args,save_results,plot_rewards +from common.utils import get_logger,save_results,save_cfgs,plot_rewards,merge_class_attrs,load_cfgs +from common.config import GeneralConfig,AlgoConfig,MergedConfig +import time +from pathlib import Path +import datetime +import argparse + class Launcher: def __init__(self) -> None: - pass - def get_args(self): - cfg = {} - return cfg - def env_agent_config(self,cfg): + self.get_cfg() + def get_cfg(self): + self.cfgs = {'general_cfg':GeneralConfig(),'algo_cfg':AlgoConfig()} # create config + def process_yaml_cfg(self): + ''' load yaml config + ''' + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--yaml', default = None, type=str,help='the path of config file') + args = parser.parse_args() + if args.yaml is not None: + load_cfgs(self.cfgs, args.yaml) + def print_cfg(self,cfg): + ''' print parameters + ''' + cfg_dict = vars(cfg) + print("Hyperparameters:") + print(''.join(['=']*80)) + tplt = "{:^20}\t{:^20}\t{:^20}" + print(tplt.format("Name", "Value", "Type")) + for k,v in cfg_dict.items(): + print(tplt.format(k,v,str(type(v)))) + print(''.join(['=']*80)) + def env_agent_config(self,cfg,logger): env,agent = None,None return env,agent - def train(self,cfg, env, agent): - res_dic = {} - return res_dic - def test(self,cfg, env, agent): - res_dic = {} - return res_dic - + def train_one_episode(self,env, agent, cfg): + ep_reward = 0 + ep_step = 0 + return agent,ep_reward,ep_step + def test_one_episode(self,env, agent, cfg): + ep_reward = 0 + ep_step = 0 + return agent,ep_reward,ep_step + def evaluate(self,env, agent, cfg): + sum_eval_reward = 0 + for _ in range(cfg.eval_eps): + _,eval_ep_reward,_ = self.test_one_episode(env, agent, cfg) + sum_eval_reward += eval_ep_reward + mean_eval_reward = sum_eval_reward/cfg.eval_eps + return mean_eval_reward + # def train(self,cfg, env, agent,logger): + # res_dic = {} + # return res_dic + # def test(self,cfg, env, agent,logger): + # res_dic = {} + # return res_dic + def create_path(self,cfg): + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time + self.task_dir = f"{cfg.mode.capitalize()}_{cfg.env_name}_{cfg.algo_name}_{curr_time}" + Path(self.task_dir).mkdir(parents=True, exist_ok=True) + self.model_dir = f"{self.task_dir}/models/" + self.res_dir = f"{self.task_dir}/results/" + self.log_dir = f"{self.task_dir}/logs/" def run(self): - cfg = self.get_args() - env, agent = self.env_agent_config(cfg) - res_dic = self.train(cfg, env, agent) - save_args(cfg,path = cfg['result_path']) # save parameters - agent.save_model(path = cfg['model_path']) # save models - save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results - plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train") # plot results - # testing - # env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step - agent.load_model(path = cfg['model_path']) # load model - res_dic = self.test(cfg, env, agent) - save_results(res_dic, tag='test', - path = cfg['result_path']) - plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "test") + self.process_yaml_cfg() # load yaml config + cfg = MergedConfig() # merge config + cfg = merge_class_attrs(cfg,self.cfgs['general_cfg']) + cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg']) + self.print_cfg(cfg) # print the configuration + self.create_path(cfg) # create the path to save the results + logger = get_logger(self.log_dir) # create the logger + env, agent = self.env_agent_config(cfg,logger) + if cfg.load_checkpoint: + agent.load_model(f"{cfg.load_path}/models/") + logger.info(f"Start {cfg.mode}ing!") + logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + if cfg.mode.lower() == 'train': + best_ep_reward = -float('inf') + for i_ep in range(cfg.train_eps): + agent,ep_reward,ep_step = self.train_one_episode(env, agent, cfg) + logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}") + rewards.append(ep_reward) + steps.append(ep_step) + # for _ in range + if (i_ep+1)%cfg.eval_per_episode == 0: + mean_eval_reward = self.evaluate(env, agent, cfg) + if mean_eval_reward >= best_ep_reward: # update best reward + logger.info(f"Current episode {i_ep+1} has the best eval reward: {mean_eval_reward:.3f}") + best_ep_reward = mean_eval_reward + agent.save_model(self.model_dir) # save models with best reward + # env.close() + elif cfg.mode.lower() == 'test': + for i_ep in range(cfg.test_eps): + agent,ep_reward,ep_step = self.test_one_episode(env, agent, cfg) + logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}") + rewards.append(ep_reward) + steps.append(ep_step) + agent.save_model(self.model_dir) # save models + # env.close() + logger.info(f"Finish {cfg.mode}ing!") + res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + save_results(res_dic, self.res_dir) # save results + save_cfgs(self.cfgs, self.task_dir) # save config + plot_rewards(rewards, title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir) + # def run(self): + # self.process_yaml_cfg() # load yaml config + # cfg = MergedConfig() # merge config + # cfg = merge_class_attrs(cfg,self.cfgs['general_cfg']) + # cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg']) + # self.print_cfg(cfg) # print the configuration + # self.create_path(cfg) # create the path to save the results + # logger = get_logger(self.log_dir) # create the logger + # env, agent = self.env_agent_config(cfg,logger) + # if cfg.load_checkpoint: + # agent.load_model(f"{cfg.load_path}/models/") + # if cfg.mode.lower() == 'train': + # res_dic = self.train(cfg, env, agent,logger) + # elif cfg.mode.lower() == 'test': + # res_dic = self.test(cfg, env, agent,logger) + # save_results(res_dic, self.res_dir) # save results + # save_cfgs(self.cfgs, self.task_dir) # save config + # agent.save_model(self.model_dir) # save models + # plot_rewards(res_dic['rewards'], title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir) \ No newline at end of file diff --git a/projects/codes/common/models.py b/projects/codes/common/models.py index 3e3e562..41d1b17 100644 --- a/projects/codes/common/models.py +++ b/projects/codes/common/models.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 21:14:12 LastEditor: John -LastEditTime: 2022-08-29 14:24:44 +LastEditTime: 2022-10-31 23:53:06 Discription: Environment: ''' @@ -35,20 +35,65 @@ class ActorSoftmax(nn.Module): def __init__(self, input_dim, output_dim, hidden_dim=256): super(ActorSoftmax, self).__init__() self.fc1 = nn.Linear(input_dim, hidden_dim) - self.fc2 = nn.Linear(hidden_dim, output_dim) - def forward(self,state): - dist = F.relu(self.fc1(state)) - dist = F.softmax(self.fc2(dist),dim=1) - return dist + self.fc2 = nn.Linear(hidden_dim, hidden_dim) + self.fc3 = nn.Linear(hidden_dim, output_dim) + def forward(self,x): + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + probs = F.softmax(self.fc3(x),dim=1) + return probs + +class ActorSoftmaxTanh(nn.Module): + def __init__(self, input_dim, output_dim, hidden_dim=256): + super(ActorSoftmaxTanh, self).__init__() + self.fc1 = nn.Linear(input_dim, hidden_dim) + self.fc2 = nn.Linear(hidden_dim, hidden_dim) + self.fc3 = nn.Linear(hidden_dim, output_dim) + def forward(self,x): + x = F.tanh(self.fc1(x)) + x = F.tanh(self.fc2(x)) + probs = F.softmax(self.fc3(x),dim=1) + return probs +class ActorNormal(nn.Module): + def __init__(self, n_states,n_actions, hidden_dim=256): + super(ActorNormal, self).__init__() + self.fc1 = nn.Linear(n_states, hidden_dim) + self.fc2 = nn.Linear(hidden_dim, hidden_dim) + self.fc3 = nn.Linear(hidden_dim, n_actions) + self.fc4 = nn.Linear(hidden_dim, n_actions) + def forward(self,x): + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + mu = torch.tanh(self.fc3(x)) + sigma = F.softplus(self.fc4(x)) + 0.001 # avoid 0 + return mu,sigma +# class ActorSoftmax(nn.Module): +# def __init__(self,input_dim, output_dim, +# hidden_dim=256): +# super(ActorSoftmax, self).__init__() +# self.actor = nn.Sequential( +# nn.Linear(input_dim, hidden_dim), +# nn.ReLU(), +# nn.Linear(hidden_dim, hidden_dim), +# nn.ReLU(), +# nn.Linear(hidden_dim, output_dim), +# nn.Softmax(dim=-1) +# ) +# def forward(self, state): +# probs = self.actor(state) +# dist = Categorical(probs) +# return dist class Critic(nn.Module): def __init__(self,input_dim,output_dim,hidden_dim=256): super(Critic,self).__init__() assert output_dim == 1 # critic must output a single value self.fc1 = nn.Linear(input_dim, hidden_dim) - self.fc2 = nn.Linear(hidden_dim, output_dim) - def forward(self,state): - value = F.relu(self.fc1(state)) - value = self.fc2(value) + self.fc2 = nn.Linear(hidden_dim, hidden_dim) + self.fc3 = nn.Linear(hidden_dim, output_dim) + def forward(self,x): + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + value = self.fc3(x) return value class ActorCriticSoftmax(nn.Module): @@ -72,18 +117,18 @@ class ActorCriticSoftmax(nn.Module): return value, policy_dist class ActorCritic(nn.Module): - def __init__(self, n_states, n_actions, hidden_dim=256): + def __init__(self, input_dim, output_dim, hidden_dim=256): super(ActorCritic, self).__init__() self.critic = nn.Sequential( - nn.Linear(n_states, hidden_dim), + nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1) ) self.actor = nn.Sequential( - nn.Linear(n_states, hidden_dim), + nn.Linear(input_dim, hidden_dim), nn.ReLU(), - nn.Linear(hidden_dim, n_actions), + nn.Linear(hidden_dim, output_dim), nn.Softmax(dim=1), ) diff --git a/projects/codes/common/utils.py b/projects/codes/common/utils.py index e4f75ed..62c343d 100644 --- a/projects/codes/common/utils.py +++ b/projects/codes/common/utils.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:02:24 LastEditor: John -LastEditTime: 2022-08-24 10:31:30 +LastEditTime: 2022-10-26 07:38:17 Discription: Environment: ''' @@ -14,8 +14,13 @@ import numpy as np from pathlib import Path import matplotlib.pyplot as plt import seaborn as sns -import json +import yaml import pandas as pd +from functools import wraps +from time import time +import logging +from pathlib import Path + from matplotlib.font_manager import FontProperties # 导入字体模块 @@ -61,17 +66,17 @@ def smooth(data, weight=0.9): last = smoothed_val return smoothed -def plot_rewards(rewards,cfg,path=None,tag='train'): +def plot_rewards(rewards,title="learning curve",fpath=None,save_fig=True,show_fig=False): sns.set() plt.figure() # 创建一个图形实例,方便同时多画几个图 - plt.title(f"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}") + plt.title(f"{title}") plt.xlabel('epsiodes') plt.plot(rewards, label='rewards') plt.plot(smooth(rewards), label='smoothed') plt.legend() - if cfg['save_fig']: - plt.savefig(f"{path}/{tag}ing_curve.png") - if cfg['show_fig']: + if save_fig: + plt.savefig(f"{fpath}/learning_curve.png") + if show_fig: plt.show() def plot_losses(losses, algo="DQN", save=True, path='./'): @@ -85,48 +90,86 @@ def plot_losses(losses, algo="DQN", save=True, path='./'): plt.savefig(path+"losses_curve") plt.show() -def save_results(res_dic, tag='train', path = None): - ''' 保存奖励 +def save_results(res_dic,fpath = None): + ''' save results ''' - Path(path).mkdir(parents=True, exist_ok=True) + Path(fpath).mkdir(parents=True, exist_ok=True) df = pd.DataFrame(res_dic) - df.to_csv(f"{path}/{tag}ing_results.csv",index=None) - print('Results saved!') - - -def make_dir(*paths): - ''' 创建文件夹 + df.to_csv(f"{fpath}/res.csv",index=None) +def merge_class_attrs(ob1, ob2): + ob1.__dict__.update(ob2.__dict__) + return ob1 +def get_logger(fpath): + Path(fpath).mkdir(parents=True, exist_ok=True) + logger = logging.getLogger(name='r') # set root logger if not set name + logger.setLevel(logging.DEBUG) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s: - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') + # output to file by using FileHandler + fh = logging.FileHandler(fpath+"log.txt") + fh.setLevel(logging.DEBUG) + fh.setFormatter(formatter) + # output to screen by using StreamHandler + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + ch.setFormatter(formatter) + # add Handler + logger.addHandler(ch) + logger.addHandler(fh) + return logger +def save_cfgs(cfgs, fpath): + ''' save config ''' - for path in paths: - Path(path).mkdir(parents=True, exist_ok=True) + Path(fpath).mkdir(parents=True, exist_ok=True) + + with open(f"{fpath}/config.yaml", 'w') as f: + for cfg_type in cfgs: + yaml.dump({cfg_type: cfgs[cfg_type].__dict__}, f, default_flow_style=False) +def load_cfgs(cfgs, fpath): + with open(fpath) as f: + load_cfg = yaml.load(f,Loader=yaml.FullLoader) + for cfg_type in cfgs: + for k, v in load_cfg[cfg_type].items(): + setattr(cfgs[cfg_type], k, v) +# def del_empty_dir(*paths): +# ''' 删除目录下所有空文件夹 +# ''' +# for path in paths: +# dirs = os.listdir(path) +# for dir in dirs: +# if not os.listdir(os.path.join(path, dir)): +# os.removedirs(os.path.join(path, dir)) - -def del_empty_dir(*paths): - ''' 删除目录下所有空文件夹 - ''' - for path in paths: - dirs = os.listdir(path) - for dir in dirs: - if not os.listdir(os.path.join(path, dir)): - os.removedirs(os.path.join(path, dir)) - -class NpEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - if isinstance(obj, np.floating): - return float(obj) - if isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) +# class NpEncoder(json.JSONEncoder): +# def default(self, obj): +# if isinstance(obj, np.integer): +# return int(obj) +# if isinstance(obj, np.floating): +# return float(obj) +# if isinstance(obj, np.ndarray): +# return obj.tolist() +# return json.JSONEncoder.default(self, obj) -def save_args(args,path=None): - # save parameters - Path(path).mkdir(parents=True, exist_ok=True) - with open(f"{path}/params.json", 'w') as fp: - json.dump(args, fp,cls=NpEncoder) - print("Parameters saved!") +# def save_args(args,path=None): +# # save parameters +# Path(path).mkdir(parents=True, exist_ok=True) +# with open(f"{path}/params.json", 'w') as fp: +# json.dump(args, fp,cls=NpEncoder) +# print("Parameters saved!") + +def timing(func): + ''' a decorator to print the running time of a function + ''' + @wraps(func) + def wrap(*args, **kw): + ts = time() + result = func(*args, **kw) + te = time() + print(f"func: {func.__name__}, took: {te-ts:2.4f} seconds") + return result + return wrap def all_seed(env,seed = 1): ''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function Args: @@ -136,7 +179,7 @@ def all_seed(env,seed = 1): import torch import numpy as np import random - print(f"seed = {seed}") + # print(f"seed = {seed}") env.seed(seed) # env config np.random.seed(seed) random.seed(seed) diff --git a/projects/codes/scripts/A2C_CartPole-v0.sh b/projects/codes/scripts/A2C_CartPole-v0.sh deleted file mode 100644 index 4fcc9a1..0000000 --- a/projects/codes/scripts/A2C_CartPole-v0.sh +++ /dev/null @@ -1,15 +0,0 @@ -# run A2C on CartPole-v0 -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" - -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created -codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/A2C/main.py \ No newline at end of file diff --git a/projects/codes/scripts/DQN_Acrobot-v1.sh b/projects/codes/scripts/DQN_Acrobot-v1.sh index 83cd297..623a0cc 100644 --- a/projects/codes/scripts/DQN_Acrobot-v1.sh +++ b/projects/codes/scripts/DQN_Acrobot-v1.sh @@ -1,15 +1,3 @@ # run DQN on Acrobot-v1, not the best tuned parameters - -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/DQN/main.py --env_name Acrobot-v1 --train_eps 100 --epsilon_decay 1500 --lr 0.002 --memory_capacity 200000 --batch_size 128 --device cuda \ No newline at end of file diff --git a/projects/codes/scripts/DQN_CartPole-v0.sh b/projects/codes/scripts/DQN_CartPole-v0.sh deleted file mode 100644 index ead51b9..0000000 --- a/projects/codes/scripts/DQN_CartPole-v0.sh +++ /dev/null @@ -1,15 +0,0 @@ -# run DQN on CartPole-v0 -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" - -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created -codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/DQN/main.py \ No newline at end of file diff --git a/projects/codes/scripts/DQN_CartPole-v1.sh b/projects/codes/scripts/DQN_CartPole-v1.sh index 8cac524..e4fe811 100644 --- a/projects/codes/scripts/DQN_CartPole-v1.sh +++ b/projects/codes/scripts/DQN_CartPole-v1.sh @@ -1,14 +1,3 @@ # run DQN on CartPole-v1, not finished yet -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/DQN/main.py --env_name CartPole-v1 --train_eps 2000 --gamma 0.99 --epsilon_decay 6000 --lr 0.00001 --memory_capacity 200000 --batch_size 64 --device cuda \ No newline at end of file diff --git a/projects/codes/scripts/DoubleDQN_CartPole-v0.sh b/projects/codes/scripts/DoubleDQN_CartPole-v0.sh index 0da88f2..0154227 100644 --- a/projects/codes/scripts/DoubleDQN_CartPole-v0.sh +++ b/projects/codes/scripts/DoubleDQN_CartPole-v0.sh @@ -1,15 +1,3 @@ # run Double DQN on CartPole-v0 -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" - -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/DoubleDQN/main.py --device cuda \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_CliffWalking-v0.sh b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh index 233cec7..6ba8b53 100644 --- a/projects/codes/scripts/Qlearning_CliffWalking-v0.sh +++ b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh @@ -1,12 +1,2 @@ -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/QLearning/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh index 0df0547..c4638fe 100644 --- a/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh +++ b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh @@ -1,14 +1,2 @@ - -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/QLearning/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --epsilon_start 0.70 --epsilon_end 0.1 --epsilon_decay 2000 --gamma 0.9 --lr 0.9 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_Racetrack-v0.sh b/projects/codes/scripts/Qlearning_Racetrack-v0.sh index 00599fa..aba42b2 100644 --- a/projects/codes/scripts/Qlearning_Racetrack-v0.sh +++ b/projects/codes/scripts/Qlearning_Racetrack-v0.sh @@ -1,14 +1,2 @@ - -# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/QLearning/main.py --env_name Racetrack-v0 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_CliffWalking-v0.sh b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh index c4f5e6a..9207c9d 100644 --- a/projects/codes/scripts/Sarsa_CliffWalking-v0.sh +++ b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh @@ -1,12 +1,2 @@ -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/Sarsa/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh index f215c94..9c77e75 100644 --- a/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh +++ b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh @@ -1,13 +1,2 @@ -# Sarsa for FrozenLakeNoSlippery-v1, cannot converge like Qlearning! -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/Sarsa/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --ep_max_steps 10 --epsilon_start 0.50 --epsilon_end 0.01 --epsilon_decay 2000 --gamma 0.9 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_Racetrack-v0.sh b/projects/codes/scripts/Sarsa_Racetrack-v0.sh index dcd6cac..ff8317e 100644 --- a/projects/codes/scripts/Sarsa_Racetrack-v0.sh +++ b/projects/codes/scripts/Sarsa_Racetrack-v0.sh @@ -1,12 +1,2 @@ -if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/anaconda3/etc/profile.d/conda.sh" - source ~/anaconda3/etc/profile.d/conda.sh -elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then - echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" - source ~/opt/anaconda3/etc/profile.d/conda.sh -else - echo 'please manually config the conda source path' -fi -conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path python $codes_dir/Sarsa/main.py --env_name Racetrack-v0 \ No newline at end of file diff --git a/projects/notebooks/1.QLearning.ipynb b/projects/notebooks/1.QLearning.ipynb index c5cc3f7..4116815 100644 --- a/projects/notebooks/1.QLearning.ipynb +++ b/projects/notebooks/1.QLearning.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -31,7 +31,7 @@ " self.epsilon_end = cfg.epsilon_end\n", " self.epsilon_decay = cfg.epsilon_decay\n", " self.Q_table = defaultdict(lambda: np.zeros(n_actions)) # 用嵌套字典存放状态->动作->状态-动作值(Q值)的映射,即Q表\n", - " def sample(self, state):\n", + " def sample_action(self, state):\n", " ''' 采样动作,训练时用\n", " '''\n", " self.sample_count += 1\n", @@ -43,7 +43,7 @@ " else:\n", " action = np.random.choice(self.n_actions) # 随机选择动作\n", " return action\n", - " def predict(self,state):\n", + " def predict_action(self,state):\n", " ''' 预测或选择动作,测试时用\n", " '''\n", " action = np.argmax(self.Q_table[str(state)])\n", @@ -54,19 +54,7 @@ " Q_target = reward \n", " else:\n", " Q_target = reward + self.gamma * np.max(self.Q_table[str(next_state)]) \n", - " self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict)\n", - " def save(self,path):\n", - " import dill\n", - " torch.save(\n", - " obj=self.Q_table,\n", - " f=path+\"Qleaning_model.pkl\",\n", - " pickle_module=dill\n", - " )\n", - " print(\"保存模型成功!\")\n", - " def load(self, path):\n", - " import dill\n", - " self.Q_table =torch.load(f=path+'Qleaning_model.pkl',pickle_module=dill)\n", - " print(\"加载模型成功!\")" + " self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict)" ] }, { @@ -99,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -111,7 +99,7 @@ " ep_reward = 0 # 记录每个回合的奖励\n", " state = env.reset() # 重置环境,即开始新的回合\n", " while True:\n", - " action = agent.sample(state) # 根据算法采样一个动作\n", + " action = agent.sample_action(state) # 根据算法采样一个动作\n", " next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互\n", " agent.update(state, action, reward, next_state, done) # Q学习算法更新\n", " state = next_state # 更新状态\n", @@ -119,7 +107,8 @@ " if done:\n", " break\n", " rewards.append(ep_reward)\n", - " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}\")\n", + " if (i_ep+1)%20==0:\n", + " print(f\"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon:.3f}\")\n", " print('完成训练!')\n", " return {\"rewards\":rewards}\n", "def test(cfg,env,agent):\n", @@ -130,7 +119,7 @@ " ep_reward = 0 # 记录每个episode的reward\n", " state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合)\n", " while True:\n", - " action = agent.predict(state) # 根据算法选择一个动作\n", + " action = agent.predict_action(state) # 根据算法选择一个动作\n", " next_state, reward, done, _ = env.step(action) # 与环境进行一个交互\n", " state = next_state # 更新状态\n", " ep_reward += reward\n", @@ -153,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -163,74 +152,6 @@ "\n", "# turtle tutorial : https://docs.python.org/3.3/library/turtle.html\n", "\n", - "def GridWorld(gridmap=None, is_slippery=False):\n", - " if gridmap is None:\n", - " gridmap = ['SFFF', 'FHFH', 'FFFH', 'HFFG']\n", - " env = gym.make(\"FrozenLake-v0\", desc=gridmap, is_slippery=False)\n", - " env = FrozenLakeWapper(env)\n", - " return env\n", - "\n", - "\n", - "class FrozenLakeWapper(gym.Wrapper):\n", - " def __init__(self, env):\n", - " gym.Wrapper.__init__(self, env)\n", - " self.max_y = env.desc.shape[0]\n", - " self.max_x = env.desc.shape[1]\n", - " self.t = None\n", - " self.unit = 50\n", - "\n", - " def draw_box(self, x, y, fillcolor='', line_color='gray'):\n", - " self.t.up()\n", - " self.t.goto(x * self.unit, y * self.unit)\n", - " self.t.color(line_color)\n", - " self.t.fillcolor(fillcolor)\n", - " self.t.setheading(90)\n", - " self.t.down()\n", - " self.t.begin_fill()\n", - " for _ in range(4):\n", - " self.t.forward(self.unit)\n", - " self.t.right(90)\n", - " self.t.end_fill()\n", - "\n", - " def move_player(self, x, y):\n", - " self.t.up()\n", - " self.t.setheading(90)\n", - " self.t.fillcolor('red')\n", - " self.t.goto((x + 0.5) * self.unit, (y + 0.5) * self.unit)\n", - "\n", - " def render(self):\n", - " if self.t == None:\n", - " self.t = turtle.Turtle()\n", - " self.wn = turtle.Screen()\n", - " self.wn.setup(self.unit * self.max_x + 100,\n", - " self.unit * self.max_y + 100)\n", - " self.wn.setworldcoordinates(0, 0, self.unit * self.max_x,\n", - " self.unit * self.max_y)\n", - " self.t.shape('circle')\n", - " self.t.width(2)\n", - " self.t.speed(0)\n", - " self.t.color('gray')\n", - " for i in range(self.desc.shape[0]):\n", - " for j in range(self.desc.shape[1]):\n", - " x = j\n", - " y = self.max_y - 1 - i\n", - " if self.desc[i][j] == b'S': # Start\n", - " self.draw_box(x, y, 'white')\n", - " elif self.desc[i][j] == b'F': # Frozen ice\n", - " self.draw_box(x, y, 'white')\n", - " elif self.desc[i][j] == b'G': # Goal\n", - " self.draw_box(x, y, 'yellow')\n", - " elif self.desc[i][j] == b'H': # Hole\n", - " self.draw_box(x, y, 'black')\n", - " else:\n", - " self.draw_box(x, y, 'white')\n", - " self.t.shape('turtle')\n", - "\n", - " x_pos = self.s % self.max_x\n", - " y_pos = self.max_y - 1 - int(self.s / self.max_x)\n", - " self.move_player(x_pos, y_pos)\n", - "\n", - "\n", "class CliffWalkingWapper(gym.Wrapper):\n", " def __init__(self, env):\n", " gym.Wrapper.__init__(self, env)\n", @@ -312,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -346,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -399,8 +320,7 @@ " plt.xlabel('epsiodes')\n", " plt.plot(rewards, label='rewards')\n", " plt.plot(smooth(rewards), label='smoothed')\n", - " plt.legend()\n", - " plt.show()" + " plt.legend()" ] }, { @@ -414,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -423,425 +343,27 @@ "text": [ "开始训练!\n", "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", - "回合:1/400,奖励:-1668.0,Epsilon:0.3771901652370099\n", - "回合:2/400,奖励:-2328.0,Epsilon:0.03210668110464856\n", - "回合:3/400,奖励:-152.0,Epsilon:0.02331928797825333\n", - "回合:4/400,奖励:-296.0,Epsilon:0.014965661602689185\n", - "回合:5/400,奖励:-168.0,Epsilon:0.012836430915462094\n", - "回合:6/400,奖励:-149.0,Epsilon:0.011726126490407173\n", - "回合:7/400,奖励:-274.0,Epsilon:0.010963239247691907\n", - "回合:8/400,奖励:-127.0,Epsilon:0.010630787152305933\n", - "回合:9/400,奖励:-356.0,Epsilon:0.010267816440118822\n", - "回合:10/400,奖励:-105.0,Epsilon:0.0101887270555826\n", - "回合:11/400,奖励:-162.0,Epsilon:0.01010998036181645\n", - "回合:12/400,奖励:-124.0,Epsilon:0.010072745604688937\n", - "回合:13/400,奖励:-125.0,Epsilon:0.010047956858279448\n", - "回合:14/400,奖励:-69.0,Epsilon:0.010038103335373512\n", - "回合:15/400,奖励:-146.0,Epsilon:0.010023421049147612\n", - "回合:16/400,奖励:-99.0,Epsilon:0.010016837948094095\n", - "回合:17/400,奖励:-102.0,Epsilon:0.010011984751749595\n", - "回合:18/400,奖励:-114.0,Epsilon:0.010008195909220538\n", - "回合:19/400,奖励:-95.0,Epsilon:0.010005971322860786\n", - "回合:20/400,奖励:-50.0,Epsilon:0.010005054615675078\n", - "回合:21/400,奖励:-179.0,Epsilon:0.010002783294099886\n", - "回合:22/400,奖励:-51.0,Epsilon:0.010002348167306314\n", - "回合:23/400,奖励:-53.0,Epsilon:0.010001967902958245\n", - "回合:24/400,奖励:-126.0,Epsilon:0.01000129300438042\n", - "回合:25/400,奖励:-105.0,Epsilon:0.010000911164786836\n", - "回合:26/400,奖励:-55.0,Epsilon:0.010000758536131584\n", - "回合:27/400,奖励:-112.0,Epsilon:0.010000522203364875\n", - "回合:28/400,奖励:-81.0,Epsilon:0.01000039863934062\n", - "回合:29/400,奖励:-187.0,Epsilon:0.010000297294659517\n", - "回合:30/400,奖励:-176.0,Epsilon:0.01000022999489198\n", - "回合:31/400,奖励:-71.0,Epsilon:0.010000181524464132\n", - "回合:32/400,奖励:-77.0,Epsilon:0.010000140432053464\n", - "回合:33/400,奖励:-82.0,Epsilon:0.010000106846201706\n", - "回合:34/400,奖励:-95.0,Epsilon:0.010000077845318887\n", - "回合:35/400,奖励:-53.0,Epsilon:0.010000065238977184\n", - "回合:36/400,奖励:-30.0,Epsilon:0.010000059030667672\n", - "回合:37/400,奖励:-122.0,Epsilon:0.010000039306520976\n", - "回合:38/400,奖励:-37.0,Epsilon:0.010000034745744355\n", - "回合:39/400,奖励:-100.0,Epsilon:0.01000002489641374\n", - "回合:40/400,奖励:-201.0,Epsilon:0.010000017720528442\n", - "回合:41/400,奖励:-62.0,Epsilon:0.010000014411941012\n", - "回合:42/400,奖励:-61.0,Epsilon:0.010000011760233133\n", - "回合:43/400,奖励:-57.0,Epsilon:0.010000009725232207\n", - "回合:44/400,奖励:-73.0,Epsilon:0.0100000076246806\n", - "回合:45/400,奖励:-39.0,Epsilon:0.010000006695197199\n", - "回合:46/400,奖励:-71.0,Epsilon:0.010000005284213373\n", - "回合:47/400,奖励:-77.0,Epsilon:0.010000004088005098\n", - "回合:48/400,奖励:-53.0,Epsilon:0.010000003425989836\n", - "回合:49/400,奖励:-88.0,Epsilon:0.010000002555012459\n", - "回合:50/400,奖励:-65.0,Epsilon:0.01000000205729175\n", - "回合:51/400,奖励:-41.0,Epsilon:0.010000001794495218\n", - "回合:52/400,奖励:-67.0,Epsilon:0.010000001435323749\n", - "回合:53/400,奖励:-38.0,Epsilon:0.010000001264559407\n", - "回合:54/400,奖励:-50.0,Epsilon:0.010000001070426428\n", - "回合:55/400,奖励:-35.0,Epsilon:0.010000000952552966\n", - "回合:56/400,奖励:-74.0,Epsilon:0.010000000744325952\n", - "回合:57/400,奖励:-75.0,Epsilon:0.010000000579681634\n", - "回合:58/400,奖励:-31.0,Epsilon:0.010000000522772152\n", - "回合:59/400,奖励:-38.0,Epsilon:0.010000000460576537\n", - "回合:60/400,奖励:-51.0,Epsilon:0.01000000038857222\n", - "回合:61/400,奖励:-64.0,Epsilon:0.010000000313922366\n", - "回合:62/400,奖励:-78.0,Epsilon:0.010000000242050338\n", - "回合:63/400,奖励:-41.0,Epsilon:0.010000000211131054\n", - "回合:64/400,奖励:-62.0,Epsilon:0.010000000171710922\n", - "回合:65/400,奖励:-58.0,Epsilon:0.010000000141525377\n", - "回合:66/400,奖励:-34.0,Epsilon:0.010000000126361357\n", - "回合:67/400,奖励:-52.0,Epsilon:0.010000000106251867\n", - "回合:68/400,奖励:-28.0,Epsilon:0.010000000096783744\n", - "回合:69/400,奖励:-57.0,Epsilon:0.010000000080036202\n", - "回合:70/400,奖励:-39.0,Epsilon:0.010000000070279423\n", - "回合:71/400,奖励:-55.0,Epsilon:0.01000000005850696\n", - "回合:72/400,奖励:-33.0,Epsilon:0.010000000052412531\n", - "回合:73/400,奖励:-62.0,Epsilon:0.010000000042626625\n", - "回合:74/400,奖励:-56.0,Epsilon:0.010000000035368174\n", - "回合:75/400,奖励:-34.0,Epsilon:0.01000000003157858\n", - "回合:76/400,奖励:-37.0,Epsilon:0.010000000027914485\n", - "回合:77/400,奖励:-149.0,Epsilon:0.0100000000236291\n", - "回合:78/400,奖励:-46.0,Epsilon:0.010000000020270076\n", - "回合:79/400,奖励:-28.0,Epsilon:0.010000000018463805\n", - "回合:80/400,奖励:-37.0,Epsilon:0.010000000016321432\n", - "回合:81/400,奖励:-64.0,Epsilon:0.01000000001318587\n", - "回合:82/400,奖励:-52.0,Epsilon:0.010000000011087433\n", - "回合:83/400,奖励:-22.0,Epsilon:0.010000000010303453\n", - "回合:84/400,奖励:-32.0,Epsilon:0.010000000009261004\n", - "回合:85/400,奖励:-74.0,Epsilon:0.010000000007236559\n", - "回合:86/400,奖励:-33.0,Epsilon:0.010000000006482756\n", - "回合:87/400,奖励:-39.0,Epsilon:0.010000000005692478\n", - "回合:88/400,奖励:-40.0,Epsilon:0.010000000004981906\n", - "回合:89/400,奖励:-33.0,Epsilon:0.010000000004462961\n", - "回合:90/400,奖励:-47.0,Epsilon:0.010000000003815783\n", - "回合:91/400,奖励:-45.0,Epsilon:0.010000000003284274\n", - "回合:92/400,奖励:-28.0,Epsilon:0.010000000002991612\n", - "回合:93/400,奖励:-45.0,Epsilon:0.010000000002574904\n", - "回合:94/400,奖励:-56.0,Epsilon:0.010000000002136451\n", - "回合:95/400,奖励:-31.0,Epsilon:0.010000000001926707\n", - "回合:96/400,奖励:-38.0,Epsilon:0.010000000001697481\n", - "回合:97/400,奖励:-50.0,Epsilon:0.010000000001436887\n", - "回合:98/400,奖励:-41.0,Epsilon:0.010000000001253341\n", - "回合:99/400,奖励:-41.0,Epsilon:0.01000000000109324\n", - "回合:100/400,奖励:-13.0,Epsilon:0.010000000001046878\n", - "回合:101/400,奖励:-45.0,Epsilon:0.010000000000901057\n", - "回合:102/400,奖励:-19.0,Epsilon:0.01000000000084576\n", - "回合:103/400,奖励:-44.0,Epsilon:0.010000000000730383\n", - "回合:104/400,奖励:-23.0,Epsilon:0.010000000000676478\n", - "回合:105/400,奖励:-40.0,Epsilon:0.010000000000592037\n", - "回合:106/400,奖励:-52.0,Epsilon:0.010000000000497817\n", - "回合:107/400,奖励:-38.0,Epsilon:0.010000000000438592\n", - "回合:108/400,奖励:-24.0,Epsilon:0.01000000000040487\n", - "回合:109/400,奖励:-32.0,Epsilon:0.010000000000363909\n", - "回合:110/400,奖励:-38.0,Epsilon:0.010000000000320614\n", - "回合:111/400,奖励:-52.0,Epsilon:0.01000000000026959\n", - "回合:112/400,奖励:-22.0,Epsilon:0.010000000000250527\n", - "回合:113/400,奖励:-38.0,Epsilon:0.010000000000220721\n", - "回合:114/400,奖励:-33.0,Epsilon:0.01000000000019773\n", - "回合:115/400,奖励:-29.0,Epsilon:0.010000000000179511\n", - "回合:116/400,奖励:-56.0,Epsilon:0.010000000000148944\n", - "回合:117/400,奖励:-20.0,Epsilon:0.010000000000139338\n", - "回合:118/400,奖励:-31.0,Epsilon:0.010000000000125658\n", - "回合:119/400,奖励:-33.0,Epsilon:0.01000000000011257\n", - "回合:120/400,奖励:-39.0,Epsilon:0.010000000000098846\n", - "回合:121/400,奖励:-26.0,Epsilon:0.010000000000090641\n", - "回合:122/400,奖励:-31.0,Epsilon:0.010000000000081742\n", - "回合:123/400,奖励:-40.0,Epsilon:0.010000000000071538\n", - "回合:124/400,奖励:-33.0,Epsilon:0.010000000000064086\n", - "回合:125/400,奖励:-46.0,Epsilon:0.010000000000054977\n", - "回合:126/400,奖励:-28.0,Epsilon:0.010000000000050078\n", - "回合:127/400,奖励:-23.0,Epsilon:0.010000000000046382\n", - "回合:128/400,奖励:-30.0,Epsilon:0.010000000000041968\n", - "回合:129/400,奖励:-24.0,Epsilon:0.010000000000038742\n", - "回合:130/400,奖励:-36.0,Epsilon:0.01000000000003436\n", - "回合:131/400,奖励:-28.0,Epsilon:0.010000000000031298\n", - "回合:132/400,奖励:-28.0,Epsilon:0.01000000000002851\n", - "回合:133/400,奖励:-35.0,Epsilon:0.01000000000002537\n", - "回合:134/400,奖励:-27.0,Epsilon:0.010000000000023187\n", - "回合:135/400,奖励:-30.0,Epsilon:0.01000000000002098\n", - "回合:136/400,奖励:-35.0,Epsilon:0.01000000000001867\n", - "回合:137/400,奖励:-31.0,Epsilon:0.010000000000016837\n", - "回合:138/400,奖励:-27.0,Epsilon:0.010000000000015387\n", - "回合:139/400,奖励:-48.0,Epsilon:0.010000000000013113\n", - "回合:140/400,奖励:-23.0,Epsilon:0.010000000000012145\n", - "回合:141/400,奖励:-29.0,Epsilon:0.010000000000011026\n", - "回合:142/400,奖励:-21.0,Epsilon:0.01000000000001028\n", - "回合:143/400,奖励:-22.0,Epsilon:0.010000000000009553\n", - "回合:144/400,奖励:-42.0,Epsilon:0.010000000000008306\n", - "回合:145/400,奖励:-21.0,Epsilon:0.010000000000007744\n", - "回合:146/400,奖励:-141.0,Epsilon:0.010000000000006733\n", - "回合:147/400,奖励:-43.0,Epsilon:0.010000000000005834\n", - "回合:148/400,奖励:-44.0,Epsilon:0.010000000000005038\n", - "回合:149/400,奖励:-18.0,Epsilon:0.010000000000004745\n", - "回合:150/400,奖励:-23.0,Epsilon:0.010000000000004394\n", - "回合:151/400,奖励:-24.0,Epsilon:0.010000000000004056\n", - "回合:152/400,奖励:-30.0,Epsilon:0.010000000000003671\n", - "回合:153/400,奖励:-27.0,Epsilon:0.010000000000003355\n", - "回合:154/400,奖励:-15.0,Epsilon:0.01000000000000319\n", - "回合:155/400,奖励:-19.0,Epsilon:0.010000000000002994\n", - "回合:156/400,奖励:-50.0,Epsilon:0.010000000000002535\n", - "回合:157/400,奖励:-22.0,Epsilon:0.010000000000002356\n", - "回合:158/400,奖励:-28.0,Epsilon:0.010000000000002146\n", - "回合:159/400,奖励:-27.0,Epsilon:0.010000000000001962\n", - "回合:160/400,奖励:-13.0,Epsilon:0.010000000000001879\n", - "回合:161/400,奖励:-33.0,Epsilon:0.010000000000001683\n", - "回合:162/400,奖励:-24.0,Epsilon:0.010000000000001553\n", - "回合:163/400,奖励:-30.0,Epsilon:0.010000000000001405\n", - "回合:164/400,奖励:-19.0,Epsilon:0.010000000000001319\n", - "回合:165/400,奖励:-22.0,Epsilon:0.010000000000001227\n", - "回合:166/400,奖励:-32.0,Epsilon:0.010000000000001102\n", - "回合:167/400,奖励:-35.0,Epsilon:0.01000000000000098\n", - "回合:168/400,奖励:-32.0,Epsilon:0.010000000000000881\n", - "回合:169/400,奖励:-21.0,Epsilon:0.010000000000000822\n", - "回合:170/400,奖励:-27.0,Epsilon:0.010000000000000751\n", - "回合:171/400,奖励:-22.0,Epsilon:0.010000000000000698\n", - "回合:172/400,奖励:-22.0,Epsilon:0.010000000000000649\n", - "回合:173/400,奖励:-34.0,Epsilon:0.01000000000000058\n", - "回合:174/400,奖励:-22.0,Epsilon:0.010000000000000538\n", - "回合:175/400,奖励:-27.0,Epsilon:0.010000000000000491\n", - "回合:176/400,奖励:-13.0,Epsilon:0.01000000000000047\n", - "回合:177/400,奖励:-29.0,Epsilon:0.010000000000000427\n", - "回合:178/400,奖励:-20.0,Epsilon:0.010000000000000401\n", - "回合:179/400,奖励:-22.0,Epsilon:0.010000000000000371\n", - "回合:180/400,奖励:-33.0,Epsilon:0.010000000000000333\n", - "回合:181/400,奖励:-20.0,Epsilon:0.010000000000000312\n", - "回合:182/400,奖励:-26.0,Epsilon:0.010000000000000286\n", - "回合:183/400,奖励:-22.0,Epsilon:0.010000000000000266\n", - "回合:184/400,奖励:-29.0,Epsilon:0.010000000000000241\n", - "回合:185/400,奖励:-25.0,Epsilon:0.010000000000000222\n", - "回合:186/400,奖励:-16.0,Epsilon:0.01000000000000021\n", - "回合:187/400,奖励:-28.0,Epsilon:0.010000000000000191\n", - "回合:188/400,奖励:-23.0,Epsilon:0.010000000000000177\n", - "回合:189/400,奖励:-31.0,Epsilon:0.01000000000000016\n", - "回合:190/400,奖励:-17.0,Epsilon:0.010000000000000151\n", - "回合:191/400,奖励:-22.0,Epsilon:0.01000000000000014\n", - "回合:192/400,奖励:-18.0,Epsilon:0.010000000000000132\n", - "回合:193/400,奖励:-34.0,Epsilon:0.010000000000000118\n", - "回合:194/400,奖励:-32.0,Epsilon:0.010000000000000106\n", - "回合:195/400,奖励:-14.0,Epsilon:0.0100000000000001\n", - "回合:196/400,奖励:-23.0,Epsilon:0.010000000000000094\n", - "回合:197/400,奖励:-23.0,Epsilon:0.010000000000000087\n", - "回合:198/400,奖励:-28.0,Epsilon:0.01000000000000008\n", - "回合:199/400,奖励:-24.0,Epsilon:0.010000000000000073\n", - "回合:200/400,奖励:-21.0,Epsilon:0.010000000000000068\n", - "回合:201/400,奖励:-15.0,Epsilon:0.010000000000000064\n", - "回合:202/400,奖励:-16.0,Epsilon:0.010000000000000061\n", - "回合:203/400,奖励:-22.0,Epsilon:0.010000000000000057\n", - "回合:204/400,奖励:-28.0,Epsilon:0.010000000000000052\n", - "回合:205/400,奖励:-25.0,Epsilon:0.010000000000000049\n", - "回合:206/400,奖励:-16.0,Epsilon:0.010000000000000045\n", - "回合:207/400,奖励:-13.0,Epsilon:0.010000000000000044\n", - "回合:208/400,奖励:-31.0,Epsilon:0.01000000000000004\n", - "回合:209/400,奖励:-25.0,Epsilon:0.010000000000000037\n", - "回合:210/400,奖励:-21.0,Epsilon:0.010000000000000033\n", - "回合:211/400,奖励:-26.0,Epsilon:0.010000000000000031\n", - "回合:212/400,奖励:-13.0,Epsilon:0.01000000000000003\n", - "回合:213/400,奖励:-15.0,Epsilon:0.010000000000000028\n", - "回合:214/400,奖励:-23.0,Epsilon:0.010000000000000026\n", - "回合:215/400,奖励:-23.0,Epsilon:0.010000000000000024\n", - "回合:216/400,奖励:-13.0,Epsilon:0.010000000000000023\n", - "回合:217/400,奖励:-21.0,Epsilon:0.010000000000000021\n", - "回合:218/400,奖励:-28.0,Epsilon:0.01000000000000002\n", - "回合:219/400,奖励:-24.0,Epsilon:0.010000000000000018\n", - "回合:220/400,奖励:-20.0,Epsilon:0.010000000000000018\n", - "回合:221/400,奖励:-13.0,Epsilon:0.010000000000000016\n", - "回合:222/400,奖励:-15.0,Epsilon:0.010000000000000016\n", - "回合:223/400,奖励:-27.0,Epsilon:0.010000000000000014\n", - "回合:224/400,奖励:-18.0,Epsilon:0.010000000000000014\n", - "回合:225/400,奖励:-20.0,Epsilon:0.010000000000000012\n", - "回合:226/400,奖励:-27.0,Epsilon:0.010000000000000012\n", - "回合:227/400,奖励:-18.0,Epsilon:0.01000000000000001\n", - "回合:228/400,奖励:-15.0,Epsilon:0.01000000000000001\n", - "回合:229/400,奖励:-19.0,Epsilon:0.010000000000000009\n", - "回合:230/400,奖励:-20.0,Epsilon:0.010000000000000009\n", - "回合:231/400,奖励:-13.0,Epsilon:0.010000000000000009\n", - "回合:232/400,奖励:-28.0,Epsilon:0.010000000000000007\n", - "回合:233/400,奖励:-38.0,Epsilon:0.010000000000000007\n", - "回合:234/400,奖励:-17.0,Epsilon:0.010000000000000007\n", - "回合:235/400,奖励:-22.0,Epsilon:0.010000000000000005\n", - "回合:236/400,奖励:-13.0,Epsilon:0.010000000000000005\n", - "回合:237/400,奖励:-20.0,Epsilon:0.010000000000000005\n", - "回合:238/400,奖励:-18.0,Epsilon:0.010000000000000005\n", - "回合:239/400,奖励:-14.0,Epsilon:0.010000000000000005\n", - "回合:240/400,奖励:-13.0,Epsilon:0.010000000000000005\n", - "回合:241/400,奖励:-28.0,Epsilon:0.010000000000000004\n", - "回合:242/400,奖励:-13.0,Epsilon:0.010000000000000004\n", - "回合:243/400,奖励:-23.0,Epsilon:0.010000000000000004\n", - "回合:244/400,奖励:-17.0,Epsilon:0.010000000000000004\n", - "回合:245/400,奖励:-14.0,Epsilon:0.010000000000000004\n", - "回合:246/400,奖励:-22.0,Epsilon:0.010000000000000004\n", - "回合:247/400,奖励:-15.0,Epsilon:0.010000000000000004\n", - "回合:248/400,奖励:-19.0,Epsilon:0.010000000000000004\n", - "回合:249/400,奖励:-17.0,Epsilon:0.010000000000000004\n", - "回合:250/400,奖励:-27.0,Epsilon:0.010000000000000002\n", - "回合:251/400,奖励:-21.0,Epsilon:0.010000000000000002\n", - "回合:252/400,奖励:-23.0,Epsilon:0.010000000000000002\n", - "回合:253/400,奖励:-15.0,Epsilon:0.010000000000000002\n", - "回合:254/400,奖励:-15.0,Epsilon:0.010000000000000002\n", - "回合:255/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:256/400,奖励:-15.0,Epsilon:0.010000000000000002\n", - "回合:257/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:258/400,奖励:-28.0,Epsilon:0.010000000000000002\n", - "回合:259/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:260/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:261/400,奖励:-23.0,Epsilon:0.010000000000000002\n", - "回合:262/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:263/400,奖励:-24.0,Epsilon:0.010000000000000002\n", - "回合:264/400,奖励:-17.0,Epsilon:0.010000000000000002\n", - "回合:265/400,奖励:-19.0,Epsilon:0.010000000000000002\n", - "回合:266/400,奖励:-13.0,Epsilon:0.010000000000000002\n", - "回合:267/400,奖励:-25.0,Epsilon:0.010000000000000002\n", - "回合:268/400,奖励:-15.0,Epsilon:0.01\n", - "回合:269/400,奖励:-15.0,Epsilon:0.01\n", - "回合:270/400,奖励:-21.0,Epsilon:0.01\n", - "回合:271/400,奖励:-13.0,Epsilon:0.01\n", - "回合:272/400,奖励:-13.0,Epsilon:0.01\n", - "回合:273/400,奖励:-22.0,Epsilon:0.01\n", - "回合:274/400,奖励:-15.0,Epsilon:0.01\n", - "回合:275/400,奖励:-13.0,Epsilon:0.01\n", - "回合:276/400,奖励:-19.0,Epsilon:0.01\n", - "回合:277/400,奖励:-13.0,Epsilon:0.01\n", - "回合:278/400,奖励:-18.0,Epsilon:0.01\n", - "回合:279/400,奖励:-14.0,Epsilon:0.01\n", - "回合:280/400,奖励:-126.0,Epsilon:0.01\n", - "回合:281/400,奖励:-15.0,Epsilon:0.01\n", - "回合:282/400,奖励:-19.0,Epsilon:0.01\n", - "回合:283/400,奖励:-13.0,Epsilon:0.01\n", - "回合:284/400,奖励:-25.0,Epsilon:0.01\n", - "回合:285/400,奖励:-13.0,Epsilon:0.01\n", - "回合:286/400,奖励:-119.0,Epsilon:0.01\n", - "回合:287/400,奖励:-15.0,Epsilon:0.01\n", - "回合:288/400,奖励:-15.0,Epsilon:0.01\n", - "回合:289/400,奖励:-14.0,Epsilon:0.01\n", - "回合:290/400,奖励:-13.0,Epsilon:0.01\n", - "回合:291/400,奖励:-13.0,Epsilon:0.01\n", - "回合:292/400,奖励:-15.0,Epsilon:0.01\n", - "回合:293/400,奖励:-33.0,Epsilon:0.01\n", - "回合:294/400,奖励:-19.0,Epsilon:0.01\n", - "回合:295/400,奖励:-13.0,Epsilon:0.01\n", - "回合:296/400,奖励:-15.0,Epsilon:0.01\n", - "回合:297/400,奖励:-13.0,Epsilon:0.01\n", - "回合:298/400,奖励:-132.0,Epsilon:0.01\n", - "回合:299/400,奖励:-13.0,Epsilon:0.01\n", - "回合:300/400,奖励:-13.0,Epsilon:0.01\n", - "回合:301/400,奖励:-13.0,Epsilon:0.01\n", - "回合:302/400,奖励:-14.0,Epsilon:0.01\n", - "回合:303/400,奖励:-15.0,Epsilon:0.01\n", - "回合:304/400,奖励:-13.0,Epsilon:0.01\n", - "回合:305/400,奖励:-13.0,Epsilon:0.01\n", - "回合:306/400,奖励:-13.0,Epsilon:0.01\n", - "回合:307/400,奖励:-13.0,Epsilon:0.01\n", - "回合:308/400,奖励:-13.0,Epsilon:0.01\n", - "回合:309/400,奖励:-13.0,Epsilon:0.01\n", - "回合:310/400,奖励:-13.0,Epsilon:0.01\n", - "回合:311/400,奖励:-15.0,Epsilon:0.01\n", - "回合:312/400,奖励:-13.0,Epsilon:0.01\n", - "回合:313/400,奖励:-13.0,Epsilon:0.01\n", - "回合:314/400,奖励:-13.0,Epsilon:0.01\n", - "回合:315/400,奖励:-15.0,Epsilon:0.01\n", - "回合:316/400,奖励:-14.0,Epsilon:0.01\n", - "回合:317/400,奖励:-13.0,Epsilon:0.01\n", - "回合:318/400,奖励:-13.0,Epsilon:0.01\n", - "回合:319/400,奖励:-13.0,Epsilon:0.01\n", - "回合:320/400,奖励:-21.0,Epsilon:0.01\n", - "回合:321/400,奖励:-19.0,Epsilon:0.01\n", - "回合:322/400,奖励:-13.0,Epsilon:0.01\n", - "回合:323/400,奖励:-13.0,Epsilon:0.01\n", - "回合:324/400,奖励:-13.0,Epsilon:0.01\n", - "回合:325/400,奖励:-13.0,Epsilon:0.01\n", - "回合:326/400,奖励:-14.0,Epsilon:0.01\n", - "回合:327/400,奖励:-15.0,Epsilon:0.01\n", - "回合:328/400,奖励:-13.0,Epsilon:0.01\n", - "回合:329/400,奖励:-13.0,Epsilon:0.01\n", - "回合:330/400,奖励:-13.0,Epsilon:0.01\n", - "回合:331/400,奖励:-13.0,Epsilon:0.01\n", - "回合:332/400,奖励:-13.0,Epsilon:0.01\n", - "回合:333/400,奖励:-14.0,Epsilon:0.01\n", - "回合:334/400,奖励:-13.0,Epsilon:0.01\n", - "回合:335/400,奖励:-113.0,Epsilon:0.01\n", - "回合:336/400,奖励:-13.0,Epsilon:0.01\n", - "回合:337/400,奖励:-13.0,Epsilon:0.01\n", - "回合:338/400,奖励:-13.0,Epsilon:0.01\n", - "回合:339/400,奖励:-13.0,Epsilon:0.01\n", - "回合:340/400,奖励:-13.0,Epsilon:0.01\n", - "回合:341/400,奖励:-15.0,Epsilon:0.01\n", - "回合:342/400,奖励:-23.0,Epsilon:0.01\n", - "回合:343/400,奖励:-13.0,Epsilon:0.01\n", - "回合:344/400,奖励:-13.0,Epsilon:0.01\n", - "回合:345/400,奖励:-13.0,Epsilon:0.01\n", - "回合:346/400,奖励:-13.0,Epsilon:0.01\n", - "回合:347/400,奖励:-13.0,Epsilon:0.01\n", - "回合:348/400,奖励:-13.0,Epsilon:0.01\n", - "回合:349/400,奖励:-13.0,Epsilon:0.01\n", - "回合:350/400,奖励:-13.0,Epsilon:0.01\n", - "回合:351/400,奖励:-13.0,Epsilon:0.01\n", - "回合:352/400,奖励:-13.0,Epsilon:0.01\n", - "回合:353/400,奖励:-13.0,Epsilon:0.01\n", - "回合:354/400,奖励:-13.0,Epsilon:0.01\n", - "回合:355/400,奖励:-13.0,Epsilon:0.01\n", - "回合:356/400,奖励:-13.0,Epsilon:0.01\n", - "回合:357/400,奖励:-13.0,Epsilon:0.01\n", - "回合:358/400,奖励:-13.0,Epsilon:0.01\n", - "回合:359/400,奖励:-13.0,Epsilon:0.01\n", - "回合:360/400,奖励:-13.0,Epsilon:0.01\n", - "回合:361/400,奖励:-13.0,Epsilon:0.01\n", - "回合:362/400,奖励:-13.0,Epsilon:0.01\n", - "回合:363/400,奖励:-13.0,Epsilon:0.01\n", - "回合:364/400,奖励:-13.0,Epsilon:0.01\n", - "回合:365/400,奖励:-13.0,Epsilon:0.01\n", - "回合:366/400,奖励:-13.0,Epsilon:0.01\n", - "回合:367/400,奖励:-13.0,Epsilon:0.01\n", - "回合:368/400,奖励:-13.0,Epsilon:0.01\n", - "回合:369/400,奖励:-13.0,Epsilon:0.01\n", - "回合:370/400,奖励:-13.0,Epsilon:0.01\n", - "回合:371/400,奖励:-13.0,Epsilon:0.01\n", - "回合:372/400,奖励:-14.0,Epsilon:0.01\n", - "回合:373/400,奖励:-13.0,Epsilon:0.01\n", - "回合:374/400,奖励:-15.0,Epsilon:0.01\n", - "回合:375/400,奖励:-13.0,Epsilon:0.01\n", - "回合:376/400,奖励:-13.0,Epsilon:0.01\n", - "回合:377/400,奖励:-13.0,Epsilon:0.01\n", - "回合:378/400,奖励:-13.0,Epsilon:0.01\n", - "回合:379/400,奖励:-13.0,Epsilon:0.01\n", - "回合:380/400,奖励:-117.0,Epsilon:0.01\n", - "回合:381/400,奖励:-13.0,Epsilon:0.01\n", - "回合:382/400,奖励:-13.0,Epsilon:0.01\n", - "回合:383/400,奖励:-13.0,Epsilon:0.01\n", - "回合:384/400,奖励:-13.0,Epsilon:0.01\n", - "回合:385/400,奖励:-13.0,Epsilon:0.01\n", - "回合:386/400,奖励:-13.0,Epsilon:0.01\n", - "回合:387/400,奖励:-13.0,Epsilon:0.01\n", - "回合:388/400,奖励:-13.0,Epsilon:0.01\n", - "回合:389/400,奖励:-13.0,Epsilon:0.01\n", - "回合:390/400,奖励:-13.0,Epsilon:0.01\n", - "回合:391/400,奖励:-13.0,Epsilon:0.01\n", - "回合:392/400,奖励:-13.0,Epsilon:0.01\n", - "回合:393/400,奖励:-13.0,Epsilon:0.01\n", - "回合:394/400,奖励:-13.0,Epsilon:0.01\n", - "回合:395/400,奖励:-13.0,Epsilon:0.01\n", - "回合:396/400,奖励:-13.0,Epsilon:0.01\n", - "回合:397/400,奖励:-13.0,Epsilon:0.01\n", - "回合:398/400,奖励:-15.0,Epsilon:0.01\n", - "回合:399/400,奖励:-13.0,Epsilon:0.01\n", - "回合:400/400,奖励:-13.0,Epsilon:0.01\n", - "完成训练!\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "回合:20/400,奖励:-45.0,Epsilon:0.010\n", + "回合:40/400,奖励:-34.0,Epsilon:0.010\n", + "回合:60/400,奖励:-47.0,Epsilon:0.010\n", + "回合:80/400,奖励:-88.0,Epsilon:0.010\n", + "回合:100/400,奖励:-53.0,Epsilon:0.010\n", + "回合:120/400,奖励:-23.0,Epsilon:0.010\n", + "回合:140/400,奖励:-20.0,Epsilon:0.010\n", + "回合:160/400,奖励:-29.0,Epsilon:0.010\n", + "回合:180/400,奖励:-42.0,Epsilon:0.010\n", + "回合:200/400,奖励:-28.0,Epsilon:0.010\n", + "回合:220/400,奖励:-20.0,Epsilon:0.010\n", + "回合:240/400,奖励:-20.0,Epsilon:0.010\n", + "回合:260/400,奖励:-17.0,Epsilon:0.010\n", + "回合:280/400,奖励:-13.0,Epsilon:0.010\n", + "回合:300/400,奖励:-13.0,Epsilon:0.010\n", + "回合:320/400,奖励:-13.0,Epsilon:0.010\n", + "回合:340/400,奖励:-13.0,Epsilon:0.010\n", + "回合:360/400,奖励:-13.0,Epsilon:0.010\n", + "回合:380/400,奖励:-13.0,Epsilon:0.010\n", + "回合:400/400,奖励:-14.0,Epsilon:0.010\n", + "完成训练!\n", "开始测试!\n", "环境:CliffWalking-v0, 算法:Q-learning, 设备:cpu\n", "回合数:1/20, 奖励:-13.0\n", @@ -869,7 +391,17 @@ }, { "data": { - "image/png": "", + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", "text/plain": [ "
" ] diff --git a/projects/notebooks/3.DQN.ipynb b/projects/notebooks/3.DQN.ipynb index 9fed7b6..6b73846 100644 --- a/projects/notebooks/3.DQN.ipynb +++ b/projects/notebooks/3.DQN.ipynb @@ -1,18 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1、分析伪代码\n", - "\n", - "目前DQN算法基本遵循[Nature DQN](https://www.nature.com/articles/nature14236)的伪代码步骤,如下:\n", - "\n", - "
\n", - "\"\" \n", - "
" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -36,15 +23,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", "class MLP(nn.Module):\n", " def __init__(self, n_states,n_actions,hidden_dim=128):\n", " \"\"\" 初始化q网络,为全连接网络\n", - " n_states: 输入的特征数即环境的状态维度\n", - " n_actions: 输出的动作维度\n", " \"\"\"\n", " super(MLP, self).__init__()\n", " self.fc1 = nn.Linear(n_states, hidden_dim) # 输入层\n", @@ -64,34 +51,39 @@ "source": [ "### 1.2、定义经验回放\n", "\n", - "经验回放首先是具有一定容量的,只有存储一定的transition网络才会更新,否则就退回到了之前的逐步更新了。另外写经验回放的时候一般需要包涵两个功能或方法,一个是push,即将一个transition样本按顺序放到经验回放中,如果满了就把最开始放进去的样本挤掉,因此如果大家学过数据结构的话推荐用队列来写,虽然这里不是。另外一个是sample,很简单就是随机采样出一个或者若干个(具体多少就是batch_size了)样本供DQN网络更新。功能讲清楚了,大家可以按照自己的想法用代码来实现,可以肯定地说,我这里不是最高效的,毕竟这还是青涩时期写出的代码。" + "经验回放首先是具有一定容量的,只有存储一定的transition网络才会更新,否则就退回到了之前的逐步更新了。另外写经验回放的时候一般需要包涵两个功能或方法,一个是push,即将一个transition样本按顺序放到经验回放中,如果满了就把最开始放进去的样本挤掉,因此如果大家学过数据结构的话推荐用队列来写,虽然这里不是。另外一个是sample,很简单就是随机采样出一个或者若干个(具体多少就是batch_size了)样本供DQN网络更新。功能讲清楚了,大家可以按照自己的想法用代码来实现,参考如下。" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "class ReplayBuffer:\n", - " def __init__(self, capacity):\n", - " self.capacity = capacity # 经验回放的容量\n", - " self.buffer = [] # 缓冲区\n", - " self.position = 0 \n", - " \n", - " def push(self, state, action, reward, next_state, done):\n", - " ''' 缓冲区是一个队列,容量超出时去掉开始存入的转移(transition)\n", + "from collections import deque\n", + "import random\n", + "class ReplayBuffer(object):\n", + " def __init__(self, capacity: int) -> None:\n", + " self.capacity = capacity\n", + " self.buffer = deque(maxlen=self.capacity)\n", + " def push(self,transitions):\n", + " ''' 存储transition到经验回放中\n", " '''\n", - " if len(self.buffer) < self.capacity:\n", - " self.buffer.append(None)\n", - " self.buffer[self.position] = (state, action, reward, next_state, done)\n", - " self.position = (self.position + 1) % self.capacity \n", - " \n", - " def sample(self, batch_size):\n", - " batch = random.sample(self.buffer, batch_size) # 随机采出小批量转移\n", - " state, action, reward, next_state, done = zip(*batch) # 解压成状态,动作等\n", - " return state, action, reward, next_state, done\n", - " \n", + " self.buffer.append(transitions)\n", + " def sample(self, batch_size: int, sequential: bool = False):\n", + " if batch_size > len(self.buffer): # 如果批量大小大于经验回放的容量,则取经验回放的容量\n", + " batch_size = len(self.buffer)\n", + " if sequential: # 顺序采样\n", + " rand = random.randint(0, len(self.buffer) - batch_size)\n", + " batch = [self.buffer[i] for i in range(rand, rand + batch_size)]\n", + " return zip(*batch)\n", + " else: # 随机采样\n", + " batch = random.sample(self.buffer, batch_size)\n", + " return zip(*batch)\n", + " def clear(self):\n", + " ''' 清空经验回放\n", + " '''\n", + " self.buffer.clear()\n", " def __len__(self):\n", " ''' 返回当前存储的量\n", " '''\n", @@ -102,61 +94,100 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 1.3、真--定义算法\n", + "### 1.3、真定义算法\n", "\n", - "到了高级一点的算法,定义算法就比较麻烦,要先定义一些子模块。可以看到,其实去掉子模块的话,DQN跟Q learning的算法结构没啥区别,当然因为神经网络一般需要Torch或者Tensorflow来写,因此推荐大家先去学一学这些工具,比如\"eat_pytorch_in_20_days\"。\n" + "到了高级一点的算法,定义算法就比较麻烦,要先定义一些子模块,再定义好子模块之后我们就可以实现我们的算法核心部分。如下,可以看到,其实去掉子模块的话,DQN跟Q learning的算法结构没啥区别,当然因为神经网络一般需要Torch或者Tensorflow来写,因此推荐大家先去学一学这些工具,比如\"eat_pytorch_in_20_days\"。\n", + "\n", + "这里我们主要分析一下DQN的更新过程,也就是update函数。首先我们知道目前所有基于深度神经网络的更新方式都是梯度下降,如下:\n", + "$$\n", + "\\theta_i \\leftarrow \\theta_i - \\lambda \\nabla_{\\theta_{i}} L_{i}\\left(\\theta_{i}\\right)\n", + "$$\n", + "那么这个$\\theta$又是什么呢,注意到前面我们讲的DQN跟Q learning算法的一个主要区别就是使用神经网络替代了Q表,而这个$\\theta$实际上就是神经网络的参数,通常用$Q\\left(s_{i}, a_{i} ; \\theta\\right)$表示。根据强化学习的原理我们需要优化的是对应状态下不同动作的长期价值,然后每次选择价值最大对应的动作就能完成一条最优策略,使用神经网络表示Q表时也是如此,我们将输入的状态数作为神经网络的输入层,动作数作为输出层,这样的神经网络表达的功能就跟在Q learning中的Q表是一样的,只不过具有更强的鲁棒性。\n", + "\n", + "讲完了为什么要优化的是这个参数$\\theta$,接下来我们从代码层面进一步剖析,稍微了解一点Torch知识的同学都知道,上面的公式其实只需要定义一个优化器,然后计算损失之后用优化器迭代即可,如下:\n", + "```python\n", + "optimizer = optim.Adam(Q_net.parameters(), lr=0.01) # 定义优化器,对应的网络是Q_net,学习率为0.01\n", + "loss = ... # 计算损失,这里掠过\n", + "# 然后优化器先zero_grad(),loss再反向传播,然后优化器step() ,这是一个固定的套路\n", + "optimizer.zero_grad() \n", + "loss.backward()\n", + "optimizer.step() \n", + "```\n", + "当然强烈建议同学们了解一下深度学习中的梯度下降,并且使用numpy实现,这样就会更加清楚整个梯度下降过程到底是怎么回事,上述只是在同学们了解了梯度下降的具体实现方式的前提下为了方便学习更多其他的知识形成的套路。这就好比我们玩一个竞技游戏,如果我们之前从来没有接触过该类游戏,那么肯定是从普通攻击,每个技能一步一步地学起打好基础,然后再学习技能连招等等也就是形成固定的套路,但是如果不先打基础,直接学习套路可能会是一脸懵逼的状态,尤其是很多高端玩家会对这些连招套路简化名称比如光速qa和1233321等等,一开始我们是很难听懂的。等当我们先打好基础,然后再学习了很多套路之后会发现这些基础并不能用得上,甚至有的时候可能会忽然忘记了这些基础,但其实我们并没有忘记,再回顾一遍也能很快拣起来。在这点上我想强调的是基础固然重要,但是不要死磕基础,除非是学术研究需要。再比如我们小学学完简单加减乘除之后很快就去背九九乘法表,而不会去过多纠结一加一等于几的问题,上大学后也是如此,只是很多时候我们很可能看起来这个问题值得研究,但意识不到自己就是在纠结一加一等于几的问题,这也是我在和众多读者们学习讨论的过程中在他们身上发现的问题。\n", + "\n", + "回归正题,细心的同学会发现数学公式和代码的对应是有一定的壁垒的,只要通过多加练习跨越了这个壁垒,那么对于往后我们想要复现论文也会轻松许多。我们目前讲了参数的更新过程,但是最关键的是损失是如何计算的,在DQN中损失的计算相对来说比较简单,如下:\n", + "$$\n", + "L(\\theta)=\\left(y_{i}-Q\\left(s_{i}, a_{i} ; \\theta\\right)\\right)^{2}\n", + "$$\n", + "这里的$y_{i}$通常称为期望值,$Q\\left(s_{i}, a_{i} ; \\theta\\right)$称为实际值,这个损失在深度学习中通常称作均方差损失,也就是mseloss,使用这个损失函数通常追溯到数学上的最小二乘法,感兴趣的同学可以了解一下深度学习中的各种损失函数以及各自的使用场景。\n", + "$y_{i}$在DQN中一般表示如下:\n", + "$$\n", + "y_{i}= \\begin{cases}r_{i} & \\text {对于终止状态} s_{i+1} \\\\ r_{i}+\\gamma \\max _{a^{\\prime}} Q\\left(s_{i+1}, a^{\\prime} ; \\theta\\right) & \\text {对于非终止状态} s_{i+1}\\end{cases}\n", + "$$\n", + "该公式的意思就是将下一个状态对应的最大Q值作为实际值(因为实际值通常不能直接求得,只能近似),这种做法实际上只是一种近似,可能会导致过估计等问题,也有一些改善的方法具体可以在后面各种改进的DQN算法比如Double DQN中看到,在这里我们暂时不要深究为什么要用这个来近似实际值。然后注意到这里其实有一个终止状态的判断,因为如果当前状态是终止状态,那么实际上是没有下一个状态的,所以DQN干脆直接使用对应的奖励表示Q的实际值。" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ + "import torch\n", + "import torch.optim as optim\n", + "import math\n", + "import numpy as np\n", "class DQN:\n", - " def __init__(self,n_actions,model,memory,cfg):\n", + " def __init__(self,model,memory,cfg):\n", "\n", - " self.n_actions = n_actions \n", - " self.device = torch.device(cfg.device) # cpu or cuda\n", - " self.gamma = cfg.gamma # 奖励的折扣因子\n", + " self.n_actions = cfg['n_actions'] \n", + " self.device = torch.device(cfg['device']) \n", + " self.gamma = cfg['gamma'] # 奖励的折扣因子\n", " # e-greedy策略相关参数\n", " self.sample_count = 0 # 用于epsilon的衰减计数\n", - " self.epsilon = lambda sample_count: cfg.epsilon_end + \\\n", - " (cfg.epsilon_start - cfg.epsilon_end) * \\\n", - " math.exp(-1. * sample_count / cfg.epsilon_decay)\n", - " self.batch_size = cfg.batch_size\n", + " self.epsilon = cfg['epsilon_start']\n", + " self.sample_count = 0 \n", + " self.epsilon_start = cfg['epsilon_start']\n", + " self.epsilon_end = cfg['epsilon_end']\n", + " self.epsilon_decay = cfg['epsilon_decay']\n", + " self.batch_size = cfg['batch_size']\n", " self.policy_net = model.to(self.device)\n", " self.target_net = model.to(self.device)\n", - " for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): # 复制参数到目标网路targe_net\n", + " # 复制参数到目标网络\n", + " for target_param, param in zip(self.target_net.parameters(),self.policy_net.parameters()): \n", " target_param.data.copy_(param.data)\n", - " self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg.lr) # 优化器\n", + " self.optimizer = optim.Adam(self.policy_net.parameters(), lr=cfg['lr']) # 优化器\n", " self.memory = memory # 经验回放\n", - "\n", - " def sample(self, state):\n", - " ''' 选择动作\n", + " def sample_action(self, state):\n", + " ''' 采样动作\n", " '''\n", " self.sample_count += 1\n", - " if random.random() > self.epsilon(self.sample_count):\n", + " # epsilon指数衰减\n", + " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", + " math.exp(-1. * self.sample_count / self.epsilon_decay) \n", + " if random.random() > self.epsilon:\n", " with torch.no_grad():\n", " state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)\n", " q_values = self.policy_net(state)\n", - " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", + " action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value\n", " else:\n", " action = random.randrange(self.n_actions)\n", " return action\n", - " def predict(self,state):\n", - " with torch.no_grad():\n", - " state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)\n", - " q_values = self.policy_net(state)\n", - " action = q_values.max(1)[1].item() # 选择Q值最大的动作\n", + " @torch.no_grad() # 不计算梯度,该装饰器效果等同于with torch.no_grad():\n", + " def predict_action(self, state):\n", + " ''' 预测动作\n", + " '''\n", + " state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)\n", + " q_values = self.policy_net(state)\n", + " action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value\n", " return action\n", " def update(self):\n", - " if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略\n", + " if len(self.memory) < self.batch_size: # 当经验回放中不满足一个批量时,不更新策略\n", " return\n", - " # 从经验回放中(replay memory)中随机采样一个批量的转移(transition)\n", - " \n", + " # 从经验回放中随机采样一个批量的转移(transition)\n", " state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(\n", " self.batch_size)\n", + " # 将数据转换为tensor\n", " state_batch = torch.tensor(np.array(state_batch), device=self.device, dtype=torch.float)\n", " action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) \n", " reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float) \n", @@ -170,17 +201,10 @@ " # 优化更新模型\n", " self.optimizer.zero_grad() \n", " loss.backward()\n", - " for param in self.policy_net.parameters(): # clip防止梯度爆炸\n", + " # clip防止梯度爆炸\n", + " for param in self.policy_net.parameters(): \n", " param.grad.data.clamp_(-1, 1)\n", - " self.optimizer.step() \n", - "\n", - " def save(self, path):\n", - " torch.save(self.target_net.state_dict(), path+'checkpoint.pth')\n", - "\n", - " def load(self, path):\n", - " self.target_net.load_state_dict(torch.load(path+'checkpoint.pth'))\n", - " for target_param, param in zip(self.target_net.parameters(), self.policy_net.parameters()):\n", - " param.data.copy_(target_param.data)" + " self.optimizer.step() " ] }, { @@ -192,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -200,47 +224,43 @@ " ''' 训练\n", " '''\n", " print(\"开始训练!\")\n", - " print(f\"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}\")\n", " rewards = [] # 记录所有回合的奖励\n", " steps = []\n", - " for i_ep in range(cfg.train_eps):\n", + " for i_ep in range(cfg['train_eps']):\n", " ep_reward = 0 # 记录一回合内的奖励\n", " ep_step = 0\n", " state = env.reset() # 重置环境,返回初始状态\n", - " while True:\n", + " for _ in range(cfg['ep_max_steps']):\n", " ep_step += 1\n", - " action = agent.sample(state) # 选择动作\n", + " action = agent.sample_action(state) # 选择动作\n", " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", - " agent.memory.push(state, action, reward,\n", - " next_state, done) # 保存transition\n", + " agent.memory.push((state, action, reward,next_state, done)) # 保存transition\n", " state = next_state # 更新下一个状态\n", " agent.update() # 更新智能体\n", " ep_reward += reward # 累加奖励\n", " if done:\n", " break\n", - " if (i_ep + 1) % cfg.target_update == 0: # 智能体目标网络更新\n", + " if (i_ep + 1) % cfg['target_update'] == 0: # 智能体目标网络更新\n", " agent.target_net.load_state_dict(agent.policy_net.state_dict())\n", " steps.append(ep_step)\n", " rewards.append(ep_reward)\n", " if (i_ep + 1) % 10 == 0:\n", - " print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f},Epislon:{agent.epsilon(agent.frame_idx):.3f}')\n", + " print(f\"回合:{i_ep+1}/{cfg['train_eps']},奖励:{ep_reward:.2f},Epislon:{agent.epsilon:.3f}\")\n", " print(\"完成训练!\")\n", " env.close()\n", - " res_dic = {'rewards':rewards}\n", - " return res_dic\n", + " return {'rewards':rewards}\n", "\n", "def test(cfg, env, agent):\n", " print(\"开始测试!\")\n", - " print(f\"回合:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}\")\n", " rewards = [] # 记录所有回合的奖励\n", " steps = []\n", - " for i_ep in range(cfg.test_eps):\n", + " for i_ep in range(cfg['test_eps']):\n", " ep_reward = 0 # 记录一回合内的奖励\n", " ep_step = 0\n", " state = env.reset() # 重置环境,返回初始状态\n", - " while True:\n", + " for _ in range(cfg['ep_max_steps']):\n", " ep_step+=1\n", - " action = agent.predict(state) # 选择动作\n", + " action = agent.predict_action(state) # 选择动作\n", " next_state, reward, done, _ = env.step(action) # 更新环境,返回transition\n", " state = next_state # 更新下一个状态\n", " ep_reward += reward # 累加奖励\n", @@ -248,27 +268,271 @@ " break\n", " steps.append(ep_step)\n", " rewards.append(ep_reward)\n", - " print(f'回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.2f}')\n", + " print(f\"回合:{i_ep+1}/{cfg['test_eps']},奖励:{ep_reward:.2f}\")\n", " print(\"完成测试\")\n", " env.close()\n", " return {'rewards':rewards}" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. 定义环境" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import os\n", + "def all_seed(env,seed = 1):\n", + " ''' 万能的seed函数\n", + " '''\n", + " env.seed(seed) # env config\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " torch.manual_seed(seed) # config for CPU\n", + " torch.cuda.manual_seed(seed) # config for GPU\n", + " os.environ['PYTHONHASHSEED'] = str(seed) # config for python scripts\n", + " # config for cudnn\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " torch.backends.cudnn.enabled = False\n", + "def env_agent_config(cfg):\n", + " env = gym.make(cfg['env_name']) # 创建环境\n", + " if cfg['seed'] !=0:\n", + " all_seed(env,seed=cfg['seed'])\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.n\n", + " print(f\"状态空间维度:{n_states},动作空间维度:{n_actions}\")\n", + " cfg.update({\"n_states\":n_states,\"n_actions\":n_actions}) # 更新n_states和n_actions到cfg参数中\n", + " model = MLP(n_states, n_actions, hidden_dim = cfg['hidden_dim']) # 创建模型\n", + " memory = ReplayBuffer(cfg['memory_capacity'])\n", + " agent = DQN(model,memory,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4、设置参数" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "def get_args():\n", + " \"\"\" 超参数\n", + " \"\"\"\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='DQN',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='CartPole-v0',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=200,type=int,help=\"episodes of training\")\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\")\n", + " parser.add_argument('--ep_max_steps',default = 100000,type=int,help=\"steps per episode, much larger value can simulate infinite steps\")\n", + " parser.add_argument('--gamma',default=0.95,type=float,help=\"discounted factor\")\n", + " parser.add_argument('--epsilon_start',default=0.95,type=float,help=\"initial value of epsilon\")\n", + " parser.add_argument('--epsilon_end',default=0.01,type=float,help=\"final value of epsilon\")\n", + " parser.add_argument('--epsilon_decay',default=500,type=int,help=\"decay rate of epsilon, the higher value, the slower decay\")\n", + " parser.add_argument('--lr',default=0.0001,type=float,help=\"learning rate\")\n", + " parser.add_argument('--memory_capacity',default=100000,type=int,help=\"memory capacity\")\n", + " parser.add_argument('--batch_size',default=64,type=int)\n", + " parser.add_argument('--target_update',default=4,type=int)\n", + " parser.add_argument('--hidden_dim',default=256,type=int)\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", + " parser.add_argument('--seed',default=10,type=int,help=\"seed\") \n", + " args = parser.parse_args([])\n", + " args = {**vars(args)} # 转换成字典类型 \n", + " ## 打印超参数\n", + " print(\"超参数\")\n", + " print(''.join(['=']*80))\n", + " tplt = \"{:^20}\\t{:^20}\\t{:^20}\"\n", + " print(tplt.format(\"Name\", \"Value\", \"Type\"))\n", + " for k,v in args.items():\n", + " print(tplt.format(k,v,str(type(v)))) \n", + " print(''.join(['=']*80)) \n", + " return args\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth曲线\n", + " '''\n", + " last = data[0] \n", + " smoothed = []\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg, tag='train'):\n", + " ''' 画图\n", + " '''\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}\")\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()\n", + " plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5、我准备好了!" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "超参数\n", + "================================================================================\n", + " Name \t Value \t Type \n", + " algo_name \t DQN \t \n", + " env_name \t CartPole-v0 \t \n", + " train_eps \t 200 \t \n", + " test_eps \t 20 \t \n", + " ep_max_steps \t 100000 \t \n", + " gamma \t 0.95 \t \n", + " epsilon_start \t 0.95 \t \n", + " epsilon_end \t 0.01 \t \n", + " epsilon_decay \t 500 \t \n", + " lr \t 0.0001 \t \n", + " memory_capacity \t 100000 \t \n", + " batch_size \t 64 \t \n", + " target_update \t 4 \t \n", + " hidden_dim \t 256 \t \n", + " device \t cpu \t \n", + " seed \t 10 \t \n", + "================================================================================\n", + "状态空间维度:4,动作空间维度:2\n", + "开始训练!\n", + "回合:10/200,奖励:14.00,Epislon:0.611\n", + "回合:20/200,奖励:10.00,Epislon:0.470\n", + "回合:30/200,奖励:11.00,Epislon:0.372\n", + "回合:40/200,奖励:18.00,Epislon:0.302\n", + "回合:50/200,奖励:15.00,Epislon:0.228\n", + "回合:60/200,奖励:62.00,Epislon:0.121\n", + "回合:70/200,奖励:128.00,Epislon:0.039\n", + "回合:80/200,奖励:200.00,Epislon:0.011\n", + "回合:90/200,奖励:200.00,Epislon:0.010\n", + "回合:100/200,奖励:200.00,Epislon:0.010\n", + "回合:110/200,奖励:200.00,Epislon:0.010\n", + "回合:120/200,奖励:200.00,Epislon:0.010\n", + "回合:130/200,奖励:200.00,Epislon:0.010\n", + "回合:140/200,奖励:200.00,Epislon:0.010\n", + "回合:150/200,奖励:200.00,Epislon:0.010\n", + "回合:160/200,奖励:200.00,Epislon:0.010\n", + "回合:170/200,奖励:200.00,Epislon:0.010\n", + "回合:180/200,奖励:200.00,Epislon:0.010\n", + "回合:190/200,奖励:200.00,Epislon:0.010\n", + "回合:200/200,奖励:200.00,Epislon:0.010\n", + "完成训练!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "开始测试!\n", + "回合:1/20,奖励:200.00\n", + "回合:2/20,奖励:200.00\n", + "回合:3/20,奖励:200.00\n", + "回合:4/20,奖励:200.00\n", + "回合:5/20,奖励:200.00\n", + "回合:6/20,奖励:200.00\n", + "回合:7/20,奖励:200.00\n", + "回合:8/20,奖励:200.00\n", + "回合:9/20,奖励:200.00\n", + "回合:10/20,奖励:200.00\n", + "回合:11/20,奖励:200.00\n", + "回合:12/20,奖励:200.00\n", + "回合:13/20,奖励:200.00\n", + "回合:14/20,奖励:200.00\n", + "回合:15/20,奖励:200.00\n", + "回合:16/20,奖励:200.00\n", + "回合:17/20,奖励:200.00\n", + "回合:18/20,奖励:200.00\n", + "回合:19/20,奖励:200.00\n", + "回合:20/20,奖励:200.00\n", + "完成测试\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.7.13 ('easyrl')", + "display_name": "Python 3.7.12 ('easyrl')", "language": "python", "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.7.13" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + "hash": "f5a9629e9f3b9957bf68a43815f911e93447d47b3d065b6a8a04975e44c504d9" } } }, diff --git a/projects/notebooks/DDPG.ipynb b/projects/notebooks/DDPG.ipynb new file mode 100644 index 0000000..5194644 --- /dev/null +++ b/projects/notebooks/DDPG.ipynb @@ -0,0 +1,559 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. 定义算法" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1. 定义模型\n", + "\n", + "注意DDGP中critic网络的输入是state加上action。" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "class Actor(nn.Module):\n", + " def __init__(self, n_states, n_actions, hidden_dim = 256, init_w=3e-3):\n", + " super(Actor, self).__init__() \n", + " self.linear1 = nn.Linear(n_states, hidden_dim)\n", + " self.linear2 = nn.Linear(hidden_dim, hidden_dim)\n", + " self.linear3 = nn.Linear(hidden_dim, n_actions)\n", + " \n", + " self.linear3.weight.data.uniform_(-init_w, init_w)\n", + " self.linear3.bias.data.uniform_(-init_w, init_w)\n", + " \n", + " def forward(self, x):\n", + " x = F.relu(self.linear1(x))\n", + " x = F.relu(self.linear2(x))\n", + " x = torch.tanh(self.linear3(x))\n", + " return x\n", + " \n", + "class Critic(nn.Module):\n", + " def __init__(self, n_states, n_actions, hidden_dim=256, init_w=3e-3):\n", + " super(Critic, self).__init__()\n", + " \n", + " self.linear1 = nn.Linear(n_states + n_actions, hidden_dim)\n", + " self.linear2 = nn.Linear(hidden_dim, hidden_dim)\n", + " self.linear3 = nn.Linear(hidden_dim, 1)\n", + " # 随机初始化为较小的值\n", + " self.linear3.weight.data.uniform_(-init_w, init_w)\n", + " self.linear3.bias.data.uniform_(-init_w, init_w)\n", + " \n", + " def forward(self, state, action):\n", + " # 按维数1拼接\n", + " x = torch.cat([state, action], 1)\n", + " x = F.relu(self.linear1(x))\n", + " x = F.relu(self.linear2(x))\n", + " x = self.linear3(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2 定义经验回放" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import deque\n", + "import random\n", + "class ReplayBuffer:\n", + " def __init__(self, capacity: int) -> None:\n", + " self.capacity = capacity\n", + " self.buffer = deque(maxlen=self.capacity)\n", + " def push(self,transitions):\n", + " '''_summary_\n", + " Args:\n", + " trainsitions (tuple): _description_\n", + " '''\n", + " self.buffer.append(transitions)\n", + " def sample(self, batch_size: int, sequential: bool = False):\n", + " if batch_size > len(self.buffer):\n", + " batch_size = len(self.buffer)\n", + " if sequential: # sequential sampling\n", + " rand = random.randint(0, len(self.buffer) - batch_size)\n", + " batch = [self.buffer[i] for i in range(rand, rand + batch_size)]\n", + " return zip(*batch)\n", + " else:\n", + " batch = random.sample(self.buffer, batch_size)\n", + " return zip(*batch)\n", + " def clear(self):\n", + " self.buffer.clear()\n", + " def __len__(self):\n", + " return len(self.buffer)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.optim as optim\n", + "import numpy as np\n", + "class DDPG:\n", + " def __init__(self, models,memories,cfg):\n", + " self.device = torch.device(cfg['device'])\n", + " self.critic = models['critic'].to(self.device)\n", + " self.target_critic = models['critic'].to(self.device)\n", + " self.actor = models['actor'].to(self.device)\n", + " self.target_actor = models['actor'].to(self.device)\n", + " \n", + " # 复制参数到目标网络\n", + " for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()):\n", + " target_param.data.copy_(param.data)\n", + " for target_param, param in zip(self.target_actor.parameters(), self.actor.parameters()):\n", + " target_param.data.copy_(param.data)\n", + " self.critic_optimizer = optim.Adam(\n", + " self.critic.parameters(), lr=cfg['critic_lr'])\n", + " self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=cfg['actor_lr'])\n", + " self.memory = memories['memory']\n", + " self.batch_size = cfg['batch_size']\n", + " self.gamma = cfg['gamma']\n", + " self.tau = cfg['tau'] # 软更新参数\n", + " def sample_action(self, state):\n", + " state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", + " action = self.actor(state)\n", + " return action.detach().cpu().numpy()[0, 0]\n", + " @torch.no_grad()\n", + " def predict_action(self, state):\n", + " ''' 用于预测,不需要计算梯度\n", + " '''\n", + " state = torch.FloatTensor(state).unsqueeze(0).to(self.device)\n", + " action = self.actor(state)\n", + " return action.cpu().numpy()[0, 0]\n", + " def update(self):\n", + " if len(self.memory) < self.batch_size: # 当memory中不满足一个批量时,不更新策略\n", + " return\n", + " # 从经验回放中中随机采样一个批量的transition\n", + " state, action, reward, next_state, done = self.memory.sample(self.batch_size)\n", + " # 转变为张量\n", + " state = torch.FloatTensor(np.array(state)).to(self.device)\n", + " next_state = torch.FloatTensor(np.array(next_state)).to(self.device)\n", + " action = torch.FloatTensor(np.array(action)).to(self.device)\n", + " reward = torch.FloatTensor(reward).unsqueeze(1).to(self.device)\n", + " done = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(self.device)\n", + " # 注意看伪代码,这里的actor损失就是对应策略即actor输出的action下对应critic值的负均值\n", + " actor_loss = self.critic(state, self.actor(state))\n", + " actor_loss = - actor_loss.mean()\n", + "\n", + " next_action = self.target_actor(next_state)\n", + " target_value = self.target_critic(next_state, next_action.detach())\n", + " # 这里的expected_value就是伪代码中间的y_i \n", + " expected_value = reward + (1.0 - done) * self.gamma * target_value\n", + " expected_value = torch.clamp(expected_value, -np.inf, np.inf)\n", + "\n", + " actual_value = self.critic(state, action)\n", + " critic_loss = nn.MSELoss()(actual_value, expected_value.detach())\n", + " \n", + " self.actor_optimizer.zero_grad()\n", + " actor_loss.backward()\n", + " self.actor_optimizer.step()\n", + " self.critic_optimizer.zero_grad()\n", + " critic_loss.backward()\n", + " self.critic_optimizer.step()\n", + " # 各自目标网络的参数软更新\n", + " for target_param, param in zip(self.target_critic.parameters(), self.critic.parameters()):\n", + " target_param.data.copy_(\n", + " target_param.data * (1.0 - self.tau) +\n", + " param.data * self.tau\n", + " )\n", + " for target_param, param in zip(self.target_actor.parameters(), self.actor.parameters()):\n", + " target_param.data.copy_(\n", + " target_param.data * (1.0 - self.tau) +\n", + " param.data * self.tau\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. 定义训练\n", + "\n", + "注意测试函数中不需要动作噪声" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "class OUNoise(object):\n", + " '''Ornstein–Uhlenbeck噪声\n", + " '''\n", + " def __init__(self, action_space, mu=0.0, theta=0.15, max_sigma=0.3, min_sigma=0.3, decay_period=100000):\n", + " self.mu = mu # OU噪声的参数\n", + " self.theta = theta # OU噪声的参数\n", + " self.sigma = max_sigma # OU噪声的参数\n", + " self.max_sigma = max_sigma\n", + " self.min_sigma = min_sigma\n", + " self.decay_period = decay_period\n", + " self.n_actions = action_space.shape[0]\n", + " self.low = action_space.low\n", + " self.high = action_space.high\n", + " self.reset()\n", + " def reset(self):\n", + " self.obs = np.ones(self.n_actions) * self.mu\n", + " def evolve_obs(self):\n", + " x = self.obs\n", + " dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.n_actions)\n", + " self.obs = x + dx\n", + " return self.obs\n", + " def get_action(self, action, t=0):\n", + " ou_obs = self.evolve_obs()\n", + " self.sigma = self.max_sigma - (self.max_sigma - self.min_sigma) * min(1.0, t / self.decay_period) # sigma会逐渐衰减\n", + " return np.clip(action + ou_obs, self.low, self.high) # 动作加上噪声后进行剪切\n", + "\n", + "def train(cfg, env, agent):\n", + " print(\"开始训练!\")\n", + " ou_noise = OUNoise(env.action_space) # 动作噪声\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg['train_eps']):\n", + " state = env.reset()\n", + " ou_noise.reset()\n", + " ep_reward = 0\n", + " for i_step in range(cfg['max_steps']):\n", + " action = agent.sample_action(state)\n", + " action = ou_noise.get_action(action, i_step+1) \n", + " next_state, reward, done, _ = env.step(action)\n", + " ep_reward += reward\n", + " agent.memory.push((state, action, reward, next_state, done))\n", + " agent.update()\n", + " state = next_state\n", + " if done:\n", + " break\n", + " if (i_ep+1)%10 == 0:\n", + " print(f\"回合:{i_ep+1}/{cfg['train_eps']},奖励:{ep_reward:.2f}\")\n", + " rewards.append(ep_reward)\n", + " print(\"完成训练!\")\n", + " return {'rewards':rewards}\n", + "def test(cfg, env, agent):\n", + " print(\"开始测试!\")\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg['test_eps']):\n", + " state = env.reset() \n", + " ep_reward = 0\n", + " for i_step in range(cfg['max_steps']):\n", + " action = agent.predict_action(state)\n", + " next_state, reward, done, _ = env.step(action)\n", + " ep_reward += reward\n", + " state = next_state\n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合:{i_ep+1}/{cfg['test_eps']},奖励:{ep_reward:.2f}\")\n", + " print(\"完成测试!\")\n", + " return {'rewards':rewards}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. 定义环境" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import os\n", + "import torch\n", + "import numpy as np\n", + "import random\n", + "class NormalizedActions(gym.ActionWrapper):\n", + " ''' 将action范围重定在[0.1]之间\n", + " '''\n", + " def action(self, action):\n", + " low_bound = self.action_space.low\n", + " upper_bound = self.action_space.high\n", + " action = low_bound + (action + 1.0) * 0.5 * (upper_bound - low_bound)\n", + " action = np.clip(action, low_bound, upper_bound)\n", + " return action\n", + "\n", + " def reverse_action(self, action):\n", + " low_bound = self.action_space.low\n", + " upper_bound = self.action_space.high\n", + " action = 2 * (action - low_bound) / (upper_bound - low_bound) - 1\n", + " action = np.clip(action, low_bound, upper_bound)\n", + " return action\n", + "def all_seed(env,seed = 1):\n", + " ''' 万能的seed函数\n", + " '''\n", + " env.seed(seed) # env config\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " torch.manual_seed(seed) # config for CPU\n", + " torch.cuda.manual_seed(seed) # config for GPU\n", + " os.environ['PYTHONHASHSEED'] = str(seed) # config for python scripts\n", + " # config for cudnn\n", + " torch.backends.cudnn.deterministic = True\n", + " torch.backends.cudnn.benchmark = False\n", + " torch.backends.cudnn.enabled = False\n", + "def env_agent_config(cfg):\n", + " env = NormalizedActions(gym.make(cfg['env_name'])) # 装饰action噪声\n", + " if cfg['seed'] !=0:\n", + " all_seed(env,seed=cfg['seed'])\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.shape[0]\n", + " cfg.update({\"n_states\":n_states,\"n_actions\":n_actions}) # 更新n_states和n_actions到cfg参数中\n", + " models = {\"actor\":Actor(n_states,n_actions,hidden_dim=cfg['actor_hidden_dim']),\"critic\":Critic(n_states,n_actions,hidden_dim=cfg['critic_hidden_dim'])}\n", + " memories = {\"memory\":ReplayBuffer(cfg['memory_capacity'])}\n", + " agent = DDPG(models,memories,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. 设置参数" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "def get_args():\n", + " \"\"\" 超参数\n", + " \"\"\"\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='DDPG',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='Pendulum-v1',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=300,type=int,help=\"episodes of training\")\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\")\n", + " parser.add_argument('--max_steps',default=100000,type=int,help=\"steps per episode, much larger value can simulate infinite steps\")\n", + " parser.add_argument('--gamma',default=0.99,type=float,help=\"discounted factor\")\n", + " parser.add_argument('--critic_lr',default=1e-3,type=float,help=\"learning rate of critic\")\n", + " parser.add_argument('--actor_lr',default=1e-4,type=float,help=\"learning rate of actor\")\n", + " parser.add_argument('--memory_capacity',default=8000,type=int,help=\"memory capacity\")\n", + " parser.add_argument('--batch_size',default=128,type=int)\n", + " parser.add_argument('--target_update',default=2,type=int)\n", + " parser.add_argument('--tau',default=1e-2,type=float)\n", + " parser.add_argument('--critic_hidden_dim',default=256,type=int)\n", + " parser.add_argument('--actor_hidden_dim',default=256,type=int)\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", + " parser.add_argument('--seed',default=1,type=int,help=\"random seed\")\n", + " args = parser.parse_args([]) \n", + " args = {**vars(args)} # 将args转换为字典 \n", + " # 打印参数\n", + " print(\"训练参数如下:\")\n", + " print(''.join(['=']*80))\n", + " tplt = \"{:^20}\\t{:^20}\\t{:^20}\"\n", + " print(tplt.format(\"参数名\",\"参数值\",\"参数类型\"))\n", + " for k,v in args.items():\n", + " print(tplt.format(k,v,str(type(v)))) \n", + " print(''.join(['=']*80)) \n", + " return args\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth\n", + "\n", + " Args:\n", + " data (List):输入数据\n", + " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", + "\n", + " Returns:\n", + " smoothed (List): 平滑后的数据\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg,path=None,tag='train'):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}\")\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. 我准备好了!" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "训练参数如下:\n", + "================================================================================\n", + " 参数名 \t 参数值 \t 参数类型 \n", + " algo_name \t DDPG \t \n", + " env_name \t Pendulum-v1 \t \n", + " train_eps \t 300 \t \n", + " test_eps \t 20 \t \n", + " max_steps \t 100000 \t \n", + " gamma \t 0.99 \t \n", + " critic_lr \t 0.001 \t \n", + " actor_lr \t 0.0001 \t \n", + " memory_capacity \t 8000 \t \n", + " batch_size \t 128 \t \n", + " target_update \t 2 \t \n", + " tau \t 0.01 \t \n", + " critic_hidden_dim \t 256 \t \n", + " actor_hidden_dim \t 256 \t \n", + " device \t cpu \t \n", + " seed \t 1 \t \n", + "================================================================================\n", + "开始训练!\n", + "回合:10/300,奖励:-1549.57\n", + "回合:20/300,奖励:-1515.84\n", + "回合:30/300,奖励:-1413.30\n", + "回合:40/300,奖励:-972.99\n", + "回合:50/300,奖励:-829.94\n", + "回合:60/300,奖励:-727.91\n", + "回合:70/300,奖励:-954.71\n", + "回合:80/300,奖励:-1318.39\n", + "回合:90/300,奖励:-981.19\n", + "回合:100/300,奖励:-1262.05\n", + "回合:110/300,奖励:-640.49\n", + "回合:120/300,奖励:-1100.00\n", + "回合:130/300,奖励:-764.66\n", + "回合:140/300,奖励:-352.27\n", + "回合:150/300,奖励:-891.03\n", + "回合:160/300,奖励:-1318.07\n", + "回合:170/300,奖励:-124.30\n", + "回合:180/300,奖励:-240.08\n", + "回合:190/300,奖励:-491.77\n", + "回合:200/300,奖励:-1000.77\n", + "回合:210/300,奖励:-128.87\n", + "回合:220/300,奖励:-950.32\n", + "回合:230/300,奖励:-122.48\n", + "回合:240/300,奖励:-246.52\n", + "回合:250/300,奖励:-374.37\n", + "回合:260/300,奖励:-368.25\n", + "回合:270/300,奖励:-364.17\n", + "回合:280/300,奖励:-725.39\n", + "回合:290/300,奖励:-131.21\n", + "回合:300/300,奖励:-610.10\n", + "完成训练!\n", + "开始测试!\n", + "回合:1/20,奖励:-116.05\n", + "回合:2/20,奖励:-126.18\n", + "回合:3/20,奖励:-231.46\n", + "回合:4/20,奖励:-246.40\n", + "回合:5/20,奖励:-304.69\n", + "回合:6/20,奖励:-124.40\n", + "回合:7/20,奖励:-1.06\n", + "回合:8/20,奖励:-114.20\n", + "回合:9/20,奖励:-348.97\n", + "回合:10/20,奖励:-116.11\n", + "回合:11/20,奖励:-117.20\n", + "回合:12/20,奖励:-118.66\n", + "回合:13/20,奖励:-235.18\n", + "回合:14/20,奖励:-356.14\n", + "回合:15/20,奖励:-118.39\n", + "回合:16/20,奖励:-351.94\n", + "回合:17/20,奖励:-114.51\n", + "回合:18/20,奖励:-124.78\n", + "回合:19/20,奖励:-226.47\n", + "回合:20/20,奖励:-121.49\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/PolicyGradient.ipynb b/projects/notebooks/PolicyGradient.ipynb new file mode 100644 index 0000000..b6326da --- /dev/null +++ b/projects/notebooks/PolicyGradient.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. 定义算法\n", + "\n", + "最基础的策略梯度算法就是REINFORCE算法,又称作Monte-Carlo Policy Gradient算法。我们策略优化的目标如下:\n", + "\n", + "$$\n", + "J_{\\theta}= \\Psi_{\\pi} \\nabla_\\theta \\log \\pi_\\theta\\left(a_t \\mid s_t\\right)\n", + "$$\n", + "\n", + "其中$\\Psi_{\\pi}$在REINFORCE算法中表示衰减的回报(具体公式见伪代码),也可以用优势来估计,也就是我们熟知的A3C算法,这个在后面包括GAE算法中都会讲到。\n", + "\n", + "### 1.1. 策略函数设计\n", + "\n", + "既然策略梯度是直接对策略函数进行梯度计算,那么策略函数如何设计呢?一般来讲有两种设计方式,一个是softmax函数,另外一个是高斯分布$\\mathbb{N}\\left(\\phi(\\mathbb{s})^{\\mathbb{\\pi}} \\theta, \\sigma^2\\right)$,前者用于离散动作空间,后者多用于连续动作空间。\n", + "\n", + "softmax函数可以表示为:\n", + "$$\n", + "\\pi_\\theta(s, a)=\\frac{e^{\\phi(s, a)^{T_\\theta}}}{\\sum_b e^{\\phi(s, b)^{T^T}}}\n", + "$$\n", + "对应的梯度为:\n", + "$$\n", + "\\nabla_\\theta \\log \\pi_\\theta(s, a)=\\phi(s, a)-\\mathbb{E}_{\\pi_\\theta}[\\phi(s,)\n", + "$$\n", + "高斯分布对应的梯度为:\n", + "$$\n", + "\\nabla_\\theta \\log \\pi_\\theta(s, a)=\\frac{\\left(a-\\phi(s)^T \\theta\\right) \\phi(s)}{\\sigma^2}\n", + "$$\n", + "但是对于一些特殊的情况,例如在本次演示中动作维度=2且为离散空间,这个时候可以用伯努利分布来实现,这种方式其实是不推荐的,这里给大家做演示也是为了展现一些特殊情况,启发大家一些思考,例如Bernoulli,Binomial,Gaussian分布之间的关系。简单说来,Binomial分布,$n = 1$时就是Bernoulli分布,$n \\rightarrow \\infty$时就是Gaussian分布。\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2. 模型设计\n", + "\n", + "前面讲到,尽管本次演示是离散空间,但是由于动作维度等于2,此时就可以用特殊的高斯分布来表示策略函数,即伯努利分布。伯努利的分布实际上是用一个概率作为输入,然后从中采样动作,伯努利采样出来的动作只可能是0或1,就像投掷出硬币的正反面。在这种情况下,我们的策略模型就需要在MLP的基础上,将状态作为输入,将动作作为倒数第二层输出,并在最后一层增加激活函数来输出对应动作的概率。不清楚激活函数作用的同学可以再看一遍深度学习相关的知识,简单来说其作用就是增加神经网络的非线性。既然需要输出对应动作的概率,那么输出的值需要处于0-1之间,此时sigmoid函数刚好满足我们的需求,实现代码参考如下。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "class PGNet(nn.Module):\n", + " def __init__(self, input_dim,output_dim,hidden_dim=128):\n", + " \"\"\" 初始化q网络,为全连接网络\n", + " input_dim: 输入的特征数即环境的状态维度\n", + " output_dim: 输出的动作维度\n", + " \"\"\"\n", + " super(PGNet, self).__init__()\n", + " self.fc1 = nn.Linear(input_dim, hidden_dim) # 输入层\n", + " self.fc2 = nn.Linear(hidden_dim,hidden_dim) # 隐藏层\n", + " self.fc3 = nn.Linear(hidden_dim, output_dim) # 输出层\n", + " def forward(self, x):\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = torch.sigmoid(self.fc3(x))\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.3. 更新函数设计\n", + "\n", + "前面提到我们的优化目标也就是策略梯度算法的损失函数如下:\n", + "$$\n", + "J_{\\theta}= \\Psi_{\\pi} \\nabla_\\theta \\log \\pi_\\theta\\left(a_t \\mid s_t\\right)\n", + "$$\n", + "\n", + "我们需要拆开成两个部分$\\Psi_{\\pi}$和$\\nabla_\\theta \\log \\pi_\\theta\\left(a_t \\mid s_t\\right)$分开计算,首先看值函数部分$\\Psi_{\\pi}$,在REINFORCE算法中值函数是从当前时刻开始的衰减回报,如下:\n", + "$$\n", + "G \\leftarrow \\sum_{k=t+1}^{T} \\gamma^{k-1} r_{k}\n", + "$$\n", + "\n", + "这个实际用代码来实现的时候可能有点绕,我们可以倒过来看,在同一回合下,我们的终止时刻是$T$,那么对应的回报$G_T=\\gamma^{T-1}r_T$,而对应的$G_{T-1}=\\gamma^{T-2}r_{T-1}+\\gamma^{T-1}r_T$,在这里代码中我们使用了一个动态规划的技巧,如下:\n", + "```python\n", + "running_add = running_add * self.gamma + reward_pool[i] # running_add初始值为0\n", + "```\n", + "这个公式也是倒过来循环的,第一次的值等于:\n", + "$$\n", + "running\\_add = r_T\n", + "$$\n", + "第二次的值则等于:\n", + "$$\n", + "running\\_add = r_T*\\gamma+r_{T-1}\n", + "$$\n", + "第三次的值等于:\n", + "$$\n", + "running\\_add = (r_T*\\gamma+r_{T-1})*\\gamma+r_{T-2} = r_T*\\gamma^2+r_{T-1}*\\gamma+r_{T-2}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch.distributions import Bernoulli\n", + "from torch.autograd import Variable\n", + "import numpy as np\n", + "\n", + "class PolicyGradient:\n", + " \n", + " def __init__(self, model,memory,cfg):\n", + " self.gamma = cfg['gamma']\n", + " self.device = torch.device(cfg['device']) \n", + " self.memory = memory\n", + " self.policy_net = model.to(self.device)\n", + " self.optimizer = torch.optim.RMSprop(self.policy_net.parameters(), lr=cfg['lr'])\n", + "\n", + " def sample_action(self,state):\n", + "\n", + " state = torch.from_numpy(state).float()\n", + " state = Variable(state)\n", + " probs = self.policy_net(state)\n", + " m = Bernoulli(probs) # 伯努利分布\n", + " action = m.sample()\n", + " \n", + " action = action.data.numpy().astype(int)[0] # 转为标量\n", + " return action\n", + " def predict_action(self,state):\n", + "\n", + " state = torch.from_numpy(state).float()\n", + " state = Variable(state)\n", + " probs = self.policy_net(state)\n", + " m = Bernoulli(probs) # 伯努利分布\n", + " action = m.sample()\n", + " action = action.data.numpy().astype(int)[0] # 转为标量\n", + " return action\n", + " \n", + " def update(self):\n", + " state_pool,action_pool,reward_pool= self.memory.sample()\n", + " state_pool,action_pool,reward_pool = list(state_pool),list(action_pool),list(reward_pool)\n", + " # Discount reward\n", + " running_add = 0\n", + " for i in reversed(range(len(reward_pool))):\n", + " if reward_pool[i] == 0:\n", + " running_add = 0\n", + " else:\n", + " running_add = running_add * self.gamma + reward_pool[i]\n", + " reward_pool[i] = running_add\n", + "\n", + " # Normalize reward\n", + " reward_mean = np.mean(reward_pool)\n", + " reward_std = np.std(reward_pool)\n", + " for i in range(len(reward_pool)):\n", + " reward_pool[i] = (reward_pool[i] - reward_mean) / reward_std\n", + "\n", + " # Gradient Desent\n", + " self.optimizer.zero_grad()\n", + "\n", + " for i in range(len(reward_pool)):\n", + " state = state_pool[i]\n", + " action = Variable(torch.FloatTensor([action_pool[i]]))\n", + " reward = reward_pool[i]\n", + " state = Variable(torch.from_numpy(state).float())\n", + " probs = self.policy_net(state)\n", + " m = Bernoulli(probs)\n", + " loss = -m.log_prob(action) * reward # Negtive score function x reward\n", + " # print(loss)\n", + " loss.backward()\n", + " self.optimizer.step()\n", + " self.memory.clear()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/notebooks/figs/dqn_pseu.png b/projects/notebooks/figs/dqn_pseu.png deleted file mode 100644 index 591405b..0000000 Binary files a/projects/notebooks/figs/dqn_pseu.png and /dev/null differ diff --git a/projects/parl_tutorials/DDPG.ipynb b/projects/parl_tutorials/DDPG.ipynb new file mode 100644 index 0000000..a0db09f --- /dev/null +++ b/projects/parl_tutorials/DDPG.ipynb @@ -0,0 +1,465 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. 定义算法" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m[09-28 00:38:01 MainThread @utils.py:73]\u001b[0m paddlepaddle version: 2.3.2.\n" + ] + } + ], + "source": [ + "import parl\n", + "import paddle\n", + "import paddle.nn as nn\n", + "import paddle.nn.functional as F\n", + "class Actor(parl.Model):\n", + " def __init__(self, n_states, n_actions):\n", + " super(Actor, self).__init__()\n", + "\n", + " self.l1 = nn.Linear(n_states, 400)\n", + " self.l2 = nn.Linear(400, 300)\n", + " self.l3 = nn.Linear(300, n_actions)\n", + "\n", + " def forward(self, state):\n", + " x = F.relu(self.l1(state))\n", + " x = F.relu(self.l2(x))\n", + " return paddle.tanh(self.l3(x))\n", + "\n", + "class Critic(parl.Model):\n", + " def __init__(self, n_states, n_actions):\n", + " super(Critic, self).__init__()\n", + "\n", + " self.l1 = nn.Linear(n_states, 400)\n", + " self.l2 = nn.Linear(400 + n_actions, 300)\n", + " self.l3 = nn.Linear(300, 1)\n", + "\n", + " def forward(self, state, action):\n", + " x = F.relu(self.l1(state))\n", + " x = F.relu(self.l2(paddle.concat([x, action], 1)))\n", + " return self.l3(x)\n", + "class ActorCritic(parl.Model):\n", + " def __init__(self, n_states, n_actions):\n", + " super(ActorCritic, self).__init__()\n", + " self.actor_model = Actor(n_states, n_actions)\n", + " self.critic_model = Critic(n_states, n_actions)\n", + "\n", + " def policy(self, state):\n", + " return self.actor_model(state)\n", + "\n", + " def value(self, state, action):\n", + " return self.critic_model(state, action)\n", + "\n", + " def get_actor_params(self):\n", + " return self.actor_model.parameters()\n", + "\n", + " def get_critic_params(self):\n", + " return self.critic_model.parameters()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import deque\n", + "import random\n", + "class ReplayBuffer:\n", + " def __init__(self, capacity: int) -> None:\n", + " self.capacity = capacity\n", + " self.buffer = deque(maxlen=self.capacity)\n", + " def push(self,transitions):\n", + " '''_summary_\n", + " Args:\n", + " trainsitions (tuple): _description_\n", + " '''\n", + " self.buffer.append(transitions)\n", + " def sample(self, batch_size: int, sequential: bool = False):\n", + " if batch_size > len(self.buffer):\n", + " batch_size = len(self.buffer)\n", + " if sequential: # sequential sampling\n", + " rand = random.randint(0, len(self.buffer) - batch_size)\n", + " batch = [self.buffer[i] for i in range(rand, rand + batch_size)]\n", + " return zip(*batch)\n", + " else:\n", + " batch = random.sample(self.buffer, batch_size)\n", + " return zip(*batch)\n", + " def clear(self):\n", + " self.buffer.clear()\n", + " def __len__(self):\n", + " return len(self.buffer)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import parl\n", + "import paddle\n", + "import numpy as np\n", + "\n", + "\n", + "class DDPGAgent(parl.Agent):\n", + " def __init__(self, algorithm,memory,cfg):\n", + " super(DDPGAgent, self).__init__(algorithm)\n", + " self.n_actions = cfg['n_actions']\n", + " self.expl_noise = cfg['expl_noise']\n", + " self.batch_size = cfg['batch_size'] \n", + " self.memory = memory\n", + " self.alg.sync_target(decay=0)\n", + "\n", + " def sample_action(self, state):\n", + " action_numpy = self.predict_action(state)\n", + " action_noise = np.random.normal(0, self.expl_noise, size=self.n_actions)\n", + " action = (action_numpy + action_noise).clip(-1, 1)\n", + " return action\n", + "\n", + " def predict_action(self, state):\n", + " state = paddle.to_tensor(state.reshape(1, -1), dtype='float32')\n", + " action = self.alg.predict(state)\n", + " action_numpy = action.cpu().numpy()[0]\n", + " return action_numpy\n", + "\n", + " def update(self):\n", + " if len(self.memory) < self.batch_size: \n", + " return\n", + " state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(\n", + " self.batch_size)\n", + " done_batch = np.expand_dims(done_batch , -1)\n", + " reward_batch = np.expand_dims(reward_batch, -1)\n", + " state_batch = paddle.to_tensor(state_batch, dtype='float32')\n", + " action_batch = paddle.to_tensor(action_batch, dtype='float32')\n", + " reward_batch = paddle.to_tensor(reward_batch, dtype='float32')\n", + " next_state_batch = paddle.to_tensor(next_state_batch, dtype='float32')\n", + " done_batch = paddle.to_tensor(done_batch, dtype='float32')\n", + " critic_loss, actor_loss = self.alg.learn(state_batch, action_batch, reward_batch, next_state_batch,\n", + " done_batch)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg, env, agent):\n", + " ''' 训练\n", + " '''\n", + " print(f\"开始训练!\")\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg[\"train_eps\"]):\n", + " ep_reward = 0 \n", + " state = env.reset() \n", + " for i_step in range(cfg['max_steps']):\n", + " action = agent.sample_action(state) # 采样动作\n", + " next_state, reward, done, _ = env.step(action) \n", + " agent.memory.push((state, action, reward,next_state, done)) \n", + " state = next_state \n", + " agent.update() \n", + " ep_reward += reward \n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " if (i_ep + 1) % 10 == 0:\n", + " print(f\"回合:{i_ep+1}/{cfg['train_eps']},奖励:{ep_reward:.2f}\")\n", + " print(\"完成训练!\")\n", + " env.close()\n", + " res_dic = {'episodes':range(len(rewards)),'rewards':rewards}\n", + " return res_dic\n", + "\n", + "def test(cfg, env, agent):\n", + " print(\"开始测试!\")\n", + " rewards = [] # 记录所有回合的奖励\n", + " for i_ep in range(cfg['test_eps']):\n", + " ep_reward = 0 \n", + " state = env.reset() \n", + " for i_step in range(cfg['max_steps']):\n", + " action = agent.predict_action(state) \n", + " next_state, reward, done, _ = env.step(action) \n", + " state = next_state \n", + " ep_reward += reward \n", + " if done:\n", + " break\n", + " rewards.append(ep_reward)\n", + " print(f\"回合:{i_ep+1}/{cfg['test_eps']},奖励:{ep_reward:.2f}\")\n", + " print(\"完成测试!\")\n", + " env.close()\n", + " return {'episodes':range(len(rewards)),'rewards':rewards}\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import gym\n", + "import os\n", + "import paddle\n", + "import numpy as np\n", + "import random\n", + "from parl.algorithms import DDPG\n", + "class NormalizedActions(gym.ActionWrapper):\n", + " ''' 将action范围重定在[0.1]之间\n", + " '''\n", + " def action(self, action):\n", + " low_bound = self.action_space.low\n", + " upper_bound = self.action_space.high\n", + " action = low_bound + (action + 1.0) * 0.5 * (upper_bound - low_bound)\n", + " action = np.clip(action, low_bound, upper_bound)\n", + " return action\n", + "\n", + " def reverse_action(self, action):\n", + " low_bound = self.action_space.low\n", + " upper_bound = self.action_space.high\n", + " action = 2 * (action - low_bound) / (upper_bound - low_bound) - 1\n", + " action = np.clip(action, low_bound, upper_bound)\n", + " return action\n", + "def all_seed(env,seed = 1):\n", + " ''' 万能的seed函数\n", + " '''\n", + " env.seed(seed) # env config\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " paddle.seed(seed)\n", + "def env_agent_config(cfg):\n", + " env = NormalizedActions(gym.make(cfg['env_name'])) # 装饰action噪声\n", + " if cfg['seed'] !=0:\n", + " all_seed(env,seed=cfg['seed'])\n", + " n_states = env.observation_space.shape[0]\n", + " n_actions = env.action_space.shape[0]\n", + " print(f\"状态维度:{n_states},动作维度:{n_actions}\")\n", + " cfg.update({\"n_states\":n_states,\"n_actions\":n_actions}) # 更新n_states和n_actions到cfg参数中\n", + " memory = ReplayBuffer(cfg['memory_capacity'])\n", + " model = ActorCritic(n_states, n_actions)\n", + " algorithm = DDPG(model, gamma=cfg['gamma'], tau=cfg['tau'], actor_lr=cfg['actor_lr'], critic_lr=cfg['critic_lr'])\n", + " agent = DDPGAgent(algorithm,memory,cfg)\n", + " return env,agent" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import argparse\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "def get_args():\n", + " \"\"\" 超参数\n", + " \"\"\"\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='DDPG',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='Pendulum-v0',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=200,type=int,help=\"episodes of training\")\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\")\n", + " parser.add_argument('--max_steps',default=100000,type=int,help=\"steps per episode, much larger value can simulate infinite steps\")\n", + " parser.add_argument('--gamma',default=0.99,type=float,help=\"discounted factor\")\n", + " parser.add_argument('--critic_lr',default=1e-3,type=float,help=\"learning rate of critic\")\n", + " parser.add_argument('--actor_lr',default=1e-4,type=float,help=\"learning rate of actor\")\n", + " parser.add_argument('--memory_capacity',default=80000,type=int,help=\"memory capacity\")\n", + " parser.add_argument('--expl_noise',default=0.1,type=float)\n", + " parser.add_argument('--batch_size',default=128,type=int)\n", + " parser.add_argument('--target_update',default=2,type=int)\n", + " parser.add_argument('--tau',default=1e-2,type=float)\n", + " parser.add_argument('--critic_hidden_dim',default=256,type=int)\n", + " parser.add_argument('--actor_hidden_dim',default=256,type=int)\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or cuda\") \n", + " parser.add_argument('--seed',default=1,type=int,help=\"random seed\")\n", + " args = parser.parse_args([]) \n", + " args = {**vars(args)} # 将args转换为字典 \n", + " # 打印参数\n", + " print(\"训练参数如下:\")\n", + " print(''.join(['=']*80))\n", + " tplt = \"{:^20}\\t{:^20}\\t{:^20}\"\n", + " print(tplt.format(\"参数名\",\"参数值\",\"参数类型\"))\n", + " for k,v in args.items():\n", + " print(tplt.format(k,v,str(type(v)))) \n", + " print(''.join(['=']*80)) \n", + " return args\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth\n", + "\n", + " Args:\n", + " data (List):输入数据\n", + " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", + "\n", + " Returns:\n", + " smoothed (List): 平滑后的数据\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg,path=None,tag='train'):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}\")\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "训练参数如下:\n", + "================================================================================\n", + " 参数名 \t 参数值 \t 参数类型 \n", + " algo_name \t DDPG \t \n", + " env_name \t Pendulum-v0 \t \n", + " train_eps \t 200 \t \n", + " test_eps \t 20 \t \n", + " max_steps \t 100000 \t \n", + " gamma \t 0.99 \t \n", + " critic_lr \t 0.001 \t \n", + " actor_lr \t 0.0001 \t \n", + " memory_capacity \t 80000 \t \n", + " expl_noise \t 0.1 \t \n", + " batch_size \t 128 \t \n", + " target_update \t 2 \t \n", + " tau \t 0.01 \t \n", + " critic_hidden_dim \t 256 \t \n", + " actor_hidden_dim \t 256 \t \n", + " device \t cpu \t \n", + " seed \t 1 \t \n", + "================================================================================\n", + "状态维度:3,动作维度:1\n", + "开始训练!\n", + "回合:10/200,奖励:-945.22\n", + "回合:20/200,奖励:-700.56\n", + "回合:30/200,奖励:-128.48\n", + "回合:40/200,奖励:-266.74\n", + "回合:50/200,奖励:-387.26\n", + "回合:60/200,奖励:-133.07\n", + "回合:70/200,奖励:-243.47\n", + "回合:80/200,奖励:-383.76\n", + "回合:90/200,奖励:-130.47\n", + "回合:100/200,奖励:-385.78\n", + "回合:110/200,奖励:-128.11\n", + "回合:120/200,奖励:-245.72\n", + "回合:130/200,奖励:-3.26\n", + "回合:140/200,奖励:-231.93\n", + "回合:150/200,奖励:-122.84\n", + "回合:160/200,奖励:-370.19\n", + "回合:170/200,奖励:-126.60\n", + "回合:180/200,奖励:-118.99\n", + "回合:190/200,奖励:-115.58\n", + "回合:200/200,奖励:-246.70\n", + "完成训练!\n", + "开始测试!\n", + "回合:1/20,奖励:-122.76\n", + "回合:2/20,奖励:-1.78\n", + "回合:3/20,奖励:-128.77\n", + "回合:4/20,奖励:-124.03\n", + "回合:5/20,奖励:-125.87\n", + "回合:6/20,奖励:-130.87\n", + "回合:7/20,奖励:-127.97\n", + "回合:8/20,奖励:-134.63\n", + "回合:9/20,奖励:-126.38\n", + "回合:10/20,奖励:-1.42\n", + "回合:11/20,奖励:-126.13\n", + "回合:12/20,奖励:-1.88\n", + "回合:13/20,奖励:-133.22\n", + "回合:14/20,奖励:-132.14\n", + "回合:15/20,奖励:-245.42\n", + "回合:16/20,奖励:-123.41\n", + "回合:17/20,奖励:-127.20\n", + "回合:18/20,奖励:-130.53\n", + "回合:19/20,奖励:-129.29\n", + "回合:20/20,奖励:-288.72\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('parl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "29c8e495d55843cb894bac6655c13e4a65f834e86169d4dce1750654c48fe628" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/parl_tutorials/DQN.ipynb b/projects/parl_tutorials/DQN.ipynb new file mode 100644 index 0000000..4b02022 --- /dev/null +++ b/projects/parl_tutorials/DQN.ipynb @@ -0,0 +1,538 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1、定义算法\n", + "相比于Q learning,DQN本质上是为了适应更为复杂的环境,并且经过不断的改良迭代,到了Nature DQN(即Volodymyr Mnih发表的Nature论文)这里才算是基本完善。DQN主要改动的点有三个:\n", + "* 使用深度神经网络替代原来的Q表:这个很容易理解原因\n", + "* 使用了经验回放(Replay Buffer):这个好处有很多,一个是使用一堆历史数据去训练,比之前用一次就扔掉好多了,大大提高样本效率,另外一个是面试常提到的,减少样本之间的相关性,原则上获取经验跟学习阶段是分开的,原来时序的训练数据有可能是不稳定的,打乱之后再学习有助于提高训练的稳定性,跟深度学习中划分训练测试集时打乱样本是一个道理。\n", + "* 使用了两个网络:即策略网络和目标网络,每隔若干步才把每步更新的策略网络参数复制给目标网络,这样做也是为了训练的稳定,避免Q值的估计发散。想象一下,如果当前有个transition(这个Q learning中提过的,一定要记住!!!)样本导致对Q值进行了较差的过估计,如果接下来从经验回放中提取到的样本正好连续几个都这样的,很有可能导致Q值的发散(它的青春小鸟一去不回来了)。再打个比方,我们玩RPG或者闯关类游戏,有些人为了破纪录经常Save和Load,只要我出了错,我不满意我就加载之前的存档,假设不允许加载呢,就像DQN算法一样训练过程中会退不了,这时候是不是搞两个档,一个档每帧都存一下,另外一个档打了不错的结果再存,也就是若干个间隔再存一下,到最后用间隔若干步数再存的档一般都比每帧都存的档好些呢。当然你也可以再搞更多个档,也就是DQN增加多个目标网络,但是对于DQN则没有多大必要,多几个网络效果不见得会好很多。" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1 定义模型" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m[09-26 17:18:11 MainThread @utils.py:73]\u001b[0m paddlepaddle version: 2.3.2.\n" + ] + } + ], + "source": [ + "\n", + "import paddle\n", + "import paddle.nn as nn\n", + "import paddle.nn.functional as F\n", + "import parl\n", + "\n", + "class MLP(parl.Model):\n", + " \"\"\" Linear network to solve Cartpole problem.\n", + " Args:\n", + " input_dim (int): Dimension of observation space.\n", + " output_dim (int): Dimension of action space.\n", + " \"\"\"\n", + "\n", + " def __init__(self, input_dim, output_dim):\n", + " super(MLP, self).__init__()\n", + " hidden_dim1 = 256\n", + " hidden_dim2 = 256\n", + " self.fc1 = nn.Linear(input_dim, hidden_dim1)\n", + " self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)\n", + " self.fc3 = nn.Linear(hidden_dim2, output_dim)\n", + "\n", + " def forward(self, state):\n", + " x = F.relu(self.fc1(state))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.2 定义经验回放" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import deque\n", + "class ReplayBuffer:\n", + " def __init__(self, capacity: int) -> None:\n", + " self.capacity = capacity\n", + " self.buffer = deque(maxlen=self.capacity)\n", + " def push(self,transitions):\n", + " '''_summary_\n", + " Args:\n", + " trainsitions (tuple): _description_\n", + " '''\n", + " self.buffer.append(transitions)\n", + " def sample(self, batch_size: int, sequential: bool = False):\n", + " if batch_size > len(self.buffer):\n", + " batch_size = len(self.buffer)\n", + " if sequential: # sequential sampling\n", + " rand = random.randint(0, len(self.buffer) - batch_size)\n", + " batch = [self.buffer[i] for i in range(rand, rand + batch_size)]\n", + " return zip(*batch)\n", + " else:\n", + " batch = random.sample(self.buffer, batch_size)\n", + " return zip(*batch)\n", + " def clear(self):\n", + " self.buffer.clear()\n", + " def __len__(self):\n", + " return len(self.buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1.3 定义智能体" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from random import random\n", + "import parl\n", + "import paddle\n", + "import math\n", + "import numpy as np\n", + "\n", + "\n", + "class DQNAgent(parl.Agent):\n", + " \"\"\"Agent of DQN.\n", + " \"\"\"\n", + "\n", + " def __init__(self, algorithm, memory,cfg):\n", + " super(DQNAgent, self).__init__(algorithm)\n", + " self.n_actions = cfg['n_actions']\n", + " self.epsilon = cfg['epsilon_start']\n", + " self.sample_count = 0 \n", + " self.epsilon_start = cfg['epsilon_start']\n", + " self.epsilon_end = cfg['epsilon_end']\n", + " self.epsilon_decay = cfg['epsilon_decay']\n", + " self.batch_size = cfg['batch_size']\n", + " self.global_step = 0\n", + " self.update_target_steps = 600\n", + " self.memory = memory # replay buffer\n", + "\n", + " def sample_action(self, state):\n", + " self.sample_count += 1\n", + " # epsilon must decay(linear,exponential and etc.) for balancing exploration and exploitation\n", + " self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \\\n", + " math.exp(-1. * self.sample_count / self.epsilon_decay) \n", + " if random.random() < self.epsilon:\n", + " action = np.random.randint(self.n_actions)\n", + " else:\n", + " action = self.predict_action(state)\n", + " return action\n", + "\n", + " def predict_action(self, state):\n", + " state = paddle.to_tensor(state , dtype='float32')\n", + " q_values = self.alg.predict(state) # self.alg 是自带的算法\n", + " action = q_values.argmax().numpy()[0]\n", + " return action\n", + "\n", + " def update(self):\n", + " \"\"\"Update model with an episode data\n", + " Args:\n", + " obs(np.float32): shape of (batch_size, obs_dim)\n", + " act(np.int32): shape of (batch_size)\n", + " reward(np.float32): shape of (batch_size)\n", + " next_obs(np.float32): shape of (batch_size, obs_dim)\n", + " terminal(np.float32): shape of (batch_size)\n", + " Returns:\n", + " loss(float)\n", + " \"\"\"\n", + " if len(self.memory) < self.batch_size: # when transitions in memory donot meet a batch, not update\n", + " return\n", + " \n", + " if self.global_step % self.update_target_steps == 0:\n", + " self.alg.sync_target()\n", + " self.global_step += 1\n", + " state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(\n", + " self.batch_size)\n", + " action_batch = np.expand_dims(action_batch, axis=-1)\n", + " reward_batch = np.expand_dims(reward_batch, axis=-1)\n", + " done_batch = np.expand_dims(done_batch, axis=-1)\n", + "\n", + " state_batch = paddle.to_tensor(state_batch, dtype='float32')\n", + " action_batch = paddle.to_tensor(action_batch, dtype='int32')\n", + " reward_batch = paddle.to_tensor(reward_batch, dtype='float32')\n", + " next_state_batch = paddle.to_tensor(next_state_batch, dtype='float32')\n", + " done_batch = paddle.to_tensor(done_batch, dtype='float32')\n", + " loss = self.alg.learn(state_batch, action_batch, reward_batch, next_state_batch, done_batch) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. 定义训练" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def train(cfg, env, agent):\n", + " ''' 训练\n", + " '''\n", + " print(f\"开始训练!\")\n", + " print(f\"环境:{cfg['env_name']},算法:{cfg['algo_name']},设备:{cfg['device']}\")\n", + " rewards = [] # record rewards for all episodes\n", + " steps = []\n", + " for i_ep in range(cfg[\"train_eps\"]):\n", + " ep_reward = 0 # reward per episode\n", + " ep_step = 0\n", + " state = env.reset() # reset and obtain initial state\n", + " for _ in range(cfg['ep_max_steps']):\n", + " ep_step += 1\n", + " action = agent.sample_action(state) # sample action\n", + " next_state, reward, done, _ = env.step(action) # update env and return transitions\n", + " agent.memory.push((state, action, reward,next_state, done)) # save transitions\n", + " state = next_state # update next state for env\n", + " agent.update() # update agent\n", + " ep_reward += reward #\n", + " if done:\n", + " break\n", + " steps.append(ep_step)\n", + " rewards.append(ep_reward)\n", + " if (i_ep + 1) % 10 == 0:\n", + " print(f\"回合:{i_ep+1}/{cfg['train_eps']},奖励:{ep_reward:.2f},Epislon: {agent.epsilon:.3f}\")\n", + " print(\"完成训练!\")\n", + " env.close()\n", + " res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}\n", + " return res_dic\n", + "\n", + "def test(cfg, env, agent):\n", + " print(\"开始测试!\")\n", + " print(f\"环境:{cfg['env_name']},算法:{cfg['algo_name']},设备:{cfg['device']}\")\n", + " rewards = [] # record rewards for all episodes\n", + " steps = []\n", + " for i_ep in range(cfg['test_eps']):\n", + " ep_reward = 0 # reward per episode\n", + " ep_step = 0\n", + " state = env.reset() # reset and obtain initial state\n", + " for _ in range(cfg['ep_max_steps']):\n", + " ep_step+=1\n", + " action = agent.predict_action(state) # predict action\n", + " next_state, reward, done, _ = env.step(action) \n", + " state = next_state \n", + " ep_reward += reward \n", + " if done:\n", + " break\n", + " steps.append(ep_step)\n", + " rewards.append(ep_reward)\n", + " print(f\"回合:{i_ep+1}/{cfg['test_eps']},奖励:{ep_reward:.2f}\")\n", + " print(\"完成测试!\")\n", + " env.close()\n", + " return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. 定义环境" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jj/opt/anaconda3/envs/easyrl/lib/python3.7/site-packages/gym/envs/registration.py:250: DeprecationWarning: SelectableGroups dict interface is deprecated. Use select.\n", + " for plugin in metadata.entry_points().get(entry_point, []):\n" + ] + } + ], + "source": [ + "import gym\n", + "import paddle\n", + "import numpy as np\n", + "import random\n", + "import os\n", + "from parl.algorithms import DQN\n", + "def all_seed(env,seed = 1):\n", + " ''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function\n", + " Args:\n", + " env (_type_): \n", + " seed (int, optional): _description_. Defaults to 1.\n", + " '''\n", + " print(f\"seed = {seed}\")\n", + " env.seed(seed) # env config\n", + " np.random.seed(seed)\n", + " random.seed(seed)\n", + " paddle.seed(seed)\n", + " \n", + "def env_agent_config(cfg):\n", + " ''' create env and agent\n", + " '''\n", + " env = gym.make(cfg['env_name']) \n", + " if cfg['seed'] !=0: # set random seed\n", + " all_seed(env,seed=cfg[\"seed\"]) \n", + " n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'n'))\n", + " n_actions = env.action_space.n # action dimension\n", + " print(f\"n_states: {n_states}, n_actions: {n_actions}\")\n", + " cfg.update({\"n_states\":n_states,\"n_actions\":n_actions}) # update to cfg paramters\n", + " model = MLP(n_states,n_actions)\n", + " algo = DQN(model, gamma=cfg['gamma'], lr=cfg['lr'])\n", + " memory = ReplayBuffer(cfg[\"memory_capacity\"]) # replay buffer\n", + " agent = DQNAgent(algo,memory,cfg) # create agent\n", + " return env, agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. 设置参数" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jj/opt/anaconda3/envs/easyrl/lib/python3.7/site-packages/seaborn/rcmod.py:82: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " if LooseVersion(mpl.__version__) >= \"3.0\":\n", + "/Users/jj/opt/anaconda3/envs/easyrl/lib/python3.7/site-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " other = LooseVersion(other)\n" + ] + } + ], + "source": [ + "import argparse\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "def get_args():\n", + " \"\"\" \n", + " \"\"\"\n", + " parser = argparse.ArgumentParser(description=\"hyperparameters\") \n", + " parser.add_argument('--algo_name',default='DQN',type=str,help=\"name of algorithm\")\n", + " parser.add_argument('--env_name',default='CartPole-v0',type=str,help=\"name of environment\")\n", + " parser.add_argument('--train_eps',default=200,type=int,help=\"episodes of training\") # 训练的回合数\n", + " parser.add_argument('--test_eps',default=20,type=int,help=\"episodes of testing\") # 测试的回合数\n", + " parser.add_argument('--ep_max_steps',default = 100000,type=int,help=\"steps per episode, much larger value can simulate infinite steps\")\n", + " parser.add_argument('--gamma',default=0.99,type=float,help=\"discounted factor\") # 折扣因子\n", + " parser.add_argument('--epsilon_start',default=0.95,type=float,help=\"initial value of epsilon\") # e-greedy策略中初始epsilon\n", + " parser.add_argument('--epsilon_end',default=0.01,type=float,help=\"final value of epsilon\") # e-greedy策略中的终止epsilon\n", + " parser.add_argument('--epsilon_decay',default=200,type=int,help=\"decay rate of epsilon\") # e-greedy策略中epsilon的衰减率\n", + " parser.add_argument('--memory_capacity',default=200000,type=int) # replay memory的容量\n", + " parser.add_argument('--memory_warmup_size',default=200,type=int) # replay memory的预热容量\n", + " parser.add_argument('--batch_size',default=64,type=int,help=\"batch size of training\") # 训练时每次使用的样本数\n", + " parser.add_argument('--targe_update_fre',default=200,type=int,help=\"frequency of target network update\") # target network更新频率\n", + " parser.add_argument('--seed',default=10,type=int,help=\"seed\") \n", + " parser.add_argument('--lr',default=0.0001,type=float,help=\"learning rate\")\n", + " parser.add_argument('--device',default='cpu',type=str,help=\"cpu or gpu\") \n", + " args = parser.parse_args([]) \n", + " args = {**vars(args)} # type(dict) \n", + " return args\n", + "def smooth(data, weight=0.9): \n", + " '''用于平滑曲线,类似于Tensorboard中的smooth\n", + "\n", + " Args:\n", + " data (List):输入数据\n", + " weight (Float): 平滑权重,处于0-1之间,数值越高说明越平滑,一般取0.9\n", + "\n", + " Returns:\n", + " smoothed (List): 平滑后的数据\n", + " '''\n", + " last = data[0] # First value in the plot (first timestep)\n", + " smoothed = list()\n", + " for point in data:\n", + " smoothed_val = last * weight + (1 - weight) * point # 计算平滑值\n", + " smoothed.append(smoothed_val) \n", + " last = smoothed_val \n", + " return smoothed\n", + "\n", + "def plot_rewards(rewards,cfg,path=None,tag='train'):\n", + " sns.set()\n", + " plt.figure() # 创建一个图形实例,方便同时多画几个图\n", + " plt.title(f\"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}\")\n", + " plt.xlabel('epsiodes')\n", + " plt.plot(rewards, label='rewards')\n", + " plt.plot(smooth(rewards), label='smoothed')\n", + " plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. 收获成果!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seed = 10\n", + "n_states: 4, n_actions: 2\n", + "开始训练!\n", + "环境:CartPole-v0,算法:DQN,设备:cpu\n", + "回合:10/200,奖励:10.00,Epislon: 0.062\n", + "回合:20/200,奖励:85.00,Epislon: 0.014\n", + "回合:30/200,奖励:41.00,Epislon: 0.011\n", + "回合:40/200,奖励:31.00,Epislon: 0.010\n", + "回合:50/200,奖励:22.00,Epislon: 0.010\n", + "回合:60/200,奖励:10.00,Epislon: 0.010\n", + "回合:70/200,奖励:10.00,Epislon: 0.010\n", + "回合:80/200,奖励:22.00,Epislon: 0.010\n", + "回合:90/200,奖励:30.00,Epislon: 0.010\n", + "回合:100/200,奖励:20.00,Epislon: 0.010\n", + "回合:110/200,奖励:15.00,Epislon: 0.010\n", + "回合:120/200,奖励:45.00,Epislon: 0.010\n", + "回合:130/200,奖励:73.00,Epislon: 0.010\n", + "回合:140/200,奖励:180.00,Epislon: 0.010\n", + "回合:150/200,奖励:163.00,Epislon: 0.010\n", + "回合:160/200,奖励:191.00,Epislon: 0.010\n", + "回合:170/200,奖励:200.00,Epislon: 0.010\n", + "回合:180/200,奖励:200.00,Epislon: 0.010\n", + "回合:190/200,奖励:200.00,Epislon: 0.010\n", + "回合:200/200,奖励:200.00,Epislon: 0.010\n", + "完成训练!\n", + "开始测试!\n", + "环境:CartPole-v0,算法:DQN,设备:cpu\n", + "回合:1/20,奖励:200.00\n", + "回合:2/20,奖励:200.00\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jj/opt/anaconda3/envs/easyrl/lib/python3.7/site-packages/seaborn/rcmod.py:400: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " if LooseVersion(mpl.__version__) >= \"3.0\":\n", + "/Users/jj/opt/anaconda3/envs/easyrl/lib/python3.7/site-packages/setuptools/_distutils/version.py:351: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " other = LooseVersion(other)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "回合:3/20,奖励:200.00\n", + "回合:4/20,奖励:200.00\n", + "回合:5/20,奖励:200.00\n", + "回合:6/20,奖励:200.00\n", + "回合:7/20,奖励:200.00\n", + "回合:8/20,奖励:193.00\n", + "回合:9/20,奖励:200.00\n", + "回合:10/20,奖励:200.00\n", + "回合:11/20,奖励:200.00\n", + "回合:12/20,奖励:200.00\n", + "回合:13/20,奖励:200.00\n", + "回合:14/20,奖励:194.00\n", + "回合:15/20,奖励:200.00\n", + "回合:16/20,奖励:200.00\n", + "回合:17/20,奖励:200.00\n", + "回合:18/20,奖励:200.00\n", + "回合:19/20,奖励:199.00\n", + "回合:20/20,奖励:200.00\n", + "完成测试!\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# 获取参数\n", + "cfg = get_args() \n", + "# 训练\n", + "env, agent = env_agent_config(cfg)\n", + "res_dic = train(cfg, env, agent)\n", + " \n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"train\") \n", + "# 测试\n", + "res_dic = test(cfg, env, agent)\n", + "plot_rewards(res_dic['rewards'], cfg, tag=\"test\") # 画出结果" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('easyrl')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "8994a120d39b6e6a2ecc94b4007f5314b68aa69fc88a7f00edf21be39b41f49c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/projects/parl_tutorials/README.md b/projects/parl_tutorials/README.md new file mode 100644 index 0000000..fd03c9a --- /dev/null +++ b/projects/parl_tutorials/README.md @@ -0,0 +1,19 @@ +## 运行环境 + +由于```parl```和```paddle```容易与notebook相关模块发生版本冲突,因此推荐新建一个Conda环境: +```bash +conda create -n parl python=3.7 +``` + +然后安装```parl```和```paddle```: +```bash +pip install parl==2.0.5 + +pip install paddlepaddle-gpu==2.3.2 -i https://pypi.tuna.tsinghua.edu.cn/simple + +pip install paddlepaddle==2.3.2 -i https://pypi.tuna.tsinghua.edu.cn/simple +``` +安装其他依赖: +```bash +pip install -r parl_requirements.txt +``` \ No newline at end of file diff --git a/projects/parl_tutorials/parl_requirements.txt b/projects/parl_tutorials/parl_requirements.txt new file mode 100644 index 0000000..cc8624d --- /dev/null +++ b/projects/parl_tutorials/parl_requirements.txt @@ -0,0 +1,7 @@ +gym==0.19.0 +ipykernel==6.0.0 +jupyter==1.0.0 +pyzmq==18.1.1 +jupyter-client==7.0.0 +matplotlib==3.5.3 +seaborn==0.12.0 \ No newline at end of file diff --git a/projects/requirements.txt b/projects/requirements.txt index 13ee9d0..7dbd44a 100644 --- a/projects/requirements.txt +++ b/projects/requirements.txt @@ -1,4 +1,4 @@ -gym==0.21.0 +pyyaml==6.0 ipykernel==6.15.1 jupyter==1.0.0 matplotlib==3.5.2 @@ -6,3 +6,5 @@ seaborn==0.11.2 dill==0.3.5.1 argparse==1.4.0 pandas==1.3.5 +pyglet==1.5.26 +importlib-metadata<5.0 \ No newline at end of file