diff --git a/projects/.gitignore b/projects/.gitignore
index 764cbb7..d1fc2b1 100644
--- a/projects/.gitignore
+++ b/projects/.gitignore
@@ -2,4 +2,9 @@
 .ipynb_checkpoints
 __pycache__
 .vscode
-test.py
\ No newline at end of file
+test.py
+pseudocodes.aux
+pseudocodes.log
+pseudocodes.synctex.gz
+pseudocodes.out
+pseudocodes.toc
\ No newline at end of file
diff --git a/projects/README.md b/projects/README.md
index 84d96b9..fcee9df 100644
--- a/projects/README.md
+++ b/projects/README.md
@@ -22,15 +22,15 @@
 
 注：点击对应的名称会跳到[codes](./codes/)下对应的算法中，其他版本还请读者自行翻阅
 
-|         算法名称          |                           参考文献                           | 备注 |
-| :-----------------------: | :----------------------------------------------------------: | :--: |
-|                           |                                                              |      |
-|          DQN-CNN          |                                                              | 待更 |
-|   [SoftQ](codes/SoftQ)    |  [Soft Q-learning paper](https://arxiv.org/abs/1702.08165)   |      |
-|     [SAC](codes/SAC)      |      [SAC paper](https://arxiv.org/pdf/1812.05905.pdf)       |      |
-| [SAC-Discrete](codes/SAC) |  [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf)  |      |
-|           SAC-S           |       [SAC-S paper](https://arxiv.org/abs/1801.01290)        |      |
-|           DSAC            | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 |
+|                算法名称                 |                           参考文献                           | 备注 |
+| :-------------------------------------: | :----------------------------------------------------------: | :--: |
+| [Policy Gradient](codes/PolicyGradient) | [Policy Gradient paper](https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf) |      |
+|                 DQN-CNN                 |                                                              | 待更 |
+|          [SoftQ](codes/SoftQ)           |  [Soft Q-learning paper](https://arxiv.org/abs/1702.08165)   |      |
+|            [SAC](codes/SAC)             |      [SAC paper](https://arxiv.org/pdf/1812.05905.pdf)       |      |
+|        [SAC-Discrete](codes/SAC)        |  [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf)  |      |
+|                  SAC-S                  |       [SAC-S paper](https://arxiv.org/abs/1801.01290)        |      |
+|                  DSAC                   | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 |
 
 ## 3、算法环境
 
diff --git a/projects/assets/pseudocodes/pseudocodes.aux b/projects/assets/pseudocodes/pseudocodes.aux
deleted file mode 100644
index 403c058..0000000
--- a/projects/assets/pseudocodes/pseudocodes.aux
+++ /dev/null
@@ -1,35 +0,0 @@
-\relax 
-\providecommand\hyper@newdestlabel[2]{}
-\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
-\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
-\global\let\oldcontentsline\contentsline
-\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
-\global\let\oldnewlabel\newlabel
-\gdef\newlabel#1#2{\newlabelxx{#1}#2}
-\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
-\AtEndDocument{\ifx\hyper@anchor\@undefined
-\let\contentsline\oldcontentsline
-\let\newlabel\oldnewlabel
-\fi}
-\fi}
-\global\let\hyper@last\relax 
-\gdef\HyperFirstAtBeginDocument#1{#1}
-\providecommand*\HyPL@Entry[1]{}
-\HyPL@Entry{0<</S/D>>}
-\@writefile{toc}{\contentsline {section}{\numberline {1}模版备用}{2}{section.1}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{3}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{4}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{5}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{6}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{7}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{8}{algorithm.}\protected@file@percent }
-\@writefile{toc}{\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}\protected@file@percent }
-\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{9}{algorithm.}\protected@file@percent }
-\gdef \@abspage@last{9}
diff --git a/projects/assets/pseudocodes/pseudocodes.log b/projects/assets/pseudocodes/pseudocodes.log
deleted file mode 100644
index 096a0ed..0000000
--- a/projects/assets/pseudocodes/pseudocodes.log
+++ /dev/null
@@ -1,570 +0,0 @@
-This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22)  23 AUG 2022 19:26
-entering extended mode
- restricted \write18 enabled.
- file:line:error style messages enabled.
- %&-line parsing enabled.
-**/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes
-(/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes.tex
-LaTeX2e <2020-10-01> patch level 4
-L3 programming layer <2021-02-18> (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexart.cls (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexbackend.cfg
-File: ctexbackend.cfg 2021/03/14 v2.5.6 Backend configuration file (CTEX)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3kernel/expl3.sty
-Package: expl3 2021-02-18 L3 programming layer (loader) 
- (/usr/local/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-xetex.def
-File: l3backend-xetex.def 2021-03-18 L3 backend support: XeTeX
- (|extractbb --version)
-\c__kernel_sys_dvipdfmx_version_int=\count175
-\l__color_backend_stack_int=\count176
-\g__color_backend_stack_int=\count177
-\g__graphics_track_int=\count178
-\l__pdf_internal_box=\box47
-\g__pdf_backend_object_int=\count179
-\g__pdf_backend_annotation_int=\count180
-\g__pdf_backend_link_int=\count181
-))
-Document Class: ctexart 2021/03/14 v2.5.6 Chinese adapter for class article (CTEX)
-(/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-2020-10-01.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-generic.tex))) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
-Package: l3keys2e 2021-03-12 LaTeX2e option processing using LaTeX3 keys
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexhook.sty
-Package: ctexhook 2021/03/14 v2.5.6 Document and package hooks (CTEX)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexpatch.sty
-Package: ctexpatch 2021/03/14 v2.5.6 Patching commands (CTEX)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fix-cm.sty
-Package: fix-cm 2015/01/14 v1.1t fixes to LaTeX
- (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ts1enc.def
-File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
-LaTeX Font Info:    Redeclaring font encoding TS1 on input line 47.
-)) (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel.sty
-Package: everysel 2021/01/20 v2.1 EverySelectfont Package (MS)
- (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel-2011-10-28.sty))
-\l__ctex_tmp_int=\count182
-\l__ctex_tmp_box=\box48
-\l__ctex_tmp_dim=\dimen138
-\g__ctex_section_depth_int=\count183
-\g__ctex_font_size_int=\count184
- (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexopts.cfg
-File: ctexopts.cfg 2021/03/14 v2.5.6 Option configuration file (CTEX)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/article.cls
-Document Class: article 2020/04/10 v1.4m Standard LaTeX document class
-(/usr/local/texlive/2021/texmf-dist/tex/latex/base/size11.clo
-File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option)
-)
-\c@part=\count185
-\c@section=\count186
-\c@subsection=\count187
-\c@subsubsection=\count188
-\c@paragraph=\count189
-\c@subparagraph=\count190
-\c@figure=\count191
-\c@table=\count192
-\abovecaptionskip=\skip47
-\belowcaptionskip=\skip48
-\bibindent=\dimen139
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def
-File: ctex-engine-xetex.def 2021/03/14 v2.5.6 XeLaTeX adapter (CTEX)
- (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.sty
-Package: xeCJK 2020/10/19 v3.8.6 Typesetting CJK scripts with XeLaTeX
- (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty
-Package: xtemplate 2021-03-12 L3 Experimental prototype document functions
-\l__xtemplate_tmp_dim=\dimen140
-\l__xtemplate_tmp_int=\count193
-\l__xtemplate_tmp_muskip=\muskip16
-\l__xtemplate_tmp_skip=\skip49
-)
-\l__xeCJK_tmp_int=\count194
-\l__xeCJK_tmp_box=\box49
-\l__xeCJK_tmp_dim=\dimen141
-\l__xeCJK_tmp_skip=\skip50
-\g__xeCJK_space_factor_int=\count195
-\l__xeCJK_begin_int=\count196
-\l__xeCJK_end_int=\count197
-\c__xeCJK_CJK_class_int=\XeTeXcharclass1
-\c__xeCJK_FullLeft_class_int=\XeTeXcharclass2
-\c__xeCJK_FullRight_class_int=\XeTeXcharclass3
-\c__xeCJK_HalfLeft_class_int=\XeTeXcharclass4
-\c__xeCJK_HalfRight_class_int=\XeTeXcharclass5
-\c__xeCJK_NormalSpace_class_int=\XeTeXcharclass6
-\c__xeCJK_CM_class_int=\XeTeXcharclass7
-\c__xeCJK_HangulJamo_class_int=\XeTeXcharclass8
-\l__xeCJK_last_skip=\skip51
-\g__xeCJK_node_int=\count198
-\c__xeCJK_CJK_node_dim=\dimen142
-\c__xeCJK_CJK-space_node_dim=\dimen143
-\c__xeCJK_default_node_dim=\dimen144
-\c__xeCJK_default-space_node_dim=\dimen145
-\c__xeCJK_CJK-widow_node_dim=\dimen146
-\c__xeCJK_normalspace_node_dim=\dimen147
-\l__xeCJK_ccglue_skip=\skip52
-\l__xeCJK_ecglue_skip=\skip53
-\l__xeCJK_punct_kern_skip=\skip54
-\l__xeCJK_last_penalty_int=\count199
-\l__xeCJK_last_bound_dim=\dimen148
-\l__xeCJK_last_kern_dim=\dimen149
-\l__xeCJK_widow_penalty_int=\count266
-
-Package xtemplate Info: Declaring object type 'xeCJK/punctuation' taking 0
-(xtemplate)             argument(s) on line 2341.
-
-\l__xeCJK_fixed_punct_width_dim=\dimen150
-\l__xeCJK_mixed_punct_width_dim=\dimen151
-\l__xeCJK_middle_punct_width_dim=\dimen152
-\l__xeCJK_fixed_margin_width_dim=\dimen153
-\l__xeCJK_mixed_margin_width_dim=\dimen154
-\l__xeCJK_middle_margin_width_dim=\dimen155
-\l__xeCJK_bound_punct_width_dim=\dimen156
-\l__xeCJK_bound_margin_width_dim=\dimen157
-\l__xeCJK_margin_minimum_dim=\dimen158
-\l__xeCJK_kerning_total_width_dim=\dimen159
-\l__xeCJK_same_align_margin_dim=\dimen160
-\l__xeCJK_different_align_margin_dim=\dimen161
-\l__xeCJK_kerning_margin_width_dim=\dimen162
-\l__xeCJK_kerning_margin_minimum_dim=\dimen163
-\l__xeCJK_bound_dim=\dimen164
-\l__xeCJK_reverse_bound_dim=\dimen165
-\l__xeCJK_margin_dim=\dimen166
-\l__xeCJK_minimum_bound_dim=\dimen167
-\l__xeCJK_kerning_margin_dim=\dimen168
-\g__xeCJK_family_int=\count267
-\l__xeCJK_fam_int=\count268
-\g__xeCJK_fam_allocation_int=\count269
-\l__xeCJK_verb_case_int=\count270
-\l__xeCJK_verb_exspace_skip=\skip55
- (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.sty
-Package: fontspec 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX
- (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
-Package: fontspec-xetex 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX
-\l__fontspec_script_int=\count271
-\l__fontspec_language_int=\count272
-\l__fontspec_strnum_int=\count273
-\l__fontspec_tmp_int=\count274
-\l__fontspec_tmpa_int=\count275
-\l__fontspec_tmpb_int=\count276
-\l__fontspec_tmpc_int=\count277
-\l__fontspec_em_int=\count278
-\l__fontspec_emdef_int=\count279
-\l__fontspec_strong_int=\count280
-\l__fontspec_strongdef_int=\count281
-\l__fontspec_tmpa_dim=\dimen169
-\l__fontspec_tmpb_dim=\dimen170
-\l__fontspec_tmpc_dim=\dimen171
- (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fontenc.sty
-Package: fontenc 2020/08/10 v2.0s Standard LaTeX package
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg
-File: xeCJK.cfg 2020/10/19 v3.8.6 Configuration file for xeCJK package
-))
-\ccwd=\dimen172
-\l__ctex_ccglue_skip=\skip56
-)
-\l__ctex_ziju_dim=\dimen173
- (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber.sty
-Package: zhnumber 2020/05/01 v2.8 Typesetting numbers with Chinese glyphs
-\l__zhnum_scale_int=\count282
- (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber-utf8.cfg
-File: zhnumber-utf8.cfg 2020/05/01 v2.8 Chinese numerals with UTF8 encoding
-))
-\l__ctex_heading_skip=\skip57
- (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def
-File: ctex-scheme-chinese-article.def 2021/03/14 v2.5.6 Chinese scheme for article (CTEX)
- (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg
-File: ctex-name-utf8.cfg 2021/03/14 v2.5.6 Caption with encoding UTF-8 (CTEX)
-)) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-mac.def
-File: ctex-fontset-mac.def 2021/03/14 v2.5.6 macOS fonts definition (CTEX)
- (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-macnew.def
-File: ctex-fontset-macnew.def 2021/03/14 v2.5.6 macOS fonts definition for El Capitan or later version (CTEX)
-
-
-Package fontspec Warning: Font "Songti SC Light" does not contain requested
-(fontspec)                Script "CJK".
-
-
-Package fontspec Info: Font family 'SongtiSCLight(0)' created for font 'Songti
-(fontspec)             SC Light' with options
-(fontspec)             [Script={CJK},BoldItalicFont={Kaiti SC
-(fontspec)             Bold},BoldFont={Songti SC Bold},ItalicFont={Kaiti SC}].
-(fontspec)              
-(fontspec)              This font family consists of the following NFSS
-(fontspec)             series/shapes:
-(fontspec)              
-(fontspec)             - 'normal' (m/n) with NFSS spec.: <->"Songti SC
-(fontspec)             Light/OT:language=dflt;"
-(fontspec)             - 'small caps'  (m/sc) with NFSS spec.: 
-(fontspec)             - 'bold' (b/n) with NFSS spec.: <->"Songti SC
-(fontspec)             Bold/OT:language=dflt;"
-(fontspec)             - 'bold small caps'  (b/sc) with NFSS spec.: 
-(fontspec)             - 'italic' (m/it) with NFSS spec.: <->"Kaiti
-(fontspec)             SC/OT:language=dflt;"
-(fontspec)             - 'italic small caps'  (m/scit) with NFSS spec.: 
-(fontspec)             - 'bold italic' (b/it) with NFSS spec.: <->"Kaiti SC
-(fontspec)             Bold/OT:language=dflt;"
-(fontspec)             - 'bold italic small caps'  (b/scit) with NFSS spec.: 
-
-))) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex.cfg
-File: ctex.cfg 2021/03/14 v2.5.6 Configuration file (CTEX)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithm.sty
-Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD.
-Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment
- (/usr/local/texlive/2021/texmf-dist/tex/latex/float/float.sty
-Package: float 2001/11/08 v1.3d Float enhancements (AL)
-\c@float@type=\count283
-\float@exts=\toks15
-\float@box=\box50
-\@float@everytoks=\toks16
-\@floatcapt=\box51
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ifthen.sty
-Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
-)
-\@float@every@algorithm=\toks17
-\c@algorithm=\count284
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithmic.sty
-Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD.
-Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
- (/usr/local/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty
-Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
-\KV@toks@=\toks18
-)
-\c@ALC@unique=\count285
-\c@ALC@line=\count286
-\c@ALC@rem=\count287
-\c@ALC@depth=\count288
-\ALC@tlm=\skip58
-\algorithmicindent=\skip59
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amssymb.sty
-Package: amssymb 2013/01/14 v3.01 AMS font symbols
- (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amsfonts.sty
-Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
-\@emptytoks=\toks19
-\symAMSa=\mathgroup4
-\symAMSb=\mathgroup5
-LaTeX Font Info:    Redeclaring math symbol \hbar on input line 98.
-LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
-(Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
-)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsmath.sty
-Package: amsmath 2020/09/23 v2.17i AMS math features
-\@mathmargin=\skip60
-
-For additional information on amsmath, use the `?' option.
-(/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amstext.sty
-Package: amstext 2000/06/29 v2.01 AMS text
- (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsgen.sty
-File: amsgen.sty 1999/11/30 v2.0 generic functions
-\@emptytoks=\toks20
-\ex@=\dimen174
-)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsbsy.sty
-Package: amsbsy 1999/11/29 v1.2d Bold Symbols
-\pmbraise@=\dimen175
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsopn.sty
-Package: amsopn 2016/03/08 v2.02 operator names
-)
-\inf@bad=\count289
-LaTeX Info: Redefining \frac on input line 234.
-\uproot@=\count290
-\leftroot@=\count291
-LaTeX Info: Redefining \overline on input line 399.
-\classnum@=\count292
-\DOTSCASE@=\count293
-LaTeX Info: Redefining \ldots on input line 496.
-LaTeX Info: Redefining \dots on input line 499.
-LaTeX Info: Redefining \cdots on input line 620.
-\Mathstrutbox@=\box52
-\strutbox@=\box53
-\big@size=\dimen176
-LaTeX Font Info:    Redeclaring font encoding OML on input line 743.
-LaTeX Font Info:    Redeclaring font encoding OMS on input line 744.
-\macc@depth=\count294
-\c@MaxMatrixCols=\count295
-\dotsspace@=\muskip17
-\c@parentequation=\count296
-\dspbrk@lvl=\count297
-\tag@help=\toks21
-\row@=\count298
-\column@=\count299
-\maxfields@=\count300
-\andhelp@=\toks22
-\eqnshift@=\dimen177
-\alignsep@=\dimen178
-\tagshift@=\dimen179
-\tagwidth@=\dimen180
-\totwidth@=\dimen181
-\lineht@=\dimen182
-\@envbody=\toks23
-\multlinegap=\skip61
-\multlinetaggap=\skip62
-\mathdisplay@stack=\toks24
-LaTeX Info: Redefining \[ on input line 2923.
-LaTeX Info: Redefining \] on input line 2924.
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref.sty
-Package: hyperref 2021-02-27 v7.00k Hypertext links for LaTeX
- (/usr/local/texlive/2021/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
-Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/iftex/iftex.sty
-Package: iftex 2020/03/06 v1.0d TeX engine tests
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
-Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
- (/usr/local/texlive/2021/texmf-dist/tex/generic/infwarerr/infwarerr.sty
-Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
-)
-Package pdftexcmds Info: \pdf@primitive is available.
-Package pdftexcmds Info: \pdf@ifprimitive is available.
-Package pdftexcmds Info: \pdfdraftmode not found.
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty
-Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
-Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdfescape/pdfescape.sty
-Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/hycolor/hycolor.sty
-Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
-Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/auxhook/auxhook.sty
-Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/kvoptions/kvoptions.sty
-Package: kvoptions 2020-10-07 v3.14 Key value format for package options (HO)
-)
-\@linkdim=\dimen183
-\Hy@linkcounter=\count301
-\Hy@pagecounter=\count302
- (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/pd1enc.def
-File: pd1enc.def 2021-02-27 v7.00k Hyperref: PDFDocEncoding definition (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref-langpatches.def
-File: hyperref-langpatches.def 2021-02-27 v7.00k Hyperref: patches for babel languages
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/intcalc/intcalc.sty
-Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/etexcmds/etexcmds.sty
-Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
-)
-\Hy@SavedSpaceFactor=\count303
- (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/puenc.def
-File: puenc.def 2021-02-27 v7.00k Hyperref: PDF Unicode definition (HO)
-)
-Package hyperref Info: Option `unicode' set `true' on input line 4073.
-Package hyperref Info: Hyper figures OFF on input line 4192.
-Package hyperref Info: Link nesting OFF on input line 4197.
-Package hyperref Info: Hyper index ON on input line 4200.
-Package hyperref Info: Plain pages OFF on input line 4207.
-Package hyperref Info: Backreferencing OFF on input line 4212.
-Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
-Package hyperref Info: Bookmarks ON on input line 4445.
-\c@Hy@tempcnt=\count304
- (/usr/local/texlive/2021/texmf-dist/tex/latex/url/url.sty
-\Urlmuskip=\muskip18
-Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
-)
-LaTeX Info: Redefining \url on input line 4804.
-\XeTeXLinkMargin=\dimen184
- (/usr/local/texlive/2021/texmf-dist/tex/generic/bitset/bitset.sty
-Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
- (/usr/local/texlive/2021/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
-Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
-))
-\Fld@menulength=\count305
-\Field@Width=\dimen185
-\Fld@charsize=\dimen186
-Package hyperref Info: Hyper figures OFF on input line 6075.
-Package hyperref Info: Link nesting OFF on input line 6080.
-Package hyperref Info: Hyper index ON on input line 6083.
-Package hyperref Info: backreferencing OFF on input line 6090.
-Package hyperref Info: Link coloring OFF on input line 6095.
-Package hyperref Info: Link coloring with OCG OFF on input line 6100.
-Package hyperref Info: PDF/A mode OFF on input line 6105.
-LaTeX Info: Redefining \ref on input line 6145.
-LaTeX Info: Redefining \pageref on input line 6149.
- (/usr/local/texlive/2021/texmf-dist/tex/latex/base/atbegshi-ltx.sty
-Package: atbegshi-ltx 2020/08/17 v1.0a Emulation of the original atbegshi package
-with kernel methods
-)
-\Hy@abspage=\count306
-\c@Item=\count307
-\c@Hfootnote=\count308
-)
-Package hyperref Info: Driver (autodetected): hxetex.
- (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hxetex.def
-File: hxetex.def 2021-02-27 v7.00k Hyperref driver for XeTeX
- (/usr/local/texlive/2021/texmf-dist/tex/generic/stringenc/stringenc.sty
-Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)
-)
-\pdfm@box=\box54
-\c@Hy@AnnotLevel=\count309
-\HyField@AnnotCount=\count310
-\Fld@listcount=\count311
-\c@bookmark@seq@number=\count312
- (/usr/local/texlive/2021/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
-Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO)
- (/usr/local/texlive/2021/texmf-dist/tex/latex/base/atveryend-ltx.sty
-Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atvery package
-with kernel methods
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
-Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
-)
-Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 286.
-)
-\Hy@SectionHShift=\skip63
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty
-Package: setspace 2011/12/19 v6.7a set line spacing
-) (/usr/local/texlive/2021/texmf-dist/tex/latex/titlesec/titlesec.sty
-Package: titlesec 2019/10/16 v2.13 Sectioning titles
-\ttl@box=\box55
-\beforetitleunit=\skip64
-\aftertitleunit=\skip65
-\ttl@plus=\dimen187
-\ttl@minus=\dimen188
-\ttl@toksa=\toks25
-\titlewidth=\dimen189
-\titlewidthlast=\dimen190
-\titlewidthfirst=\dimen191
-) (./pseudocodes.aux)
-\openout1 = `pseudocodes.aux'.
-
-LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for TU/lmr/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
-LaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 14.
-LaTeX Font Info:    ... okay on input line 14.
- ABD: EverySelectfont initializing macros
-LaTeX Info: Redefining \selectfont on input line 14.
-
-Package fontspec Info: Adjusting the maths setup (use [no-math] to avoid
-(fontspec)             this).
-
-\symlegacymaths=\mathgroup6
-LaTeX Font Info:    Overwriting symbol font `legacymaths' in version `bold'
-(Font)                  OT1/cmr/m/n --> OT1/cmr/bx/n on input line 14.
-LaTeX Font Info:    Redeclaring math accent \acute on input line 14.
-LaTeX Font Info:    Redeclaring math accent \grave on input line 14.
-LaTeX Font Info:    Redeclaring math accent \ddot on input line 14.
-LaTeX Font Info:    Redeclaring math accent \tilde on input line 14.
-LaTeX Font Info:    Redeclaring math accent \bar on input line 14.
-LaTeX Font Info:    Redeclaring math accent \breve on input line 14.
-LaTeX Font Info:    Redeclaring math accent \check on input line 14.
-LaTeX Font Info:    Redeclaring math accent \hat on input line 14.
-LaTeX Font Info:    Redeclaring math accent \dot on input line 14.
-LaTeX Font Info:    Redeclaring math accent \mathring on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Gamma on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Delta on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Theta on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Lambda on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Xi on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Pi on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Sigma on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Upsilon on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Phi on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Psi on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \Omega on input line 14.
-LaTeX Font Info:    Redeclaring math symbol \mathdollar on input line 14.
-LaTeX Font Info:    Redeclaring symbol font `operators' on input line 14.
-LaTeX Font Info:    Encoding `OT1' has changed to `TU' for symbol font
-(Font)              `operators' in the math version `normal' on input line 14.
-LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
-(Font)                  OT1/cmr/m/n --> TU/lmr/m/n on input line 14.
-LaTeX Font Info:    Encoding `OT1' has changed to `TU' for symbol font
-(Font)              `operators' in the math version `bold' on input line 14.
-LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
-(Font)                  OT1/cmr/bx/n --> TU/lmr/m/n on input line 14.
-LaTeX Font Info:    Overwriting symbol font `operators' in version `normal'
-(Font)                  TU/lmr/m/n --> TU/lmr/m/n on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `normal'
-(Font)                  OT1/cmr/m/it --> TU/lmr/m/it on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathbf' in version `normal'
-(Font)                  OT1/cmr/bx/n --> TU/lmr/b/n on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `normal'
-(Font)                  OT1/cmss/m/n --> TU/lmss/m/n on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `normal'
-(Font)                  OT1/cmtt/m/n --> TU/lmtt/m/n on input line 14.
-LaTeX Font Info:    Overwriting symbol font `operators' in version `bold'
-(Font)                  TU/lmr/m/n --> TU/lmr/b/n on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathit' in version `bold'
-(Font)                  OT1/cmr/bx/it --> TU/lmr/b/it on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathsf' in version `bold'
-(Font)                  OT1/cmss/bx/n --> TU/lmss/b/n on input line 14.
-LaTeX Font Info:    Overwriting math alphabet `\mathtt' in version `bold'
-(Font)                  OT1/cmtt/m/n --> TU/lmtt/b/n on input line 14.
-Package hyperref Info: Link coloring OFF on input line 14.
- (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/nameref.sty
-Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section
- (/usr/local/texlive/2021/texmf-dist/tex/latex/refcount/refcount.sty
-Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
-) (/usr/local/texlive/2021/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
-Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
-)
-\c@section@level=\count313
-)
-LaTeX Info: Redefining \ref on input line 14.
-LaTeX Info: Redefining \pageref on input line 14.
-LaTeX Info: Redefining \nameref on input line 14.
- (./pseudocodes.out) (./pseudocodes.out)
-\@outlinefile=\write3
-\openout3 = `pseudocodes.out'.
-
- (./pseudocodes.toc)
-\tf@toc=\write4
-\openout4 = `pseudocodes.toc'.
-
-LaTeX Font Info:    Font shape `TU/SongtiSCLight(0)/m/sl' in size <10.95> not available
-(Font)              Font shape `TU/SongtiSCLight(0)/m/it' tried instead on input line 17.
- [1
-
-]
-Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 22.
- [2
-
-]
-LaTeX Font Info:    Trying to load font information for U+msa on input line 32.
- (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsa.fd
-File: umsa.fd 2013/01/14 v3.01 AMS symbols A
-)
-LaTeX Font Info:    Trying to load font information for U+msb on input line 32.
- (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsb.fd
-File: umsb.fd 2013/01/14 v3.01 AMS symbols B
-) [3
-
-] [4
-
-] [5
-
-] [6
-
-] [7
-
-] [8
-
-]
-Overfull \hbox (32.54117pt too wide) in paragraph at lines 212--212
-[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^R\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 Q[] [] []$| 
- []
-
-
-Overfull \hbox (15.41673pt too wide) in paragraph at lines 213--213
-[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^^\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 ^^K [] [] \OT1/cmr/m/n/9 + [] \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 f[] []$\TU/lmr/m/n/9 ,$[][] \OT1/cmr/m/n/9 =
- []
-
-[9
-
-] (./pseudocodes.aux)
-Package rerunfilecheck Info: File `pseudocodes.out' has not changed.
-(rerunfilecheck)             Checksum: 35B5A79A86EF3BC70F1A0B3BCBEBAA13;724.
- ) 
-Here is how much of TeX's memory you used:
- 14827 strings out of 476919
- 313456 string characters out of 5821840
- 653576 words of memory out of 5000000
- 34576 multiletter control sequences out of 15000+600000
- 413609 words of font info for 91 fonts, out of 8000000 for 9000
- 1348 hyphenation exceptions out of 8191
- 101i,13n,104p,676b,697s stack positions out of 5000i,500n,10000p,200000b,80000s
-
-Output written on pseudocodes.pdf (9 pages).
diff --git a/projects/assets/pseudocodes/pseudocodes.out b/projects/assets/pseudocodes/pseudocodes.out
deleted file mode 100644
index 38f7e61..0000000
--- a/projects/assets/pseudocodes/pseudocodes.out
+++ /dev/null
@@ -1,8 +0,0 @@
-\BOOKMARK [1][-]{section.1}{\376\377\152\041\162\110\131\007\165\050}{}% 1
-\BOOKMARK [1][-]{section.2}{\376\377\000Q\000\040\000l\000e\000a\000r\000n\000i\000n\000g\173\227\154\325}{}% 2
-\BOOKMARK [1][-]{section.3}{\376\377\000S\000a\000r\000s\000a\173\227\154\325}{}% 3
-\BOOKMARK [1][-]{section.4}{\376\377\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\173\227\154\325}{}% 4
-\BOOKMARK [1][-]{section.5}{\376\377\000D\000Q\000N\173\227\154\325}{}% 5
-\BOOKMARK [1][-]{section.6}{\376\377\000S\000o\000f\000t\000Q\173\227\154\325}{}% 6
-\BOOKMARK [1][-]{section.7}{\376\377\000S\000A\000C\000-\000S\173\227\154\325}{}% 7
-\BOOKMARK [1][-]{section.8}{\376\377\000S\000A\000C\173\227\154\325}{}% 8
diff --git a/projects/assets/pseudocodes/pseudocodes.pdf b/projects/assets/pseudocodes/pseudocodes.pdf
index c476940..b34cabc 100644
Binary files a/projects/assets/pseudocodes/pseudocodes.pdf and b/projects/assets/pseudocodes/pseudocodes.pdf differ
diff --git a/projects/assets/pseudocodes/pseudocodes.synctex.gz b/projects/assets/pseudocodes/pseudocodes.synctex.gz
deleted file mode 100644
index 4e7cc3f..0000000
Binary files a/projects/assets/pseudocodes/pseudocodes.synctex.gz and /dev/null differ
diff --git a/projects/assets/pseudocodes/pseudocodes.tex b/projects/assets/pseudocodes/pseudocodes.tex
index d48ab65..929453a 100644
--- a/projects/assets/pseudocodes/pseudocodes.tex
+++ b/projects/assets/pseudocodes/pseudocodes.tex
@@ -11,6 +11,27 @@
 \usepackage{float} % 调用该包能够使用[H]
 % \pagestyle{plain} % 去除页眉，但是保留页脚编号，都去掉plain换empty
 
+% 更改脚注为圆圈
+\usepackage{pifont}
+\makeatletter
+\newcommand*{\circnum}[1]{%
+  \expandafter\@circnum\csname c@#1\endcsname
+}
+\newcommand*{\@circnum}[1]{%
+  \ifnum#1<1 %
+    \@ctrerr
+  \else
+    \ifnum#1>20 %
+      \@ctrerr
+    \else
+      \ding{\the\numexpr 171+(#1)\relax}%
+    \fi
+  \fi
+}
+\makeatother
+
+\renewcommand*{\thefootnote}{\circnum{footnote}}
+
 \begin{document}
 \tableofcontents % 目录，注意要运行两下或者vscode保存两下才能显示
 % \singlespacing
@@ -69,27 +90,10 @@
 \end{algorithm}
 \footnotetext[1]{Reinforcement Learning: An Introduction}
 \clearpage
-\section{Policy Gradient算法}
-\begin{algorithm}[H] % [H]固定位置
-    \floatname{algorithm}{{REINFORCE算法：Monte-Carlo Policy Gradient}\footnotemark[1]} 
-	\renewcommand{\thealgorithm}{} % 去掉算法标号
-	\caption{} 
-	\begin{algorithmic}[1] % [1]显示步数
-		\STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$
-		\FOR {回合数 = $1,M$}
-			\STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition
-			\FOR {时步 = $1,t$}
-				\STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$
-				\STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$
-			\ENDFOR
-		\ENDFOR
-	\end{algorithmic}
-\end{algorithm}
-\footnotetext[1]{Reinforcement Learning: An Introduction}
-\clearpage
+
 \section{DQN算法}
 \begin{algorithm}[H] % [H]固定位置
-    \floatname{algorithm}{{DQN算法}{\hypersetup{linkcolor=white}\footnotemark}}  
+    \floatname{algorithm}{{DQN算法}\footnotemark[1]}  
     \renewcommand{\thealgorithm}{} % 去掉算法标号
 	\caption{} 
     \renewcommand{\algorithmicrequire}{\textbf{输入:}}  
@@ -109,10 +113,10 @@
 				\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
 				\STATE {\bfseries 更新策略：}
 				\STATE 从$D$中采样一个batch的transition
-				\STATE 计算实际的$Q$值，即$y_{j}${\hypersetup{linkcolor=white}\footnotemark}
-				\STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降{\hypersetup{linkcolor=white}\footnotemark}
+				\STATE 计算实际的$Q$值，即$y_{j}$\footnotemark[2]
+				\STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降\footnotemark[3]
 			\ENDFOR
-			\STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q${\hypersetup{linkcolor=white}\footnotemark}
+			\STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q$\footnotemark[4]]
 		\ENDFOR
 	\end{algorithmic}
 \end{algorithm}
@@ -121,7 +125,46 @@
 \footnotetext[3]{$\theta_i \leftarrow \theta_i - \lambda \nabla_{\theta_{i}} L_{i}\left(\theta_{i}\right)$}
 \footnotetext[4]{此处也可像原论文中放到小循环中改成每$C$步，但没有每$C$个回合稳定}
 \clearpage
+\section{Policy Gradient算法}
+\begin{algorithm}[H] % [H]固定位置
+    \floatname{algorithm}{{REINFORCE算法：Monte-Carlo Policy Gradient}\footnotemark[1]} 
+	\renewcommand{\thealgorithm}{} % 去掉算法标号
+	\caption{} 
+	\begin{algorithmic}[1] % [1]显示步数
+		\STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$
+		\FOR {回合数 = $1,M$}
+			\STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition
+			\FOR {时步 = $1,t$}
+				\STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$
+				\STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$
+			\ENDFOR
+		\ENDFOR
+	\end{algorithmic}
+\end{algorithm}
+\footnotetext[1]{Reinforcement Learning: An Introduction}
+\clearpage
+\section{Advantage Actor Critic算法}
+\begin{algorithm}[H] % [H]固定位置
+    \floatname{algorithm}{{Q Actor Critic算法}} 
+	\renewcommand{\thealgorithm}{} % 去掉算法标号
+	\caption{} 
+	\begin{algorithmic}[1] % [1]显示步数
+		\STATE 初始化Actor参数$\theta$和Critic参数$w$
+		\FOR {回合数 = $1,M$}
+			\STATE 根据策略$\pi_{\theta}(a|s)$采样一个(或几个)回合的transition
+			\STATE  {\bfseries 更新Critic参数\footnotemark[1]}
+			\FOR {时步 = $t+1,1$}
+				\STATE 计算Advantage，即$ \delta_t = r_t + \gamma Q_w(s_{t+1},a_{t+1})-Q_w(s_t,a_t)$
+				\STATE $w \leftarrow w+\alpha_{w} \delta_{t} \nabla_{w} Q_w(s_t,a_t)$
+				\STATE $a_t \leftarrow a_{t+1}$,$s_t \leftarrow s_{t+1}$
+			\ENDFOR
+			\STATE 更新Actor参数$\theta \leftarrow \theta+\alpha_{\theta} Q_{w}(s, a) \nabla_{\theta} \log \pi_{\theta}(a \mid s)$
+		\ENDFOR
+	\end{algorithmic}
+\end{algorithm}
+\footnotetext[1]{这里结合TD error的特性按照从$t+1$到$1$计算法Advantage更方便}
 
+\clearpage
 \section{SoftQ算法}
 \begin{algorithm}[H]
     \floatname{algorithm}{{SoftQ算法}}  
diff --git a/projects/assets/pseudocodes/pseudocodes.toc b/projects/assets/pseudocodes/pseudocodes.toc
deleted file mode 100644
index e33ad0b..0000000
--- a/projects/assets/pseudocodes/pseudocodes.toc
+++ /dev/null
@@ -1,8 +0,0 @@
-\contentsline {section}{\numberline {1}模版备用}{2}{section.1}%
-\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}%
-\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}%
-\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}%
-\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}%
-\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}%
-\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}%
-\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}%
diff --git a/projects/codes/A2C/a2c.py b/projects/codes/A2C/a2c.py
index ba0ed7c..c1a88a5 100644
--- a/projects/codes/A2C/a2c.py
+++ b/projects/codes/A2C/a2c.py
@@ -1,56 +1,60 @@
-#!/usr/bin/env python
-# coding=utf-8
-'''
-Author: JiangJi
-Email: johnjim0816@gmail.com
-Date: 2021-05-03 22:16:08
-LastEditor: JiangJi
-LastEditTime: 2022-07-20 23:54:40
-Discription: 
-Environment: 
-'''
 import torch
-import torch.optim as optim
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.distributions import Categorical
+import numpy as np
+
 
-class ActorCritic(nn.Module):
-    ''' A2C网络模型，包含一个Actor和Critic
-    '''
-    def __init__(self, input_dim, output_dim, hidden_dim):
-        super(ActorCritic, self).__init__()
-        self.critic = nn.Sequential(
-            nn.Linear(input_dim, hidden_dim),
-            nn.ReLU(),
-            nn.Linear(hidden_dim, 1)
-        )
         
-        self.actor = nn.Sequential(
-            nn.Linear(input_dim, hidden_dim),
-            nn.ReLU(),
-            nn.Linear(hidden_dim, output_dim),
-            nn.Softmax(dim=1),
-        )
-        
-    def forward(self, x):
-        value = self.critic(x)
-        probs = self.actor(x)
-        dist  = Categorical(probs)
-        return dist, value
 class A2C:
-    ''' A2C算法
-    '''
-    def __init__(self,n_states,n_actions,cfg) -> None:
-        self.gamma = cfg.gamma
-        self.device = torch.device(cfg.device)
-        self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device)
-        self.optimizer = optim.Adam(self.model.parameters())
+    def __init__(self,models,memories,cfg):
+        self.n_actions = cfg['n_actions']
+        self.gamma = cfg['gamma']
+        self.device = torch.device(cfg['device']) 
+        self.memory = memories['ACMemory']
+        self.actor = models['Actor'].to(self.device)
+        self.critic = models['Critic'].to(self.device)
+        self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=cfg['actor_lr'])
+        self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=cfg['critic_lr'])
+    def sample_action(self,state):
+        state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+        dist = self.actor(state)
+        value = self.critic(state) # note that 'dist' need require_grad=True
+        value = value.detach().numpy().squeeze(0)[0]
+        action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1)
+        return action,value,dist 
+    def predict_action(self,state):
+        state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+        dist = self.actor(state)
+        value = self.critic(state) # note that 'dist' need require_grad=True
+        value = value.detach().numpy().squeeze(0)[0]
+        action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1)
+        return action,value,dist 
+    def update(self,next_state,entropy):
+        value_pool,log_prob_pool,reward_pool = self.memory.sample()
+        next_state = torch.tensor(next_state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+        next_value = self.critic(next_state)
+        returns = np.zeros_like(reward_pool)
+        for t in reversed(range(len(reward_pool))):
+            next_value = reward_pool[t] + self.gamma * next_value # G(s_{t},a{t}) = r_{t+1} + gamma * V(s_{t+1})
+            returns[t] = next_value
+        returns = torch.tensor(returns, device=self.device)
+        value_pool = torch.tensor(value_pool, device=self.device)
+        advantages = returns - value_pool
+        log_prob_pool = torch.stack(log_prob_pool)
+        actor_loss = (-log_prob_pool * advantages).mean()
+        critic_loss = 0.5 * advantages.pow(2).mean()
+        tot_loss = actor_loss + critic_loss + 0.001 * entropy
+        self.actor_optim.zero_grad()
+        self.critic_optim.zero_grad()
+        tot_loss.backward()
+        self.actor_optim.step()
+        self.critic_optim.step()
+        self.memory.clear()
+    def save_model(self, path):
+        from pathlib import Path
+        # create path
+        Path(path).mkdir(parents=True, exist_ok=True)
+        torch.save(self.actor.state_dict(), f"{path}/actor_checkpoint.pt")
+        torch.save(self.critic.state_dict(), f"{path}/critic_checkpoint.pt")
 
-    def compute_returns(self,next_value, rewards, masks):
-        R = next_value
-        returns = []
-        for step in reversed(range(len(rewards))):
-            R = rewards[step] + self.gamma * R * masks[step]
-            returns.insert(0, R)
-        return returns
\ No newline at end of file
+    def load_model(self, path):
+        self.actor.load_state_dict(torch.load(f"{path}/actor_checkpoint.pt"))
+        self.critic.load_state_dict(torch.load(f"{path}/critic_checkpoint.pt"))
\ No newline at end of file
diff --git a/projects/codes/A2C/a2c_2.py b/projects/codes/A2C/a2c_2.py
new file mode 100644
index 0000000..74e2cfe
--- /dev/null
+++ b/projects/codes/A2C/a2c_2.py
@@ -0,0 +1,55 @@
+import torch
+import numpy as np
+
+class A2C_2:
+    def __init__(self,models,memories,cfg):
+        self.n_actions = cfg['n_actions']
+        self.gamma = cfg['gamma']
+        self.device = torch.device(cfg['device']) 
+        self.memory = memories['ACMemory']
+        self.ac_net = models['ActorCritic'].to(self.device)
+        self.ac_optimizer = torch.optim.Adam(self.ac_net.parameters(), lr=cfg['lr'])
+    def sample_action(self,state):
+        state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+        value, dist = self.ac_net(state) # note that 'dist' need require_grad=True
+        value = value.detach().numpy().squeeze(0)[0]
+        action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1)
+        return action,value,dist
+    def predict_action(self,state):
+        ''' predict can be all wrapped with no_grad(), then donot need detach(), or you can just copy contents of 'sample_action'
+        '''
+        with torch.no_grad(): 
+            state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+            value, dist = self.ac_net(state)
+            value = value.numpy().squeeze(0)[0] # shape(value) = (1,)
+            action = np.random.choice(self.n_actions, p=dist.numpy().squeeze(0)) # shape(p=(n_actions,1)
+        return action,value,dist
+    def update(self,next_state,entropy):
+        value_pool,log_prob_pool,reward_pool = self.memory.sample()
+        next_state = torch.tensor(next_state, device=self.device, dtype=torch.float32).unsqueeze(dim=0)
+        next_value,_ = self.ac_net(next_state)
+        returns = np.zeros_like(reward_pool)
+        for t in reversed(range(len(reward_pool))):
+            next_value = reward_pool[t] + self.gamma * next_value # G(s_{t},a{t}) = r_{t+1} + gamma * V(s_{t+1})
+            returns[t] = next_value
+        returns = torch.tensor(returns, device=self.device)
+        value_pool = torch.tensor(value_pool, device=self.device)
+        advantages = returns - value_pool
+        log_prob_pool = torch.stack(log_prob_pool)
+        actor_loss = (-log_prob_pool * advantages).mean()
+        critic_loss = 0.5 * advantages.pow(2).mean()
+        ac_loss = actor_loss + critic_loss + 0.001 * entropy
+        self.ac_optimizer.zero_grad()
+        ac_loss.backward()
+        self.ac_optimizer.step()
+        self.memory.clear()
+    def save_model(self, path):
+        from pathlib import Path
+        # create path
+        Path(path).mkdir(parents=True, exist_ok=True)
+        torch.save(self.ac_net.state_dict(), f"{path}/a2c_checkpoint.pt")
+
+    def load_model(self, path):
+        self.ac_net.load_state_dict(torch.load(f"{path}/a2c_checkpoint.pt"))
+        
+       
\ No newline at end of file
diff --git a/projects/codes/A2C/main.py b/projects/codes/A2C/main.py
new file mode 100644
index 0000000..e5585e8
--- /dev/null
+++ b/projects/codes/A2C/main.py
@@ -0,0 +1,121 @@
+import sys,os
+os.environ["KMP_DUPLICATE_LIB_OK"]  =  "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized."
+curr_path = os.path.dirname(os.path.abspath(__file__))  # current path
+parent_path = os.path.dirname(curr_path)  # parent path 
+sys.path.append(parent_path)  # add path to system path
+
+import datetime
+import argparse
+import gym
+import torch
+import numpy as np
+from common.utils import all_seed
+from common.launcher import Launcher
+from common.memories import PGReplay
+from common.models import ActorSoftmax,Critic
+from envs.register import register_env
+from a2c import A2C
+
+class Main(Launcher):
+    def get_args(self):
+        curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")   # obtain current time
+        parser = argparse.ArgumentParser(description="hyperparameters")      
+        parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm")
+        parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
+        parser.add_argument('--train_eps',default=1600,type=int,help="episodes of training") 
+        parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") 
+        parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
+        parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") 
+        parser.add_argument('--actor_lr',default=3e-4,type=float,help="learning rate of actor")
+        parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic")
+        parser.add_argument('--actor_hidden_dim',default=256,type=int,help="hidden of actor net")
+        parser.add_argument('--critic_hidden_dim',default=256,type=int,help="hidden of critic net")
+        parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
+        parser.add_argument('--seed',default=10,type=int,help="seed") 
+        parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")  
+        parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")   
+        args = parser.parse_args()   
+        default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/",
+                        'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/",
+        }
+        args = {**vars(args),**default_args}  # type(dict)                         
+        return args
+    def env_agent_config(self,cfg):
+        ''' create env and agent
+        '''  
+        register_env(cfg['env_name'])
+        env = gym.make(cfg['env_name']) 
+        if cfg['seed'] !=0: # set random seed
+            all_seed(env,seed=cfg["seed"]) 
+        try: # state dimension
+            n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
+        except AttributeError:
+            n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
+        n_actions = env.action_space.n  # action dimension
+        print(f"n_states: {n_states}, n_actions: {n_actions}")
+        cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
+        models = {'Actor':ActorSoftmax(cfg['n_states'],cfg['n_actions'], hidden_dim = cfg['actor_hidden_dim']),'Critic':Critic(cfg['n_states'],1,hidden_dim=cfg['critic_hidden_dim'])}
+        memories = {'ACMemory':PGReplay()}
+        agent = A2C(models,memories,cfg)
+        return env,agent
+    def train(self,cfg,env,agent):
+        print("Start training!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        
+        for i_ep in range(cfg['train_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0 # step per episode
+            ep_entropy = 0
+            state = env.reset()  # reset and obtain initial state
+            
+            for _ in range(cfg['ep_max_steps']):
+                action, value, dist = agent.sample_action(state)  # sample action
+                next_state, reward, done, _ = env.step(action)  # update env and return transitions
+                log_prob = torch.log(dist.squeeze(0)[action])
+                entropy = -np.sum(np.mean(dist.detach().numpy()) * np.log(dist.detach().numpy()))
+                agent.memory.push((value,log_prob,reward))  # save transitions
+                state = next_state  # update state
+                ep_reward += reward
+                ep_entropy += entropy
+                ep_step += 1
+                if done:
+                    break
+            agent.update(next_state,ep_entropy)  # update agent
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            if (i_ep+1)%10==0:
+                print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}')
+        print("Finish training!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+    def test(self,cfg,env,agent):
+        print("Start testing!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        for i_ep in range(cfg['test_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0
+            state = env.reset()  # reset and obtain initial state
+            for _ in range(cfg['ep_max_steps']):
+                action,_,_ = agent.predict_action(state)  # predict action
+                next_state, reward, done, _ = env.step(action)  
+                state = next_state 
+                ep_reward += reward
+                ep_step += 1
+                if done:
+                    break
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}")
+        print("Finish testing!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+
+if __name__ == "__main__":
+    main = Main()
+    main.run()
+   
+
+        
+    
diff --git a/projects/codes/A2C/main2.py b/projects/codes/A2C/main2.py
new file mode 100644
index 0000000..c81754f
--- /dev/null
+++ b/projects/codes/A2C/main2.py
@@ -0,0 +1,120 @@
+import sys,os
+os.environ["KMP_DUPLICATE_LIB_OK"]  =  "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized."
+curr_path = os.path.dirname(os.path.abspath(__file__))  # current path
+parent_path = os.path.dirname(curr_path)  # parent path 
+sys.path.append(parent_path)  # add path to system path
+
+import datetime
+import argparse
+import gym
+import torch
+import numpy as np
+from common.utils import all_seed
+from common.launcher import Launcher
+from common.memories import PGReplay
+from common.models import ActorCriticSoftmax
+from envs.register import register_env
+from a2c_2 import A2C_2
+
+class Main(Launcher):
+    def get_args(self):
+        curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")   # obtain current time
+        parser = argparse.ArgumentParser(description="hyperparameters")      
+        parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm")
+        parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
+        parser.add_argument('--train_eps',default=2000,type=int,help="episodes of training") 
+        parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") 
+        parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
+        parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") 
+        parser.add_argument('--lr',default=3e-4,type=float,help="learning rate")
+        parser.add_argument('--actor_hidden_dim',default=256,type=int)
+        parser.add_argument('--critic_hidden_dim',default=256,type=int)
+        parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
+        parser.add_argument('--seed',default=10,type=int,help="seed") 
+        parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")  
+        parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")   
+        args = parser.parse_args()   
+        default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/",
+                        'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/",
+        }
+        args = {**vars(args),**default_args}  # type(dict)                         
+        return args
+    def env_agent_config(self,cfg):
+        ''' create env and agent
+        '''  
+        register_env(cfg['env_name'])
+        env = gym.make(cfg['env_name']) 
+        if cfg['seed'] !=0: # set random seed
+            all_seed(env,seed=cfg["seed"]) 
+        try: # state dimension
+            n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
+        except AttributeError:
+            n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
+        n_actions = env.action_space.n  # action dimension
+        print(f"n_states: {n_states}, n_actions: {n_actions}")
+        cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
+        models = {'ActorCritic':ActorCriticSoftmax(cfg['n_states'],cfg['n_actions'], actor_hidden_dim = cfg['actor_hidden_dim'],critic_hidden_dim=cfg['critic_hidden_dim'])}
+        memories = {'ACMemory':PGReplay()}
+        agent = A2C_2(models,memories,cfg)
+        return env,agent
+    def train(self,cfg,env,agent):
+        print("Start training!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        
+        for i_ep in range(cfg['train_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0 # step per episode
+            ep_entropy = 0
+            state = env.reset()  # reset and obtain initial state
+            
+            for _ in range(cfg['ep_max_steps']):
+                action, value, dist = agent.sample_action(state)  # sample action
+                next_state, reward, done, _ = env.step(action)  # update env and return transitions
+                log_prob = torch.log(dist.squeeze(0)[action])
+                entropy = -np.sum(np.mean(dist.detach().numpy()) * np.log(dist.detach().numpy()))
+                agent.memory.push((value,log_prob,reward))  # save transitions
+                state = next_state  # update state
+                ep_reward += reward
+                ep_entropy += entropy
+                ep_step += 1
+                if done:
+                    break
+            agent.update(next_state,ep_entropy)  # update agent
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            if (i_ep+1)%10==0:
+                print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}')
+        print("Finish training!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+    def test(self,cfg,env,agent):
+        print("Start testing!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        for i_ep in range(cfg['test_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0
+            state = env.reset()  # reset and obtain initial state
+            for _ in range(cfg['ep_max_steps']):
+                action,_,_ = agent.predict_action(state)  # predict action
+                next_state, reward, done, _ = env.step(action)  
+                state = next_state 
+                ep_reward += reward
+                ep_step += 1
+                if done:
+                    break
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}")
+        print("Finish testing!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+
+if __name__ == "__main__":
+    main = Main()
+    main.run()
+   
+
+        
+    
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt
new file mode 100644
index 0000000..c346b1b
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json
new file mode 100644
index 0000000..2ce53a7
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json
@@ -0,0 +1,19 @@
+{
+    "algo_name": "A2C",
+    "env_name": "CartPole-v0",
+    "train_eps": 2000,
+    "test_eps": 20,
+    "ep_max_steps": 100000,
+    "gamma": 0.99,
+    "lr": 0.0003,
+    "actor_hidden_dim": 256,
+    "critic_hidden_dim": 256,
+    "device": "cpu",
+    "seed": 10,
+    "show_fig": false,
+    "save_fig": true,
+    "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/results/",
+    "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/models/",
+    "n_states": 4,
+    "n_actions": 2
+}
\ No newline at end of file
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png
new file mode 100644
index 0000000..b1bbebb
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv
new file mode 100644
index 0000000..221744d
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,200.0,200
+1,200.0,200
+2,93.0,93
+3,155.0,155
+4,116.0,116
+5,200.0,200
+6,190.0,190
+7,176.0,176
+8,200.0,200
+9,200.0,200
+10,200.0,200
+11,179.0,179
+12,200.0,200
+13,185.0,185
+14,191.0,191
+15,200.0,200
+16,200.0,200
+17,124.0,124
+18,200.0,200
+19,172.0,172
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png
new file mode 100644
index 0000000..4410e5e
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv
new file mode 100644
index 0000000..7d5debb
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv
@@ -0,0 +1,2001 @@
+episodes,rewards,steps
+0,16.0,16
+1,17.0,17
+2,19.0,19
+3,95.0,95
+4,13.0,13
+5,22.0,22
+6,15.0,15
+7,20.0,20
+8,13.0,13
+9,20.0,20
+10,21.0,21
+11,59.0,59
+12,16.0,16
+13,35.0,35
+14,17.0,17
+15,22.0,22
+16,28.0,28
+17,19.0,19
+18,12.0,12
+19,17.0,17
+20,16.0,16
+21,28.0,28
+22,11.0,11
+23,11.0,11
+24,11.0,11
+25,18.0,18
+26,15.0,15
+27,33.0,33
+28,21.0,21
+29,34.0,34
+30,11.0,11
+31,16.0,16
+32,27.0,27
+33,19.0,19
+34,16.0,16
+35,33.0,33
+36,21.0,21
+37,12.0,12
+38,26.0,26
+39,34.0,34
+40,11.0,11
+41,38.0,38
+42,13.0,13
+43,12.0,12
+44,20.0,20
+45,17.0,17
+46,10.0,10
+47,20.0,20
+48,22.0,22
+49,21.0,21
+50,20.0,20
+51,32.0,32
+52,10.0,10
+53,33.0,33
+54,25.0,25
+55,30.0,30
+56,22.0,22
+57,25.0,25
+58,19.0,19
+59,12.0,12
+60,9.0,9
+61,11.0,11
+62,12.0,12
+63,28.0,28
+64,12.0,12
+65,12.0,12
+66,12.0,12
+67,34.0,34
+68,12.0,12
+69,25.0,25
+70,13.0,13
+71,26.0,26
+72,13.0,13
+73,22.0,22
+74,24.0,24
+75,9.0,9
+76,14.0,14
+77,17.0,17
+78,14.0,14
+79,25.0,25
+80,23.0,23
+81,38.0,38
+82,30.0,30
+83,28.0,28
+84,25.0,25
+85,16.0,16
+86,13.0,13
+87,34.0,34
+88,16.0,16
+89,48.0,48
+90,12.0,12
+91,25.0,25
+92,25.0,25
+93,17.0,17
+94,13.0,13
+95,12.0,12
+96,23.0,23
+97,22.0,22
+98,12.0,12
+99,16.0,16
+100,16.0,16
+101,10.0,10
+102,14.0,14
+103,20.0,20
+104,13.0,13
+105,16.0,16
+106,14.0,14
+107,22.0,22
+108,17.0,17
+109,19.0,19
+110,26.0,26
+111,16.0,16
+112,22.0,22
+113,20.0,20
+114,27.0,27
+115,16.0,16
+116,40.0,40
+117,14.0,14
+118,15.0,15
+119,40.0,40
+120,23.0,23
+121,32.0,32
+122,13.0,13
+123,33.0,33
+124,18.0,18
+125,26.0,26
+126,30.0,30
+127,28.0,28
+128,12.0,12
+129,45.0,45
+130,14.0,14
+131,40.0,40
+132,13.0,13
+133,16.0,16
+134,78.0,78
+135,19.0,19
+136,19.0,19
+137,20.0,20
+138,26.0,26
+139,21.0,21
+140,28.0,28
+141,17.0,17
+142,19.0,19
+143,13.0,13
+144,54.0,54
+145,41.0,41
+146,10.0,10
+147,15.0,15
+148,14.0,14
+149,19.0,19
+150,19.0,19
+151,32.0,32
+152,39.0,39
+153,36.0,36
+154,21.0,21
+155,58.0,58
+156,15.0,15
+157,55.0,55
+158,16.0,16
+159,46.0,46
+160,25.0,25
+161,15.0,15
+162,13.0,13
+163,18.0,18
+164,19.0,19
+165,22.0,22
+166,17.0,17
+167,48.0,48
+168,28.0,28
+169,29.0,29
+170,60.0,60
+171,12.0,12
+172,34.0,34
+173,18.0,18
+174,20.0,20
+175,18.0,18
+176,29.0,29
+177,14.0,14
+178,23.0,23
+179,26.0,26
+180,23.0,23
+181,77.0,77
+182,46.0,46
+183,25.0,25
+184,37.0,37
+185,12.0,12
+186,12.0,12
+187,36.0,36
+188,30.0,30
+189,135.0,135
+190,11.0,11
+191,18.0,18
+192,34.0,34
+193,12.0,12
+194,22.0,22
+195,19.0,19
+196,21.0,21
+197,26.0,26
+198,13.0,13
+199,128.0,128
+200,11.0,11
+201,21.0,21
+202,27.0,27
+203,11.0,11
+204,14.0,14
+205,70.0,70
+206,10.0,10
+207,18.0,18
+208,35.0,35
+209,80.0,80
+210,19.0,19
+211,95.0,95
+212,14.0,14
+213,20.0,20
+214,30.0,30
+215,19.0,19
+216,20.0,20
+217,54.0,54
+218,27.0,27
+219,19.0,19
+220,30.0,30
+221,19.0,19
+222,55.0,55
+223,29.0,29
+224,65.0,65
+225,19.0,19
+226,38.0,38
+227,14.0,14
+228,62.0,62
+229,37.0,37
+230,50.0,50
+231,40.0,40
+232,39.0,39
+233,16.0,16
+234,18.0,18
+235,86.0,86
+236,45.0,45
+237,37.0,37
+238,35.0,35
+239,20.0,20
+240,22.0,22
+241,40.0,40
+242,29.0,29
+243,17.0,17
+244,34.0,34
+245,91.0,91
+246,31.0,31
+247,69.0,69
+248,31.0,31
+249,30.0,30
+250,30.0,30
+251,23.0,23
+252,22.0,22
+253,52.0,52
+254,25.0,25
+255,32.0,32
+256,37.0,37
+257,31.0,31
+258,18.0,18
+259,60.0,60
+260,15.0,15
+261,23.0,23
+262,34.0,34
+263,43.0,43
+264,66.0,66
+265,15.0,15
+266,19.0,19
+267,55.0,55
+268,65.0,65
+269,50.0,50
+270,19.0,19
+271,37.0,37
+272,33.0,33
+273,33.0,33
+274,16.0,16
+275,19.0,19
+276,18.0,18
+277,52.0,52
+278,27.0,27
+279,48.0,48
+280,39.0,39
+281,29.0,29
+282,86.0,86
+283,37.0,37
+284,21.0,21
+285,21.0,21
+286,52.0,52
+287,88.0,88
+288,30.0,30
+289,34.0,34
+290,22.0,22
+291,26.0,26
+292,18.0,18
+293,29.0,29
+294,57.0,57
+295,61.0,61
+296,73.0,73
+297,19.0,19
+298,32.0,32
+299,49.0,49
+300,58.0,58
+301,31.0,31
+302,29.0,29
+303,73.0,73
+304,21.0,21
+305,33.0,33
+306,23.0,23
+307,22.0,22
+308,36.0,36
+309,41.0,41
+310,19.0,19
+311,41.0,41
+312,67.0,67
+313,95.0,95
+314,43.0,43
+315,27.0,27
+316,29.0,29
+317,30.0,30
+318,23.0,23
+319,32.0,32
+320,48.0,48
+321,33.0,33
+322,32.0,32
+323,63.0,63
+324,20.0,20
+325,25.0,25
+326,23.0,23
+327,42.0,42
+328,15.0,15
+329,66.0,66
+330,40.0,40
+331,31.0,31
+332,73.0,73
+333,52.0,52
+334,31.0,31
+335,69.0,69
+336,33.0,33
+337,39.0,39
+338,21.0,21
+339,34.0,34
+340,32.0,32
+341,38.0,38
+342,29.0,29
+343,32.0,32
+344,14.0,14
+345,51.0,51
+346,38.0,38
+347,51.0,51
+348,28.0,28
+349,85.0,85
+350,34.0,34
+351,57.0,57
+352,21.0,21
+353,22.0,22
+354,27.0,27
+355,19.0,19
+356,77.0,77
+357,30.0,30
+358,28.0,28
+359,53.0,53
+360,48.0,48
+361,41.0,41
+362,26.0,26
+363,35.0,35
+364,52.0,52
+365,42.0,42
+366,21.0,21
+367,34.0,34
+368,43.0,43
+369,82.0,82
+370,43.0,43
+371,67.0,67
+372,56.0,56
+373,54.0,54
+374,27.0,27
+375,37.0,37
+376,32.0,32
+377,23.0,23
+378,32.0,32
+379,40.0,40
+380,26.0,26
+381,22.0,22
+382,23.0,23
+383,100.0,100
+384,45.0,45
+385,57.0,57
+386,51.0,51
+387,15.0,15
+388,17.0,17
+389,63.0,63
+390,67.0,67
+391,80.0,80
+392,64.0,64
+393,29.0,29
+394,74.0,74
+395,51.0,51
+396,88.0,88
+397,11.0,11
+398,27.0,27
+399,30.0,30
+400,22.0,22
+401,36.0,36
+402,13.0,13
+403,37.0,37
+404,86.0,86
+405,18.0,18
+406,25.0,25
+407,21.0,21
+408,30.0,30
+409,28.0,28
+410,43.0,43
+411,23.0,23
+412,17.0,17
+413,32.0,32
+414,25.0,25
+415,22.0,22
+416,36.0,36
+417,52.0,52
+418,33.0,33
+419,16.0,16
+420,30.0,30
+421,52.0,52
+422,59.0,59
+423,35.0,35
+424,39.0,39
+425,47.0,47
+426,30.0,30
+427,32.0,32
+428,42.0,42
+429,37.0,37
+430,35.0,35
+431,25.0,25
+432,39.0,39
+433,26.0,26
+434,58.0,58
+435,64.0,64
+436,30.0,30
+437,33.0,33
+438,42.0,42
+439,30.0,30
+440,47.0,47
+441,69.0,69
+442,47.0,47
+443,40.0,40
+444,53.0,53
+445,38.0,38
+446,176.0,176
+447,116.0,116
+448,40.0,40
+449,86.0,86
+450,38.0,38
+451,39.0,39
+452,48.0,48
+453,22.0,22
+454,64.0,64
+455,30.0,30
+456,36.0,36
+457,46.0,46
+458,16.0,16
+459,103.0,103
+460,58.0,58
+461,16.0,16
+462,36.0,36
+463,21.0,21
+464,79.0,79
+465,29.0,29
+466,67.0,67
+467,59.0,59
+468,50.0,50
+469,72.0,72
+470,75.0,75
+471,26.0,26
+472,36.0,36
+473,35.0,35
+474,40.0,40
+475,49.0,49
+476,47.0,47
+477,42.0,42
+478,37.0,37
+479,33.0,33
+480,60.0,60
+481,34.0,34
+482,20.0,20
+483,69.0,69
+484,63.0,63
+485,49.0,49
+486,18.0,18
+487,68.0,68
+488,24.0,24
+489,79.0,79
+490,22.0,22
+491,39.0,39
+492,64.0,64
+493,20.0,20
+494,21.0,21
+495,22.0,22
+496,56.0,56
+497,56.0,56
+498,39.0,39
+499,64.0,64
+500,42.0,42
+501,40.0,40
+502,44.0,44
+503,30.0,30
+504,56.0,56
+505,137.0,137
+506,37.0,37
+507,19.0,19
+508,59.0,59
+509,29.0,29
+510,108.0,108
+511,53.0,53
+512,26.0,26
+513,43.0,43
+514,27.0,27
+515,34.0,34
+516,51.0,51
+517,35.0,35
+518,90.0,90
+519,64.0,64
+520,63.0,63
+521,33.0,33
+522,29.0,29
+523,48.0,48
+524,40.0,40
+525,55.0,55
+526,26.0,26
+527,69.0,69
+528,48.0,48
+529,50.0,50
+530,34.0,34
+531,31.0,31
+532,26.0,26
+533,60.0,60
+534,60.0,60
+535,80.0,80
+536,44.0,44
+537,62.0,62
+538,47.0,47
+539,79.0,79
+540,91.0,91
+541,84.0,84
+542,134.0,134
+543,49.0,49
+544,37.0,37
+545,23.0,23
+546,52.0,52
+547,52.0,52
+548,57.0,57
+549,69.0,69
+550,76.0,76
+551,33.0,33
+552,117.0,117
+553,83.0,83
+554,38.0,38
+555,45.0,45
+556,119.0,119
+557,105.0,105
+558,151.0,151
+559,59.0,59
+560,41.0,41
+561,49.0,49
+562,29.0,29
+563,61.0,61
+564,106.0,106
+565,43.0,43
+566,51.0,51
+567,54.0,54
+568,60.0,60
+569,29.0,29
+570,54.0,54
+571,47.0,47
+572,101.0,101
+573,26.0,26
+574,71.0,71
+575,115.0,115
+576,21.0,21
+577,47.0,47
+578,71.0,71
+579,61.0,61
+580,30.0,30
+581,72.0,72
+582,37.0,37
+583,62.0,62
+584,76.0,76
+585,49.0,49
+586,78.0,78
+587,62.0,62
+588,86.0,86
+589,56.0,56
+590,103.0,103
+591,97.0,97
+592,53.0,53
+593,110.0,110
+594,91.0,91
+595,52.0,52
+596,67.0,67
+597,40.0,40
+598,71.0,71
+599,52.0,52
+600,49.0,49
+601,101.0,101
+602,93.0,93
+603,33.0,33
+604,56.0,56
+605,53.0,53
+606,103.0,103
+607,134.0,134
+608,83.0,83
+609,78.0,78
+610,25.0,25
+611,52.0,52
+612,61.0,61
+613,49.0,49
+614,91.0,91
+615,46.0,46
+616,34.0,34
+617,72.0,72
+618,62.0,62
+619,42.0,42
+620,63.0,63
+621,60.0,60
+622,178.0,178
+623,43.0,43
+624,66.0,66
+625,23.0,23
+626,42.0,42
+627,26.0,26
+628,73.0,73
+629,30.0,30
+630,39.0,39
+631,36.0,36
+632,47.0,47
+633,58.0,58
+634,45.0,45
+635,82.0,82
+636,55.0,55
+637,31.0,31
+638,71.0,71
+639,68.0,68
+640,119.0,119
+641,42.0,42
+642,72.0,72
+643,36.0,36
+644,47.0,47
+645,126.0,126
+646,64.0,64
+647,60.0,60
+648,147.0,147
+649,34.0,34
+650,17.0,17
+651,59.0,59
+652,46.0,46
+653,126.0,126
+654,76.0,76
+655,85.0,85
+656,68.0,68
+657,36.0,36
+658,53.0,53
+659,116.0,116
+660,99.0,99
+661,29.0,29
+662,22.0,22
+663,89.0,89
+664,166.0,166
+665,73.0,73
+666,28.0,28
+667,110.0,110
+668,92.0,92
+669,76.0,76
+670,65.0,65
+671,48.0,48
+672,27.0,27
+673,38.0,38
+674,44.0,44
+675,70.0,70
+676,103.0,103
+677,48.0,48
+678,56.0,56
+679,51.0,51
+680,30.0,30
+681,118.0,118
+682,35.0,35
+683,12.0,12
+684,64.0,64
+685,105.0,105
+686,23.0,23
+687,52.0,52
+688,153.0,153
+689,65.0,65
+690,44.0,44
+691,38.0,38
+692,55.0,55
+693,37.0,37
+694,18.0,18
+695,106.0,106
+696,175.0,175
+697,88.0,88
+698,22.0,22
+699,60.0,60
+700,22.0,22
+701,78.0,78
+702,54.0,54
+703,61.0,61
+704,50.0,50
+705,55.0,55
+706,34.0,34
+707,23.0,23
+708,39.0,39
+709,45.0,45
+710,43.0,43
+711,113.0,113
+712,59.0,59
+713,36.0,36
+714,71.0,71
+715,35.0,35
+716,45.0,45
+717,42.0,42
+718,40.0,40
+719,26.0,26
+720,52.0,52
+721,29.0,29
+722,47.0,47
+723,45.0,45
+724,73.0,73
+725,42.0,42
+726,42.0,42
+727,30.0,30
+728,119.0,119
+729,60.0,60
+730,18.0,18
+731,18.0,18
+732,21.0,21
+733,33.0,33
+734,45.0,45
+735,35.0,35
+736,28.0,28
+737,37.0,37
+738,26.0,26
+739,28.0,28
+740,31.0,31
+741,130.0,130
+742,48.0,48
+743,65.0,65
+744,38.0,38
+745,54.0,54
+746,92.0,92
+747,53.0,53
+748,42.0,42
+749,87.0,87
+750,65.0,65
+751,45.0,45
+752,58.0,58
+753,27.0,27
+754,20.0,20
+755,59.0,59
+756,105.0,105
+757,54.0,54
+758,27.0,27
+759,46.0,46
+760,29.0,29
+761,15.0,15
+762,58.0,58
+763,22.0,22
+764,45.0,45
+765,44.0,44
+766,81.0,81
+767,61.0,61
+768,23.0,23
+769,134.0,134
+770,38.0,38
+771,200.0,200
+772,63.0,63
+773,62.0,62
+774,36.0,36
+775,68.0,68
+776,94.0,94
+777,26.0,26
+778,61.0,61
+779,77.0,77
+780,71.0,71
+781,63.0,63
+782,40.0,40
+783,26.0,26
+784,126.0,126
+785,59.0,59
+786,64.0,64
+787,57.0,57
+788,99.0,99
+789,47.0,47
+790,68.0,68
+791,38.0,38
+792,57.0,57
+793,42.0,42
+794,79.0,79
+795,108.0,108
+796,63.0,63
+797,62.0,62
+798,189.0,189
+799,157.0,157
+800,93.0,93
+801,53.0,53
+802,56.0,56
+803,144.0,144
+804,63.0,63
+805,41.0,41
+806,134.0,134
+807,23.0,23
+808,90.0,90
+809,124.0,124
+810,42.0,42
+811,40.0,40
+812,29.0,29
+813,46.0,46
+814,160.0,160
+815,34.0,34
+816,91.0,91
+817,60.0,60
+818,50.0,50
+819,113.0,113
+820,108.0,108
+821,56.0,56
+822,200.0,200
+823,154.0,154
+824,78.0,78
+825,55.0,55
+826,136.0,136
+827,66.0,66
+828,81.0,81
+829,23.0,23
+830,63.0,63
+831,85.0,85
+832,91.0,91
+833,85.0,85
+834,17.0,17
+835,85.0,85
+836,152.0,152
+837,59.0,59
+838,40.0,40
+839,103.0,103
+840,135.0,135
+841,50.0,50
+842,22.0,22
+843,75.0,75
+844,97.0,97
+845,59.0,59
+846,57.0,57
+847,122.0,122
+848,100.0,100
+849,132.0,132
+850,53.0,53
+851,106.0,106
+852,87.0,87
+853,82.0,82
+854,154.0,154
+855,139.0,139
+856,27.0,27
+857,35.0,35
+858,60.0,60
+859,188.0,188
+860,116.0,116
+861,160.0,160
+862,190.0,190
+863,61.0,61
+864,122.0,122
+865,97.0,97
+866,54.0,54
+867,24.0,24
+868,122.0,122
+869,161.0,161
+870,40.0,40
+871,165.0,165
+872,145.0,145
+873,155.0,155
+874,90.0,90
+875,58.0,58
+876,53.0,53
+877,47.0,47
+878,53.0,53
+879,86.0,86
+880,56.0,56
+881,152.0,152
+882,77.0,77
+883,50.0,50
+884,85.0,85
+885,200.0,200
+886,96.0,96
+887,85.0,85
+888,44.0,44
+889,39.0,39
+890,200.0,200
+891,164.0,164
+892,36.0,36
+893,139.0,139
+894,44.0,44
+895,46.0,46
+896,103.0,103
+897,168.0,168
+898,189.0,189
+899,200.0,200
+900,69.0,69
+901,71.0,71
+902,147.0,147
+903,140.0,140
+904,200.0,200
+905,82.0,82
+906,129.0,129
+907,164.0,164
+908,28.0,28
+909,73.0,73
+910,174.0,174
+911,176.0,176
+912,132.0,132
+913,149.0,149
+914,93.0,93
+915,52.0,52
+916,93.0,93
+917,33.0,33
+918,154.0,154
+919,200.0,200
+920,200.0,200
+921,200.0,200
+922,67.0,67
+923,83.0,83
+924,162.0,162
+925,41.0,41
+926,103.0,103
+927,200.0,200
+928,131.0,131
+929,117.0,117
+930,77.0,77
+931,45.0,45
+932,144.0,144
+933,123.0,123
+934,122.0,122
+935,29.0,29
+936,89.0,89
+937,71.0,71
+938,200.0,200
+939,80.0,80
+940,98.0,98
+941,143.0,143
+942,200.0,200
+943,95.0,95
+944,83.0,83
+945,62.0,62
+946,62.0,62
+947,118.0,118
+948,144.0,144
+949,113.0,113
+950,110.0,110
+951,139.0,139
+952,70.0,70
+953,79.0,79
+954,176.0,176
+955,151.0,151
+956,24.0,24
+957,50.0,50
+958,90.0,90
+959,20.0,20
+960,65.0,65
+961,176.0,176
+962,37.0,37
+963,48.0,48
+964,89.0,89
+965,190.0,190
+966,155.0,155
+967,26.0,26
+968,200.0,200
+969,186.0,186
+970,60.0,60
+971,115.0,115
+972,115.0,115
+973,121.0,121
+974,177.0,177
+975,200.0,200
+976,51.0,51
+977,105.0,105
+978,200.0,200
+979,68.0,68
+980,170.0,170
+981,70.0,70
+982,55.0,55
+983,70.0,70
+984,66.0,66
+985,161.0,161
+986,40.0,40
+987,200.0,200
+988,107.0,107
+989,80.0,80
+990,128.0,128
+991,154.0,154
+992,101.0,101
+993,178.0,178
+994,129.0,129
+995,128.0,128
+996,146.0,146
+997,142.0,142
+998,200.0,200
+999,62.0,62
+1000,19.0,19
+1001,82.0,82
+1002,63.0,63
+1003,129.0,129
+1004,54.0,54
+1005,125.0,125
+1006,113.0,113
+1007,93.0,93
+1008,200.0,200
+1009,48.0,48
+1010,58.0,58
+1011,66.0,66
+1012,41.0,41
+1013,145.0,145
+1014,42.0,42
+1015,185.0,185
+1016,199.0,199
+1017,200.0,200
+1018,125.0,125
+1019,145.0,145
+1020,32.0,32
+1021,141.0,141
+1022,195.0,195
+1023,175.0,175
+1024,162.0,162
+1025,127.0,127
+1026,154.0,154
+1027,166.0,166
+1028,200.0,200
+1029,188.0,188
+1030,200.0,200
+1031,200.0,200
+1032,162.0,162
+1033,141.0,141
+1034,200.0,200
+1035,155.0,155
+1036,134.0,134
+1037,146.0,146
+1038,192.0,192
+1039,136.0,136
+1040,200.0,200
+1041,149.0,149
+1042,113.0,113
+1043,40.0,40
+1044,178.0,178
+1045,126.0,126
+1046,200.0,200
+1047,37.0,37
+1048,200.0,200
+1049,141.0,141
+1050,118.0,118
+1051,34.0,34
+1052,142.0,142
+1053,65.0,65
+1054,200.0,200
+1055,133.0,133
+1056,137.0,137
+1057,148.0,148
+1058,148.0,148
+1059,136.0,136
+1060,159.0,159
+1061,48.0,48
+1062,109.0,109
+1063,65.0,65
+1064,130.0,130
+1065,191.0,191
+1066,200.0,200
+1067,200.0,200
+1068,48.0,48
+1069,200.0,200
+1070,65.0,65
+1071,164.0,164
+1072,200.0,200
+1073,156.0,156
+1074,200.0,200
+1075,131.0,131
+1076,126.0,126
+1077,200.0,200
+1078,200.0,200
+1079,32.0,32
+1080,175.0,175
+1081,200.0,200
+1082,84.0,84
+1083,81.0,81
+1084,183.0,183
+1085,51.0,51
+1086,155.0,155
+1087,146.0,146
+1088,108.0,108
+1089,176.0,176
+1090,189.0,189
+1091,200.0,200
+1092,164.0,164
+1093,70.0,70
+1094,200.0,200
+1095,172.0,172
+1096,163.0,163
+1097,168.0,168
+1098,181.0,181
+1099,200.0,200
+1100,33.0,33
+1101,200.0,200
+1102,58.0,58
+1103,200.0,200
+1104,156.0,156
+1105,200.0,200
+1106,138.0,138
+1107,200.0,200
+1108,81.0,81
+1109,105.0,105
+1110,87.0,87
+1111,170.0,170
+1112,200.0,200
+1113,200.0,200
+1114,200.0,200
+1115,200.0,200
+1116,200.0,200
+1117,200.0,200
+1118,200.0,200
+1119,158.0,158
+1120,64.0,64
+1121,138.0,138
+1122,200.0,200
+1123,158.0,158
+1124,86.0,86
+1125,125.0,125
+1126,105.0,105
+1127,200.0,200
+1128,120.0,120
+1129,53.0,53
+1130,127.0,127
+1131,200.0,200
+1132,48.0,48
+1133,200.0,200
+1134,144.0,144
+1135,42.0,42
+1136,100.0,100
+1137,160.0,160
+1138,200.0,200
+1139,200.0,200
+1140,200.0,200
+1141,200.0,200
+1142,135.0,135
+1143,184.0,184
+1144,184.0,184
+1145,168.0,168
+1146,162.0,162
+1147,52.0,52
+1148,120.0,120
+1149,133.0,133
+1150,200.0,200
+1151,178.0,178
+1152,200.0,200
+1153,22.0,22
+1154,200.0,200
+1155,108.0,108
+1156,200.0,200
+1157,200.0,200
+1158,102.0,102
+1159,200.0,200
+1160,200.0,200
+1161,200.0,200
+1162,65.0,65
+1163,131.0,131
+1164,129.0,129
+1165,136.0,136
+1166,137.0,137
+1167,40.0,40
+1168,130.0,130
+1169,99.0,99
+1170,131.0,131
+1171,200.0,200
+1172,172.0,172
+1173,200.0,200
+1174,200.0,200
+1175,83.0,83
+1176,151.0,151
+1177,200.0,200
+1178,70.0,70
+1179,84.0,84
+1180,172.0,172
+1181,200.0,200
+1182,22.0,22
+1183,118.0,118
+1184,200.0,200
+1185,200.0,200
+1186,200.0,200
+1187,200.0,200
+1188,200.0,200
+1189,145.0,145
+1190,121.0,121
+1191,159.0,159
+1192,126.0,126
+1193,49.0,49
+1194,200.0,200
+1195,119.0,119
+1196,155.0,155
+1197,71.0,71
+1198,200.0,200
+1199,79.0,79
+1200,68.0,68
+1201,200.0,200
+1202,200.0,200
+1203,143.0,143
+1204,200.0,200
+1205,54.0,54
+1206,180.0,180
+1207,158.0,158
+1208,149.0,149
+1209,170.0,170
+1210,118.0,118
+1211,155.0,155
+1212,200.0,200
+1213,200.0,200
+1214,143.0,143
+1215,200.0,200
+1216,200.0,200
+1217,55.0,55
+1218,200.0,200
+1219,200.0,200
+1220,168.0,168
+1221,170.0,170
+1222,112.0,112
+1223,108.0,108
+1224,104.0,104
+1225,135.0,135
+1226,131.0,131
+1227,98.0,98
+1228,63.0,63
+1229,200.0,200
+1230,119.0,119
+1231,130.0,130
+1232,113.0,113
+1233,170.0,170
+1234,200.0,200
+1235,164.0,164
+1236,149.0,149
+1237,200.0,200
+1238,116.0,116
+1239,200.0,200
+1240,133.0,133
+1241,200.0,200
+1242,164.0,164
+1243,154.0,154
+1244,191.0,191
+1245,128.0,128
+1246,119.0,119
+1247,114.0,114
+1248,200.0,200
+1249,111.0,111
+1250,78.0,78
+1251,200.0,200
+1252,200.0,200
+1253,172.0,172
+1254,200.0,200
+1255,105.0,105
+1256,181.0,181
+1257,200.0,200
+1258,83.0,83
+1259,200.0,200
+1260,176.0,176
+1261,200.0,200
+1262,147.0,147
+1263,165.0,165
+1264,167.0,167
+1265,183.0,183
+1266,117.0,117
+1267,200.0,200
+1268,200.0,200
+1269,171.0,171
+1270,20.0,20
+1271,200.0,200
+1272,200.0,200
+1273,200.0,200
+1274,109.0,109
+1275,142.0,142
+1276,117.0,117
+1277,200.0,200
+1278,176.0,176
+1279,200.0,200
+1280,101.0,101
+1281,200.0,200
+1282,130.0,130
+1283,200.0,200
+1284,111.0,111
+1285,124.0,124
+1286,178.0,178
+1287,200.0,200
+1288,184.0,184
+1289,200.0,200
+1290,200.0,200
+1291,200.0,200
+1292,200.0,200
+1293,130.0,130
+1294,200.0,200
+1295,134.0,134
+1296,195.0,195
+1297,200.0,200
+1298,62.0,62
+1299,200.0,200
+1300,200.0,200
+1301,165.0,165
+1302,190.0,190
+1303,200.0,200
+1304,200.0,200
+1305,168.0,168
+1306,200.0,200
+1307,64.0,64
+1308,122.0,122
+1309,200.0,200
+1310,134.0,134
+1311,200.0,200
+1312,200.0,200
+1313,200.0,200
+1314,150.0,150
+1315,187.0,187
+1316,130.0,130
+1317,140.0,140
+1318,157.0,157
+1319,200.0,200
+1320,149.0,149
+1321,200.0,200
+1322,58.0,58
+1323,84.0,84
+1324,140.0,140
+1325,139.0,139
+1326,117.0,117
+1327,175.0,175
+1328,135.0,135
+1329,169.0,169
+1330,200.0,200
+1331,143.0,143
+1332,127.0,127
+1333,127.0,127
+1334,148.0,148
+1335,200.0,200
+1336,136.0,136
+1337,200.0,200
+1338,200.0,200
+1339,175.0,175
+1340,102.0,102
+1341,200.0,200
+1342,97.0,97
+1343,120.0,120
+1344,59.0,59
+1345,200.0,200
+1346,200.0,200
+1347,104.0,104
+1348,128.0,128
+1349,200.0,200
+1350,195.0,195
+1351,96.0,96
+1352,196.0,196
+1353,200.0,200
+1354,58.0,58
+1355,200.0,200
+1356,200.0,200
+1357,114.0,114
+1358,104.0,104
+1359,200.0,200
+1360,179.0,179
+1361,200.0,200
+1362,200.0,200
+1363,140.0,140
+1364,138.0,138
+1365,57.0,57
+1366,165.0,165
+1367,174.0,174
+1368,199.0,199
+1369,110.0,110
+1370,200.0,200
+1371,154.0,154
+1372,200.0,200
+1373,78.0,78
+1374,200.0,200
+1375,185.0,185
+1376,167.0,167
+1377,161.0,161
+1378,155.0,155
+1379,117.0,117
+1380,128.0,128
+1381,94.0,94
+1382,200.0,200
+1383,121.0,121
+1384,61.0,61
+1385,21.0,21
+1386,105.0,105
+1387,185.0,185
+1388,200.0,200
+1389,124.0,124
+1390,200.0,200
+1391,133.0,133
+1392,200.0,200
+1393,153.0,153
+1394,200.0,200
+1395,200.0,200
+1396,152.0,152
+1397,146.0,146
+1398,200.0,200
+1399,183.0,183
+1400,195.0,195
+1401,172.0,172
+1402,151.0,151
+1403,122.0,122
+1404,200.0,200
+1405,200.0,200
+1406,200.0,200
+1407,200.0,200
+1408,130.0,130
+1409,148.0,148
+1410,200.0,200
+1411,200.0,200
+1412,200.0,200
+1413,157.0,157
+1414,136.0,136
+1415,115.0,115
+1416,200.0,200
+1417,105.0,105
+1418,124.0,124
+1419,144.0,144
+1420,34.0,34
+1421,151.0,151
+1422,101.0,101
+1423,64.0,64
+1424,200.0,200
+1425,100.0,100
+1426,54.0,54
+1427,132.0,132
+1428,200.0,200
+1429,131.0,131
+1430,51.0,51
+1431,123.0,123
+1432,99.0,99
+1433,200.0,200
+1434,200.0,200
+1435,144.0,144
+1436,166.0,166
+1437,122.0,122
+1438,147.0,147
+1439,200.0,200
+1440,103.0,103
+1441,164.0,164
+1442,76.0,76
+1443,159.0,159
+1444,152.0,152
+1445,200.0,200
+1446,129.0,129
+1447,124.0,124
+1448,40.0,40
+1449,200.0,200
+1450,117.0,117
+1451,175.0,175
+1452,51.0,51
+1453,101.0,101
+1454,117.0,117
+1455,179.0,179
+1456,44.0,44
+1457,190.0,190
+1458,135.0,135
+1459,183.0,183
+1460,118.0,118
+1461,200.0,200
+1462,109.0,109
+1463,86.0,86
+1464,147.0,147
+1465,200.0,200
+1466,124.0,124
+1467,128.0,128
+1468,156.0,156
+1469,200.0,200
+1470,167.0,167
+1471,197.0,197
+1472,75.0,75
+1473,168.0,168
+1474,114.0,114
+1475,153.0,153
+1476,146.0,146
+1477,188.0,188
+1478,144.0,144
+1479,200.0,200
+1480,51.0,51
+1481,35.0,35
+1482,152.0,152
+1483,161.0,161
+1484,114.0,114
+1485,200.0,200
+1486,161.0,161
+1487,200.0,200
+1488,93.0,93
+1489,116.0,116
+1490,152.0,152
+1491,200.0,200
+1492,200.0,200
+1493,200.0,200
+1494,86.0,86
+1495,200.0,200
+1496,178.0,178
+1497,200.0,200
+1498,200.0,200
+1499,154.0,154
+1500,135.0,135
+1501,200.0,200
+1502,146.0,146
+1503,78.0,78
+1504,115.0,115
+1505,189.0,189
+1506,133.0,133
+1507,123.0,123
+1508,158.0,158
+1509,200.0,200
+1510,200.0,200
+1511,200.0,200
+1512,200.0,200
+1513,200.0,200
+1514,200.0,200
+1515,200.0,200
+1516,119.0,119
+1517,162.0,162
+1518,200.0,200
+1519,114.0,114
+1520,200.0,200
+1521,128.0,128
+1522,200.0,200
+1523,200.0,200
+1524,130.0,130
+1525,65.0,65
+1526,200.0,200
+1527,200.0,200
+1528,200.0,200
+1529,188.0,188
+1530,159.0,159
+1531,200.0,200
+1532,200.0,200
+1533,200.0,200
+1534,147.0,147
+1535,180.0,180
+1536,152.0,152
+1537,178.0,178
+1538,131.0,131
+1539,118.0,118
+1540,153.0,153
+1541,197.0,197
+1542,200.0,200
+1543,200.0,200
+1544,178.0,178
+1545,67.0,67
+1546,137.0,137
+1547,51.0,51
+1548,160.0,160
+1549,200.0,200
+1550,124.0,124
+1551,109.0,109
+1552,181.0,181
+1553,182.0,182
+1554,136.0,136
+1555,91.0,91
+1556,159.0,159
+1557,192.0,192
+1558,106.0,106
+1559,200.0,200
+1560,169.0,169
+1561,167.0,167
+1562,141.0,141
+1563,127.0,127
+1564,71.0,71
+1565,134.0,134
+1566,200.0,200
+1567,115.0,115
+1568,99.0,99
+1569,184.0,184
+1570,200.0,200
+1571,133.0,133
+1572,153.0,153
+1573,200.0,200
+1574,194.0,194
+1575,169.0,169
+1576,113.0,113
+1577,147.0,147
+1578,140.0,140
+1579,200.0,200
+1580,113.0,113
+1581,181.0,181
+1582,200.0,200
+1583,182.0,182
+1584,185.0,185
+1585,197.0,197
+1586,200.0,200
+1587,151.0,151
+1588,49.0,49
+1589,137.0,137
+1590,166.0,166
+1591,149.0,149
+1592,126.0,126
+1593,73.0,73
+1594,127.0,127
+1595,104.0,104
+1596,65.0,65
+1597,63.0,63
+1598,126.0,126
+1599,181.0,181
+1600,132.0,132
+1601,89.0,89
+1602,130.0,130
+1603,150.0,150
+1604,100.0,100
+1605,139.0,139
+1606,119.0,119
+1607,48.0,48
+1608,80.0,80
+1609,105.0,105
+1610,85.0,85
+1611,200.0,200
+1612,142.0,142
+1613,95.0,95
+1614,50.0,50
+1615,51.0,51
+1616,124.0,124
+1617,47.0,47
+1618,159.0,159
+1619,154.0,154
+1620,200.0,200
+1621,88.0,88
+1622,65.0,65
+1623,111.0,111
+1624,99.0,99
+1625,120.0,120
+1626,127.0,127
+1627,43.0,43
+1628,80.0,80
+1629,163.0,163
+1630,90.0,90
+1631,154.0,154
+1632,127.0,127
+1633,39.0,39
+1634,200.0,200
+1635,161.0,161
+1636,119.0,119
+1637,156.0,156
+1638,200.0,200
+1639,200.0,200
+1640,41.0,41
+1641,200.0,200
+1642,136.0,136
+1643,157.0,157
+1644,142.0,142
+1645,125.0,125
+1646,155.0,155
+1647,139.0,139
+1648,122.0,122
+1649,116.0,116
+1650,200.0,200
+1651,144.0,144
+1652,170.0,170
+1653,200.0,200
+1654,103.0,103
+1655,105.0,105
+1656,193.0,193
+1657,122.0,122
+1658,200.0,200
+1659,191.0,191
+1660,200.0,200
+1661,200.0,200
+1662,200.0,200
+1663,200.0,200
+1664,200.0,200
+1665,200.0,200
+1666,200.0,200
+1667,64.0,64
+1668,200.0,200
+1669,121.0,121
+1670,200.0,200
+1671,171.0,171
+1672,200.0,200
+1673,130.0,130
+1674,200.0,200
+1675,200.0,200
+1676,188.0,188
+1677,200.0,200
+1678,200.0,200
+1679,200.0,200
+1680,181.0,181
+1681,200.0,200
+1682,200.0,200
+1683,135.0,135
+1684,200.0,200
+1685,114.0,114
+1686,189.0,189
+1687,200.0,200
+1688,200.0,200
+1689,200.0,200
+1690,184.0,184
+1691,200.0,200
+1692,200.0,200
+1693,55.0,55
+1694,153.0,153
+1695,200.0,200
+1696,200.0,200
+1697,125.0,125
+1698,177.0,177
+1699,154.0,154
+1700,53.0,53
+1701,112.0,112
+1702,184.0,184
+1703,200.0,200
+1704,200.0,200
+1705,137.0,137
+1706,72.0,72
+1707,200.0,200
+1708,200.0,200
+1709,200.0,200
+1710,121.0,121
+1711,200.0,200
+1712,200.0,200
+1713,142.0,142
+1714,200.0,200
+1715,200.0,200
+1716,169.0,169
+1717,200.0,200
+1718,200.0,200
+1719,196.0,196
+1720,135.0,135
+1721,200.0,200
+1722,200.0,200
+1723,200.0,200
+1724,96.0,96
+1725,200.0,200
+1726,200.0,200
+1727,200.0,200
+1728,200.0,200
+1729,138.0,138
+1730,200.0,200
+1731,139.0,139
+1732,200.0,200
+1733,190.0,190
+1734,200.0,200
+1735,200.0,200
+1736,138.0,138
+1737,114.0,114
+1738,159.0,159
+1739,120.0,120
+1740,186.0,186
+1741,200.0,200
+1742,183.0,183
+1743,200.0,200
+1744,200.0,200
+1745,200.0,200
+1746,99.0,99
+1747,200.0,200
+1748,100.0,100
+1749,187.0,187
+1750,106.0,106
+1751,200.0,200
+1752,200.0,200
+1753,200.0,200
+1754,52.0,52
+1755,197.0,197
+1756,165.0,165
+1757,200.0,200
+1758,200.0,200
+1759,92.0,92
+1760,200.0,200
+1761,200.0,200
+1762,70.0,70
+1763,165.0,165
+1764,192.0,192
+1765,200.0,200
+1766,200.0,200
+1767,87.0,87
+1768,150.0,150
+1769,149.0,149
+1770,79.0,79
+1771,200.0,200
+1772,200.0,200
+1773,117.0,117
+1774,200.0,200
+1775,135.0,135
+1776,200.0,200
+1777,130.0,130
+1778,200.0,200
+1779,200.0,200
+1780,200.0,200
+1781,200.0,200
+1782,200.0,200
+1783,200.0,200
+1784,200.0,200
+1785,200.0,200
+1786,200.0,200
+1787,200.0,200
+1788,140.0,140
+1789,200.0,200
+1790,200.0,200
+1791,42.0,42
+1792,198.0,198
+1793,200.0,200
+1794,200.0,200
+1795,85.0,85
+1796,164.0,164
+1797,99.0,99
+1798,151.0,151
+1799,200.0,200
+1800,200.0,200
+1801,199.0,199
+1802,200.0,200
+1803,190.0,190
+1804,114.0,114
+1805,200.0,200
+1806,200.0,200
+1807,161.0,161
+1808,200.0,200
+1809,187.0,187
+1810,145.0,145
+1811,200.0,200
+1812,200.0,200
+1813,200.0,200
+1814,96.0,96
+1815,163.0,163
+1816,160.0,160
+1817,200.0,200
+1818,200.0,200
+1819,50.0,50
+1820,200.0,200
+1821,102.0,102
+1822,200.0,200
+1823,200.0,200
+1824,200.0,200
+1825,200.0,200
+1826,109.0,109
+1827,200.0,200
+1828,129.0,129
+1829,200.0,200
+1830,171.0,171
+1831,183.0,183
+1832,106.0,106
+1833,200.0,200
+1834,136.0,136
+1835,100.0,100
+1836,200.0,200
+1837,188.0,188
+1838,200.0,200
+1839,200.0,200
+1840,162.0,162
+1841,200.0,200
+1842,200.0,200
+1843,177.0,177
+1844,200.0,200
+1845,200.0,200
+1846,200.0,200
+1847,200.0,200
+1848,166.0,166
+1849,200.0,200
+1850,53.0,53
+1851,200.0,200
+1852,200.0,200
+1853,153.0,153
+1854,190.0,190
+1855,200.0,200
+1856,200.0,200
+1857,200.0,200
+1858,200.0,200
+1859,136.0,136
+1860,200.0,200
+1861,143.0,143
+1862,45.0,45
+1863,129.0,129
+1864,200.0,200
+1865,200.0,200
+1866,200.0,200
+1867,200.0,200
+1868,60.0,60
+1869,150.0,150
+1870,174.0,174
+1871,157.0,157
+1872,198.0,198
+1873,200.0,200
+1874,91.0,91
+1875,200.0,200
+1876,112.0,112
+1877,159.0,159
+1878,186.0,186
+1879,200.0,200
+1880,82.0,82
+1881,192.0,192
+1882,147.0,147
+1883,200.0,200
+1884,200.0,200
+1885,174.0,174
+1886,181.0,181
+1887,200.0,200
+1888,74.0,74
+1889,200.0,200
+1890,200.0,200
+1891,200.0,200
+1892,157.0,157
+1893,200.0,200
+1894,200.0,200
+1895,180.0,180
+1896,170.0,170
+1897,200.0,200
+1898,135.0,135
+1899,200.0,200
+1900,175.0,175
+1901,200.0,200
+1902,200.0,200
+1903,118.0,118
+1904,147.0,147
+1905,44.0,44
+1906,200.0,200
+1907,58.0,58
+1908,185.0,185
+1909,200.0,200
+1910,200.0,200
+1911,200.0,200
+1912,78.0,78
+1913,190.0,190
+1914,177.0,177
+1915,112.0,112
+1916,200.0,200
+1917,142.0,142
+1918,200.0,200
+1919,92.0,92
+1920,172.0,172
+1921,200.0,200
+1922,178.0,178
+1923,200.0,200
+1924,200.0,200
+1925,138.0,138
+1926,100.0,100
+1927,200.0,200
+1928,95.0,95
+1929,200.0,200
+1930,200.0,200
+1931,129.0,129
+1932,154.0,154
+1933,200.0,200
+1934,200.0,200
+1935,133.0,133
+1936,152.0,152
+1937,133.0,133
+1938,200.0,200
+1939,200.0,200
+1940,200.0,200
+1941,200.0,200
+1942,200.0,200
+1943,167.0,167
+1944,179.0,179
+1945,164.0,164
+1946,187.0,187
+1947,156.0,156
+1948,200.0,200
+1949,200.0,200
+1950,130.0,130
+1951,200.0,200
+1952,200.0,200
+1953,200.0,200
+1954,200.0,200
+1955,65.0,65
+1956,200.0,200
+1957,148.0,148
+1958,200.0,200
+1959,200.0,200
+1960,200.0,200
+1961,168.0,168
+1962,164.0,164
+1963,200.0,200
+1964,200.0,200
+1965,103.0,103
+1966,200.0,200
+1967,173.0,173
+1968,200.0,200
+1969,146.0,146
+1970,197.0,197
+1971,123.0,123
+1972,162.0,162
+1973,200.0,200
+1974,162.0,162
+1975,82.0,82
+1976,157.0,157
+1977,138.0,138
+1978,37.0,37
+1979,200.0,200
+1980,194.0,194
+1981,200.0,200
+1982,104.0,104
+1983,198.0,198
+1984,200.0,200
+1985,200.0,200
+1986,154.0,154
+1987,200.0,200
+1988,200.0,200
+1989,158.0,158
+1990,200.0,200
+1991,142.0,142
+1992,185.0,185
+1993,69.0,69
+1994,200.0,200
+1995,144.0,144
+1996,164.0,164
+1997,189.0,189
+1998,200.0,200
+1999,141.0,141
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt
new file mode 100644
index 0000000..20d78c0
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt
new file mode 100644
index 0000000..c35547d
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json
new file mode 100644
index 0000000..010058e
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "A2C", "env_name": "CartPole-v0", "train_eps": 1600, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "actor_lr": 0.0003, "critic_lr": 0.001, "actor_hidden_dim": 256, "critic_hidden_dim": 256, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/models/", "n_states": 4, "n_actions": 2}
\ No newline at end of file
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png
new file mode 100644
index 0000000..96a9a22
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv
new file mode 100644
index 0000000..ebf3893
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,177.0,177
+1,180.0,180
+2,200.0,200
+3,200.0,200
+4,167.0,167
+5,124.0,124
+6,128.0,128
+7,200.0,200
+8,200.0,200
+9,200.0,200
+10,186.0,186
+11,187.0,187
+12,200.0,200
+13,176.0,176
+14,200.0,200
+15,200.0,200
+16,200.0,200
+17,200.0,200
+18,185.0,185
+19,180.0,180
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png
new file mode 100644
index 0000000..860a49c
Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png differ
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv
new file mode 100644
index 0000000..f05699c
--- /dev/null
+++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv
@@ -0,0 +1,1601 @@
+episodes,rewards,steps
+0,16.0,16
+1,14.0,14
+2,18.0,18
+3,31.0,31
+4,23.0,23
+5,43.0,43
+6,14.0,14
+7,20.0,20
+8,24.0,24
+9,14.0,14
+10,12.0,12
+11,16.0,16
+12,17.0,17
+13,13.0,13
+14,78.0,78
+15,36.0,36
+16,9.0,9
+17,19.0,19
+18,18.0,18
+19,20.0,20
+20,33.0,33
+21,37.0,37
+22,15.0,15
+23,24.0,24
+24,32.0,32
+25,19.0,19
+26,65.0,65
+27,13.0,13
+28,26.0,26
+29,27.0,27
+30,15.0,15
+31,10.0,10
+32,16.0,16
+33,30.0,30
+34,42.0,42
+35,30.0,30
+36,35.0,35
+37,21.0,21
+38,36.0,36
+39,18.0,18
+40,81.0,81
+41,12.0,12
+42,21.0,21
+43,32.0,32
+44,39.0,39
+45,10.0,10
+46,13.0,13
+47,35.0,35
+48,22.0,22
+49,20.0,20
+50,67.0,67
+51,28.0,28
+52,13.0,13
+53,14.0,14
+54,13.0,13
+55,26.0,26
+56,12.0,12
+57,17.0,17
+58,41.0,41
+59,14.0,14
+60,68.0,68
+61,12.0,12
+62,20.0,20
+63,19.0,19
+64,31.0,31
+65,31.0,31
+66,34.0,34
+67,24.0,24
+68,31.0,31
+69,37.0,37
+70,49.0,49
+71,15.0,15
+72,97.0,97
+73,18.0,18
+74,20.0,20
+75,31.0,31
+76,68.0,68
+77,21.0,21
+78,19.0,19
+79,19.0,19
+80,19.0,19
+81,14.0,14
+82,17.0,17
+83,35.0,35
+84,25.0,25
+85,59.0,59
+86,29.0,29
+87,35.0,35
+88,73.0,73
+89,26.0,26
+90,81.0,81
+91,20.0,20
+92,78.0,78
+93,26.0,26
+94,43.0,43
+95,25.0,25
+96,24.0,24
+97,14.0,14
+98,27.0,27
+99,16.0,16
+100,68.0,68
+101,57.0,57
+102,31.0,31
+103,46.0,46
+104,17.0,17
+105,16.0,16
+106,21.0,21
+107,25.0,25
+108,13.0,13
+109,50.0,50
+110,12.0,12
+111,34.0,34
+112,16.0,16
+113,34.0,34
+114,19.0,19
+115,38.0,38
+116,20.0,20
+117,42.0,42
+118,28.0,28
+119,53.0,53
+120,43.0,43
+121,37.0,37
+122,13.0,13
+123,29.0,29
+124,17.0,17
+125,23.0,23
+126,22.0,22
+127,21.0,21
+128,62.0,62
+129,52.0,52
+130,32.0,32
+131,37.0,37
+132,38.0,38
+133,28.0,28
+134,33.0,33
+135,33.0,33
+136,35.0,35
+137,43.0,43
+138,41.0,41
+139,18.0,18
+140,30.0,30
+141,48.0,48
+142,50.0,50
+143,34.0,34
+144,86.0,86
+145,21.0,21
+146,29.0,29
+147,29.0,29
+148,24.0,24
+149,88.0,88
+150,28.0,28
+151,19.0,19
+152,35.0,35
+153,24.0,24
+154,11.0,11
+155,49.0,49
+156,30.0,30
+157,44.0,44
+158,26.0,26
+159,19.0,19
+160,12.0,12
+161,65.0,65
+162,18.0,18
+163,11.0,11
+164,38.0,38
+165,28.0,28
+166,17.0,17
+167,51.0,51
+168,18.0,18
+169,39.0,39
+170,22.0,22
+171,26.0,26
+172,33.0,33
+173,35.0,35
+174,60.0,60
+175,12.0,12
+176,19.0,19
+177,62.0,62
+178,79.0,79
+179,36.0,36
+180,31.0,31
+181,23.0,23
+182,23.0,23
+183,40.0,40
+184,19.0,19
+185,18.0,18
+186,18.0,18
+187,23.0,23
+188,43.0,43
+189,45.0,45
+190,28.0,28
+191,61.0,61
+192,50.0,50
+193,40.0,40
+194,9.0,9
+195,56.0,56
+196,43.0,43
+197,43.0,43
+198,18.0,18
+199,43.0,43
+200,26.0,26
+201,44.0,44
+202,35.0,35
+203,25.0,25
+204,31.0,31
+205,26.0,26
+206,40.0,40
+207,36.0,36
+208,50.0,50
+209,25.0,25
+210,29.0,29
+211,11.0,11
+212,23.0,23
+213,23.0,23
+214,36.0,36
+215,39.0,39
+216,45.0,45
+217,14.0,14
+218,46.0,46
+219,62.0,62
+220,15.0,15
+221,19.0,19
+222,28.0,28
+223,39.0,39
+224,14.0,14
+225,25.0,25
+226,35.0,35
+227,16.0,16
+228,22.0,22
+229,41.0,41
+230,21.0,21
+231,22.0,22
+232,37.0,37
+233,32.0,32
+234,18.0,18
+235,23.0,23
+236,23.0,23
+237,16.0,16
+238,38.0,38
+239,25.0,25
+240,37.0,37
+241,13.0,13
+242,30.0,30
+243,27.0,27
+244,27.0,27
+245,23.0,23
+246,62.0,62
+247,31.0,31
+248,37.0,37
+249,26.0,26
+250,77.0,77
+251,15.0,15
+252,25.0,25
+253,20.0,20
+254,27.0,27
+255,42.0,42
+256,41.0,41
+257,34.0,34
+258,23.0,23
+259,37.0,37
+260,47.0,47
+261,22.0,22
+262,34.0,34
+263,10.0,10
+264,37.0,37
+265,29.0,29
+266,49.0,49
+267,67.0,67
+268,15.0,15
+269,81.0,81
+270,95.0,95
+271,54.0,54
+272,19.0,19
+273,31.0,31
+274,54.0,54
+275,46.0,46
+276,21.0,21
+277,22.0,22
+278,58.0,58
+279,58.0,58
+280,37.0,37
+281,37.0,37
+282,25.0,25
+283,20.0,20
+284,46.0,46
+285,80.0,80
+286,25.0,25
+287,18.0,18
+288,18.0,18
+289,18.0,18
+290,37.0,37
+291,20.0,20
+292,62.0,62
+293,22.0,22
+294,23.0,23
+295,28.0,28
+296,38.0,38
+297,15.0,15
+298,17.0,17
+299,20.0,20
+300,20.0,20
+301,18.0,18
+302,77.0,77
+303,54.0,54
+304,95.0,95
+305,25.0,25
+306,79.0,79
+307,116.0,116
+308,52.0,52
+309,58.0,58
+310,15.0,15
+311,60.0,60
+312,97.0,97
+313,61.0,61
+314,18.0,18
+315,69.0,69
+316,18.0,18
+317,20.0,20
+318,19.0,19
+319,16.0,16
+320,21.0,21
+321,12.0,12
+322,30.0,30
+323,51.0,51
+324,37.0,37
+325,28.0,28
+326,29.0,29
+327,66.0,66
+328,56.0,56
+329,79.0,79
+330,56.0,56
+331,70.0,70
+332,33.0,33
+333,54.0,54
+334,35.0,35
+335,18.0,18
+336,141.0,141
+337,38.0,38
+338,18.0,18
+339,34.0,34
+340,20.0,20
+341,95.0,95
+342,29.0,29
+343,40.0,40
+344,20.0,20
+345,14.0,14
+346,83.0,83
+347,72.0,72
+348,88.0,88
+349,41.0,41
+350,103.0,103
+351,85.0,85
+352,14.0,14
+353,51.0,51
+354,68.0,68
+355,102.0,102
+356,61.0,61
+357,22.0,22
+358,22.0,22
+359,15.0,15
+360,23.0,23
+361,35.0,35
+362,72.0,72
+363,32.0,32
+364,107.0,107
+365,34.0,34
+366,28.0,28
+367,99.0,99
+368,140.0,140
+369,40.0,40
+370,24.0,24
+371,157.0,157
+372,33.0,33
+373,35.0,35
+374,18.0,18
+375,21.0,21
+376,23.0,23
+377,67.0,67
+378,112.0,112
+379,21.0,21
+380,38.0,38
+381,12.0,12
+382,23.0,23
+383,31.0,31
+384,36.0,36
+385,20.0,20
+386,82.0,82
+387,20.0,20
+388,32.0,32
+389,28.0,28
+390,26.0,26
+391,26.0,26
+392,46.0,46
+393,35.0,35
+394,32.0,32
+395,67.0,67
+396,30.0,30
+397,36.0,36
+398,67.0,67
+399,51.0,51
+400,21.0,21
+401,28.0,28
+402,46.0,46
+403,29.0,29
+404,30.0,30
+405,22.0,22
+406,24.0,24
+407,78.0,78
+408,28.0,28
+409,41.0,41
+410,38.0,38
+411,56.0,56
+412,19.0,19
+413,38.0,38
+414,51.0,51
+415,33.0,33
+416,40.0,40
+417,35.0,35
+418,59.0,59
+419,138.0,138
+420,18.0,18
+421,10.0,10
+422,25.0,25
+423,104.0,104
+424,127.0,127
+425,44.0,44
+426,35.0,35
+427,62.0,62
+428,108.0,108
+429,39.0,39
+430,15.0,15
+431,73.0,73
+432,24.0,24
+433,23.0,23
+434,35.0,35
+435,14.0,14
+436,47.0,47
+437,112.0,112
+438,79.0,79
+439,99.0,99
+440,66.0,66
+441,64.0,64
+442,63.0,63
+443,28.0,28
+444,116.0,116
+445,94.0,94
+446,91.0,91
+447,60.0,60
+448,22.0,22
+449,68.0,68
+450,22.0,22
+451,34.0,34
+452,23.0,23
+453,117.0,117
+454,26.0,26
+455,30.0,30
+456,31.0,31
+457,50.0,50
+458,56.0,56
+459,81.0,81
+460,43.0,43
+461,49.0,49
+462,62.0,62
+463,64.0,64
+464,16.0,16
+465,37.0,37
+466,103.0,103
+467,66.0,66
+468,25.0,25
+469,50.0,50
+470,118.0,118
+471,42.0,42
+472,69.0,69
+473,55.0,55
+474,41.0,41
+475,25.0,25
+476,114.0,114
+477,27.0,27
+478,27.0,27
+479,61.0,61
+480,61.0,61
+481,66.0,66
+482,73.0,73
+483,35.0,35
+484,47.0,47
+485,77.0,77
+486,34.0,34
+487,16.0,16
+488,22.0,22
+489,47.0,47
+490,72.0,72
+491,76.0,76
+492,74.0,74
+493,36.0,36
+494,47.0,47
+495,25.0,25
+496,22.0,22
+497,20.0,20
+498,40.0,40
+499,44.0,44
+500,41.0,41
+501,42.0,42
+502,112.0,112
+503,64.0,64
+504,95.0,95
+505,112.0,112
+506,117.0,117
+507,84.0,84
+508,79.0,79
+509,129.0,129
+510,139.0,139
+511,41.0,41
+512,82.0,82
+513,54.0,54
+514,69.0,69
+515,44.0,44
+516,31.0,31
+517,64.0,64
+518,41.0,41
+519,100.0,100
+520,86.0,86
+521,44.0,44
+522,38.0,38
+523,36.0,36
+524,41.0,41
+525,22.0,22
+526,51.0,51
+527,24.0,24
+528,47.0,47
+529,79.0,79
+530,125.0,125
+531,50.0,50
+532,35.0,35
+533,48.0,48
+534,85.0,85
+535,58.0,58
+536,101.0,101
+537,200.0,200
+538,79.0,79
+539,159.0,159
+540,71.0,71
+541,71.0,71
+542,77.0,77
+543,78.0,78
+544,46.0,46
+545,49.0,49
+546,74.0,74
+547,71.0,71
+548,106.0,106
+549,36.0,36
+550,33.0,33
+551,160.0,160
+552,53.0,53
+553,54.0,54
+554,27.0,27
+555,55.0,55
+556,174.0,174
+557,33.0,33
+558,61.0,61
+559,118.0,118
+560,128.0,128
+561,148.0,148
+562,97.0,97
+563,63.0,63
+564,44.0,44
+565,110.0,110
+566,156.0,156
+567,50.0,50
+568,163.0,163
+569,126.0,126
+570,114.0,114
+571,78.0,78
+572,48.0,48
+573,59.0,59
+574,116.0,116
+575,46.0,46
+576,135.0,135
+577,71.0,71
+578,19.0,19
+579,43.0,43
+580,89.0,89
+581,97.0,97
+582,21.0,21
+583,120.0,120
+584,54.0,54
+585,24.0,24
+586,62.0,62
+587,78.0,78
+588,36.0,36
+589,71.0,71
+590,25.0,25
+591,71.0,71
+592,56.0,56
+593,78.0,78
+594,65.0,65
+595,200.0,200
+596,200.0,200
+597,167.0,167
+598,59.0,59
+599,73.0,73
+600,66.0,66
+601,35.0,35
+602,186.0,186
+603,140.0,140
+604,49.0,49
+605,134.0,134
+606,46.0,46
+607,149.0,149
+608,82.0,82
+609,119.0,119
+610,126.0,126
+611,52.0,52
+612,89.0,89
+613,200.0,200
+614,89.0,89
+615,93.0,93
+616,200.0,200
+617,39.0,39
+618,113.0,113
+619,67.0,67
+620,164.0,164
+621,120.0,120
+622,74.0,74
+623,153.0,153
+624,124.0,124
+625,41.0,41
+626,173.0,173
+627,48.0,48
+628,200.0,200
+629,58.0,58
+630,35.0,35
+631,45.0,45
+632,43.0,43
+633,157.0,157
+634,111.0,111
+635,98.0,98
+636,102.0,102
+637,124.0,124
+638,111.0,111
+639,42.0,42
+640,128.0,128
+641,183.0,183
+642,49.0,49
+643,64.0,64
+644,117.0,117
+645,150.0,150
+646,103.0,103
+647,115.0,115
+648,41.0,41
+649,200.0,200
+650,162.0,162
+651,105.0,105
+652,94.0,94
+653,55.0,55
+654,73.0,73
+655,65.0,65
+656,29.0,29
+657,148.0,148
+658,42.0,42
+659,17.0,17
+660,60.0,60
+661,59.0,59
+662,80.0,80
+663,41.0,41
+664,143.0,143
+665,200.0,200
+666,45.0,45
+667,95.0,95
+668,61.0,61
+669,63.0,63
+670,170.0,170
+671,150.0,150
+672,139.0,139
+673,44.0,44
+674,44.0,44
+675,47.0,47
+676,72.0,72
+677,129.0,129
+678,79.0,79
+679,128.0,128
+680,126.0,126
+681,65.0,65
+682,57.0,57
+683,58.0,58
+684,66.0,66
+685,89.0,89
+686,150.0,150
+687,97.0,97
+688,20.0,20
+689,58.0,58
+690,81.0,81
+691,54.0,54
+692,55.0,55
+693,197.0,197
+694,61.0,61
+695,157.0,157
+696,166.0,166
+697,79.0,79
+698,128.0,128
+699,200.0,200
+700,46.0,46
+701,140.0,140
+702,19.0,19
+703,144.0,144
+704,138.0,138
+705,46.0,46
+706,200.0,200
+707,61.0,61
+708,114.0,114
+709,100.0,100
+710,85.0,85
+711,200.0,200
+712,36.0,36
+713,142.0,142
+714,22.0,22
+715,82.0,82
+716,49.0,49
+717,139.0,139
+718,173.0,173
+719,47.0,47
+720,67.0,67
+721,197.0,197
+722,157.0,157
+723,149.0,149
+724,29.0,29
+725,85.0,85
+726,135.0,135
+727,157.0,157
+728,141.0,141
+729,165.0,165
+730,102.0,102
+731,192.0,192
+732,196.0,196
+733,183.0,183
+734,75.0,75
+735,41.0,41
+736,122.0,122
+737,200.0,200
+738,166.0,166
+739,109.0,109
+740,200.0,200
+741,200.0,200
+742,24.0,24
+743,20.0,20
+744,138.0,138
+745,122.0,122
+746,200.0,200
+747,156.0,156
+748,191.0,191
+749,91.0,91
+750,105.0,105
+751,145.0,145
+752,130.0,130
+753,150.0,150
+754,77.0,77
+755,137.0,137
+756,181.0,181
+757,200.0,200
+758,132.0,132
+759,200.0,200
+760,76.0,76
+761,63.0,63
+762,160.0,160
+763,28.0,28
+764,135.0,135
+765,43.0,43
+766,146.0,146
+767,179.0,179
+768,82.0,82
+769,126.0,126
+770,148.0,148
+771,110.0,110
+772,116.0,116
+773,55.0,55
+774,158.0,158
+775,155.0,155
+776,200.0,200
+777,153.0,153
+778,147.0,147
+779,54.0,54
+780,173.0,173
+781,44.0,44
+782,47.0,47
+783,200.0,200
+784,179.0,179
+785,194.0,194
+786,200.0,200
+787,141.0,141
+788,130.0,130
+789,133.0,133
+790,53.0,53
+791,124.0,124
+792,143.0,143
+793,58.0,58
+794,190.0,190
+795,130.0,130
+796,130.0,130
+797,200.0,200
+798,163.0,163
+799,200.0,200
+800,103.0,103
+801,200.0,200
+802,200.0,200
+803,18.0,18
+804,156.0,156
+805,165.0,165
+806,129.0,129
+807,33.0,33
+808,132.0,132
+809,200.0,200
+810,110.0,110
+811,93.0,93
+812,198.0,198
+813,200.0,200
+814,200.0,200
+815,182.0,182
+816,75.0,75
+817,200.0,200
+818,200.0,200
+819,200.0,200
+820,144.0,144
+821,118.0,118
+822,200.0,200
+823,60.0,60
+824,200.0,200
+825,134.0,134
+826,154.0,154
+827,116.0,116
+828,21.0,21
+829,200.0,200
+830,105.0,105
+831,158.0,158
+832,122.0,122
+833,88.0,88
+834,108.0,108
+835,112.0,112
+836,134.0,134
+837,165.0,165
+838,200.0,200
+839,138.0,138
+840,164.0,164
+841,200.0,200
+842,30.0,30
+843,181.0,181
+844,149.0,149
+845,102.0,102
+846,128.0,128
+847,74.0,74
+848,112.0,112
+849,80.0,80
+850,190.0,190
+851,35.0,35
+852,40.0,40
+853,121.0,121
+854,125.0,125
+855,99.0,99
+856,115.0,115
+857,171.0,171
+858,200.0,200
+859,50.0,50
+860,200.0,200
+861,143.0,143
+862,146.0,146
+863,47.0,47
+864,154.0,154
+865,48.0,48
+866,103.0,103
+867,200.0,200
+868,151.0,151
+869,46.0,46
+870,155.0,155
+871,40.0,40
+872,124.0,124
+873,41.0,41
+874,45.0,45
+875,158.0,158
+876,29.0,29
+877,200.0,200
+878,200.0,200
+879,151.0,151
+880,158.0,158
+881,200.0,200
+882,15.0,15
+883,180.0,180
+884,75.0,75
+885,196.0,196
+886,176.0,176
+887,67.0,67
+888,90.0,90
+889,161.0,161
+890,88.0,88
+891,200.0,200
+892,64.0,64
+893,111.0,111
+894,184.0,184
+895,189.0,189
+896,109.0,109
+897,167.0,167
+898,99.0,99
+899,180.0,180
+900,121.0,121
+901,126.0,126
+902,200.0,200
+903,200.0,200
+904,177.0,177
+905,107.0,107
+906,200.0,200
+907,133.0,133
+908,164.0,164
+909,200.0,200
+910,160.0,160
+911,120.0,120
+912,200.0,200
+913,65.0,65
+914,27.0,27
+915,200.0,200
+916,162.0,162
+917,25.0,25
+918,118.0,118
+919,56.0,56
+920,107.0,107
+921,200.0,200
+922,166.0,166
+923,69.0,69
+924,187.0,187
+925,126.0,126
+926,200.0,200
+927,49.0,49
+928,99.0,99
+929,200.0,200
+930,200.0,200
+931,153.0,153
+932,158.0,158
+933,200.0,200
+934,145.0,145
+935,126.0,126
+936,133.0,133
+937,81.0,81
+938,200.0,200
+939,57.0,57
+940,200.0,200
+941,131.0,131
+942,200.0,200
+943,200.0,200
+944,200.0,200
+945,171.0,171
+946,200.0,200
+947,200.0,200
+948,200.0,200
+949,176.0,176
+950,110.0,110
+951,158.0,158
+952,137.0,137
+953,103.0,103
+954,200.0,200
+955,200.0,200
+956,200.0,200
+957,190.0,190
+958,130.0,130
+959,139.0,139
+960,200.0,200
+961,172.0,172
+962,152.0,152
+963,154.0,154
+964,52.0,52
+965,194.0,194
+966,52.0,52
+967,169.0,169
+968,200.0,200
+969,176.0,176
+970,127.0,127
+971,157.0,157
+972,200.0,200
+973,134.0,134
+974,138.0,138
+975,133.0,133
+976,170.0,170
+977,159.0,159
+978,88.0,88
+979,141.0,141
+980,117.0,117
+981,157.0,157
+982,145.0,145
+983,200.0,200
+984,129.0,129
+985,155.0,155
+986,83.0,83
+987,152.0,152
+988,156.0,156
+989,200.0,200
+990,135.0,135
+991,75.0,75
+992,138.0,138
+993,83.0,83
+994,200.0,200
+995,128.0,128
+996,122.0,122
+997,200.0,200
+998,34.0,34
+999,161.0,161
+1000,143.0,143
+1001,200.0,200
+1002,103.0,103
+1003,168.0,168
+1004,200.0,200
+1005,200.0,200
+1006,167.0,167
+1007,200.0,200
+1008,76.0,76
+1009,101.0,101
+1010,153.0,153
+1011,113.0,113
+1012,109.0,109
+1013,188.0,188
+1014,122.0,122
+1015,181.0,181
+1016,166.0,166
+1017,189.0,189
+1018,200.0,200
+1019,187.0,187
+1020,116.0,116
+1021,200.0,200
+1022,108.0,108
+1023,18.0,18
+1024,158.0,158
+1025,200.0,200
+1026,43.0,43
+1027,200.0,200
+1028,199.0,199
+1029,200.0,200
+1030,133.0,133
+1031,171.0,171
+1032,200.0,200
+1033,200.0,200
+1034,200.0,200
+1035,156.0,156
+1036,52.0,52
+1037,200.0,200
+1038,121.0,121
+1039,188.0,188
+1040,167.0,167
+1041,200.0,200
+1042,124.0,124
+1043,102.0,102
+1044,161.0,161
+1045,200.0,200
+1046,200.0,200
+1047,135.0,135
+1048,200.0,200
+1049,80.0,80
+1050,200.0,200
+1051,66.0,66
+1052,200.0,200
+1053,200.0,200
+1054,112.0,112
+1055,195.0,195
+1056,200.0,200
+1057,170.0,170
+1058,194.0,194
+1059,200.0,200
+1060,200.0,200
+1061,59.0,59
+1062,75.0,75
+1063,200.0,200
+1064,200.0,200
+1065,97.0,97
+1066,171.0,171
+1067,30.0,30
+1068,200.0,200
+1069,101.0,101
+1070,124.0,124
+1071,136.0,136
+1072,184.0,184
+1073,149.0,149
+1074,137.0,137
+1075,167.0,167
+1076,136.0,136
+1077,200.0,200
+1078,139.0,139
+1079,85.0,85
+1080,137.0,137
+1081,161.0,161
+1082,81.0,81
+1083,200.0,200
+1084,200.0,200
+1085,200.0,200
+1086,200.0,200
+1087,87.0,87
+1088,174.0,174
+1089,200.0,200
+1090,128.0,128
+1091,200.0,200
+1092,200.0,200
+1093,200.0,200
+1094,120.0,120
+1095,200.0,200
+1096,131.0,131
+1097,200.0,200
+1098,200.0,200
+1099,200.0,200
+1100,146.0,146
+1101,200.0,200
+1102,200.0,200
+1103,200.0,200
+1104,80.0,80
+1105,200.0,200
+1106,172.0,172
+1107,143.0,143
+1108,200.0,200
+1109,200.0,200
+1110,181.0,181
+1111,189.0,189
+1112,133.0,133
+1113,200.0,200
+1114,111.0,111
+1115,200.0,200
+1116,200.0,200
+1117,200.0,200
+1118,192.0,192
+1119,200.0,200
+1120,200.0,200
+1121,200.0,200
+1122,144.0,144
+1123,27.0,27
+1124,200.0,200
+1125,198.0,198
+1126,186.0,186
+1127,80.0,80
+1128,200.0,200
+1129,169.0,169
+1130,48.0,48
+1131,198.0,198
+1132,162.0,162
+1133,58.0,58
+1134,200.0,200
+1135,200.0,200
+1136,189.0,189
+1137,200.0,200
+1138,117.0,117
+1139,200.0,200
+1140,200.0,200
+1141,150.0,150
+1142,163.0,163
+1143,161.0,161
+1144,200.0,200
+1145,113.0,113
+1146,181.0,181
+1147,193.0,193
+1148,98.0,98
+1149,200.0,200
+1150,22.0,22
+1151,125.0,125
+1152,200.0,200
+1153,200.0,200
+1154,200.0,200
+1155,67.0,67
+1156,186.0,186
+1157,189.0,189
+1158,186.0,186
+1159,156.0,156
+1160,200.0,200
+1161,200.0,200
+1162,116.0,116
+1163,77.0,77
+1164,148.0,148
+1165,111.0,111
+1166,68.0,68
+1167,140.0,140
+1168,114.0,114
+1169,200.0,200
+1170,173.0,173
+1171,97.0,97
+1172,166.0,166
+1173,154.0,154
+1174,200.0,200
+1175,200.0,200
+1176,129.0,129
+1177,111.0,111
+1178,200.0,200
+1179,85.0,85
+1180,71.0,71
+1181,200.0,200
+1182,158.0,158
+1183,130.0,130
+1184,161.0,161
+1185,188.0,188
+1186,124.0,124
+1187,190.0,190
+1188,157.0,157
+1189,188.0,188
+1190,194.0,194
+1191,173.0,173
+1192,123.0,123
+1193,200.0,200
+1194,123.0,123
+1195,200.0,200
+1196,200.0,200
+1197,114.0,114
+1198,45.0,45
+1199,144.0,144
+1200,107.0,107
+1201,184.0,184
+1202,121.0,121
+1203,200.0,200
+1204,50.0,50
+1205,123.0,123
+1206,73.0,73
+1207,142.0,142
+1208,38.0,38
+1209,129.0,129
+1210,123.0,123
+1211,149.0,149
+1212,97.0,97
+1213,40.0,40
+1214,177.0,177
+1215,200.0,200
+1216,166.0,166
+1217,106.0,106
+1218,114.0,114
+1219,53.0,53
+1220,162.0,162
+1221,181.0,181
+1222,128.0,128
+1223,155.0,155
+1224,180.0,180
+1225,139.0,139
+1226,99.0,99
+1227,95.0,95
+1228,159.0,159
+1229,30.0,30
+1230,67.0,67
+1231,127.0,127
+1232,33.0,33
+1233,80.0,80
+1234,200.0,200
+1235,200.0,200
+1236,200.0,200
+1237,117.0,117
+1238,128.0,128
+1239,47.0,47
+1240,134.0,134
+1241,129.0,129
+1242,135.0,135
+1243,200.0,200
+1244,200.0,200
+1245,101.0,101
+1246,35.0,35
+1247,73.0,73
+1248,95.0,95
+1249,125.0,125
+1250,200.0,200
+1251,155.0,155
+1252,48.0,48
+1253,200.0,200
+1254,153.0,153
+1255,173.0,173
+1256,128.0,128
+1257,200.0,200
+1258,200.0,200
+1259,196.0,196
+1260,50.0,50
+1261,193.0,193
+1262,200.0,200
+1263,200.0,200
+1264,200.0,200
+1265,200.0,200
+1266,179.0,179
+1267,180.0,180
+1268,200.0,200
+1269,200.0,200
+1270,200.0,200
+1271,120.0,120
+1272,200.0,200
+1273,60.0,60
+1274,99.0,99
+1275,178.0,178
+1276,157.0,157
+1277,200.0,200
+1278,177.0,177
+1279,200.0,200
+1280,200.0,200
+1281,200.0,200
+1282,200.0,200
+1283,200.0,200
+1284,200.0,200
+1285,200.0,200
+1286,97.0,97
+1287,167.0,167
+1288,183.0,183
+1289,200.0,200
+1290,61.0,61
+1291,192.0,192
+1292,200.0,200
+1293,137.0,137
+1294,200.0,200
+1295,200.0,200
+1296,200.0,200
+1297,200.0,200
+1298,200.0,200
+1299,200.0,200
+1300,103.0,103
+1301,142.0,142
+1302,200.0,200
+1303,47.0,47
+1304,189.0,189
+1305,41.0,41
+1306,200.0,200
+1307,200.0,200
+1308,132.0,132
+1309,154.0,154
+1310,95.0,95
+1311,200.0,200
+1312,200.0,200
+1313,200.0,200
+1314,71.0,71
+1315,200.0,200
+1316,170.0,170
+1317,121.0,121
+1318,200.0,200
+1319,127.0,127
+1320,200.0,200
+1321,120.0,120
+1322,200.0,200
+1323,200.0,200
+1324,161.0,161
+1325,37.0,37
+1326,200.0,200
+1327,200.0,200
+1328,200.0,200
+1329,49.0,49
+1330,118.0,118
+1331,200.0,200
+1332,167.0,167
+1333,200.0,200
+1334,99.0,99
+1335,137.0,137
+1336,200.0,200
+1337,41.0,41
+1338,200.0,200
+1339,200.0,200
+1340,97.0,97
+1341,34.0,34
+1342,40.0,40
+1343,197.0,197
+1344,51.0,51
+1345,200.0,200
+1346,156.0,156
+1347,200.0,200
+1348,75.0,75
+1349,118.0,118
+1350,200.0,200
+1351,73.0,73
+1352,200.0,200
+1353,133.0,133
+1354,200.0,200
+1355,200.0,200
+1356,162.0,162
+1357,37.0,37
+1358,130.0,130
+1359,123.0,123
+1360,200.0,200
+1361,99.0,99
+1362,200.0,200
+1363,46.0,46
+1364,200.0,200
+1365,190.0,190
+1366,34.0,34
+1367,37.0,37
+1368,200.0,200
+1369,200.0,200
+1370,131.0,131
+1371,200.0,200
+1372,200.0,200
+1373,158.0,158
+1374,175.0,175
+1375,134.0,134
+1376,100.0,100
+1377,200.0,200
+1378,200.0,200
+1379,123.0,123
+1380,200.0,200
+1381,200.0,200
+1382,200.0,200
+1383,116.0,116
+1384,200.0,200
+1385,88.0,88
+1386,200.0,200
+1387,200.0,200
+1388,147.0,147
+1389,200.0,200
+1390,200.0,200
+1391,84.0,84
+1392,200.0,200
+1393,184.0,184
+1394,200.0,200
+1395,179.0,179
+1396,200.0,200
+1397,200.0,200
+1398,130.0,130
+1399,29.0,29
+1400,200.0,200
+1401,200.0,200
+1402,200.0,200
+1403,200.0,200
+1404,195.0,195
+1405,118.0,118
+1406,200.0,200
+1407,200.0,200
+1408,200.0,200
+1409,154.0,154
+1410,185.0,185
+1411,200.0,200
+1412,152.0,152
+1413,200.0,200
+1414,200.0,200
+1415,200.0,200
+1416,200.0,200
+1417,31.0,31
+1418,200.0,200
+1419,134.0,134
+1420,172.0,172
+1421,112.0,112
+1422,153.0,153
+1423,199.0,199
+1424,200.0,200
+1425,200.0,200
+1426,200.0,200
+1427,200.0,200
+1428,166.0,166
+1429,200.0,200
+1430,200.0,200
+1431,199.0,199
+1432,195.0,195
+1433,174.0,174
+1434,46.0,46
+1435,174.0,174
+1436,23.0,23
+1437,157.0,157
+1438,200.0,200
+1439,170.0,170
+1440,92.0,92
+1441,200.0,200
+1442,200.0,200
+1443,72.0,72
+1444,200.0,200
+1445,200.0,200
+1446,200.0,200
+1447,118.0,118
+1448,119.0,119
+1449,109.0,109
+1450,101.0,101
+1451,32.0,32
+1452,197.0,197
+1453,154.0,154
+1454,138.0,138
+1455,141.0,141
+1456,141.0,141
+1457,200.0,200
+1458,90.0,90
+1459,200.0,200
+1460,122.0,122
+1461,144.0,144
+1462,155.0,155
+1463,200.0,200
+1464,160.0,160
+1465,129.0,129
+1466,200.0,200
+1467,112.0,112
+1468,132.0,132
+1469,144.0,144
+1470,184.0,184
+1471,200.0,200
+1472,26.0,26
+1473,200.0,200
+1474,26.0,26
+1475,128.0,128
+1476,200.0,200
+1477,173.0,173
+1478,145.0,145
+1479,128.0,128
+1480,118.0,118
+1481,50.0,50
+1482,184.0,184
+1483,166.0,166
+1484,142.0,142
+1485,104.0,104
+1486,180.0,180
+1487,200.0,200
+1488,200.0,200
+1489,200.0,200
+1490,123.0,123
+1491,200.0,200
+1492,140.0,140
+1493,200.0,200
+1494,200.0,200
+1495,200.0,200
+1496,200.0,200
+1497,117.0,117
+1498,13.0,13
+1499,200.0,200
+1500,127.0,127
+1501,200.0,200
+1502,200.0,200
+1503,200.0,200
+1504,200.0,200
+1505,200.0,200
+1506,200.0,200
+1507,77.0,77
+1508,152.0,152
+1509,38.0,38
+1510,125.0,125
+1511,154.0,154
+1512,142.0,142
+1513,120.0,120
+1514,200.0,200
+1515,191.0,191
+1516,21.0,21
+1517,101.0,101
+1518,191.0,191
+1519,170.0,170
+1520,200.0,200
+1521,30.0,30
+1522,191.0,191
+1523,200.0,200
+1524,200.0,200
+1525,200.0,200
+1526,135.0,135
+1527,200.0,200
+1528,185.0,185
+1529,123.0,123
+1530,156.0,156
+1531,200.0,200
+1532,140.0,140
+1533,200.0,200
+1534,136.0,136
+1535,139.0,139
+1536,200.0,200
+1537,169.0,169
+1538,200.0,200
+1539,200.0,200
+1540,103.0,103
+1541,91.0,91
+1542,200.0,200
+1543,200.0,200
+1544,65.0,65
+1545,200.0,200
+1546,169.0,169
+1547,59.0,59
+1548,175.0,175
+1549,200.0,200
+1550,200.0,200
+1551,189.0,189
+1552,200.0,200
+1553,200.0,200
+1554,151.0,151
+1555,108.0,108
+1556,146.0,146
+1557,200.0,200
+1558,198.0,198
+1559,119.0,119
+1560,105.0,105
+1561,175.0,175
+1562,200.0,200
+1563,136.0,136
+1564,200.0,200
+1565,86.0,86
+1566,200.0,200
+1567,200.0,200
+1568,200.0,200
+1569,124.0,124
+1570,200.0,200
+1571,122.0,122
+1572,200.0,200
+1573,200.0,200
+1574,47.0,47
+1575,200.0,200
+1576,194.0,194
+1577,200.0,200
+1578,121.0,121
+1579,200.0,200
+1580,200.0,200
+1581,190.0,190
+1582,200.0,200
+1583,200.0,200
+1584,200.0,200
+1585,145.0,145
+1586,121.0,121
+1587,198.0,198
+1588,200.0,200
+1589,200.0,200
+1590,130.0,130
+1591,185.0,185
+1592,193.0,193
+1593,200.0,200
+1594,200.0,200
+1595,200.0,200
+1596,200.0,200
+1597,168.0,168
+1598,200.0,200
+1599,200.0,200
diff --git a/projects/codes/A2C/README.md b/projects/codes/A3C/README.md
similarity index 100%
rename from projects/codes/A2C/README.md
rename to projects/codes/A3C/README.md
diff --git a/projects/codes/A3C/a3c.py b/projects/codes/A3C/a3c.py
new file mode 100644
index 0000000..ba0ed7c
--- /dev/null
+++ b/projects/codes/A3C/a3c.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# coding=utf-8
+'''
+Author: JiangJi
+Email: johnjim0816@gmail.com
+Date: 2021-05-03 22:16:08
+LastEditor: JiangJi
+LastEditTime: 2022-07-20 23:54:40
+Discription: 
+Environment: 
+'''
+import torch
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.distributions import Categorical
+
+class ActorCritic(nn.Module):
+    ''' A2C网络模型，包含一个Actor和Critic
+    '''
+    def __init__(self, input_dim, output_dim, hidden_dim):
+        super(ActorCritic, self).__init__()
+        self.critic = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, 1)
+        )
+        
+        self.actor = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, output_dim),
+            nn.Softmax(dim=1),
+        )
+        
+    def forward(self, x):
+        value = self.critic(x)
+        probs = self.actor(x)
+        dist  = Categorical(probs)
+        return dist, value
+class A2C:
+    ''' A2C算法
+    '''
+    def __init__(self,n_states,n_actions,cfg) -> None:
+        self.gamma = cfg.gamma
+        self.device = torch.device(cfg.device)
+        self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device)
+        self.optimizer = optim.Adam(self.model.parameters())
+
+    def compute_returns(self,next_value, rewards, masks):
+        R = next_value
+        returns = []
+        for step in reversed(range(len(rewards))):
+            R = rewards[step] + self.gamma * R * masks[step]
+            returns.insert(0, R)
+        return returns
\ No newline at end of file
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/params.json
similarity index 100%
rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json
rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/params.json
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy
similarity index 100%
rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy
rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy
similarity index 100%
rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy
rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy
diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png
similarity index 100%
rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png
rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png
diff --git a/projects/codes/A2C/task0.py b/projects/codes/A3C/task0.py
similarity index 99%
rename from projects/codes/A2C/task0.py
rename to projects/codes/A3C/task0.py
index e29266b..09dcceb 100644
--- a/projects/codes/A2C/task0.py
+++ b/projects/codes/A3C/task0.py
@@ -10,7 +10,7 @@ import torch.optim as optim
 import datetime
 import argparse
 from common.multiprocessing_env import SubprocVecEnv
-from a2c import ActorCritic
+from a3c import ActorCritic
 from common.utils import save_results, make_dir
 from common.utils import plot_rewards, save_args
 
diff --git a/projects/codes/DQN/main.py b/projects/codes/DQN/main.py
index ecf281d..d3c022c 100644
--- a/projects/codes/DQN/main.py
+++ b/projects/codes/DQN/main.py
@@ -24,6 +24,7 @@ def get_args():
     parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
     parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
     parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
+    parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps")
     parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor")
     parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon")
     parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon")
@@ -72,7 +73,7 @@ def train(cfg, env, agent):
         ep_reward = 0  # reward per episode
         ep_step = 0
         state = env.reset()  # reset and obtain initial state
-        while True:
+        for _ in range(cfg['ep_max_steps']):
             ep_step += 1
             action = agent.sample_action(state)  # sample action
             next_state, reward, done, _ = env.step(action)  # update env and return transitions
@@ -91,7 +92,7 @@ def train(cfg, env, agent):
             print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}')
     print("Finish training!")
     env.close()
-    res_dic = {'episodes':range(len(rewards)),'rewards':rewards}
+    res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
     return res_dic
 
 def test(cfg, env, agent):
@@ -103,7 +104,7 @@ def test(cfg, env, agent):
         ep_reward = 0  # reward per episode
         ep_step = 0
         state = env.reset()  # reset and obtain initial state
-        while True:
+        for _ in range(cfg['ep_max_steps']):
             ep_step+=1
             action = agent.predict_action(state)  # predict action
             next_state, reward, done, _ = env.step(action)  
@@ -116,7 +117,7 @@ def test(cfg, env, agent):
         print(f"Episode: {i_ep+1}/{cfg['test_eps']}，Reward: {ep_reward:.2f}")
     print("Finish testing!")
     env.close()
-    return {'episodes':range(len(rewards)),'rewards':rewards}
+    return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
 
 
 if __name__ == "__main__":
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt
new file mode 100644
index 0000000..e357d49
Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt differ
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json
new file mode 100644
index 0000000..83d8c57
--- /dev/null
+++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "DQN", "env_name": "CartPole-v1", "train_eps": 2000, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 6000, "lr": 1e-05, "memory_capacity": 200000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models", "n_states": 4, "n_actions": 2}
\ No newline at end of file
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png
new file mode 100644
index 0000000..f97050f
Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png differ
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv
new file mode 100644
index 0000000..bb0b8f6
--- /dev/null
+++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,371.0,371
+1,446.0,446
+2,300.0,300
+3,500.0,500
+4,313.0,313
+5,500.0,500
+6,341.0,341
+7,489.0,489
+8,304.0,304
+9,358.0,358
+10,278.0,278
+11,500.0,500
+12,500.0,500
+13,500.0,500
+14,500.0,500
+15,476.0,476
+16,308.0,308
+17,394.0,394
+18,500.0,500
+19,500.0,500
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png
new file mode 100644
index 0000000..a14bb8c
Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png differ
diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv
new file mode 100644
index 0000000..6bfc2ad
--- /dev/null
+++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv
@@ -0,0 +1,2001 @@
+episodes,rewards,steps
+0,38.0,38
+1,16.0,16
+2,28.0,28
+3,10.0,10
+4,18.0,18
+5,10.0,10
+6,8.0,8
+7,19.0,19
+8,18.0,18
+9,32.0,32
+10,12.0,12
+11,14.0,14
+12,16.0,16
+13,20.0,20
+14,33.0,33
+15,24.0,24
+16,28.0,28
+17,33.0,33
+18,31.0,31
+19,14.0,14
+20,10.0,10
+21,19.0,19
+22,16.0,16
+23,19.0,19
+24,11.0,11
+25,23.0,23
+26,20.0,20
+27,26.0,26
+28,16.0,16
+29,16.0,16
+30,11.0,11
+31,13.0,13
+32,16.0,16
+33,42.0,42
+34,15.0,15
+35,38.0,38
+36,16.0,16
+37,46.0,46
+38,28.0,28
+39,60.0,60
+40,32.0,32
+41,9.0,9
+42,23.0,23
+43,19.0,19
+44,12.0,12
+45,17.0,17
+46,27.0,27
+47,22.0,22
+48,26.0,26
+49,11.0,11
+50,15.0,15
+51,9.0,9
+52,24.0,24
+53,29.0,29
+54,11.0,11
+55,16.0,16
+56,23.0,23
+57,14.0,14
+58,11.0,11
+59,16.0,16
+60,24.0,24
+61,27.0,27
+62,11.0,11
+63,20.0,20
+64,14.0,14
+65,11.0,11
+66,15.0,15
+67,16.0,16
+68,13.0,13
+69,28.0,28
+70,14.0,14
+71,10.0,10
+72,28.0,28
+73,20.0,20
+74,13.0,13
+75,11.0,11
+76,27.0,27
+77,15.0,15
+78,14.0,14
+79,14.0,14
+80,10.0,10
+81,11.0,11
+82,14.0,14
+83,13.0,13
+84,15.0,15
+85,13.0,13
+86,10.0,10
+87,11.0,11
+88,11.0,11
+89,13.0,13
+90,14.0,14
+91,25.0,25
+92,14.0,14
+93,14.0,14
+94,19.0,19
+95,18.0,18
+96,9.0,9
+97,9.0,9
+98,11.0,11
+99,19.0,19
+100,10.0,10
+101,40.0,40
+102,10.0,10
+103,13.0,13
+104,13.0,13
+105,18.0,18
+106,13.0,13
+107,11.0,11
+108,11.0,11
+109,18.0,18
+110,20.0,20
+111,10.0,10
+112,24.0,24
+113,9.0,9
+114,10.0,10
+115,13.0,13
+116,21.0,21
+117,12.0,12
+118,14.0,14
+119,10.0,10
+120,10.0,10
+121,16.0,16
+122,10.0,10
+123,18.0,18
+124,13.0,13
+125,17.0,17
+126,14.0,14
+127,12.0,12
+128,16.0,16
+129,11.0,11
+130,15.0,15
+131,10.0,10
+132,13.0,13
+133,17.0,17
+134,9.0,9
+135,34.0,34
+136,23.0,23
+137,14.0,14
+138,17.0,17
+139,13.0,13
+140,27.0,27
+141,15.0,15
+142,16.0,16
+143,15.0,15
+144,11.0,11
+145,12.0,12
+146,9.0,9
+147,30.0,30
+148,12.0,12
+149,12.0,12
+150,13.0,13
+151,14.0,14
+152,17.0,17
+153,10.0,10
+154,15.0,15
+155,10.0,10
+156,14.0,14
+157,22.0,22
+158,12.0,12
+159,11.0,11
+160,34.0,34
+161,11.0,11
+162,12.0,12
+163,18.0,18
+164,9.0,9
+165,17.0,17
+166,16.0,16
+167,10.0,10
+168,17.0,17
+169,11.0,11
+170,21.0,21
+171,15.0,15
+172,17.0,17
+173,11.0,11
+174,23.0,23
+175,10.0,10
+176,25.0,25
+177,12.0,12
+178,10.0,10
+179,16.0,16
+180,14.0,14
+181,21.0,21
+182,11.0,11
+183,12.0,12
+184,11.0,11
+185,10.0,10
+186,11.0,11
+187,17.0,17
+188,10.0,10
+189,14.0,14
+190,11.0,11
+191,12.0,12
+192,9.0,9
+193,11.0,11
+194,11.0,11
+195,16.0,16
+196,15.0,15
+197,10.0,10
+198,9.0,9
+199,17.0,17
+200,12.0,12
+201,9.0,9
+202,11.0,11
+203,9.0,9
+204,9.0,9
+205,16.0,16
+206,15.0,15
+207,13.0,13
+208,11.0,11
+209,13.0,13
+210,17.0,17
+211,8.0,8
+212,8.0,8
+213,12.0,12
+214,15.0,15
+215,13.0,13
+216,14.0,14
+217,11.0,11
+218,14.0,14
+219,13.0,13
+220,12.0,12
+221,9.0,9
+222,10.0,10
+223,10.0,10
+224,11.0,11
+225,9.0,9
+226,16.0,16
+227,23.0,23
+228,13.0,13
+229,16.0,16
+230,9.0,9
+231,12.0,12
+232,11.0,11
+233,10.0,10
+234,13.0,13
+235,15.0,15
+236,12.0,12
+237,11.0,11
+238,9.0,9
+239,11.0,11
+240,11.0,11
+241,11.0,11
+242,12.0,12
+243,8.0,8
+244,8.0,8
+245,10.0,10
+246,12.0,12
+247,12.0,12
+248,9.0,9
+249,12.0,12
+250,13.0,13
+251,11.0,11
+252,12.0,12
+253,10.0,10
+254,10.0,10
+255,11.0,11
+256,17.0,17
+257,11.0,11
+258,14.0,14
+259,12.0,12
+260,10.0,10
+261,11.0,11
+262,16.0,16
+263,13.0,13
+264,13.0,13
+265,15.0,15
+266,11.0,11
+267,8.0,8
+268,13.0,13
+269,15.0,15
+270,11.0,11
+271,9.0,9
+272,10.0,10
+273,11.0,11
+274,11.0,11
+275,9.0,9
+276,18.0,18
+277,13.0,13
+278,11.0,11
+279,14.0,14
+280,12.0,12
+281,16.0,16
+282,10.0,10
+283,12.0,12
+284,13.0,13
+285,9.0,9
+286,14.0,14
+287,26.0,26
+288,9.0,9
+289,10.0,10
+290,12.0,12
+291,13.0,13
+292,8.0,8
+293,13.0,13
+294,12.0,12
+295,11.0,11
+296,14.0,14
+297,10.0,10
+298,11.0,11
+299,12.0,12
+300,9.0,9
+301,11.0,11
+302,11.0,11
+303,15.0,15
+304,11.0,11
+305,11.0,11
+306,13.0,13
+307,8.0,8
+308,9.0,9
+309,10.0,10
+310,12.0,12
+311,13.0,13
+312,9.0,9
+313,15.0,15
+314,11.0,11
+315,12.0,12
+316,12.0,12
+317,15.0,15
+318,13.0,13
+319,8.0,8
+320,16.0,16
+321,9.0,9
+322,11.0,11
+323,12.0,12
+324,16.0,16
+325,9.0,9
+326,13.0,13
+327,13.0,13
+328,15.0,15
+329,12.0,12
+330,12.0,12
+331,8.0,8
+332,11.0,11
+333,15.0,15
+334,12.0,12
+335,11.0,11
+336,13.0,13
+337,13.0,13
+338,16.0,16
+339,11.0,11
+340,8.0,8
+341,10.0,10
+342,14.0,14
+343,18.0,18
+344,11.0,11
+345,10.0,10
+346,11.0,11
+347,11.0,11
+348,20.0,20
+349,14.0,14
+350,10.0,10
+351,14.0,14
+352,9.0,9
+353,9.0,9
+354,12.0,12
+355,9.0,9
+356,10.0,10
+357,9.0,9
+358,10.0,10
+359,10.0,10
+360,22.0,22
+361,11.0,11
+362,12.0,12
+363,11.0,11
+364,8.0,8
+365,24.0,24
+366,11.0,11
+367,10.0,10
+368,10.0,10
+369,10.0,10
+370,10.0,10
+371,9.0,9
+372,9.0,9
+373,21.0,21
+374,10.0,10
+375,12.0,12
+376,14.0,14
+377,15.0,15
+378,10.0,10
+379,17.0,17
+380,8.0,8
+381,14.0,14
+382,11.0,11
+383,9.0,9
+384,10.0,10
+385,9.0,9
+386,15.0,15
+387,11.0,11
+388,17.0,17
+389,12.0,12
+390,11.0,11
+391,15.0,15
+392,10.0,10
+393,13.0,13
+394,12.0,12
+395,10.0,10
+396,12.0,12
+397,9.0,9
+398,14.0,14
+399,9.0,9
+400,13.0,13
+401,10.0,10
+402,13.0,13
+403,16.0,16
+404,9.0,9
+405,8.0,8
+406,11.0,11
+407,9.0,9
+408,15.0,15
+409,12.0,12
+410,15.0,15
+411,15.0,15
+412,15.0,15
+413,14.0,14
+414,12.0,12
+415,11.0,11
+416,14.0,14
+417,12.0,12
+418,14.0,14
+419,11.0,11
+420,8.0,8
+421,9.0,9
+422,13.0,13
+423,13.0,13
+424,8.0,8
+425,10.0,10
+426,10.0,10
+427,15.0,15
+428,14.0,14
+429,9.0,9
+430,12.0,12
+431,13.0,13
+432,12.0,12
+433,10.0,10
+434,14.0,14
+435,11.0,11
+436,12.0,12
+437,14.0,14
+438,10.0,10
+439,12.0,12
+440,9.0,9
+441,15.0,15
+442,12.0,12
+443,10.0,10
+444,8.0,8
+445,12.0,12
+446,14.0,14
+447,12.0,12
+448,9.0,9
+449,10.0,10
+450,13.0,13
+451,9.0,9
+452,10.0,10
+453,9.0,9
+454,11.0,11
+455,10.0,10
+456,18.0,18
+457,16.0,16
+458,10.0,10
+459,11.0,11
+460,15.0,15
+461,12.0,12
+462,11.0,11
+463,12.0,12
+464,11.0,11
+465,10.0,10
+466,12.0,12
+467,10.0,10
+468,12.0,12
+469,15.0,15
+470,11.0,11
+471,10.0,10
+472,12.0,12
+473,9.0,9
+474,13.0,13
+475,11.0,11
+476,15.0,15
+477,10.0,10
+478,11.0,11
+479,13.0,13
+480,10.0,10
+481,10.0,10
+482,10.0,10
+483,10.0,10
+484,15.0,15
+485,11.0,11
+486,12.0,12
+487,16.0,16
+488,10.0,10
+489,16.0,16
+490,11.0,11
+491,9.0,9
+492,9.0,9
+493,18.0,18
+494,10.0,10
+495,9.0,9
+496,40.0,40
+497,21.0,21
+498,10.0,10
+499,36.0,36
+500,37.0,37
+501,22.0,22
+502,30.0,30
+503,23.0,23
+504,35.0,35
+505,48.0,48
+506,32.0,32
+507,21.0,21
+508,28.0,28
+509,29.0,29
+510,10.0,10
+511,27.0,27
+512,20.0,20
+513,23.0,23
+514,24.0,24
+515,21.0,21
+516,25.0,25
+517,20.0,20
+518,15.0,15
+519,23.0,23
+520,14.0,14
+521,18.0,18
+522,18.0,18
+523,18.0,18
+524,18.0,18
+525,20.0,20
+526,13.0,13
+527,21.0,21
+528,20.0,20
+529,17.0,17
+530,17.0,17
+531,17.0,17
+532,20.0,20
+533,15.0,15
+534,17.0,17
+535,17.0,17
+536,16.0,16
+537,16.0,16
+538,14.0,14
+539,21.0,21
+540,22.0,22
+541,14.0,14
+542,20.0,20
+543,25.0,25
+544,18.0,18
+545,22.0,22
+546,21.0,21
+547,20.0,20
+548,23.0,23
+549,20.0,20
+550,20.0,20
+551,25.0,25
+552,18.0,18
+553,14.0,14
+554,16.0,16
+555,16.0,16
+556,15.0,15
+557,26.0,26
+558,18.0,18
+559,20.0,20
+560,27.0,27
+561,18.0,18
+562,20.0,20
+563,20.0,20
+564,19.0,19
+565,26.0,26
+566,21.0,21
+567,25.0,25
+568,24.0,24
+569,24.0,24
+570,24.0,24
+571,17.0,17
+572,28.0,28
+573,20.0,20
+574,22.0,22
+575,16.0,16
+576,22.0,22
+577,14.0,14
+578,27.0,27
+579,29.0,29
+580,19.0,19
+581,22.0,22
+582,29.0,29
+583,29.0,29
+584,23.0,23
+585,22.0,22
+586,21.0,21
+587,18.0,18
+588,28.0,28
+589,29.0,29
+590,23.0,23
+591,23.0,23
+592,20.0,20
+593,32.0,32
+594,38.0,38
+595,29.0,29
+596,25.0,25
+597,22.0,22
+598,37.0,37
+599,20.0,20
+600,17.0,17
+601,22.0,22
+602,23.0,23
+603,28.0,28
+604,31.0,31
+605,23.0,23
+606,26.0,26
+607,22.0,22
+608,27.0,27
+609,32.0,32
+610,33.0,33
+611,27.0,27
+612,23.0,23
+613,38.0,38
+614,26.0,26
+615,27.0,27
+616,30.0,30
+617,22.0,22
+618,27.0,27
+619,38.0,38
+620,34.0,34
+621,39.0,39
+622,24.0,24
+623,29.0,29
+624,26.0,26
+625,30.0,30
+626,33.0,33
+627,34.0,34
+628,27.0,27
+629,26.0,26
+630,27.0,27
+631,37.0,37
+632,38.0,38
+633,46.0,46
+634,48.0,48
+635,56.0,56
+636,39.0,39
+637,18.0,18
+638,63.0,63
+639,51.0,51
+640,45.0,45
+641,27.0,27
+642,56.0,56
+643,32.0,32
+644,49.0,49
+645,40.0,40
+646,59.0,59
+647,52.0,52
+648,36.0,36
+649,43.0,43
+650,54.0,54
+651,59.0,59
+652,58.0,58
+653,61.0,61
+654,66.0,66
+655,38.0,38
+656,33.0,33
+657,96.0,96
+658,82.0,82
+659,56.0,56
+660,42.0,42
+661,38.0,38
+662,48.0,48
+663,83.0,83
+664,33.0,33
+665,72.0,72
+666,41.0,41
+667,57.0,57
+668,54.0,54
+669,69.0,69
+670,63.0,63
+671,81.0,81
+672,69.0,69
+673,65.0,65
+674,55.0,55
+675,64.0,64
+676,54.0,54
+677,93.0,93
+678,47.0,47
+679,84.0,84
+680,46.0,46
+681,63.0,63
+682,51.0,51
+683,64.0,64
+684,58.0,58
+685,72.0,72
+686,35.0,35
+687,59.0,59
+688,124.0,124
+689,64.0,64
+690,59.0,59
+691,77.0,77
+692,55.0,55
+693,63.0,63
+694,100.0,100
+695,100.0,100
+696,58.0,58
+697,85.0,85
+698,50.0,50
+699,57.0,57
+700,59.0,59
+701,72.0,72
+702,200.0,200
+703,118.0,118
+704,63.0,63
+705,125.0,125
+706,80.0,80
+707,49.0,49
+708,52.0,52
+709,71.0,71
+710,68.0,68
+711,71.0,71
+712,60.0,60
+713,58.0,58
+714,192.0,192
+715,57.0,57
+716,93.0,93
+717,107.0,107
+718,59.0,59
+719,71.0,71
+720,81.0,81
+721,76.0,76
+722,98.0,98
+723,95.0,95
+724,99.0,99
+725,132.0,132
+726,99.0,99
+727,58.0,58
+728,95.0,95
+729,79.0,79
+730,70.0,70
+731,76.0,76
+732,58.0,58
+733,174.0,174
+734,58.0,58
+735,178.0,178
+736,92.0,92
+737,114.0,114
+738,101.0,101
+739,59.0,59
+740,171.0,171
+741,179.0,179
+742,85.0,85
+743,115.0,115
+744,74.0,74
+745,99.0,99
+746,174.0,174
+747,124.0,124
+748,101.0,101
+749,106.0,106
+750,75.0,75
+751,70.0,70
+752,139.0,139
+753,76.0,76
+754,86.0,86
+755,82.0,82
+756,51.0,51
+757,78.0,78
+758,67.0,67
+759,72.0,72
+760,83.0,83
+761,58.0,58
+762,109.0,109
+763,62.0,62
+764,74.0,74
+765,83.0,83
+766,111.0,111
+767,67.0,67
+768,57.0,57
+769,59.0,59
+770,112.0,112
+771,197.0,197
+772,111.0,111
+773,90.0,90
+774,111.0,111
+775,81.0,81
+776,72.0,72
+777,80.0,80
+778,130.0,130
+779,156.0,156
+780,100.0,100
+781,85.0,85
+782,66.0,66
+783,76.0,76
+784,105.0,105
+785,57.0,57
+786,201.0,201
+787,176.0,176
+788,175.0,175
+789,198.0,198
+790,73.0,73
+791,79.0,79
+792,59.0,59
+793,55.0,55
+794,84.0,84
+795,131.0,131
+796,67.0,67
+797,199.0,199
+798,94.0,94
+799,97.0,97
+800,94.0,94
+801,185.0,185
+802,98.0,98
+803,136.0,136
+804,141.0,141
+805,81.0,81
+806,77.0,77
+807,100.0,100
+808,99.0,99
+809,133.0,133
+810,154.0,154
+811,74.0,74
+812,79.0,79
+813,94.0,94
+814,168.0,168
+815,338.0,338
+816,64.0,64
+817,112.0,112
+818,69.0,69
+819,143.0,143
+820,170.0,170
+821,170.0,170
+822,77.0,77
+823,83.0,83
+824,104.0,104
+825,152.0,152
+826,198.0,198
+827,159.0,159
+828,235.0,235
+829,76.0,76
+830,201.0,201
+831,289.0,289
+832,113.0,113
+833,294.0,294
+834,74.0,74
+835,416.0,416
+836,194.0,194
+837,85.0,85
+838,170.0,170
+839,208.0,208
+840,177.0,177
+841,83.0,83
+842,82.0,82
+843,183.0,183
+844,90.0,90
+845,398.0,398
+846,244.0,244
+847,99.0,99
+848,310.0,310
+849,195.0,195
+850,183.0,183
+851,162.0,162
+852,115.0,115
+853,82.0,82
+854,233.0,233
+855,102.0,102
+856,262.0,262
+857,300.0,300
+858,245.0,245
+859,299.0,299
+860,150.0,150
+861,199.0,199
+862,79.0,79
+863,74.0,74
+864,113.0,113
+865,152.0,152
+866,126.0,126
+867,68.0,68
+868,185.0,185
+869,156.0,156
+870,63.0,63
+871,121.0,121
+872,83.0,83
+873,72.0,72
+874,337.0,337
+875,179.0,179
+876,325.0,325
+877,115.0,115
+878,217.0,217
+879,74.0,74
+880,90.0,90
+881,218.0,218
+882,82.0,82
+883,173.0,173
+884,106.0,106
+885,83.0,83
+886,68.0,68
+887,173.0,173
+888,159.0,159
+889,57.0,57
+890,80.0,80
+891,302.0,302
+892,71.0,71
+893,59.0,59
+894,153.0,153
+895,129.0,129
+896,63.0,63
+897,147.0,147
+898,82.0,82
+899,186.0,186
+900,74.0,74
+901,81.0,81
+902,61.0,61
+903,88.0,88
+904,101.0,101
+905,60.0,60
+906,154.0,154
+907,202.0,202
+908,96.0,96
+909,81.0,81
+910,91.0,91
+911,77.0,77
+912,63.0,63
+913,154.0,154
+914,57.0,57
+915,86.0,86
+916,84.0,84
+917,66.0,66
+918,141.0,141
+919,88.0,88
+920,89.0,89
+921,49.0,49
+922,97.0,97
+923,50.0,50
+924,211.0,211
+925,69.0,69
+926,278.0,278
+927,75.0,75
+928,60.0,60
+929,57.0,57
+930,200.0,200
+931,101.0,101
+932,70.0,70
+933,93.0,93
+934,61.0,61
+935,80.0,80
+936,94.0,94
+937,53.0,53
+938,223.0,223
+939,142.0,142
+940,74.0,74
+941,60.0,60
+942,75.0,75
+943,78.0,78
+944,81.0,81
+945,51.0,51
+946,215.0,215
+947,64.0,64
+948,70.0,70
+949,85.0,85
+950,102.0,102
+951,48.0,48
+952,69.0,69
+953,65.0,65
+954,70.0,70
+955,174.0,174
+956,46.0,46
+957,75.0,75
+958,75.0,75
+959,62.0,62
+960,71.0,71
+961,67.0,67
+962,48.0,48
+963,64.0,64
+964,58.0,58
+965,64.0,64
+966,82.0,82
+967,70.0,70
+968,68.0,68
+969,48.0,48
+970,48.0,48
+971,53.0,53
+972,80.0,80
+973,46.0,46
+974,101.0,101
+975,303.0,303
+976,59.0,59
+977,212.0,212
+978,64.0,64
+979,76.0,76
+980,69.0,69
+981,241.0,241
+982,46.0,46
+983,45.0,45
+984,124.0,124
+985,99.0,99
+986,210.0,210
+987,67.0,67
+988,78.0,78
+989,58.0,58
+990,54.0,54
+991,63.0,63
+992,37.0,37
+993,46.0,46
+994,63.0,63
+995,48.0,48
+996,70.0,70
+997,58.0,58
+998,88.0,88
+999,62.0,62
+1000,173.0,173
+1001,99.0,99
+1002,47.0,47
+1003,47.0,47
+1004,74.0,74
+1005,101.0,101
+1006,42.0,42
+1007,46.0,46
+1008,61.0,61
+1009,42.0,42
+1010,48.0,48
+1011,60.0,60
+1012,42.0,42
+1013,53.0,53
+1014,54.0,54
+1015,62.0,62
+1016,98.0,98
+1017,50.0,50
+1018,39.0,39
+1019,60.0,60
+1020,52.0,52
+1021,46.0,46
+1022,68.0,68
+1023,40.0,40
+1024,41.0,41
+1025,54.0,54
+1026,66.0,66
+1027,112.0,112
+1028,55.0,55
+1029,46.0,46
+1030,72.0,72
+1031,54.0,54
+1032,51.0,51
+1033,43.0,43
+1034,66.0,66
+1035,59.0,59
+1036,57.0,57
+1037,68.0,68
+1038,63.0,63
+1039,38.0,38
+1040,48.0,48
+1041,58.0,58
+1042,58.0,58
+1043,116.0,116
+1044,52.0,52
+1045,180.0,180
+1046,91.0,91
+1047,292.0,292
+1048,65.0,65
+1049,46.0,46
+1050,40.0,40
+1051,192.0,192
+1052,46.0,46
+1053,52.0,52
+1054,50.0,50
+1055,37.0,37
+1056,136.0,136
+1057,46.0,46
+1058,35.0,35
+1059,89.0,89
+1060,34.0,34
+1061,101.0,101
+1062,102.0,102
+1063,166.0,166
+1064,62.0,62
+1065,40.0,40
+1066,37.0,37
+1067,45.0,45
+1068,45.0,45
+1069,48.0,48
+1070,67.0,67
+1071,87.0,87
+1072,51.0,51
+1073,103.0,103
+1074,46.0,46
+1075,52.0,52
+1076,40.0,40
+1077,47.0,47
+1078,49.0,49
+1079,35.0,35
+1080,46.0,46
+1081,38.0,38
+1082,36.0,36
+1083,219.0,219
+1084,57.0,57
+1085,39.0,39
+1086,48.0,48
+1087,37.0,37
+1088,46.0,46
+1089,37.0,37
+1090,65.0,65
+1091,39.0,39
+1092,44.0,44
+1093,85.0,85
+1094,50.0,50
+1095,39.0,39
+1096,57.0,57
+1097,221.0,221
+1098,35.0,35
+1099,59.0,59
+1100,46.0,46
+1101,38.0,38
+1102,37.0,37
+1103,62.0,62
+1104,59.0,59
+1105,46.0,46
+1106,40.0,40
+1107,74.0,74
+1108,58.0,58
+1109,37.0,37
+1110,56.0,56
+1111,52.0,52
+1112,45.0,45
+1113,76.0,76
+1114,54.0,54
+1115,37.0,37
+1116,41.0,41
+1117,47.0,47
+1118,56.0,56
+1119,39.0,39
+1120,37.0,37
+1121,42.0,42
+1122,59.0,59
+1123,38.0,38
+1124,49.0,49
+1125,49.0,49
+1126,130.0,130
+1127,52.0,52
+1128,45.0,45
+1129,43.0,43
+1130,57.0,57
+1131,37.0,37
+1132,43.0,43
+1133,60.0,60
+1134,58.0,58
+1135,57.0,57
+1136,35.0,35
+1137,57.0,57
+1138,154.0,154
+1139,39.0,39
+1140,48.0,48
+1141,78.0,78
+1142,58.0,58
+1143,70.0,70
+1144,52.0,52
+1145,53.0,53
+1146,58.0,58
+1147,40.0,40
+1148,74.0,74
+1149,39.0,39
+1150,69.0,69
+1151,78.0,78
+1152,34.0,34
+1153,44.0,44
+1154,45.0,45
+1155,173.0,173
+1156,190.0,190
+1157,47.0,47
+1158,36.0,36
+1159,52.0,52
+1160,44.0,44
+1161,50.0,50
+1162,96.0,96
+1163,88.0,88
+1164,38.0,38
+1165,44.0,44
+1166,102.0,102
+1167,49.0,49
+1168,46.0,46
+1169,68.0,68
+1170,46.0,46
+1171,50.0,50
+1172,58.0,58
+1173,46.0,46
+1174,50.0,50
+1175,40.0,40
+1176,44.0,44
+1177,75.0,75
+1178,109.0,109
+1179,51.0,51
+1180,44.0,44
+1181,42.0,42
+1182,41.0,41
+1183,62.0,62
+1184,48.0,48
+1185,60.0,60
+1186,52.0,52
+1187,73.0,73
+1188,39.0,39
+1189,42.0,42
+1190,89.0,89
+1191,64.0,64
+1192,40.0,40
+1193,42.0,42
+1194,59.0,59
+1195,48.0,48
+1196,45.0,45
+1197,48.0,48
+1198,171.0,171
+1199,77.0,77
+1200,97.0,97
+1201,43.0,43
+1202,86.0,86
+1203,62.0,62
+1204,67.0,67
+1205,86.0,86
+1206,77.0,77
+1207,88.0,88
+1208,73.0,73
+1209,80.0,80
+1210,94.0,94
+1211,71.0,71
+1212,96.0,96
+1213,65.0,65
+1214,61.0,61
+1215,63.0,63
+1216,72.0,72
+1217,78.0,78
+1218,92.0,92
+1219,64.0,64
+1220,76.0,76
+1221,69.0,69
+1222,86.0,86
+1223,93.0,93
+1224,70.0,70
+1225,67.0,67
+1226,89.0,89
+1227,72.0,72
+1228,106.0,106
+1229,76.0,76
+1230,218.0,218
+1231,64.0,64
+1232,224.0,224
+1233,68.0,68
+1234,217.0,217
+1235,216.0,216
+1236,205.0,205
+1237,234.0,234
+1238,196.0,196
+1239,217.0,217
+1240,213.0,213
+1241,247.0,247
+1242,295.0,295
+1243,197.0,197
+1244,212.0,212
+1245,196.0,196
+1246,226.0,226
+1247,228.0,228
+1248,240.0,240
+1249,189.0,189
+1250,190.0,190
+1251,242.0,242
+1252,204.0,204
+1253,201.0,201
+1254,189.0,189
+1255,225.0,225
+1256,212.0,212
+1257,198.0,198
+1258,284.0,284
+1259,197.0,197
+1260,194.0,194
+1261,230.0,230
+1262,200.0,200
+1263,207.0,207
+1264,207.0,207
+1265,233.0,233
+1266,213.0,213
+1267,275.0,275
+1268,201.0,201
+1269,201.0,201
+1270,202.0,202
+1271,264.0,264
+1272,212.0,212
+1273,490.0,490
+1274,222.0,222
+1275,244.0,244
+1276,500.0,500
+1277,244.0,244
+1278,246.0,246
+1279,237.0,237
+1280,210.0,210
+1281,274.0,274
+1282,258.0,258
+1283,405.0,405
+1284,216.0,216
+1285,500.0,500
+1286,218.0,218
+1287,361.0,361
+1288,262.0,262
+1289,500.0,500
+1290,194.0,194
+1291,361.0,361
+1292,194.0,194
+1293,229.0,229
+1294,361.0,361
+1295,270.0,270
+1296,297.0,297
+1297,225.0,225
+1298,214.0,214
+1299,256.0,256
+1300,330.0,330
+1301,347.0,347
+1302,372.0,372
+1303,183.0,183
+1304,300.0,300
+1305,313.0,313
+1306,227.0,227
+1307,307.0,307
+1308,296.0,296
+1309,212.0,212
+1310,244.0,244
+1311,206.0,206
+1312,262.0,262
+1313,274.0,274
+1314,225.0,225
+1315,209.0,209
+1316,272.0,272
+1317,213.0,213
+1318,262.0,262
+1319,214.0,214
+1320,224.0,224
+1321,368.0,368
+1322,264.0,264
+1323,243.0,243
+1324,299.0,299
+1325,210.0,210
+1326,310.0,310
+1327,252.0,252
+1328,201.0,201
+1329,193.0,193
+1330,220.0,220
+1331,224.0,224
+1332,203.0,203
+1333,268.0,268
+1334,288.0,288
+1335,330.0,330
+1336,331.0,331
+1337,249.0,249
+1338,222.0,222
+1339,237.0,237
+1340,204.0,204
+1341,329.0,329
+1342,232.0,232
+1343,175.0,175
+1344,251.0,251
+1345,259.0,259
+1346,220.0,220
+1347,246.0,246
+1348,215.0,215
+1349,257.0,257
+1350,350.0,350
+1351,269.0,269
+1352,266.0,266
+1353,220.0,220
+1354,276.0,276
+1355,281.0,281
+1356,200.0,200
+1357,274.0,274
+1358,260.0,260
+1359,393.0,393
+1360,240.0,240
+1361,197.0,197
+1362,273.0,273
+1363,220.0,220
+1364,228.0,228
+1365,337.0,337
+1366,203.0,203
+1367,500.0,500
+1368,214.0,214
+1369,271.0,271
+1370,211.0,211
+1371,264.0,264
+1372,338.0,338
+1373,298.0,298
+1374,358.0,358
+1375,454.0,454
+1376,317.0,317
+1377,283.0,283
+1378,441.0,441
+1379,343.0,343
+1380,270.0,270
+1381,263.0,263
+1382,405.0,405
+1383,255.0,255
+1384,500.0,500
+1385,389.0,389
+1386,212.0,212
+1387,339.0,339
+1388,225.0,225
+1389,500.0,500
+1390,467.0,467
+1391,237.0,237
+1392,257.0,257
+1393,352.0,352
+1394,264.0,264
+1395,452.0,452
+1396,388.0,388
+1397,447.0,447
+1398,258.0,258
+1399,269.0,269
+1400,264.0,264
+1401,238.0,238
+1402,258.0,258
+1403,433.0,433
+1404,500.0,500
+1405,298.0,298
+1406,500.0,500
+1407,287.0,287
+1408,329.0,329
+1409,500.0,500
+1410,424.0,424
+1411,239.0,239
+1412,350.0,350
+1413,287.0,287
+1414,388.0,388
+1415,498.0,498
+1416,454.0,454
+1417,351.0,351
+1418,277.0,277
+1419,256.0,256
+1420,339.0,339
+1421,338.0,338
+1422,339.0,339
+1423,292.0,292
+1424,500.0,500
+1425,264.0,264
+1426,381.0,381
+1427,320.0,320
+1428,500.0,500
+1429,388.0,388
+1430,500.0,500
+1431,500.0,500
+1432,500.0,500
+1433,309.0,309
+1434,470.0,470
+1435,496.0,496
+1436,326.0,326
+1437,500.0,500
+1438,500.0,500
+1439,284.0,284
+1440,309.0,309
+1441,349.0,349
+1442,245.0,245
+1443,407.0,407
+1444,305.0,305
+1445,233.0,233
+1446,469.0,469
+1447,304.0,304
+1448,303.0,303
+1449,500.0,500
+1450,257.0,257
+1451,336.0,336
+1452,500.0,500
+1453,440.0,440
+1454,500.0,500
+1455,500.0,500
+1456,317.0,317
+1457,500.0,500
+1458,475.0,475
+1459,395.0,395
+1460,331.0,331
+1461,374.0,374
+1462,500.0,500
+1463,246.0,246
+1464,355.0,355
+1465,500.0,500
+1466,500.0,500
+1467,260.0,260
+1468,500.0,500
+1469,437.0,437
+1470,500.0,500
+1471,367.0,367
+1472,388.0,388
+1473,239.0,239
+1474,493.0,493
+1475,322.0,322
+1476,500.0,500
+1477,416.0,416
+1478,403.0,403
+1479,500.0,500
+1480,355.0,355
+1481,500.0,500
+1482,437.0,437
+1483,433.0,433
+1484,500.0,500
+1485,246.0,246
+1486,364.0,364
+1487,255.0,255
+1488,500.0,500
+1489,443.0,443
+1490,500.0,500
+1491,287.0,287
+1492,402.0,402
+1493,500.0,500
+1494,499.0,499
+1495,500.0,500
+1496,248.0,248
+1497,266.0,266
+1498,500.0,500
+1499,338.0,338
+1500,395.0,395
+1501,304.0,304
+1502,433.0,433
+1503,351.0,351
+1504,230.0,230
+1505,352.0,352
+1506,500.0,500
+1507,265.0,265
+1508,500.0,500
+1509,244.0,244
+1510,392.0,392
+1511,467.0,467
+1512,353.0,353
+1513,500.0,500
+1514,473.0,473
+1515,246.0,246
+1516,336.0,336
+1517,317.0,317
+1518,325.0,325
+1519,481.0,481
+1520,374.0,374
+1521,231.0,231
+1522,500.0,500
+1523,234.0,234
+1524,290.0,290
+1525,297.0,297
+1526,299.0,299
+1527,364.0,364
+1528,326.0,326
+1529,482.0,482
+1530,233.0,233
+1531,500.0,500
+1532,264.0,264
+1533,314.0,314
+1534,500.0,500
+1535,433.0,433
+1536,415.0,415
+1537,288.0,288
+1538,458.0,458
+1539,308.0,308
+1540,500.0,500
+1541,459.0,459
+1542,273.0,273
+1543,500.0,500
+1544,500.0,500
+1545,470.0,470
+1546,364.0,364
+1547,425.0,425
+1548,374.0,374
+1549,399.0,399
+1550,500.0,500
+1551,500.0,500
+1552,500.0,500
+1553,497.0,497
+1554,272.0,272
+1555,268.0,268
+1556,292.0,292
+1557,500.0,500
+1558,281.0,281
+1559,272.0,272
+1560,411.0,411
+1561,500.0,500
+1562,430.0,430
+1563,415.0,415
+1564,500.0,500
+1565,464.0,464
+1566,436.0,436
+1567,500.0,500
+1568,344.0,344
+1569,395.0,395
+1570,385.0,385
+1571,232.0,232
+1572,260.0,260
+1573,499.0,499
+1574,411.0,411
+1575,500.0,500
+1576,290.0,290
+1577,321.0,321
+1578,481.0,481
+1579,473.0,473
+1580,301.0,301
+1581,404.0,404
+1582,410.0,410
+1583,437.0,437
+1584,311.0,311
+1585,500.0,500
+1586,231.0,231
+1587,376.0,376
+1588,359.0,359
+1589,276.0,276
+1590,457.0,457
+1591,500.0,500
+1592,318.0,318
+1593,500.0,500
+1594,309.0,309
+1595,481.0,481
+1596,274.0,274
+1597,331.0,331
+1598,500.0,500
+1599,259.0,259
+1600,500.0,500
+1601,291.0,291
+1602,499.0,499
+1603,256.0,256
+1604,266.0,266
+1605,500.0,500
+1606,325.0,325
+1607,359.0,359
+1608,274.0,274
+1609,357.0,357
+1610,465.0,465
+1611,500.0,500
+1612,435.0,435
+1613,268.0,268
+1614,251.0,251
+1615,252.0,252
+1616,275.0,275
+1617,284.0,284
+1618,416.0,416
+1619,229.0,229
+1620,500.0,500
+1621,265.0,265
+1622,354.0,354
+1623,251.0,251
+1624,381.0,381
+1625,279.0,279
+1626,267.0,267
+1627,232.0,232
+1628,365.0,365
+1629,500.0,500
+1630,489.0,489
+1631,500.0,500
+1632,243.0,243
+1633,253.0,253
+1634,334.0,334
+1635,500.0,500
+1636,280.0,280
+1637,268.0,268
+1638,356.0,356
+1639,500.0,500
+1640,253.0,253
+1641,244.0,244
+1642,237.0,237
+1643,421.0,421
+1644,247.0,247
+1645,378.0,378
+1646,252.0,252
+1647,282.0,282
+1648,247.0,247
+1649,289.0,289
+1650,226.0,226
+1651,289.0,289
+1652,480.0,480
+1653,500.0,500
+1654,270.0,270
+1655,309.0,309
+1656,292.0,292
+1657,272.0,272
+1658,233.0,233
+1659,261.0,261
+1660,500.0,500
+1661,316.0,316
+1662,310.0,310
+1663,276.0,276
+1664,315.0,315
+1665,267.0,267
+1666,420.0,420
+1667,320.0,320
+1668,500.0,500
+1669,370.0,370
+1670,500.0,500
+1671,246.0,246
+1672,296.0,296
+1673,256.0,256
+1674,281.0,281
+1675,327.0,327
+1676,242.0,242
+1677,393.0,393
+1678,332.0,332
+1679,288.0,288
+1680,250.0,250
+1681,391.0,391
+1682,296.0,296
+1683,490.0,490
+1684,224.0,224
+1685,369.0,369
+1686,311.0,311
+1687,335.0,335
+1688,227.0,227
+1689,500.0,500
+1690,242.0,242
+1691,363.0,363
+1692,284.0,284
+1693,254.0,254
+1694,386.0,386
+1695,353.0,353
+1696,443.0,443
+1697,500.0,500
+1698,253.0,253
+1699,293.0,293
+1700,500.0,500
+1701,259.0,259
+1702,254.0,254
+1703,343.0,343
+1704,313.0,313
+1705,253.0,253
+1706,409.0,409
+1707,474.0,474
+1708,226.0,226
+1709,325.0,325
+1710,441.0,441
+1711,252.0,252
+1712,430.0,430
+1713,287.0,287
+1714,318.0,318
+1715,323.0,323
+1716,268.0,268
+1717,288.0,288
+1718,292.0,292
+1719,323.0,323
+1720,291.0,291
+1721,399.0,399
+1722,263.0,263
+1723,385.0,385
+1724,229.0,229
+1725,282.0,282
+1726,347.0,347
+1727,257.0,257
+1728,264.0,264
+1729,282.0,282
+1730,402.0,402
+1731,328.0,328
+1732,227.0,227
+1733,272.0,272
+1734,462.0,462
+1735,236.0,236
+1736,302.0,302
+1737,275.0,275
+1738,280.0,280
+1739,331.0,331
+1740,352.0,352
+1741,500.0,500
+1742,389.0,389
+1743,303.0,303
+1744,398.0,398
+1745,359.0,359
+1746,436.0,436
+1747,233.0,233
+1748,295.0,295
+1749,234.0,234
+1750,290.0,290
+1751,261.0,261
+1752,248.0,248
+1753,263.0,263
+1754,368.0,368
+1755,500.0,500
+1756,276.0,276
+1757,243.0,243
+1758,500.0,500
+1759,289.0,289
+1760,500.0,500
+1761,275.0,275
+1762,297.0,297
+1763,250.0,250
+1764,405.0,405
+1765,261.0,261
+1766,239.0,239
+1767,351.0,351
+1768,301.0,301
+1769,384.0,384
+1770,240.0,240
+1771,258.0,258
+1772,258.0,258
+1773,249.0,249
+1774,254.0,254
+1775,374.0,374
+1776,347.0,347
+1777,377.0,377
+1778,263.0,263
+1779,229.0,229
+1780,292.0,292
+1781,259.0,259
+1782,259.0,259
+1783,260.0,260
+1784,267.0,267
+1785,256.0,256
+1786,306.0,306
+1787,238.0,238
+1788,257.0,257
+1789,252.0,252
+1790,293.0,293
+1791,273.0,273
+1792,308.0,308
+1793,291.0,291
+1794,342.0,342
+1795,273.0,273
+1796,257.0,257
+1797,221.0,221
+1798,276.0,276
+1799,279.0,279
+1800,269.0,269
+1801,291.0,291
+1802,359.0,359
+1803,431.0,431
+1804,375.0,375
+1805,298.0,298
+1806,253.0,253
+1807,276.0,276
+1808,258.0,258
+1809,242.0,242
+1810,397.0,397
+1811,394.0,394
+1812,323.0,323
+1813,257.0,257
+1814,343.0,343
+1815,287.0,287
+1816,372.0,372
+1817,294.0,294
+1818,261.0,261
+1819,270.0,270
+1820,284.0,284
+1821,247.0,247
+1822,372.0,372
+1823,292.0,292
+1824,357.0,357
+1825,247.0,247
+1826,355.0,355
+1827,447.0,447
+1828,251.0,251
+1829,375.0,375
+1830,262.0,262
+1831,340.0,340
+1832,243.0,243
+1833,261.0,261
+1834,247.0,247
+1835,499.0,499
+1836,242.0,242
+1837,237.0,237
+1838,255.0,255
+1839,320.0,320
+1840,216.0,216
+1841,356.0,356
+1842,261.0,261
+1843,247.0,247
+1844,229.0,229
+1845,238.0,238
+1846,233.0,233
+1847,232.0,232
+1848,234.0,234
+1849,391.0,391
+1850,273.0,273
+1851,438.0,438
+1852,402.0,402
+1853,394.0,394
+1854,287.0,287
+1855,230.0,230
+1856,251.0,251
+1857,278.0,278
+1858,378.0,378
+1859,249.0,249
+1860,271.0,271
+1861,296.0,296
+1862,256.0,256
+1863,270.0,270
+1864,500.0,500
+1865,385.0,385
+1866,284.0,284
+1867,248.0,248
+1868,283.0,283
+1869,246.0,246
+1870,339.0,339
+1871,415.0,415
+1872,276.0,276
+1873,275.0,275
+1874,457.0,457
+1875,500.0,500
+1876,281.0,281
+1877,324.0,324
+1878,414.0,414
+1879,314.0,314
+1880,449.0,449
+1881,281.0,281
+1882,368.0,368
+1883,322.0,322
+1884,235.0,235
+1885,337.0,337
+1886,500.0,500
+1887,311.0,311
+1888,347.0,347
+1889,365.0,365
+1890,272.0,272
+1891,342.0,342
+1892,379.0,379
+1893,247.0,247
+1894,321.0,321
+1895,403.0,403
+1896,464.0,464
+1897,330.0,330
+1898,361.0,361
+1899,500.0,500
+1900,433.0,433
+1901,500.0,500
+1902,293.0,293
+1903,386.0,386
+1904,283.0,283
+1905,366.0,366
+1906,278.0,278
+1907,279.0,279
+1908,415.0,415
+1909,480.0,480
+1910,500.0,500
+1911,353.0,353
+1912,500.0,500
+1913,269.0,269
+1914,500.0,500
+1915,385.0,385
+1916,246.0,246
+1917,481.0,481
+1918,500.0,500
+1919,462.0,462
+1920,373.0,373
+1921,500.0,500
+1922,272.0,272
+1923,500.0,500
+1924,495.0,495
+1925,500.0,500
+1926,295.0,295
+1927,249.0,249
+1928,256.0,256
+1929,500.0,500
+1930,317.0,317
+1931,500.0,500
+1932,317.0,317
+1933,258.0,258
+1934,380.0,380
+1935,402.0,402
+1936,500.0,500
+1937,319.0,319
+1938,319.0,319
+1939,500.0,500
+1940,447.0,447
+1941,500.0,500
+1942,459.0,459
+1943,500.0,500
+1944,299.0,299
+1945,290.0,290
+1946,318.0,318
+1947,500.0,500
+1948,500.0,500
+1949,500.0,500
+1950,500.0,500
+1951,478.0,478
+1952,500.0,500
+1953,500.0,500
+1954,330.0,330
+1955,366.0,366
+1956,500.0,500
+1957,283.0,283
+1958,300.0,300
+1959,292.0,292
+1960,270.0,270
+1961,500.0,500
+1962,474.0,474
+1963,328.0,328
+1964,389.0,389
+1965,500.0,500
+1966,493.0,493
+1967,357.0,357
+1968,500.0,500
+1969,500.0,500
+1970,500.0,500
+1971,320.0,320
+1972,385.0,385
+1973,500.0,500
+1974,422.0,422
+1975,405.0,405
+1976,500.0,500
+1977,363.0,363
+1978,329.0,329
+1979,309.0,309
+1980,500.0,500
+1981,500.0,500
+1982,277.0,277
+1983,461.0,461
+1984,262.0,262
+1985,500.0,500
+1986,500.0,500
+1987,370.0,370
+1988,500.0,500
+1989,255.0,255
+1990,449.0,449
+1991,361.0,361
+1992,319.0,319
+1993,382.0,382
+1994,363.0,363
+1995,500.0,500
+1996,336.0,336
+1997,500.0,500
+1998,500.0,500
+1999,500.0,500
diff --git a/projects/codes/PolicyGradient/main.py b/projects/codes/PolicyGradient/main.py
index 2e4c5e4..3473c38 100644
--- a/projects/codes/PolicyGradient/main.py
+++ b/projects/codes/PolicyGradient/main.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-11-22 23:21:53
 LastEditor: John
-LastEditTime: 2022-08-25 20:59:23
+LastEditTime: 2022-08-27 00:04:08
 Discription: 
 Environment: 
 '''
@@ -34,7 +34,7 @@ class PGNet(MLP):
     def forward(self, x):
         x = F.relu(self.fc1(x))
         x = F.relu(self.fc2(x))
-        x = F.sigmoid(self.fc3(x))
+        x = torch.sigmoid(self.fc3(x))
         return x
 
 class Main(Launcher):
@@ -47,8 +47,9 @@ class Main(Launcher):
         parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment")
         parser.add_argument('--train_eps',default=200,type=int,help="episodes of training")
         parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing")
+        parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") 
         parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor")
-        parser.add_argument('--lr',default=0.005,type=float,help="learning rate")
+        parser.add_argument('--lr',default=0.01,type=float,help="learning rate")
         parser.add_argument('--update_fre',default=8,type=int)
         parser.add_argument('--hidden_dim',default=36,type=int)
         parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
@@ -81,7 +82,7 @@ class Main(Launcher):
         for i_ep in range(cfg['train_eps']):
             state = env.reset()
             ep_reward = 0
-            for _ in count():
+            for _ in range(cfg['ep_max_steps']):
                 action = agent.sample_action(state) # sample action
                 next_state, reward, done, _ = env.step(action)
                 ep_reward += reward
@@ -90,8 +91,9 @@ class Main(Launcher):
                 agent.memory.push((state,float(action),reward))
                 state = next_state
                 if done:
-                    print(f"Episode：{i_ep+1}/{cfg['train_eps']}, Reward:{ep_reward:.2f}")
                     break
+            if (i_ep+1) % 10 == 0:
+                print(f"Episode：{i_ep+1}/{cfg['train_eps']}, Reward:{ep_reward:.2f}")
             if (i_ep+1) % cfg['update_fre'] == 0:
                 agent.update()
             rewards.append(ep_reward)
@@ -107,7 +109,7 @@ class Main(Launcher):
         for i_ep in range(cfg['test_eps']):
             state = env.reset()
             ep_reward = 0
-            for _ in count():
+            for _ in range(cfg['ep_max_steps']):
                 action = agent.predict_action(state)
                 next_state, reward, done, _ = env.step(action)
                 ep_reward += reward
@@ -115,9 +117,9 @@ class Main(Launcher):
                     reward = 0
                 state = next_state
                 if done:
-                    print(f"Episode: {i_ep+1}/{cfg['test_eps']}，Reward: {ep_reward:.2f}")
                     break
-            rewards.append(ep_reward)
+            print(f"Episode: {i_ep+1}/{cfg['test_eps']}，Reward: {ep_reward:.2f}")
+            rewards.append(ep_reward)         
         print("Finish testing!")
         env.close()
         return {'episodes':range(len(rewards)),'rewards':rewards}
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt
deleted file mode 100644
index 2676e7a..0000000
Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt and /dev/null differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json
deleted file mode 100644
index 2a3810d..0000000
--- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json
+++ /dev/null
@@ -1 +0,0 @@
-{"algo_name": "PolicyGradient", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "lr": 0.005, "update_fre": 8, "hidden_dim": 36, "device": "cpu", "seed": 1, "save_fig": true, "show_fig": false, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/", "n_states": 4, "n_actions": 2}
\ No newline at end of file
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png
deleted file mode 100644
index a38dd4b..0000000
Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png and /dev/null differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png
deleted file mode 100644
index 3e0db7c..0000000
Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png and /dev/null differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv
deleted file mode 100644
index daeb8f2..0000000
--- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv
+++ /dev/null
@@ -1,201 +0,0 @@
-episodes,rewards
-0,26.0
-1,53.0
-2,10.0
-3,37.0
-4,22.0
-5,21.0
-6,12.0
-7,34.0
-8,38.0
-9,40.0
-10,23.0
-11,14.0
-12,16.0
-13,25.0
-14,15.0
-15,23.0
-16,11.0
-17,28.0
-18,21.0
-19,62.0
-20,33.0
-21,27.0
-22,15.0
-23,17.0
-24,26.0
-25,35.0
-26,26.0
-27,14.0
-28,42.0
-29,45.0
-30,34.0
-31,39.0
-32,31.0
-33,17.0
-34,42.0
-35,41.0
-36,31.0
-37,39.0
-38,28.0
-39,12.0
-40,36.0
-41,33.0
-42,47.0
-43,40.0
-44,63.0
-45,36.0
-46,64.0
-47,79.0
-48,49.0
-49,40.0
-50,65.0
-51,47.0
-52,51.0
-53,30.0
-54,26.0
-55,41.0
-56,86.0
-57,61.0
-58,38.0
-59,200.0
-60,49.0
-61,70.0
-62,61.0
-63,101.0
-64,200.0
-65,152.0
-66,108.0
-67,46.0
-68,72.0
-69,87.0
-70,27.0
-71,126.0
-72,46.0
-73,25.0
-74,14.0
-75,42.0
-76,38.0
-77,55.0
-78,42.0
-79,51.0
-80,67.0
-81,83.0
-82,178.0
-83,115.0
-84,140.0
-85,97.0
-86,85.0
-87,61.0
-88,153.0
-89,200.0
-90,200.0
-91,200.0
-92,200.0
-93,64.0
-94,200.0
-95,200.0
-96,157.0
-97,128.0
-98,160.0
-99,35.0
-100,140.0
-101,113.0
-102,200.0
-103,154.0
-104,200.0
-105,200.0
-106,200.0
-107,198.0
-108,137.0
-109,200.0
-110,200.0
-111,102.0
-112,200.0
-113,200.0
-114,200.0
-115,200.0
-116,148.0
-117,200.0
-118,200.0
-119,200.0
-120,200.0
-121,200.0
-122,194.0
-123,200.0
-124,200.0
-125,200.0
-126,183.0
-127,200.0
-128,200.0
-129,200.0
-130,200.0
-131,200.0
-132,200.0
-133,200.0
-134,200.0
-135,200.0
-136,93.0
-137,96.0
-138,84.0
-139,103.0
-140,79.0
-141,104.0
-142,82.0
-143,105.0
-144,200.0
-145,200.0
-146,171.0
-147,200.0
-148,200.0
-149,200.0
-150,200.0
-151,197.0
-152,133.0
-153,142.0
-154,147.0
-155,156.0
-156,131.0
-157,181.0
-158,163.0
-159,146.0
-160,200.0
-161,176.0
-162,200.0
-163,173.0
-164,177.0
-165,200.0
-166,200.0
-167,200.0
-168,200.0
-169,200.0
-170,200.0
-171,200.0
-172,200.0
-173,200.0
-174,200.0
-175,200.0
-176,200.0
-177,200.0
-178,200.0
-179,200.0
-180,200.0
-181,200.0
-182,200.0
-183,200.0
-184,200.0
-185,200.0
-186,200.0
-187,200.0
-188,200.0
-189,200.0
-190,200.0
-191,200.0
-192,200.0
-193,200.0
-194,200.0
-195,200.0
-196,190.0
-197,200.0
-198,189.0
-199,200.0
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt
new file mode 100644
index 0000000..7b98cda
Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json
new file mode 100644
index 0000000..4dfae79
--- /dev/null
+++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "PolicyGradient", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "lr": 0.01, "update_fre": 8, "hidden_dim": 36, "device": "cpu", "seed": 1, "save_fig": true, "show_fig": false, "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PolicyGradient/outputs/CartPole-v0/20220827-000433/results/", "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PolicyGradient/outputs/CartPole-v0/20220827-000433/models/", "n_states": 4, "n_actions": 2}
\ No newline at end of file
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png
new file mode 100644
index 0000000..e3c3489
Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv
similarity index 81%
rename from projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv
rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv
index 958b0ef..fb73fd6 100644
--- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv
+++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv
@@ -1,7 +1,7 @@
 episodes,rewards
 0,200.0
 1,200.0
-2,165.0
+2,200.0
 3,200.0
 4,200.0
 5,200.0
@@ -10,12 +10,12 @@ episodes,rewards
 8,200.0
 9,200.0
 10,200.0
-11,168.0
+11,200.0
 12,200.0
 13,200.0
 14,200.0
-15,115.0
-16,198.0
+15,200.0
+16,200.0
 17,200.0
 18,200.0
 19,200.0
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png
new file mode 100644
index 0000000..1f954a1
Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png differ
diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv
new file mode 100644
index 0000000..715be6d
--- /dev/null
+++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv
@@ -0,0 +1,201 @@
+episodes,rewards
+0,26.0
+1,53.0
+2,10.0
+3,37.0
+4,22.0
+5,21.0
+6,12.0
+7,34.0
+8,93.0
+9,36.0
+10,29.0
+11,18.0
+12,14.0
+13,62.0
+14,20.0
+15,40.0
+16,10.0
+17,10.0
+18,10.0
+19,11.0
+20,10.0
+21,14.0
+22,12.0
+23,8.0
+24,19.0
+25,33.0
+26,22.0
+27,32.0
+28,16.0
+29,24.0
+30,24.0
+31,24.0
+32,75.0
+33,33.0
+34,33.0
+35,72.0
+36,110.0
+37,48.0
+38,60.0
+39,43.0
+40,61.0
+41,34.0
+42,50.0
+43,61.0
+44,53.0
+45,58.0
+46,36.0
+47,44.0
+48,42.0
+49,64.0
+50,67.0
+51,52.0
+52,39.0
+53,42.0
+54,40.0
+55,33.0
+56,200.0
+57,199.0
+58,149.0
+59,185.0
+60,134.0
+61,174.0
+62,162.0
+63,200.0
+64,93.0
+65,72.0
+66,69.0
+67,51.0
+68,62.0
+69,98.0
+70,73.0
+71,73.0
+72,200.0
+73,200.0
+74,200.0
+75,200.0
+76,200.0
+77,200.0
+78,200.0
+79,133.0
+80,200.0
+81,200.0
+82,200.0
+83,200.0
+84,200.0
+85,200.0
+86,200.0
+87,200.0
+88,114.0
+89,151.0
+90,129.0
+91,156.0
+92,112.0
+93,172.0
+94,171.0
+95,141.0
+96,200.0
+97,200.0
+98,200.0
+99,200.0
+100,200.0
+101,200.0
+102,200.0
+103,200.0
+104,188.0
+105,199.0
+106,138.0
+107,200.0
+108,200.0
+109,181.0
+110,145.0
+111,200.0
+112,135.0
+113,119.0
+114,112.0
+115,122.0
+116,118.0
+117,119.0
+118,131.0
+119,119.0
+120,109.0
+121,96.0
+122,105.0
+123,29.0
+124,110.0
+125,113.0
+126,18.0
+127,90.0
+128,145.0
+129,152.0
+130,151.0
+131,109.0
+132,141.0
+133,109.0
+134,136.0
+135,143.0
+136,200.0
+137,200.0
+138,200.0
+139,200.0
+140,200.0
+141,200.0
+142,200.0
+143,200.0
+144,192.0
+145,173.0
+146,180.0
+147,182.0
+148,186.0
+149,175.0
+150,176.0
+151,191.0
+152,200.0
+153,200.0
+154,200.0
+155,200.0
+156,200.0
+157,200.0
+158,200.0
+159,200.0
+160,200.0
+161,200.0
+162,200.0
+163,200.0
+164,200.0
+165,200.0
+166,200.0
+167,200.0
+168,200.0
+169,200.0
+170,200.0
+171,200.0
+172,200.0
+173,200.0
+174,200.0
+175,200.0
+176,200.0
+177,200.0
+178,200.0
+179,200.0
+180,200.0
+181,200.0
+182,200.0
+183,200.0
+184,200.0
+185,200.0
+186,200.0
+187,200.0
+188,200.0
+189,200.0
+190,200.0
+191,200.0
+192,200.0
+193,200.0
+194,200.0
+195,200.0
+196,200.0
+197,200.0
+198,200.0
+199,200.0
diff --git a/projects/codes/PolicyGradient/pg.py b/projects/codes/PolicyGradient/pg.py
index aeef3f8..d0b4956 100644
--- a/projects/codes/PolicyGradient/pg.py
+++ b/projects/codes/PolicyGradient/pg.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-11-22 23:27:44
 LastEditor: John
-LastEditTime: 2022-08-25 20:58:59
+LastEditTime: 2022-08-27 13:45:26
 Discription: 
 Environment: 
 '''
@@ -31,8 +31,11 @@ class PolicyGradient:
         state = torch.from_numpy(state).float()
         state = Variable(state)
         probs = self.policy_net(state)
+        print("probs")
+        print(probs)
         m = Bernoulli(probs) # 伯努利分布
         action = m.sample()
+        
         action = action.data.numpy().astype(int)[0] # 转为标量
         return action
     def predict_action(self,state):
diff --git a/projects/codes/QLearning/main.py b/projects/codes/QLearning/main.py
index 8f423ef..7adbfbe 100644
--- a/projects/codes/QLearning/main.py
+++ b/projects/codes/QLearning/main.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2020-09-11 23:03:00
 LastEditor: John
-LastEditTime: 2022-08-25 14:59:15
+LastEditTime: 2022-08-26 22:46:21
 Discription: 
 Environment: 
 '''
@@ -57,7 +57,10 @@ class Main(Launcher):
             env = CliffWalkingWapper(env)
         if cfg['seed'] !=0: # set random seed
             all_seed(env,seed=cfg["seed"]) 
-        n_states = env.observation_space.n  # state dimension
+        try: # state dimension
+            n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
+        except AttributeError:
+            n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
         n_actions = env.action_space.n  # action dimension
         print(f"n_states: {n_states}, n_actions: {n_actions}")
         cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv
deleted file mode 100644
index 86359a6..0000000
--- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv
+++ /dev/null
@@ -1,21 +0,0 @@
-episodes,rewards
-0,-13
-1,-13
-2,-13
-3,-13
-4,-13
-5,-13
-6,-13
-7,-13
-8,-13
-9,-13
-10,-13
-11,-13
-12,-13
-13,-13
-14,-13
-15,-13
-16,-13
-17,-13
-18,-13
-19,-13
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv
deleted file mode 100644
index 99b7e5f..0000000
--- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv
+++ /dev/null
@@ -1,401 +0,0 @@
-episodes,rewards
-0,-2131
-1,-1086
-2,-586
-3,-220
-4,-154
-5,-122
-6,-150
-7,-159
-8,-164
-9,-88
-10,-195
-11,-114
-12,-60
-13,-179
-14,-101
-15,-304
-16,-96
-17,-119
-18,-113
-19,-98
-20,-106
-21,-105
-22,-77
-23,-51
-24,-105
-25,-136
-26,-100
-27,-29
-28,-79
-29,-114
-30,-82
-31,-70
-32,-75
-33,-51
-34,-94
-35,-52
-36,-93
-37,-71
-38,-73
-39,-48
-40,-52
-41,-96
-42,-46
-43,-65
-44,-57
-45,-41
-46,-104
-47,-51
-48,-181
-49,-229
-50,-39
-51,-69
-52,-53
-53,-59
-54,-26
-55,-75
-56,-31
-57,-60
-58,-63
-59,-40
-60,-35
-61,-79
-62,-42
-63,-22
-64,-73
-65,-71
-66,-18
-67,-55
-68,-29
-69,-43
-70,-70
-71,-49
-72,-42
-73,-29
-74,-81
-75,-36
-76,-38
-77,-36
-78,-52
-79,-28
-80,-42
-81,-52
-82,-66
-83,-31
-84,-27
-85,-49
-86,-28
-87,-54
-88,-34
-89,-35
-90,-50
-91,-36
-92,-36
-93,-46
-94,-34
-95,-135
-96,-39
-97,-36
-98,-26
-99,-56
-100,-40
-101,-40
-102,-26
-103,-28
-104,-31
-105,-35
-106,-26
-107,-57
-108,-44
-109,-41
-110,-31
-111,-26
-112,-25
-113,-41
-114,-32
-115,-44
-116,-30
-117,-32
-118,-30
-119,-25
-120,-23
-121,-47
-122,-24
-123,-45
-124,-39
-125,-21
-126,-43
-127,-143
-128,-26
-129,-20
-130,-32
-131,-16
-132,-24
-133,-42
-134,-25
-135,-36
-136,-19
-137,-29
-138,-43
-139,-17
-140,-150
-141,-32
-142,-34
-143,-19
-144,-26
-145,-30
-146,-31
-147,-49
-148,-33
-149,-21
-150,-17
-151,-48
-152,-34
-153,-20
-154,-20
-155,-26
-156,-21
-157,-13
-158,-40
-159,-22
-160,-26
-161,-30
-162,-29
-163,-25
-164,-26
-165,-27
-166,-21
-167,-29
-168,-24
-169,-17
-170,-22
-171,-35
-172,-35
-173,-18
-174,-135
-175,-15
-176,-23
-177,-28
-178,-25
-179,-24
-180,-29
-181,-31
-182,-24
-183,-129
-184,-45
-185,-24
-186,-17
-187,-20
-188,-21
-189,-23
-190,-15
-191,-32
-192,-22
-193,-19
-194,-17
-195,-45
-196,-15
-197,-14
-198,-14
-199,-37
-200,-23
-201,-17
-202,-19
-203,-21
-204,-23
-205,-27
-206,-14
-207,-18
-208,-23
-209,-34
-210,-23
-211,-13
-212,-25
-213,-17
-214,-13
-215,-21
-216,-29
-217,-18
-218,-24
-219,-15
-220,-27
-221,-25
-222,-21
-223,-19
-224,-17
-225,-18
-226,-13
-227,-22
-228,-14
-229,-13
-230,-29
-231,-23
-232,-15
-233,-15
-234,-14
-235,-28
-236,-25
-237,-17
-238,-23
-239,-29
-240,-15
-241,-14
-242,-15
-243,-23
-244,-15
-245,-16
-246,-19
-247,-13
-248,-16
-249,-17
-250,-25
-251,-30
-252,-13
-253,-14
-254,-15
-255,-22
-256,-14
-257,-17
-258,-126
-259,-15
-260,-21
-261,-16
-262,-23
-263,-14
-264,-13
-265,-13
-266,-19
-267,-13
-268,-19
-269,-17
-270,-17
-271,-13
-272,-19
-273,-13
-274,-13
-275,-16
-276,-22
-277,-14
-278,-15
-279,-19
-280,-34
-281,-13
-282,-15
-283,-32
-284,-13
-285,-13
-286,-13
-287,-14
-288,-16
-289,-13
-290,-13
-291,-17
-292,-13
-293,-13
-294,-22
-295,-14
-296,-15
-297,-13
-298,-13
-299,-13
-300,-16
-301,-13
-302,-14
-303,-13
-304,-13
-305,-13
-306,-24
-307,-13
-308,-13
-309,-15
-310,-13
-311,-13
-312,-13
-313,-15
-314,-13
-315,-19
-316,-15
-317,-17
-318,-13
-319,-13
-320,-13
-321,-13
-322,-13
-323,-15
-324,-13
-325,-13
-326,-13
-327,-123
-328,-13
-329,-13
-330,-13
-331,-13
-332,-13
-333,-13
-334,-13
-335,-13
-336,-16
-337,-13
-338,-23
-339,-13
-340,-13
-341,-13
-342,-13
-343,-13
-344,-13
-345,-13
-346,-13
-347,-13
-348,-13
-349,-13
-350,-134
-351,-13
-352,-13
-353,-13
-354,-13
-355,-13
-356,-13
-357,-13
-358,-13
-359,-13
-360,-15
-361,-13
-362,-13
-363,-13
-364,-13
-365,-13
-366,-13
-367,-13
-368,-13
-369,-14
-370,-13
-371,-13
-372,-13
-373,-13
-374,-13
-375,-13
-376,-13
-377,-124
-378,-13
-379,-13
-380,-13
-381,-13
-382,-13
-383,-13
-384,-13
-385,-13
-386,-13
-387,-13
-388,-13
-389,-121
-390,-13
-391,-13
-392,-13
-393,-13
-394,-13
-395,-13
-396,-13
-397,-13
-398,-17
-399,-13
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl
similarity index 94%
rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl
rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl
index a328ce9..2369fe1 100644
Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl differ
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json
new file mode 100644
index 0000000..09764c9
--- /dev/null
+++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "Q-learning", "env_name": "CliffWalking-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/models/", "n_states": 48, "n_actions": 4}
\ No newline at end of file
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png
similarity index 100%
rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_curve.png
rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv
new file mode 100644
index 0000000..c48c7ef
--- /dev/null
+++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,-13,13
+1,-13,13
+2,-13,13
+3,-13,13
+4,-13,13
+5,-13,13
+6,-13,13
+7,-13,13
+8,-13,13
+9,-13,13
+10,-13,13
+11,-13,13
+12,-13,13
+13,-13,13
+14,-13,13
+15,-13,13
+16,-13,13
+17,-13,13
+18,-13,13
+19,-13,13
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png
similarity index 100%
rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_curve.png
rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv
new file mode 100644
index 0000000..523dc54
--- /dev/null
+++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv
@@ -0,0 +1,401 @@
+episodes,rewards,steps
+0,-2131,448
+1,-1086,492
+2,-586,388
+3,-220,220
+4,-154,154
+5,-122,122
+6,-150,150
+7,-159,159
+8,-164,164
+9,-88,88
+10,-195,195
+11,-114,114
+12,-60,60
+13,-179,179
+14,-101,101
+15,-304,205
+16,-96,96
+17,-119,119
+18,-113,113
+19,-98,98
+20,-106,106
+21,-105,105
+22,-77,77
+23,-51,51
+24,-105,105
+25,-136,136
+26,-100,100
+27,-29,29
+28,-79,79
+29,-114,114
+30,-82,82
+31,-70,70
+32,-75,75
+33,-51,51
+34,-94,94
+35,-52,52
+36,-93,93
+37,-71,71
+38,-73,73
+39,-48,48
+40,-52,52
+41,-96,96
+42,-46,46
+43,-65,65
+44,-57,57
+45,-41,41
+46,-104,104
+47,-51,51
+48,-181,82
+49,-229,130
+50,-39,39
+51,-69,69
+52,-53,53
+53,-59,59
+54,-26,26
+55,-75,75
+56,-31,31
+57,-60,60
+58,-63,63
+59,-40,40
+60,-35,35
+61,-79,79
+62,-42,42
+63,-22,22
+64,-73,73
+65,-71,71
+66,-18,18
+67,-55,55
+68,-29,29
+69,-43,43
+70,-70,70
+71,-49,49
+72,-42,42
+73,-29,29
+74,-81,81
+75,-36,36
+76,-38,38
+77,-36,36
+78,-52,52
+79,-28,28
+80,-42,42
+81,-52,52
+82,-66,66
+83,-31,31
+84,-27,27
+85,-49,49
+86,-28,28
+87,-54,54
+88,-34,34
+89,-35,35
+90,-50,50
+91,-36,36
+92,-36,36
+93,-46,46
+94,-34,34
+95,-135,36
+96,-39,39
+97,-36,36
+98,-26,26
+99,-56,56
+100,-40,40
+101,-40,40
+102,-26,26
+103,-28,28
+104,-31,31
+105,-35,35
+106,-26,26
+107,-57,57
+108,-44,44
+109,-41,41
+110,-31,31
+111,-26,26
+112,-25,25
+113,-41,41
+114,-32,32
+115,-44,44
+116,-30,30
+117,-32,32
+118,-30,30
+119,-25,25
+120,-23,23
+121,-47,47
+122,-24,24
+123,-45,45
+124,-39,39
+125,-21,21
+126,-43,43
+127,-143,44
+128,-26,26
+129,-20,20
+130,-32,32
+131,-16,16
+132,-24,24
+133,-42,42
+134,-25,25
+135,-36,36
+136,-19,19
+137,-29,29
+138,-43,43
+139,-17,17
+140,-150,51
+141,-32,32
+142,-34,34
+143,-19,19
+144,-26,26
+145,-30,30
+146,-31,31
+147,-49,49
+148,-33,33
+149,-21,21
+150,-17,17
+151,-48,48
+152,-34,34
+153,-20,20
+154,-20,20
+155,-26,26
+156,-21,21
+157,-13,13
+158,-40,40
+159,-22,22
+160,-26,26
+161,-30,30
+162,-29,29
+163,-25,25
+164,-26,26
+165,-27,27
+166,-21,21
+167,-29,29
+168,-24,24
+169,-17,17
+170,-22,22
+171,-35,35
+172,-35,35
+173,-18,18
+174,-135,36
+175,-15,15
+176,-23,23
+177,-28,28
+178,-25,25
+179,-24,24
+180,-29,29
+181,-31,31
+182,-24,24
+183,-129,30
+184,-45,45
+185,-24,24
+186,-17,17
+187,-20,20
+188,-21,21
+189,-23,23
+190,-15,15
+191,-32,32
+192,-22,22
+193,-19,19
+194,-17,17
+195,-45,45
+196,-15,15
+197,-14,14
+198,-14,14
+199,-37,37
+200,-23,23
+201,-17,17
+202,-19,19
+203,-21,21
+204,-23,23
+205,-27,27
+206,-14,14
+207,-18,18
+208,-23,23
+209,-34,34
+210,-23,23
+211,-13,13
+212,-25,25
+213,-17,17
+214,-13,13
+215,-21,21
+216,-29,29
+217,-18,18
+218,-24,24
+219,-15,15
+220,-27,27
+221,-25,25
+222,-21,21
+223,-19,19
+224,-17,17
+225,-18,18
+226,-13,13
+227,-22,22
+228,-14,14
+229,-13,13
+230,-29,29
+231,-23,23
+232,-15,15
+233,-15,15
+234,-14,14
+235,-28,28
+236,-25,25
+237,-17,17
+238,-23,23
+239,-29,29
+240,-15,15
+241,-14,14
+242,-15,15
+243,-23,23
+244,-15,15
+245,-16,16
+246,-19,19
+247,-13,13
+248,-16,16
+249,-17,17
+250,-25,25
+251,-30,30
+252,-13,13
+253,-14,14
+254,-15,15
+255,-22,22
+256,-14,14
+257,-17,17
+258,-126,27
+259,-15,15
+260,-21,21
+261,-16,16
+262,-23,23
+263,-14,14
+264,-13,13
+265,-13,13
+266,-19,19
+267,-13,13
+268,-19,19
+269,-17,17
+270,-17,17
+271,-13,13
+272,-19,19
+273,-13,13
+274,-13,13
+275,-16,16
+276,-22,22
+277,-14,14
+278,-15,15
+279,-19,19
+280,-34,34
+281,-13,13
+282,-15,15
+283,-32,32
+284,-13,13
+285,-13,13
+286,-13,13
+287,-14,14
+288,-16,16
+289,-13,13
+290,-13,13
+291,-17,17
+292,-13,13
+293,-13,13
+294,-22,22
+295,-14,14
+296,-15,15
+297,-13,13
+298,-13,13
+299,-13,13
+300,-16,16
+301,-13,13
+302,-14,14
+303,-13,13
+304,-13,13
+305,-13,13
+306,-24,24
+307,-13,13
+308,-13,13
+309,-15,15
+310,-13,13
+311,-13,13
+312,-13,13
+313,-15,15
+314,-13,13
+315,-19,19
+316,-15,15
+317,-17,17
+318,-13,13
+319,-13,13
+320,-13,13
+321,-13,13
+322,-13,13
+323,-15,15
+324,-13,13
+325,-13,13
+326,-13,13
+327,-123,24
+328,-13,13
+329,-13,13
+330,-13,13
+331,-13,13
+332,-13,13
+333,-13,13
+334,-13,13
+335,-13,13
+336,-16,16
+337,-13,13
+338,-23,23
+339,-13,13
+340,-13,13
+341,-13,13
+342,-13,13
+343,-13,13
+344,-13,13
+345,-13,13
+346,-13,13
+347,-13,13
+348,-13,13
+349,-13,13
+350,-134,35
+351,-13,13
+352,-13,13
+353,-13,13
+354,-13,13
+355,-13,13
+356,-13,13
+357,-13,13
+358,-13,13
+359,-13,13
+360,-15,15
+361,-13,13
+362,-13,13
+363,-13,13
+364,-13,13
+365,-13,13
+366,-13,13
+367,-13,13
+368,-13,13
+369,-14,14
+370,-13,13
+371,-13,13
+372,-13,13
+373,-13,13
+374,-13,13
+375,-13,13
+376,-13,13
+377,-124,25
+378,-13,13
+379,-13,13
+380,-13,13
+381,-13,13
+382,-13,13
+383,-13,13
+384,-13,13
+385,-13,13
+386,-13,13
+387,-13,13
+388,-13,13
+389,-121,22
+390,-13,13
+391,-13,13
+392,-13,13
+393,-13,13
+394,-13,13
+395,-13,13
+396,-13,13
+397,-13,13
+398,-17,17
+399,-13,13
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl
new file mode 100644
index 0000000..6d6b01f
Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl differ
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json
new file mode 100644
index 0000000..ead445f
--- /dev/null
+++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "Q-learning", "env_name": "Racetrack-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/models/", "n_states": 4, "n_actions": 9}
\ No newline at end of file
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png
new file mode 100644
index 0000000..fa1588a
Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png differ
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv
new file mode 100644
index 0000000..3d60bb2
--- /dev/null
+++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,-1000,1000
+1,2,8
+2,4,6
+3,3,7
+4,2,8
+5,3,7
+6,4,6
+7,-1000,1000
+8,3,7
+9,-11,11
+10,-19,19
+11,-18,18
+12,1,9
+13,1,9
+14,4,6
+15,-16,16
+16,-17,17
+17,4,6
+18,-16,16
+19,4,6
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png
new file mode 100644
index 0000000..c0c7b24
Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png differ
diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv
new file mode 100644
index 0000000..a7df26d
--- /dev/null
+++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv
@@ -0,0 +1,401 @@
+episodes,rewards,steps
+0,-3580,1000
+1,-2960,1000
+2,-2670,1000
+3,-2720,1000
+4,-2670,1000
+5,-2570,1000
+6,-2407,977
+7,-2012,852
+8,-2500,1000
+9,-2530,1000
+10,-2550,1000
+11,-437,187
+12,-80,40
+13,-2450,1000
+14,-338,148
+15,-1175,525
+16,-755,325
+17,-411,181
+18,-1068,448
+19,-785,325
+20,-149,79
+21,-628,268
+22,-423,183
+23,-282,122
+24,-2198,938
+25,-13,13
+26,-253,113
+27,-48,28
+28,-72,42
+29,-123,63
+30,-305,145
+31,-72,32
+32,-142,72
+33,-13,13
+34,4,6
+35,-1285,545
+36,-174,94
+37,-436,196
+38,-759,339
+39,-11,11
+40,-17,17
+41,-283,123
+42,-181,81
+43,-44,24
+44,-55,35
+45,-135,65
+46,-577,277
+47,-234,114
+48,-54,34
+49,4,6
+50,-29,19
+51,-100,50
+52,-32,22
+53,-23,23
+54,4,6
+55,-17,17
+56,-18,18
+57,-48,28
+58,-34,24
+59,-45,25
+60,-29,19
+61,1,9
+62,-77,37
+63,3,7
+64,-25,15
+65,-3,13
+66,-78,48
+67,-69,39
+68,-105,45
+69,-48,28
+70,3,7
+71,4,6
+72,-100,50
+73,-130,60
+74,-20,20
+75,4,6
+76,4,6
+77,4,6
+78,4,6
+79,-47,27
+80,4,6
+81,4,6
+82,-174,94
+83,-12,12
+84,-26,16
+85,3,7
+86,3,7
+87,-42,32
+88,-48,28
+89,-97,57
+90,-11,11
+91,-16,16
+92,-15,15
+93,4,6
+94,-147,67
+95,-52,32
+96,-97,47
+97,3,7
+98,-17,17
+99,3,7
+100,4,6
+101,3,7
+102,3,7
+103,3,7
+104,1,9
+105,4,6
+106,4,6
+107,3,7
+108,4,6
+109,-68,38
+110,3,7
+111,4,6
+112,-14,14
+113,4,6
+114,-57,37
+115,3,7
+116,4,6
+117,-12,12
+118,3,7
+119,3,7
+120,-64,34
+121,-13,13
+122,3,7
+123,-13,13
+124,4,6
+125,3,7
+126,-32,22
+127,-41,31
+128,3,7
+129,3,7
+130,3,7
+131,4,6
+132,4,6
+133,3,7
+134,-12,12
+135,-31,21
+136,4,6
+137,3,7
+138,-51,31
+139,-48,28
+140,4,6
+141,-85,45
+142,-14,14
+143,4,6
+144,3,7
+145,-6,16
+146,4,6
+147,4,6
+148,-15,15
+149,4,6
+150,-24,24
+151,3,7
+152,-14,14
+153,-18,18
+154,3,7
+155,4,6
+156,-85,45
+157,-51,31
+158,3,7
+159,2,8
+160,3,7
+161,-79,39
+162,-14,14
+163,-13,13
+164,4,6
+165,3,7
+166,4,6
+167,3,7
+168,-74,34
+169,-15,15
+170,4,6
+171,-14,14
+172,4,6
+173,-31,21
+174,-8,18
+175,4,6
+176,4,6
+177,4,6
+178,4,6
+179,-29,19
+180,4,6
+181,3,7
+182,4,6
+183,-82,42
+184,3,7
+185,4,6
+186,4,6
+187,-11,11
+188,-23,23
+189,-33,23
+190,3,7
+191,-12,12
+192,-44,24
+193,-62,42
+194,-16,16
+195,4,6
+196,-12,12
+197,3,7
+198,-13,13
+199,3,7
+200,3,7
+201,4,6
+202,4,6
+203,4,6
+204,-28,18
+205,-16,16
+206,3,7
+207,4,6
+208,-12,12
+209,-13,13
+210,-66,36
+211,-14,14
+212,4,6
+213,4,6
+214,-15,15
+215,-60,30
+216,4,6
+217,3,7
+218,4,6
+219,-33,23
+220,-12,12
+221,-14,14
+222,4,6
+223,3,7
+224,-97,47
+225,4,6
+226,2,8
+227,4,6
+228,4,6
+229,3,7
+230,-11,11
+231,4,6
+232,3,7
+233,3,7
+234,4,6
+235,3,7
+236,3,7
+237,-32,22
+238,-13,13
+239,3,7
+240,-22,22
+241,4,6
+242,2,8
+243,-31,21
+244,4,6
+245,-4,14
+246,-30,20
+247,4,6
+248,3,7
+249,-26,16
+250,4,6
+251,-12,12
+252,2,8
+253,1,9
+254,4,6
+255,2,8
+256,2,8
+257,-12,12
+258,3,7
+259,-48,28
+260,4,6
+261,4,6
+262,-51,31
+263,-12,12
+264,4,6
+265,2,8
+266,2,8
+267,2,8
+268,3,7
+269,4,6
+270,4,6
+271,-17,17
+272,4,6
+273,-13,13
+274,-16,16
+275,-97,57
+276,3,7
+277,-1,11
+278,-32,22
+279,3,7
+280,4,6
+281,3,7
+282,3,7
+283,3,7
+284,3,7
+285,2,8
+286,3,7
+287,-15,15
+288,2,8
+289,-18,18
+290,4,6
+291,-36,26
+292,4,6
+293,4,6
+294,4,6
+295,4,6
+296,-77,47
+297,-14,14
+298,3,7
+299,3,7
+300,3,7
+301,4,6
+302,3,7
+303,4,6
+304,-12,12
+305,-45,35
+306,-63,43
+307,2,8
+308,4,6
+309,4,6
+310,-13,13
+311,4,6
+312,-13,13
+313,4,6
+314,3,7
+315,-30,20
+316,-13,13
+317,3,7
+318,4,6
+319,4,6
+320,-12,12
+321,-13,13
+322,3,7
+323,3,7
+324,3,7
+325,3,7
+326,-36,26
+327,4,6
+328,3,7
+329,3,7
+330,3,7
+331,3,7
+332,-14,14
+333,-16,16
+334,3,7
+335,3,7
+336,-14,14
+337,1,9
+338,2,8
+339,3,7
+340,4,6
+341,-36,26
+342,-14,14
+343,-78,48
+344,2,8
+345,-37,27
+346,3,7
+347,3,7
+348,-37,27
+349,-16,16
+350,4,6
+351,-15,15
+352,4,6
+353,2,8
+354,-44,24
+355,-13,13
+356,-14,14
+357,-17,17
+358,-13,13
+359,3,7
+360,2,8
+361,4,6
+362,3,7
+363,-5,15
+364,-14,14
+365,2,8
+366,-12,12
+367,3,7
+368,4,6
+369,2,8
+370,2,8
+371,1,9
+372,-16,16
+373,1,9
+374,4,6
+375,-16,16
+376,3,7
+377,2,8
+378,-13,13
+379,-44,34
+380,-16,16
+381,-30,20
+382,4,6
+383,4,6
+384,2,8
+385,-15,15
+386,4,6
+387,3,7
+388,2,8
+389,4,6
+390,2,8
+391,3,7
+392,3,7
+393,-14,14
+394,-15,15
+395,3,7
+396,-13,13
+397,3,7
+398,4,6
+399,3,7
diff --git a/projects/codes/Sarsa/main.py b/projects/codes/Sarsa/main.py
index 1ea4527..cb1b22c 100644
--- a/projects/codes/Sarsa/main.py
+++ b/projects/codes/Sarsa/main.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-11 17:59:16
 LastEditor: John
-LastEditTime: 2022-08-25 14:26:36
+LastEditTime: 2022-08-26 23:03:39
 Discription: 
 Environment: 
 '''
@@ -20,117 +20,105 @@ import argparse
 from envs.register import register_env
 from envs.wrappers import CliffWalkingWapper
 from Sarsa.sarsa import Sarsa
-from common.utils import save_results,make_dir,plot_rewards,save_args,all_seed
+from common.utils import all_seed
+from common.launcher import Launcher
 
-def get_args():
-    curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")   # obtain current time
-    parser = argparse.ArgumentParser(description="hyperparameters")      
-    parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm")
-    parser.add_argument('--env_name',default='Racetrack-v0',type=str,help="name of environment")
-    parser.add_argument('--train_eps',default=300,type=int,help="episodes of training") 
-    parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") 
-    parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") 
-    parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") 
-    parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") 
-    parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") 
-    parser.add_argument('--lr',default=0.2,type=float,help="learning rate")
-    parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
-    parser.add_argument('--seed',default=10,type=int,help="seed") 
-    parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")  
-    parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")   
-    args = parser.parse_args()   
-    default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/",
-                    'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/",
-    }
-    args = {**vars(args),**default_args}  # type(dict)                         
-    return args
+class Main(Launcher):
+    def get_args(self):
+        curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")   # obtain current time
+        parser = argparse.ArgumentParser(description="hyperparameters")      
+        parser.add_argument('--algo_name',default = 'Sarsa',type=str,help="name of algorithm")
+        parser.add_argument('--env_name',default = 'Racetrack-v0',type=str,help="name of environment")
+        parser.add_argument('--train_eps',default = 300,type=int,help="episodes of training") 
+        parser.add_argument('--test_eps',default = 20,type=int,help="episodes of testing") 
+        parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") 
+        parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") 
+        parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") 
+        parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") 
+        parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") 
+        parser.add_argument('--lr',default=0.2,type=float,help="learning rate")
+        parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
+        parser.add_argument('--seed',default=10,type=int,help="seed") 
+        parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not")  
+        parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")   
+        args = parser.parse_args()   
+        default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/",
+                        'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/",
+        }
+        args = {**vars(args),**default_args}  # type(dict)                         
+        return args
 
-def env_agent_config(cfg):
-    register_env(cfg['env_name'])
-    env = gym.make(cfg['env_name'])
-    if cfg['seed'] !=0: # set random seed
-        all_seed(env,seed= cfg['seed']) 
-    if cfg['env_name'] == 'CliffWalking-v0':
-        env = CliffWalkingWapper(env)
-    try: # state dimension
-        n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
-    except AttributeError:
-        n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
-    n_actions = env.action_space.n  # action dimension
-    print(f"n_states: {n_states}, n_actions: {n_actions}")
-    cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
-    agent = Sarsa(cfg)
-    return env,agent
+    def env_agent_config(self,cfg):
+        register_env(cfg['env_name'])
+        env = gym.make(cfg['env_name'])
+        if cfg['seed'] !=0: # set random seed
+            all_seed(env,seed= cfg['seed']) 
+        if cfg['env_name'] == 'CliffWalking-v0':
+            env = CliffWalkingWapper(env)
+        try: # state dimension
+            n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n'))
+        except AttributeError:
+            n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape'))
+        n_actions = env.action_space.n  # action dimension
+        print(f"n_states: {n_states}, n_actions: {n_actions}")
+        cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters
+        agent = Sarsa(cfg)
+        return env,agent
         
-def train(cfg,env,agent):
-    print("Start training!")
-    print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
-    rewards = []  # record rewards for all episodes
-    steps = [] # record steps for all episodes
-    for i_ep in range(cfg['train_eps']):
-        ep_reward = 0  # reward per episode
-        ep_step = 0 # step per episode
-        state = env.reset()  # reset and obtain initial state
-        action = agent.sample_action(state)
-        while True:
-        # for _ in range(cfg.ep_max_steps):
-            next_state, reward, done, _ = env.step(action)  # update env and return transitions
-            next_action =  agent.sample_action(next_state)
-            agent.update(state, action, reward, next_state, next_action,done)  # update agent
-            state = next_state  # update state
-            action = next_action
-            ep_reward += reward
-            ep_step += 1
-            if done:
-                break
-        rewards.append(ep_reward)
-        steps.append(ep_step)
-        if (i_ep+1)%10==0:
-            print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}, Epislon: {agent.epsilon:.3f}')
-    print("Finish training!")
-    return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+    def train(self,cfg,env,agent):
+        print("Start training!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        for i_ep in range(cfg['train_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0 # step per episode
+            state = env.reset()  # reset and obtain initial state
+            action = agent.sample_action(state)
+            # while True:
+            for _ in range(cfg['ep_max_steps']):
+                next_state, reward, done, _ = env.step(action)  # update env and return transitions
+                next_action =  agent.sample_action(next_state)
+                agent.update(state, action, reward, next_state, next_action,done)  # update agent
+                state = next_state  # update state
+                action = next_action
+                ep_reward += reward
+                ep_step += 1
+                if done:
+                    break
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            if (i_ep+1)%10==0:
+                print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps: {ep_step}, Epislon: {agent.epsilon:.3f}')
+        print("Finish training!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
 
-def test(cfg,env,agent):
-    print("Start testing!")
-    print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
-    rewards = []  # record rewards for all episodes
-    steps = [] # record steps for all episodes
-    for i_ep in range(cfg['test_eps']):
-        ep_reward = 0  # reward per episode
-        ep_step = 0
-        while True:
-        # for _ in range(cfg.ep_max_steps):
-            action = agent.predict_action(state)
-            next_state, reward, done = env.step(action)
-            state = next_state
-            ep_reward+=reward
-            ep_step+=1
-            if done:
-                break  
-        rewards.append(ep_reward)
-        steps.append(ep_step)
-        print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}")
-    print("Finish testing!")
-    return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+    def test(self,cfg,env,agent):
+        print("Start testing!")
+        print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        for i_ep in range(cfg['test_eps']):
+            ep_reward = 0  # reward per episode
+            ep_step = 0
+            state = env.reset()  # reset and obtain initial state
+            for _ in range(cfg['ep_max_steps']):
+                action = agent.predict_action(state)
+                next_state, reward, done, _ = env.step(action)
+                state = next_state
+                ep_reward+=reward
+                ep_step+=1
+                if done:
+                    break  
+            rewards.append(ep_reward)
+            steps.append(ep_step)
+            print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps: {ep_step}, Reward: {ep_reward:.2f}")
+        print("Finish testing!")
+        return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
 
 if __name__ == "__main__":
-    cfg = get_args()
-    # 训练
-    env, agent = env_agent_config(cfg)
-    res_dic = train(cfg, env, agent)
-    make_dir(cfg.result_path, cfg.model_path)  
-    save_args(cfg) # save parameters
-    agent.save(path=cfg.model_path)  # save model
-    save_results(res_dic, tag='train',
-                 path=cfg.result_path)  
-    plot_rewards(res_dic['rewards'], cfg, tag="train")  
-    # 测试
-    env, agent = env_agent_config(cfg)
-    agent.load(path=cfg.model_path)  # 导入模型
-    res_dic = test(cfg, env, agent)
-    save_results(res_dic, tag='test',
-                 path=cfg.result_path)  # 保存结果
-    plot_rewards(res_dic['rewards'], cfg, tag="test")  # 画出结果
+    main = Main()
+    main.run()
     
     
 
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl
deleted file mode 100644
index 1c8f133..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json
deleted file mode 100644
index 8492e8e..0000000
--- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json
+++ /dev/null
@@ -1 +0,0 @@
-{"algo_name": "Sarsa", "env_name": "CliffWalking-v0", "train_eps": 300, "test_eps": 20, "ep_max_steps": 200, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/", "save_fig": true}
\ No newline at end of file
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy
deleted file mode 100644
index ef51f5e..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png
deleted file mode 100644
index 5b97ea1..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy
deleted file mode 100644
index c7ad308..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png
deleted file mode 100644
index 111f028..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl
deleted file mode 100644
index f1d9dcf..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json
deleted file mode 100644
index 517bb98..0000000
--- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "algo_name": "Sarsa",
-    "env_name": "CliffWalking-v0",
-    "train_eps": 400,
-    "test_eps": 20,
-    "gamma": 0.9,
-    "epsilon_start": 0.95,
-    "epsilon_end": 0.01,
-    "epsilon_decay": 300,
-    "lr": 0.1,
-    "device": "cpu",
-    "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/results/",
-    "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/models/",
-    "save_fig": true
-}
\ No newline at end of file
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy
deleted file mode 100644
index 1b35004..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy
deleted file mode 100644
index f9979cc..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png
deleted file mode 100644
index 9ffa9bf..0000000
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png and /dev/null differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl
new file mode 100644
index 0000000..fb8efd6
Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl differ
diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json
similarity index 54%
rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json
rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json
index 0bdad3a..e16e735 100644
--- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json
+++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json
@@ -1,5 +1,5 @@
 {
-    "algo_name": "Q-learning",
+    "algo_name": "Sarsa",
     "env_name": "CliffWalking-v0",
     "train_eps": 400,
     "test_eps": 20,
@@ -12,8 +12,8 @@
     "seed": 10,
     "show_fig": false,
     "save_fig": true,
-    "result_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/",
-    "model_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/",
+    "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/",
+    "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/",
     "n_states": 48,
     "n_actions": 4
 }
\ No newline at end of file
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png
similarity index 99%
rename from projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png
rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png
index d600435..cf20c71 100644
Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv
new file mode 100644
index 0000000..7f09e4b
--- /dev/null
+++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,-15,15
+1,-15,15
+2,-15,15
+3,-15,15
+4,-15,15
+5,-15,15
+6,-15,15
+7,-15,15
+8,-15,15
+9,-15,15
+10,-15,15
+11,-15,15
+12,-15,15
+13,-15,15
+14,-15,15
+15,-15,15
+16,-15,15
+17,-15,15
+18,-15,15
+19,-15,15
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png
new file mode 100644
index 0000000..14dbf39
Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png differ
diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv
new file mode 100644
index 0000000..c51b354
--- /dev/null
+++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv
@@ -0,0 +1,401 @@
+episodes,rewards,steps
+0,-649,154
+1,-2822,842
+2,-176,176
+3,-139,139
+4,-221,221
+5,-51,51
+6,-219,219
+7,-247,148
+8,-90,90
+9,-145,145
+10,-104,104
+11,-162,162
+12,-49,49
+13,-129,129
+14,-140,140
+15,-19,19
+16,-131,131
+17,-115,115
+18,-43,43
+19,-133,133
+20,-73,73
+21,-89,89
+22,-131,131
+23,-61,61
+24,-113,113
+25,-119,119
+26,-119,119
+27,-71,71
+28,-132,132
+29,-47,47
+30,-79,79
+31,-57,57
+32,-125,125
+33,-77,77
+34,-87,87
+35,-49,49
+36,-57,57
+37,-81,81
+38,-81,81
+39,-97,97
+40,-61,61
+41,-85,85
+42,-217,118
+43,-39,39
+44,-117,117
+45,-41,41
+46,-71,71
+47,-105,105
+48,-73,73
+49,-68,68
+50,-95,95
+51,-41,41
+52,-41,41
+53,-67,67
+54,-71,71
+55,-65,65
+56,-41,41
+57,-61,61
+58,-81,81
+59,-21,21
+60,-76,76
+61,-80,80
+62,-23,23
+63,-53,53
+64,-67,67
+65,-33,33
+66,-41,41
+67,-59,59
+68,-33,33
+69,-64,64
+70,-188,89
+71,-47,47
+72,-57,57
+73,-45,45
+74,-33,33
+75,-79,79
+76,-45,45
+77,-23,23
+78,-47,47
+79,-57,57
+80,-47,47
+81,-45,45
+82,-53,53
+83,-29,29
+84,-33,33
+85,-69,69
+86,-61,61
+87,-35,35
+88,-59,59
+89,-43,43
+90,-17,17
+91,-39,39
+92,-59,59
+93,-29,29
+94,-31,31
+95,-55,55
+96,-35,35
+97,-45,45
+98,-29,29
+99,-59,59
+100,-25,25
+101,-29,29
+102,-33,33
+103,-39,39
+104,-19,19
+105,-47,47
+106,-57,57
+107,-19,19
+108,-47,47
+109,-25,25
+110,-23,23
+111,-53,53
+112,-39,39
+113,-34,34
+114,-27,27
+115,-27,27
+116,-63,63
+117,-33,33
+118,-17,17
+119,-21,21
+120,-19,19
+121,-49,49
+122,-25,25
+123,-39,39
+124,-25,25
+125,-167,68
+126,-35,35
+127,-29,29
+128,-31,31
+129,-44,44
+130,-33,33
+131,-23,23
+132,-37,37
+133,-134,35
+134,-31,31
+135,-19,19
+136,-29,29
+137,-37,37
+138,-25,25
+139,-39,39
+140,-47,47
+141,-29,29
+142,-27,27
+143,-21,21
+144,-41,41
+145,-29,29
+146,-25,25
+147,-25,25
+148,-21,21
+149,-29,29
+150,-39,39
+151,-35,35
+152,-35,35
+153,-32,32
+154,-31,31
+155,-19,19
+156,-21,21
+157,-35,35
+158,-33,33
+159,-37,37
+160,-25,25
+161,-41,41
+162,-25,25
+163,-23,23
+164,-27,27
+165,-25,25
+166,-39,39
+167,-28,28
+168,-24,24
+169,-23,23
+170,-41,41
+171,-17,17
+172,-35,35
+173,-23,23
+174,-29,29
+175,-17,17
+176,-39,39
+177,-33,33
+178,-29,29
+179,-24,24
+180,-23,23
+181,-19,19
+182,-15,15
+183,-23,23
+184,-39,39
+185,-25,25
+186,-35,35
+187,-33,33
+188,-19,19
+189,-35,35
+190,-21,21
+191,-131,32
+192,-15,15
+193,-23,23
+194,-21,21
+195,-17,17
+196,-23,23
+197,-31,31
+198,-21,21
+199,-31,31
+200,-35,35
+201,-27,27
+202,-19,19
+203,-21,21
+204,-23,23
+205,-23,23
+206,-21,21
+207,-31,31
+208,-25,25
+209,-23,23
+210,-17,17
+211,-19,19
+212,-25,25
+213,-23,23
+214,-19,19
+215,-19,19
+216,-25,25
+217,-25,25
+218,-25,25
+219,-25,25
+220,-23,23
+221,-19,19
+222,-19,19
+223,-149,50
+224,-41,41
+225,-19,19
+226,-29,29
+227,-37,37
+228,-17,17
+229,-17,17
+230,-19,19
+231,-27,27
+232,-19,19
+233,-33,33
+234,-23,23
+235,-23,23
+236,-34,34
+237,-15,15
+238,-33,33
+239,-29,29
+240,-17,17
+241,-23,23
+242,-17,17
+243,-19,19
+244,-21,21
+245,-23,23
+246,-17,17
+247,-15,15
+248,-39,39
+249,-21,21
+250,-23,23
+251,-29,29
+252,-15,15
+253,-17,17
+254,-29,29
+255,-15,15
+256,-21,21
+257,-19,19
+258,-19,19
+259,-21,21
+260,-17,17
+261,-21,21
+262,-27,27
+263,-27,27
+264,-21,21
+265,-19,19
+266,-17,17
+267,-23,23
+268,-19,19
+269,-17,17
+270,-19,19
+271,-19,19
+272,-17,17
+273,-23,23
+274,-17,17
+275,-22,22
+276,-31,31
+277,-19,19
+278,-17,17
+279,-33,33
+280,-19,19
+281,-17,17
+282,-31,31
+283,-15,15
+284,-15,15
+285,-15,15
+286,-29,29
+287,-19,19
+288,-17,17
+289,-26,26
+290,-17,17
+291,-19,19
+292,-15,15
+293,-21,21
+294,-21,21
+295,-15,15
+296,-19,19
+297,-15,15
+298,-17,17
+299,-19,19
+300,-17,17
+301,-21,21
+302,-17,17
+303,-27,27
+304,-17,17
+305,-19,19
+306,-15,15
+307,-19,19
+308,-33,33
+309,-17,17
+310,-20,20
+311,-19,19
+312,-17,17
+313,-15,15
+314,-23,23
+315,-15,15
+316,-15,15
+317,-17,17
+318,-25,25
+319,-15,15
+320,-17,17
+321,-19,19
+322,-17,17
+323,-15,15
+324,-23,23
+325,-19,19
+326,-17,17
+327,-23,23
+328,-15,15
+329,-19,19
+330,-15,15
+331,-17,17
+332,-19,19
+333,-15,15
+334,-17,17
+335,-17,17
+336,-19,19
+337,-15,15
+338,-19,19
+339,-19,19
+340,-17,17
+341,-15,15
+342,-21,21
+343,-19,19
+344,-17,17
+345,-17,17
+346,-15,15
+347,-21,21
+348,-20,20
+349,-15,15
+350,-15,15
+351,-15,15
+352,-19,19
+353,-17,17
+354,-15,15
+355,-27,27
+356,-15,15
+357,-15,15
+358,-23,23
+359,-125,26
+360,-132,33
+361,-17,17
+362,-15,15
+363,-17,17
+364,-23,23
+365,-17,17
+366,-15,15
+367,-15,15
+368,-17,17
+369,-15,15
+370,-17,17
+371,-15,15
+372,-15,15
+373,-15,15
+374,-15,15
+375,-15,15
+376,-15,15
+377,-15,15
+378,-15,15
+379,-15,15
+380,-17,17
+381,-15,15
+382,-15,15
+383,-19,19
+384,-15,15
+385,-17,17
+386,-27,27
+387,-15,15
+388,-21,21
+389,-125,26
+390,-15,15
+391,-15,15
+392,-15,15
+393,-27,27
+394,-15,15
+395,-15,15
+396,-17,17
+397,-15,15
+398,-15,15
+399,-15,15
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl
new file mode 100644
index 0000000..81268a4
Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl differ
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json
new file mode 100644
index 0000000..accb050
--- /dev/null
+++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json
@@ -0,0 +1 @@
+{"algo_name": "Sarsa", "env_name": "Racetrack-v0", "train_eps": 300, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/", "n_states": 4, "n_actions": 9}
\ No newline at end of file
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png
new file mode 100644
index 0000000..c78b938
Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png differ
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv
new file mode 100644
index 0000000..2cb817f
--- /dev/null
+++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv
@@ -0,0 +1,21 @@
+episodes,rewards,steps
+0,4,6
+1,4,6
+2,-1010,1000
+3,-14,14
+4,4,6
+5,4,6
+6,4,6
+7,-1060,1000
+8,2,8
+9,-12,12
+10,3,7
+11,-15,15
+12,3,7
+13,4,6
+14,-14,14
+15,3,7
+16,-18,18
+17,4,6
+18,4,6
+19,-1020,1000
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png
new file mode 100644
index 0000000..5c612d6
Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png differ
diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv
new file mode 100644
index 0000000..0912f70
--- /dev/null
+++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv
@@ -0,0 +1,301 @@
+episodes,rewards,steps
+0,-3460,1000
+1,-2800,1000
+2,-2910,1000
+3,-2620,1000
+4,-2620,1000
+5,-2590,1000
+6,-2390,1000
+7,-2510,1000
+8,-2470,1000
+9,-611,251
+10,-891,371
+11,-265,125
+12,-2281,911
+13,-1203,523
+14,-616,266
+15,-213,113
+16,-633,273
+17,-1112,482
+18,-350,160
+19,-852,342
+20,-87,47
+21,-11,11
+22,-27,17
+23,-117,57
+24,-15,15
+25,4,6
+26,-27,17
+27,-94,44
+28,-184,84
+29,-44,24
+30,-150,80
+31,-14,14
+32,-219,89
+33,-50,30
+34,-111,61
+35,-10,10
+36,-28,18
+37,-34,24
+38,-12,12
+39,-19,19
+40,-136,66
+41,-171,71
+42,-51,31
+43,4,6
+44,-117,57
+45,4,6
+46,4,6
+47,-127,67
+48,-78,48
+49,-311,131
+50,-25,15
+51,4,6
+52,-49,29
+53,-25,15
+54,-78,48
+55,-238,108
+56,4,6
+57,-17,17
+58,-29,19
+59,-218,98
+60,4,6
+61,-129,59
+62,-344,144
+63,-25,15
+64,-15,15
+65,-77,37
+66,2,8
+67,0,10
+68,4,6
+69,4,6
+70,-242,102
+71,3,7
+72,4,6
+73,-53,33
+74,-14,14
+75,4,6
+76,4,6
+77,-30,20
+78,-12,12
+79,2,8
+80,-12,12
+81,-150,70
+82,-48,28
+83,-102,52
+84,4,6
+85,-97,47
+86,-10,10
+87,-125,55
+88,-28,18
+89,-26,16
+90,-107,57
+91,4,6
+92,-16,16
+93,-84,44
+94,-13,13
+95,-43,23
+96,-14,14
+97,-12,12
+98,-13,13
+99,-2,12
+100,-14,14
+101,-47,27
+102,4,6
+103,4,6
+104,-91,51
+105,-65,35
+106,4,6
+107,-12,12
+108,-14,14
+109,-13,13
+110,4,6
+111,-41,31
+112,-13,13
+113,4,6
+114,-4,14
+115,-74,34
+116,4,6
+117,-60,30
+118,4,6
+119,-15,15
+120,3,7
+121,4,6
+122,4,6
+123,-19,19
+124,4,6
+125,-49,29
+126,-13,13
+127,-30,20
+128,2,8
+129,-21,21
+130,-45,25
+131,-32,22
+132,-67,37
+133,-46,26
+134,0,10
+135,-12,12
+136,-9,9
+137,-10,10
+138,-14,14
+139,4,6
+140,-11,11
+141,-12,12
+142,2,8
+143,-35,25
+144,4,6
+145,-73,43
+146,4,6
+147,-20,20
+148,4,6
+149,2,8
+150,-29,19
+151,-20,20
+152,4,6
+153,-28,18
+154,4,6
+155,4,6
+156,4,6
+157,4,6
+158,-34,24
+159,4,6
+160,4,6
+161,4,6
+162,-25,15
+163,4,6
+164,3,7
+165,-48,28
+166,4,6
+167,-58,38
+168,-20,20
+169,-9,9
+170,3,7
+171,4,6
+172,3,7
+173,-33,23
+174,-50,30
+175,-16,16
+176,-32,22
+177,-65,35
+178,4,6
+179,-13,13
+180,-11,11
+181,3,7
+182,4,6
+183,-16,16
+184,-12,12
+185,4,6
+186,-48,28
+187,-13,13
+188,2,8
+189,3,7
+190,-27,17
+191,3,7
+192,4,6
+193,4,6
+194,4,6
+195,4,6
+196,4,6
+197,-13,13
+198,-14,14
+199,4,6
+200,4,6
+201,-13,13
+202,-33,23
+203,4,6
+204,-32,22
+205,4,6
+206,-48,28
+207,4,6
+208,4,6
+209,3,7
+210,4,6
+211,-34,24
+212,3,7
+213,4,6
+214,4,6
+215,4,6
+216,3,7
+217,-12,12
+218,3,7
+219,-8,8
+220,3,7
+221,4,6
+222,-46,26
+223,-33,23
+224,4,6
+225,1,9
+226,3,7
+227,2,8
+228,-34,24
+229,4,6
+230,4,6
+231,4,6
+232,4,6
+233,-55,35
+234,-37,27
+235,4,6
+236,-14,14
+237,-65,35
+238,4,6
+239,-13,13
+240,4,6
+241,4,6
+242,-13,13
+243,-30,20
+244,3,7
+245,-13,13
+246,4,6
+247,4,6
+248,-13,13
+249,-32,22
+250,4,6
+251,-55,35
+252,-12,12
+253,3,7
+254,3,7
+255,3,7
+256,4,6
+257,2,8
+258,-12,12
+259,3,7
+260,-10,10
+261,-12,12
+262,4,6
+263,3,7
+264,3,7
+265,-16,16
+266,3,7
+267,-47,27
+268,-13,13
+269,4,6
+270,3,7
+271,-13,13
+272,4,6
+273,4,6
+274,-17,17
+275,4,6
+276,3,7
+277,3,7
+278,4,6
+279,-41,31
+280,3,7
+281,-47,27
+282,-32,22
+283,4,6
+284,3,7
+285,-17,17
+286,3,7
+287,3,7
+288,3,7
+289,-12,12
+290,4,6
+291,3,7
+292,3,7
+293,-24,14
+294,3,7
+295,4,6
+296,3,7
+297,3,7
+298,3,7
+299,-13,13
diff --git a/projects/codes/Sarsa/sarsa.py b/projects/codes/Sarsa/sarsa.py
index c10d226..37ed818 100644
--- a/projects/codes/Sarsa/sarsa.py
+++ b/projects/codes/Sarsa/sarsa.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 16:58:16
 LastEditor: John
-LastEditTime: 2022-08-25 00:23:22
+LastEditTime: 2022-08-25 21:26:08
 Discription: 
 Environment: 
 '''
@@ -30,7 +30,7 @@ class Sarsa(object):
         self.sample_count += 1
         self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \
             math.exp(-1. * self.sample_count / self.epsilon_decay) # The probability to select a random action, is is log decayed
-        best_action = np.argmax(self.Q_table[state])
+        best_action = np.argmax(self.Q_table[str(state)]) # array cannot be hashtable, thus convert to str
         action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions
         action_probs[best_action] += (1.0 - self.epsilon)
         action = np.random.choice(np.arange(len(action_probs)), p=action_probs) 
@@ -38,27 +38,27 @@ class Sarsa(object):
     def predict_action(self,state):
         ''' predict action while testing 
         '''
-        action = np.argmax(self.Q_table[state])
+        action = np.argmax(self.Q_table[str(state)])
         return action
     def update(self, state, action, reward, next_state, next_action,done):
-        Q_predict = self.Q_table[state][action]
+        Q_predict = self.Q_table[str(state)][action]
         if done:
             Q_target = reward  # terminal state
         else:
-            Q_target = reward + self.gamma * self.Q_table[next_state][next_action] # the only difference from Q learning
-        self.Q_table[state][action] += self.lr * (Q_target - Q_predict) 
+            Q_target = reward + self.gamma * self.Q_table[str(next_state)][next_action] # the only difference from Q learning
+        self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict) 
     def save_model(self,path):
         import dill
         from pathlib import Path
         # create path
         Path(path).mkdir(parents=True, exist_ok=True)
         torch.save(
-            obj=self.Q_table_table,
+            obj=self.Q_table,
             f=path+"checkpoint.pkl",
             pickle_module=dill
         )
         print("Model saved!")
     def load_model(self, path):
         import dill
-        self.Q_table_table =torch.load(f=path+'checkpoint.pkl',pickle_module=dill)
+        self.Q_table=torch.load(f=path+'checkpoint.pkl',pickle_module=dill)
         print("Mode loaded!")
\ No newline at end of file
diff --git a/projects/codes/Sarsa/task1.py b/projects/codes/Sarsa/task1.py
deleted file mode 100644
index 3fe8fb9..0000000
--- a/projects/codes/Sarsa/task1.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-'''
-Author: John
-Email: johnjim0816@gmail.com
-Date: 2020-09-11 23:03:00
-LastEditor: John
-LastEditTime: 2022-08-04 22:44:00
-Discription: 
-Environment: 
-'''
-import sys
-import os
-curr_path = os.path.dirname(os.path.abspath(__file__))  # 当前文件所在绝对路径
-parent_path = os.path.dirname(curr_path)  # 父路径
-sys.path.append(parent_path)  # 添加路径到系统路径
-
-import gym
-import torch
-import datetime
-import argparse
-from envs.gridworld_env import CliffWalkingWapper
-from Sarsa.sarsa import Sarsa
-from common.utils import plot_rewards,save_args
-from common.utils import save_results,make_dir
-
-
-def get_args():
-    """ 
-    """
-    curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")  # 获取当前时间
-    parser = argparse.ArgumentParser(description="hyperparameters")      
-    parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm")
-    parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment")
-    parser.add_argument('--train_eps',default=400,type=int,help="episodes of training") # 训练的回合数
-    parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") # 测试的回合数
-    parser.add_argument('--gamma',default=0.90,type=float,help="discounted factor") # 折扣因子
-    parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") #  e-greedy策略中初始epsilon
-    parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") # e-greedy策略中的终止epsilon
-    parser.add_argument('--epsilon_decay',default=300,type=int,help="decay rate of epsilon") # e-greedy策略中epsilon的衰减率
-    parser.add_argument('--lr',default=0.1,type=float,help="learning rate")
-    parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") 
-    parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
-            '/' + curr_time + '/results/' )
-    parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \
-            '/' + curr_time + '/models/' ) # path to save models
-    parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not")           
-    args = parser.parse_args([])                          
-    return args
-curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间
-      
-def train(cfg,env,agent):
-    print('开始训练！')
-    print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}')
-    rewards = []  # 记录奖励
-    for i_ep in range(cfg.train_eps):
-        ep_reward = 0  # 记录每个回合的奖励
-        state = env.reset()  # 重置环境,即开始新的回合
-        action = agent.sample(state)
-        while True:
-            action = agent.sample(state)  # 根据算法采样一个动作
-            next_state, reward, done, _ = env.step(action)  # 与环境进行一次动作交互
-            next_action = agent.sample(next_state)
-            agent.update(state, action, reward, next_state, next_action,done) # 算法更新
-            state = next_state # 更新状态
-            action = next_action
-            ep_reward += reward
-            if done:
-                break
-        rewards.append(ep_reward)
-        print(f"回合：{i_ep+1}/{cfg.train_eps}，奖励：{ep_reward:.1f}，Epsilon：{agent.epsilon}")
-    print('完成训练！')
-    return {"rewards":rewards}
-    
-def test(cfg,env,agent):
-    print('开始测试！')
-    print(f'环境：{cfg.env_name}, 算法：{cfg.algo_name}, 设备：{cfg.device}')
-    rewards = []  # 记录所有回合的奖励
-    for i_ep in range(cfg.test_eps):
-        ep_reward = 0  # 记录每个episode的reward
-        state = env.reset()  # 重置环境, 重新开一局（即开始新的一个回合）
-        while True:
-            action = agent.predict(state)  # 根据算法选择一个动作
-            next_state, reward, done, _ = env.step(action)  # 与环境进行一个交互
-            state = next_state  # 更新状态
-            ep_reward += reward
-            if done:
-                break
-        rewards.append(ep_reward)
-        print(f"回合数：{i_ep+1}/{cfg.test_eps}, 奖励：{ep_reward:.1f}")
-    print('完成测试！')
-    return {"rewards":rewards}
-        
-def env_agent_config(cfg,seed=1):
-    '''创建环境和智能体
-    Args:
-        cfg ([type]): [description]
-        seed (int, optional): 随机种子. Defaults to 1.
-    Returns:
-        env [type]: 环境
-        agent : 智能体
-    '''    
-    env = gym.make(cfg.env_name)  
-    env = CliffWalkingWapper(env)
-    env.seed(seed) # 设置随机种子
-    n_states = env.observation_space.n # 状态维度
-    n_actions = env.action_space.n # 动作维度
-    print(f"状态数：{n_states}，动作数：{n_actions}")
-    agent = Sarsa(n_actions,cfg)
-    return env,agent
-if __name__ == "__main__":
-    cfg = get_args()
-    # 训练
-    env, agent = env_agent_config(cfg)
-    res_dic = train(cfg, env, agent)
-    make_dir(cfg.result_path, cfg.model_path)  
-    save_args(cfg) # save parameters
-    agent.save(path=cfg.model_path)  # save model
-    save_results(res_dic, tag='train',
-                 path=cfg.result_path)  
-    plot_rewards(res_dic['rewards'], cfg, tag="train")  
-    # 测试
-    env, agent = env_agent_config(cfg)
-    agent.load(path=cfg.model_path)  # 导入模型
-    res_dic = test(cfg, env, agent)
-    save_results(res_dic, tag='test',
-                 path=cfg.result_path)  # 保存结果
-    plot_rewards(res_dic['rewards'], cfg, tag="test")  # 画出结果
-
-        
-    
diff --git a/projects/codes/common/launcher.py b/projects/codes/common/launcher.py
index d26bce1..43f6f45 100644
--- a/projects/codes/common/launcher.py
+++ b/projects/codes/common/launcher.py
@@ -24,7 +24,7 @@ class Launcher:
         save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results
         plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train")  # plot results
         # testing
-        env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step
+        # env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step
         agent.load_model(path = cfg['model_path'])  # load model
         res_dic = self.test(cfg, env, agent)
         save_results(res_dic, tag='test',
diff --git a/projects/codes/common/memories.py b/projects/codes/common/memories.py
index 255333a..1317dd1 100644
--- a/projects/codes/common/memories.py
+++ b/projects/codes/common/memories.py
@@ -5,7 +5,7 @@
 @Email: johnjim0816@gmail.com
 @Date: 2020-06-10 15:27:16
 @LastEditor: John
-LastEditTime: 2022-08-22 17:23:21
+LastEditTime: 2022-08-28 23:44:06
 @Discription: 
 @Environment: python 3.7.7
 '''
@@ -39,12 +39,12 @@ class ReplayBufferQue:
     def __init__(self, capacity: int) -> None:
         self.capacity = capacity
         self.buffer = deque(maxlen=self.capacity)
-    def push(self,trainsitions):
+    def push(self,transitions):
         '''_summary_
         Args:
             trainsitions (tuple): _description_
         '''
-        self.buffer.append(trainsitions)
+        self.buffer.append(transitions)
     def sample(self, batch_size: int, sequential: bool = False):
         if batch_size > len(self.buffer):
             batch_size = len(self.buffer)
diff --git a/projects/codes/common/models.py b/projects/codes/common/models.py
index 1e7bbaa..3e3e562 100644
--- a/projects/codes/common/models.py
+++ b/projects/codes/common/models.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 21:14:12
 LastEditor: John
-LastEditTime: 2021-09-15 13:21:03
+LastEditTime: 2022-08-29 14:24:44
 Discription: 
 Environment: 
 '''
@@ -31,40 +31,45 @@ class MLP(nn.Module):
         x = F.relu(self.fc2(x))
         return self.fc3(x)
 
+class ActorSoftmax(nn.Module):
+    def __init__(self, input_dim, output_dim, hidden_dim=256):
+        super(ActorSoftmax, self).__init__()
+        self.fc1 = nn.Linear(input_dim, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, output_dim)
+    def forward(self,state):
+        dist = F.relu(self.fc1(state))
+        dist = F.softmax(self.fc2(dist),dim=1)
+        return dist
 class Critic(nn.Module):
-    def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3):
-        super(Critic, self).__init__()
-        
-        self.linear1 = nn.Linear(n_obs + n_actions, hidden_size)
-        self.linear2 = nn.Linear(hidden_size, hidden_size)
-        self.linear3 = nn.Linear(hidden_size, 1)
-        # 随机初始化为较小的值
-        self.linear3.weight.data.uniform_(-init_w, init_w)
-        self.linear3.bias.data.uniform_(-init_w, init_w)
-        
-    def forward(self, state, action):
-        # 按维数1拼接
-        x = torch.cat([state, action], 1)
-        x = F.relu(self.linear1(x))
-        x = F.relu(self.linear2(x))
-        x = self.linear3(x)
-        return x
+    def __init__(self,input_dim,output_dim,hidden_dim=256):
+        super(Critic,self).__init__()
+        assert output_dim == 1 # critic must output a single value
+        self.fc1 = nn.Linear(input_dim, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, output_dim)
+    def forward(self,state):
+        value = F.relu(self.fc1(state))
+        value = self.fc2(value)
+        return value
 
-class Actor(nn.Module):
-    def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3):
-        super(Actor, self).__init__()  
-        self.linear1 = nn.Linear(n_obs, hidden_size)
-        self.linear2 = nn.Linear(hidden_size, hidden_size)
-        self.linear3 = nn.Linear(hidden_size, n_actions)
+class ActorCriticSoftmax(nn.Module):
+    def __init__(self, input_dim, output_dim, actor_hidden_dim=256,critic_hidden_dim=256):
+        super(ActorCriticSoftmax, self).__init__()
+
+        self.critic_fc1 = nn.Linear(input_dim, critic_hidden_dim)
+        self.critic_fc2 = nn.Linear(critic_hidden_dim, 1)
+
+        self.actor_fc1 = nn.Linear(input_dim, actor_hidden_dim)
+        self.actor_fc2 = nn.Linear(actor_hidden_dim, output_dim)
+    
+    def forward(self, state):
+        # state = Variable(torch.from_numpy(state).float().unsqueeze(0))
+        value = F.relu(self.critic_fc1(state))
+        value = self.critic_fc2(value)
         
-        self.linear3.weight.data.uniform_(-init_w, init_w)
-        self.linear3.bias.data.uniform_(-init_w, init_w)
-        
-    def forward(self, x):
-        x = F.relu(self.linear1(x))
-        x = F.relu(self.linear2(x))
-        x = torch.tanh(self.linear3(x))
-        return x
+        policy_dist = F.relu(self.actor_fc1(state))
+        policy_dist = F.softmax(self.actor_fc2(policy_dist), dim=1)
+
+        return value, policy_dist
 
 class ActorCritic(nn.Module):
     def __init__(self, n_states, n_actions, hidden_dim=256):
diff --git a/projects/codes/envs/register.py b/projects/codes/envs/register.py
index d92a93d..38074cf 100644
--- a/projects/codes/envs/register.py
+++ b/projects/codes/envs/register.py
@@ -5,7 +5,7 @@ def register_env(env_name):
     if env_name == 'Racetrack-v0':
         register(
             id='Racetrack-v0',
-            entry_point='racetrack:RacetrackEnv',
+            entry_point='envs.racetrack:RacetrackEnv',
             max_episode_steps=1000,
             kwargs={}
         )
diff --git a/projects/codes/scripts/A2C_CartPole-v0.sh b/projects/codes/scripts/A2C_CartPole-v0.sh
new file mode 100644
index 0000000..4fcc9a1
--- /dev/null
+++ b/projects/codes/scripts/A2C_CartPole-v0.sh
@@ -0,0 +1,15 @@
+# run A2C on CartPole-v0
+# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" 
+
+if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
+    source ~/anaconda3/etc/profile.d/conda.sh 
+elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh"
+    source ~/opt/anaconda3/etc/profile.d/conda.sh 
+else 
+    echo 'please manually config the conda source path'
+fi
+conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
+codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
+python $codes_dir/A2C/main.py
\ No newline at end of file
diff --git a/projects/codes/scripts/DQN_task2.sh b/projects/codes/scripts/DQN_Acrobot-v1.sh
similarity index 100%
rename from projects/codes/scripts/DQN_task2.sh
rename to projects/codes/scripts/DQN_Acrobot-v1.sh
diff --git a/projects/codes/scripts/DQN_task0.sh b/projects/codes/scripts/DQN_CartPole-v0.sh
similarity index 100%
rename from projects/codes/scripts/DQN_task0.sh
rename to projects/codes/scripts/DQN_CartPole-v0.sh
diff --git a/projects/codes/scripts/DQN_task1.sh b/projects/codes/scripts/DQN_CartPole-v1.sh
similarity index 83%
rename from projects/codes/scripts/DQN_task1.sh
rename to projects/codes/scripts/DQN_CartPole-v1.sh
index d9ca67d..8cac524 100644
--- a/projects/codes/scripts/DQN_task1.sh
+++ b/projects/codes/scripts/DQN_CartPole-v1.sh
@@ -1,6 +1,4 @@
-'''
-run DQN on CartPole-v1, not finished yet
-'''
+# run DQN on CartPole-v1, not finished yet
 # source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" 
 if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
     echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
@@ -13,4 +11,4 @@ else
 fi
 conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
 codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
-python $codes_dir/DQN/main.py --env_name CartPole-v1 --train_eps 500 --epsilon_decay 1000 --memory_capacity 200000 --batch_size 128 --device cuda
\ No newline at end of file
+python $codes_dir/DQN/main.py --env_name CartPole-v1 --train_eps 2000 --gamma 0.99 --epsilon_decay 6000 --lr 0.00001 --memory_capacity 200000 --batch_size 64 --device cuda
\ No newline at end of file
diff --git a/projects/codes/scripts/Qlearning_task0.sh b/projects/codes/scripts/PolicyGradient_CartPole-v0.sh
similarity index 93%
rename from projects/codes/scripts/Qlearning_task0.sh
rename to projects/codes/scripts/PolicyGradient_CartPole-v0.sh
index 7ed9089..d7e0a69 100644
--- a/projects/codes/scripts/Qlearning_task0.sh
+++ b/projects/codes/scripts/PolicyGradient_CartPole-v0.sh
@@ -1,4 +1,3 @@
-
 # source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" 
 if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
     echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
@@ -11,4 +10,4 @@ else
 fi
 conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
 codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
-python $codes_dir/QLearning/main.py --device cpu
\ No newline at end of file
+python $codes_dir/PolicyGradient/main.py
\ No newline at end of file
diff --git a/projects/codes/scripts/Qlearning_CliffWalking-v0.sh b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh
new file mode 100644
index 0000000..233cec7
--- /dev/null
+++ b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh
@@ -0,0 +1,12 @@
+if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
+    source ~/anaconda3/etc/profile.d/conda.sh 
+elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh"
+    source ~/opt/anaconda3/etc/profile.d/conda.sh 
+else 
+    echo 'please manually config the conda source path'
+fi
+conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
+codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
+python $codes_dir/QLearning/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu
\ No newline at end of file
diff --git a/projects/codes/scripts/Qlearning_task1.sh b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh
similarity index 93%
rename from projects/codes/scripts/Qlearning_task1.sh
rename to projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh
index fadb1a6..0df0547 100644
--- a/projects/codes/scripts/Qlearning_task1.sh
+++ b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh
@@ -11,5 +11,4 @@ else
 fi
 conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
 codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
-python $codes_dir/envs/register.py # register environment
 python $codes_dir/QLearning/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --epsilon_start 0.70 --epsilon_end 0.1 --epsilon_decay 2000 --gamma 0.9 --lr 0.9 --device cpu
\ No newline at end of file
diff --git a/projects/codes/scripts/Qlearning_Racetrack-v0.sh b/projects/codes/scripts/Qlearning_Racetrack-v0.sh
new file mode 100644
index 0000000..00599fa
--- /dev/null
+++ b/projects/codes/scripts/Qlearning_Racetrack-v0.sh
@@ -0,0 +1,14 @@
+
+# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" 
+if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
+    source ~/anaconda3/etc/profile.d/conda.sh 
+elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh"
+    source ~/opt/anaconda3/etc/profile.d/conda.sh 
+else 
+    echo 'please manually config the conda source path'
+fi
+conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
+codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
+python $codes_dir/QLearning/main.py --env_name Racetrack-v0 --device cpu
\ No newline at end of file
diff --git a/projects/codes/scripts/Sarsa_CliffWalking-v0.sh b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh
new file mode 100644
index 0000000..c4f5e6a
--- /dev/null
+++ b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh
@@ -0,0 +1,12 @@
+if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
+    source ~/anaconda3/etc/profile.d/conda.sh 
+elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh"
+    source ~/opt/anaconda3/etc/profile.d/conda.sh 
+else 
+    echo 'please manually config the conda source path'
+fi
+conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
+codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
+python $codes_dir/Sarsa/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu
\ No newline at end of file
diff --git a/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh
new file mode 100644
index 0000000..f215c94
--- /dev/null
+++ b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh
@@ -0,0 +1,13 @@
+# Sarsa for FrozenLakeNoSlippery-v1, cannot converge like Qlearning!
+if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/anaconda3/etc/profile.d/conda.sh"
+    source ~/anaconda3/etc/profile.d/conda.sh 
+elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then
+    echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh"
+    source ~/opt/anaconda3/etc/profile.d/conda.sh 
+else 
+    echo 'please manually config the conda source path'
+fi
+conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
+codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
+python $codes_dir/Sarsa/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --ep_max_steps 10 --epsilon_start 0.50 --epsilon_end 0.01 --epsilon_decay 2000 --gamma 0.9 --lr 0.1 --device cpu
\ No newline at end of file
diff --git a/projects/codes/scripts/Sarsa_task0.sh b/projects/codes/scripts/Sarsa_Racetrack-v0.sh
similarity index 86%
rename from projects/codes/scripts/Sarsa_task0.sh
rename to projects/codes/scripts/Sarsa_Racetrack-v0.sh
index 49358de..dcd6cac 100644
--- a/projects/codes/scripts/Sarsa_task0.sh
+++ b/projects/codes/scripts/Sarsa_Racetrack-v0.sh
@@ -9,5 +9,4 @@ else
 fi
 conda activate easyrl # easyrl here can be changed to another name of conda env that you have created
 codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path
-python $codes_dir/envs/register.py # register environment
-python $codes_dir/Sarsa/main.py
\ No newline at end of file
+python $codes_dir/Sarsa/main.py --env_name Racetrack-v0
\ No newline at end of file