diff --git a/projects/.gitignore b/projects/.gitignore index 764cbb7..d1fc2b1 100644 --- a/projects/.gitignore +++ b/projects/.gitignore @@ -2,4 +2,9 @@ .ipynb_checkpoints __pycache__ .vscode -test.py \ No newline at end of file +test.py +pseudocodes.aux +pseudocodes.log +pseudocodes.synctex.gz +pseudocodes.out +pseudocodes.toc \ No newline at end of file diff --git a/projects/README.md b/projects/README.md index 84d96b9..fcee9df 100644 --- a/projects/README.md +++ b/projects/README.md @@ -22,15 +22,15 @@ 注:点击对应的名称会跳到[codes](./codes/)下对应的算法中,其他版本还请读者自行翻阅 -| 算法名称 | 参考文献 | 备注 | -| :-----------------------: | :----------------------------------------------------------: | :--: | -| | | | -| DQN-CNN | | 待更 | -| [SoftQ](codes/SoftQ) | [Soft Q-learning paper](https://arxiv.org/abs/1702.08165) | | -| [SAC](codes/SAC) | [SAC paper](https://arxiv.org/pdf/1812.05905.pdf) | | -| [SAC-Discrete](codes/SAC) | [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf) | | -| SAC-S | [SAC-S paper](https://arxiv.org/abs/1801.01290) | | -| DSAC | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 | +| 算法名称 | 参考文献 | 备注 | +| :-------------------------------------: | :----------------------------------------------------------: | :--: | +| [Policy Gradient](codes/PolicyGradient) | [Policy Gradient paper](https://proceedings.neurips.cc/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf) | | +| DQN-CNN | | 待更 | +| [SoftQ](codes/SoftQ) | [Soft Q-learning paper](https://arxiv.org/abs/1702.08165) | | +| [SAC](codes/SAC) | [SAC paper](https://arxiv.org/pdf/1812.05905.pdf) | | +| [SAC-Discrete](codes/SAC) | [SAC-Discrete paper](https://arxiv.org/pdf/1910.07207.pdf) | | +| SAC-S | [SAC-S paper](https://arxiv.org/abs/1801.01290) | | +| DSAC | [DSAC paper](https://paperswithcode.com/paper/addressing-value-estimation-errors-in) | 待更 | ## 3、算法环境 diff --git a/projects/assets/pseudocodes/pseudocodes.aux b/projects/assets/pseudocodes/pseudocodes.aux deleted file mode 100644 index 403c058..0000000 --- a/projects/assets/pseudocodes/pseudocodes.aux +++ /dev/null @@ -1,35 +0,0 @@ -\relax -\providecommand\hyper@newdestlabel[2]{} -\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} -\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined -\global\let\oldcontentsline\contentsline -\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} -\global\let\oldnewlabel\newlabel -\gdef\newlabel#1#2{\newlabelxx{#1}#2} -\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} -\AtEndDocument{\ifx\hyper@anchor\@undefined -\let\contentsline\oldcontentsline -\let\newlabel\oldnewlabel -\fi} -\fi} -\global\let\hyper@last\relax -\gdef\HyperFirstAtBeginDocument#1{#1} -\providecommand*\HyPL@Entry[1]{} -\HyPL@Entry{0<>} -\@writefile{toc}{\contentsline {section}{\numberline {1}模版备用}{2}{section.1}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{3}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{4}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{5}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{6}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{7}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{8}{algorithm.}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}\protected@file@percent } -\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{9}{algorithm.}\protected@file@percent } -\gdef \@abspage@last{9} diff --git a/projects/assets/pseudocodes/pseudocodes.log b/projects/assets/pseudocodes/pseudocodes.log deleted file mode 100644 index 096a0ed..0000000 --- a/projects/assets/pseudocodes/pseudocodes.log +++ /dev/null @@ -1,570 +0,0 @@ -This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22) 23 AUG 2022 19:26 -entering extended mode - restricted \write18 enabled. - file:line:error style messages enabled. - %&-line parsing enabled. -**/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes -(/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes.tex -LaTeX2e <2020-10-01> patch level 4 -L3 programming layer <2021-02-18> (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexart.cls (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexbackend.cfg -File: ctexbackend.cfg 2021/03/14 v2.5.6 Backend configuration file (CTEX) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3kernel/expl3.sty -Package: expl3 2021-02-18 L3 programming layer (loader) - (/usr/local/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-xetex.def -File: l3backend-xetex.def 2021-03-18 L3 backend support: XeTeX - (|extractbb --version) -\c__kernel_sys_dvipdfmx_version_int=\count175 -\l__color_backend_stack_int=\count176 -\g__color_backend_stack_int=\count177 -\g__graphics_track_int=\count178 -\l__pdf_internal_box=\box47 -\g__pdf_backend_object_int=\count179 -\g__pdf_backend_annotation_int=\count180 -\g__pdf_backend_link_int=\count181 -)) -Document Class: ctexart 2021/03/14 v2.5.6 Chinese adapter for class article (CTEX) -(/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-2020-10-01.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-generic.tex))) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty -Package: l3keys2e 2021-03-12 LaTeX2e option processing using LaTeX3 keys -) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexhook.sty -Package: ctexhook 2021/03/14 v2.5.6 Document and package hooks (CTEX) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexpatch.sty -Package: ctexpatch 2021/03/14 v2.5.6 Patching commands (CTEX) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fix-cm.sty -Package: fix-cm 2015/01/14 v1.1t fixes to LaTeX - (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ts1enc.def -File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file -LaTeX Font Info: Redeclaring font encoding TS1 on input line 47. -)) (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel.sty -Package: everysel 2021/01/20 v2.1 EverySelectfont Package (MS) - (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel-2011-10-28.sty)) -\l__ctex_tmp_int=\count182 -\l__ctex_tmp_box=\box48 -\l__ctex_tmp_dim=\dimen138 -\g__ctex_section_depth_int=\count183 -\g__ctex_font_size_int=\count184 - (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexopts.cfg -File: ctexopts.cfg 2021/03/14 v2.5.6 Option configuration file (CTEX) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/article.cls -Document Class: article 2020/04/10 v1.4m Standard LaTeX document class -(/usr/local/texlive/2021/texmf-dist/tex/latex/base/size11.clo -File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option) -) -\c@part=\count185 -\c@section=\count186 -\c@subsection=\count187 -\c@subsubsection=\count188 -\c@paragraph=\count189 -\c@subparagraph=\count190 -\c@figure=\count191 -\c@table=\count192 -\abovecaptionskip=\skip47 -\belowcaptionskip=\skip48 -\bibindent=\dimen139 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def -File: ctex-engine-xetex.def 2021/03/14 v2.5.6 XeLaTeX adapter (CTEX) - (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.sty -Package: xeCJK 2020/10/19 v3.8.6 Typesetting CJK scripts with XeLaTeX - (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty -Package: xtemplate 2021-03-12 L3 Experimental prototype document functions -\l__xtemplate_tmp_dim=\dimen140 -\l__xtemplate_tmp_int=\count193 -\l__xtemplate_tmp_muskip=\muskip16 -\l__xtemplate_tmp_skip=\skip49 -) -\l__xeCJK_tmp_int=\count194 -\l__xeCJK_tmp_box=\box49 -\l__xeCJK_tmp_dim=\dimen141 -\l__xeCJK_tmp_skip=\skip50 -\g__xeCJK_space_factor_int=\count195 -\l__xeCJK_begin_int=\count196 -\l__xeCJK_end_int=\count197 -\c__xeCJK_CJK_class_int=\XeTeXcharclass1 -\c__xeCJK_FullLeft_class_int=\XeTeXcharclass2 -\c__xeCJK_FullRight_class_int=\XeTeXcharclass3 -\c__xeCJK_HalfLeft_class_int=\XeTeXcharclass4 -\c__xeCJK_HalfRight_class_int=\XeTeXcharclass5 -\c__xeCJK_NormalSpace_class_int=\XeTeXcharclass6 -\c__xeCJK_CM_class_int=\XeTeXcharclass7 -\c__xeCJK_HangulJamo_class_int=\XeTeXcharclass8 -\l__xeCJK_last_skip=\skip51 -\g__xeCJK_node_int=\count198 -\c__xeCJK_CJK_node_dim=\dimen142 -\c__xeCJK_CJK-space_node_dim=\dimen143 -\c__xeCJK_default_node_dim=\dimen144 -\c__xeCJK_default-space_node_dim=\dimen145 -\c__xeCJK_CJK-widow_node_dim=\dimen146 -\c__xeCJK_normalspace_node_dim=\dimen147 -\l__xeCJK_ccglue_skip=\skip52 -\l__xeCJK_ecglue_skip=\skip53 -\l__xeCJK_punct_kern_skip=\skip54 -\l__xeCJK_last_penalty_int=\count199 -\l__xeCJK_last_bound_dim=\dimen148 -\l__xeCJK_last_kern_dim=\dimen149 -\l__xeCJK_widow_penalty_int=\count266 - -Package xtemplate Info: Declaring object type 'xeCJK/punctuation' taking 0 -(xtemplate) argument(s) on line 2341. - -\l__xeCJK_fixed_punct_width_dim=\dimen150 -\l__xeCJK_mixed_punct_width_dim=\dimen151 -\l__xeCJK_middle_punct_width_dim=\dimen152 -\l__xeCJK_fixed_margin_width_dim=\dimen153 -\l__xeCJK_mixed_margin_width_dim=\dimen154 -\l__xeCJK_middle_margin_width_dim=\dimen155 -\l__xeCJK_bound_punct_width_dim=\dimen156 -\l__xeCJK_bound_margin_width_dim=\dimen157 -\l__xeCJK_margin_minimum_dim=\dimen158 -\l__xeCJK_kerning_total_width_dim=\dimen159 -\l__xeCJK_same_align_margin_dim=\dimen160 -\l__xeCJK_different_align_margin_dim=\dimen161 -\l__xeCJK_kerning_margin_width_dim=\dimen162 -\l__xeCJK_kerning_margin_minimum_dim=\dimen163 -\l__xeCJK_bound_dim=\dimen164 -\l__xeCJK_reverse_bound_dim=\dimen165 -\l__xeCJK_margin_dim=\dimen166 -\l__xeCJK_minimum_bound_dim=\dimen167 -\l__xeCJK_kerning_margin_dim=\dimen168 -\g__xeCJK_family_int=\count267 -\l__xeCJK_fam_int=\count268 -\g__xeCJK_fam_allocation_int=\count269 -\l__xeCJK_verb_case_int=\count270 -\l__xeCJK_verb_exspace_skip=\skip55 - (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.sty -Package: fontspec 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX - (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty -Package: fontspec-xetex 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX -\l__fontspec_script_int=\count271 -\l__fontspec_language_int=\count272 -\l__fontspec_strnum_int=\count273 -\l__fontspec_tmp_int=\count274 -\l__fontspec_tmpa_int=\count275 -\l__fontspec_tmpb_int=\count276 -\l__fontspec_tmpc_int=\count277 -\l__fontspec_em_int=\count278 -\l__fontspec_emdef_int=\count279 -\l__fontspec_strong_int=\count280 -\l__fontspec_strongdef_int=\count281 -\l__fontspec_tmpa_dim=\dimen169 -\l__fontspec_tmpb_dim=\dimen170 -\l__fontspec_tmpc_dim=\dimen171 - (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fontenc.sty -Package: fontenc 2020/08/10 v2.0s Standard LaTeX package -) (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg -File: xeCJK.cfg 2020/10/19 v3.8.6 Configuration file for xeCJK package -)) -\ccwd=\dimen172 -\l__ctex_ccglue_skip=\skip56 -) -\l__ctex_ziju_dim=\dimen173 - (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber.sty -Package: zhnumber 2020/05/01 v2.8 Typesetting numbers with Chinese glyphs -\l__zhnum_scale_int=\count282 - (/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber-utf8.cfg -File: zhnumber-utf8.cfg 2020/05/01 v2.8 Chinese numerals with UTF8 encoding -)) -\l__ctex_heading_skip=\skip57 - (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def -File: ctex-scheme-chinese-article.def 2021/03/14 v2.5.6 Chinese scheme for article (CTEX) - (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg -File: ctex-name-utf8.cfg 2021/03/14 v2.5.6 Caption with encoding UTF-8 (CTEX) -)) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-mac.def -File: ctex-fontset-mac.def 2021/03/14 v2.5.6 macOS fonts definition (CTEX) - (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-macnew.def -File: ctex-fontset-macnew.def 2021/03/14 v2.5.6 macOS fonts definition for El Capitan or later version (CTEX) - - -Package fontspec Warning: Font "Songti SC Light" does not contain requested -(fontspec) Script "CJK". - - -Package fontspec Info: Font family 'SongtiSCLight(0)' created for font 'Songti -(fontspec) SC Light' with options -(fontspec) [Script={CJK},BoldItalicFont={Kaiti SC -(fontspec) Bold},BoldFont={Songti SC Bold},ItalicFont={Kaiti SC}]. -(fontspec) -(fontspec) This font family consists of the following NFSS -(fontspec) series/shapes: -(fontspec) -(fontspec) - 'normal' (m/n) with NFSS spec.: <->"Songti SC -(fontspec) Light/OT:language=dflt;" -(fontspec) - 'small caps' (m/sc) with NFSS spec.: -(fontspec) - 'bold' (b/n) with NFSS spec.: <->"Songti SC -(fontspec) Bold/OT:language=dflt;" -(fontspec) - 'bold small caps' (b/sc) with NFSS spec.: -(fontspec) - 'italic' (m/it) with NFSS spec.: <->"Kaiti -(fontspec) SC/OT:language=dflt;" -(fontspec) - 'italic small caps' (m/scit) with NFSS spec.: -(fontspec) - 'bold italic' (b/it) with NFSS spec.: <->"Kaiti SC -(fontspec) Bold/OT:language=dflt;" -(fontspec) - 'bold italic small caps' (b/scit) with NFSS spec.: - -))) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex.cfg -File: ctex.cfg 2021/03/14 v2.5.6 Configuration file (CTEX) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithm.sty -Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD. -Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment - (/usr/local/texlive/2021/texmf-dist/tex/latex/float/float.sty -Package: float 2001/11/08 v1.3d Float enhancements (AL) -\c@float@type=\count283 -\float@exts=\toks15 -\float@box=\box50 -\@float@everytoks=\toks16 -\@floatcapt=\box51 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ifthen.sty -Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) -) -\@float@every@algorithm=\toks17 -\c@algorithm=\count284 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithmic.sty -Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD. -Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic' - (/usr/local/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty -Package: keyval 2014/10/28 v1.15 key=value parser (DPC) -\KV@toks@=\toks18 -) -\c@ALC@unique=\count285 -\c@ALC@line=\count286 -\c@ALC@rem=\count287 -\c@ALC@depth=\count288 -\ALC@tlm=\skip58 -\algorithmicindent=\skip59 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amssymb.sty -Package: amssymb 2013/01/14 v3.01 AMS font symbols - (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amsfonts.sty -Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support -\@emptytoks=\toks19 -\symAMSa=\mathgroup4 -\symAMSb=\mathgroup5 -LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. -LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' -(Font) U/euf/m/n --> U/euf/b/n on input line 106. -)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsmath.sty -Package: amsmath 2020/09/23 v2.17i AMS math features -\@mathmargin=\skip60 - -For additional information on amsmath, use the `?' option. -(/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amstext.sty -Package: amstext 2000/06/29 v2.01 AMS text - (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsgen.sty -File: amsgen.sty 1999/11/30 v2.0 generic functions -\@emptytoks=\toks20 -\ex@=\dimen174 -)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsbsy.sty -Package: amsbsy 1999/11/29 v1.2d Bold Symbols -\pmbraise@=\dimen175 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsopn.sty -Package: amsopn 2016/03/08 v2.02 operator names -) -\inf@bad=\count289 -LaTeX Info: Redefining \frac on input line 234. -\uproot@=\count290 -\leftroot@=\count291 -LaTeX Info: Redefining \overline on input line 399. -\classnum@=\count292 -\DOTSCASE@=\count293 -LaTeX Info: Redefining \ldots on input line 496. -LaTeX Info: Redefining \dots on input line 499. -LaTeX Info: Redefining \cdots on input line 620. -\Mathstrutbox@=\box52 -\strutbox@=\box53 -\big@size=\dimen176 -LaTeX Font Info: Redeclaring font encoding OML on input line 743. -LaTeX Font Info: Redeclaring font encoding OMS on input line 744. -\macc@depth=\count294 -\c@MaxMatrixCols=\count295 -\dotsspace@=\muskip17 -\c@parentequation=\count296 -\dspbrk@lvl=\count297 -\tag@help=\toks21 -\row@=\count298 -\column@=\count299 -\maxfields@=\count300 -\andhelp@=\toks22 -\eqnshift@=\dimen177 -\alignsep@=\dimen178 -\tagshift@=\dimen179 -\tagwidth@=\dimen180 -\totwidth@=\dimen181 -\lineht@=\dimen182 -\@envbody=\toks23 -\multlinegap=\skip61 -\multlinetaggap=\skip62 -\mathdisplay@stack=\toks24 -LaTeX Info: Redefining \[ on input line 2923. -LaTeX Info: Redefining \] on input line 2924. -) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref.sty -Package: hyperref 2021-02-27 v7.00k Hypertext links for LaTeX - (/usr/local/texlive/2021/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty -Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/generic/iftex/iftex.sty -Package: iftex 2020/03/06 v1.0d TeX engine tests -) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty -Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO) - (/usr/local/texlive/2021/texmf-dist/tex/generic/infwarerr/infwarerr.sty -Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO) -) -Package pdftexcmds Info: \pdf@primitive is available. -Package pdftexcmds Info: \pdf@ifprimitive is available. -Package pdftexcmds Info: \pdfdraftmode not found. -) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty -Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty -Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdfescape/pdfescape.sty -Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/hycolor/hycolor.sty -Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty -Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/auxhook/auxhook.sty -Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/kvoptions/kvoptions.sty -Package: kvoptions 2020-10-07 v3.14 Key value format for package options (HO) -) -\@linkdim=\dimen183 -\Hy@linkcounter=\count301 -\Hy@pagecounter=\count302 - (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/pd1enc.def -File: pd1enc.def 2021-02-27 v7.00k Hyperref: PDFDocEncoding definition (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref-langpatches.def -File: hyperref-langpatches.def 2021-02-27 v7.00k Hyperref: patches for babel languages -) (/usr/local/texlive/2021/texmf-dist/tex/generic/intcalc/intcalc.sty -Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/generic/etexcmds/etexcmds.sty -Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO) -) -\Hy@SavedSpaceFactor=\count303 - (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/puenc.def -File: puenc.def 2021-02-27 v7.00k Hyperref: PDF Unicode definition (HO) -) -Package hyperref Info: Option `unicode' set `true' on input line 4073. -Package hyperref Info: Hyper figures OFF on input line 4192. -Package hyperref Info: Link nesting OFF on input line 4197. -Package hyperref Info: Hyper index ON on input line 4200. -Package hyperref Info: Plain pages OFF on input line 4207. -Package hyperref Info: Backreferencing OFF on input line 4212. -Package hyperref Info: Implicit mode ON; LaTeX internals redefined. -Package hyperref Info: Bookmarks ON on input line 4445. -\c@Hy@tempcnt=\count304 - (/usr/local/texlive/2021/texmf-dist/tex/latex/url/url.sty -\Urlmuskip=\muskip18 -Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. -) -LaTeX Info: Redefining \url on input line 4804. -\XeTeXLinkMargin=\dimen184 - (/usr/local/texlive/2021/texmf-dist/tex/generic/bitset/bitset.sty -Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO) - (/usr/local/texlive/2021/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty -Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO) -)) -\Fld@menulength=\count305 -\Field@Width=\dimen185 -\Fld@charsize=\dimen186 -Package hyperref Info: Hyper figures OFF on input line 6075. -Package hyperref Info: Link nesting OFF on input line 6080. -Package hyperref Info: Hyper index ON on input line 6083. -Package hyperref Info: backreferencing OFF on input line 6090. -Package hyperref Info: Link coloring OFF on input line 6095. -Package hyperref Info: Link coloring with OCG OFF on input line 6100. -Package hyperref Info: PDF/A mode OFF on input line 6105. -LaTeX Info: Redefining \ref on input line 6145. -LaTeX Info: Redefining \pageref on input line 6149. - (/usr/local/texlive/2021/texmf-dist/tex/latex/base/atbegshi-ltx.sty -Package: atbegshi-ltx 2020/08/17 v1.0a Emulation of the original atbegshi package -with kernel methods -) -\Hy@abspage=\count306 -\c@Item=\count307 -\c@Hfootnote=\count308 -) -Package hyperref Info: Driver (autodetected): hxetex. - (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hxetex.def -File: hxetex.def 2021-02-27 v7.00k Hyperref driver for XeTeX - (/usr/local/texlive/2021/texmf-dist/tex/generic/stringenc/stringenc.sty -Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO) -) -\pdfm@box=\box54 -\c@Hy@AnnotLevel=\count309 -\HyField@AnnotCount=\count310 -\Fld@listcount=\count311 -\c@bookmark@seq@number=\count312 - (/usr/local/texlive/2021/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty -Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO) - (/usr/local/texlive/2021/texmf-dist/tex/latex/base/atveryend-ltx.sty -Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atvery package -with kernel methods -) (/usr/local/texlive/2021/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty -Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO) -) -Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 286. -) -\Hy@SectionHShift=\skip63 -) (/usr/local/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty -Package: setspace 2011/12/19 v6.7a set line spacing -) (/usr/local/texlive/2021/texmf-dist/tex/latex/titlesec/titlesec.sty -Package: titlesec 2019/10/16 v2.13 Sectioning titles -\ttl@box=\box55 -\beforetitleunit=\skip64 -\aftertitleunit=\skip65 -\ttl@plus=\dimen187 -\ttl@minus=\dimen188 -\ttl@toksa=\toks25 -\titlewidth=\dimen189 -\titlewidthlast=\dimen190 -\titlewidthfirst=\dimen191 -) (./pseudocodes.aux) -\openout1 = `pseudocodes.aux'. - -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. -LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 14. -LaTeX Font Info: ... okay on input line 14. - ABD: EverySelectfont initializing macros -LaTeX Info: Redefining \selectfont on input line 14. - -Package fontspec Info: Adjusting the maths setup (use [no-math] to avoid -(fontspec) this). - -\symlegacymaths=\mathgroup6 -LaTeX Font Info: Overwriting symbol font `legacymaths' in version `bold' -(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 14. -LaTeX Font Info: Redeclaring math accent \acute on input line 14. -LaTeX Font Info: Redeclaring math accent \grave on input line 14. -LaTeX Font Info: Redeclaring math accent \ddot on input line 14. -LaTeX Font Info: Redeclaring math accent \tilde on input line 14. -LaTeX Font Info: Redeclaring math accent \bar on input line 14. -LaTeX Font Info: Redeclaring math accent \breve on input line 14. -LaTeX Font Info: Redeclaring math accent \check on input line 14. -LaTeX Font Info: Redeclaring math accent \hat on input line 14. -LaTeX Font Info: Redeclaring math accent \dot on input line 14. -LaTeX Font Info: Redeclaring math accent \mathring on input line 14. -LaTeX Font Info: Redeclaring math symbol \Gamma on input line 14. -LaTeX Font Info: Redeclaring math symbol \Delta on input line 14. -LaTeX Font Info: Redeclaring math symbol \Theta on input line 14. -LaTeX Font Info: Redeclaring math symbol \Lambda on input line 14. -LaTeX Font Info: Redeclaring math symbol \Xi on input line 14. -LaTeX Font Info: Redeclaring math symbol \Pi on input line 14. -LaTeX Font Info: Redeclaring math symbol \Sigma on input line 14. -LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 14. -LaTeX Font Info: Redeclaring math symbol \Phi on input line 14. -LaTeX Font Info: Redeclaring math symbol \Psi on input line 14. -LaTeX Font Info: Redeclaring math symbol \Omega on input line 14. -LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 14. -LaTeX Font Info: Redeclaring symbol font `operators' on input line 14. -LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font -(Font) `operators' in the math version `normal' on input line 14. -LaTeX Font Info: Overwriting symbol font `operators' in version `normal' -(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 14. -LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font -(Font) `operators' in the math version `bold' on input line 14. -LaTeX Font Info: Overwriting symbol font `operators' in version `bold' -(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 14. -LaTeX Font Info: Overwriting symbol font `operators' in version `normal' -(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal' -(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal' -(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal' -(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal' -(Font) OT1/cmtt/m/n --> TU/lmtt/m/n on input line 14. -LaTeX Font Info: Overwriting symbol font `operators' in version `bold' -(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold' -(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold' -(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 14. -LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold' -(Font) OT1/cmtt/m/n --> TU/lmtt/b/n on input line 14. -Package hyperref Info: Link coloring OFF on input line 14. - (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/nameref.sty -Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section - (/usr/local/texlive/2021/texmf-dist/tex/latex/refcount/refcount.sty -Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO) -) (/usr/local/texlive/2021/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty -Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO) -) -\c@section@level=\count313 -) -LaTeX Info: Redefining \ref on input line 14. -LaTeX Info: Redefining \pageref on input line 14. -LaTeX Info: Redefining \nameref on input line 14. - (./pseudocodes.out) (./pseudocodes.out) -\@outlinefile=\write3 -\openout3 = `pseudocodes.out'. - - (./pseudocodes.toc) -\tf@toc=\write4 -\openout4 = `pseudocodes.toc'. - -LaTeX Font Info: Font shape `TU/SongtiSCLight(0)/m/sl' in size <10.95> not available -(Font) Font shape `TU/SongtiSCLight(0)/m/it' tried instead on input line 17. - [1 - -] -Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 22. - [2 - -] -LaTeX Font Info: Trying to load font information for U+msa on input line 32. - (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsa.fd -File: umsa.fd 2013/01/14 v3.01 AMS symbols A -) -LaTeX Font Info: Trying to load font information for U+msb on input line 32. - (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsb.fd -File: umsb.fd 2013/01/14 v3.01 AMS symbols B -) [3 - -] [4 - -] [5 - -] [6 - -] [7 - -] [8 - -] -Overfull \hbox (32.54117pt too wide) in paragraph at lines 212--212 -[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^R\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 Q[] [] []$| - [] - - -Overfull \hbox (15.41673pt too wide) in paragraph at lines 213--213 -[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^^\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 ^^K [] [] \OT1/cmr/m/n/9 + [] \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 f[] []$\TU/lmr/m/n/9 ,$[][] \OT1/cmr/m/n/9 = - [] - -[9 - -] (./pseudocodes.aux) -Package rerunfilecheck Info: File `pseudocodes.out' has not changed. -(rerunfilecheck) Checksum: 35B5A79A86EF3BC70F1A0B3BCBEBAA13;724. - ) -Here is how much of TeX's memory you used: - 14827 strings out of 476919 - 313456 string characters out of 5821840 - 653576 words of memory out of 5000000 - 34576 multiletter control sequences out of 15000+600000 - 413609 words of font info for 91 fonts, out of 8000000 for 9000 - 1348 hyphenation exceptions out of 8191 - 101i,13n,104p,676b,697s stack positions out of 5000i,500n,10000p,200000b,80000s - -Output written on pseudocodes.pdf (9 pages). diff --git a/projects/assets/pseudocodes/pseudocodes.out b/projects/assets/pseudocodes/pseudocodes.out deleted file mode 100644 index 38f7e61..0000000 --- a/projects/assets/pseudocodes/pseudocodes.out +++ /dev/null @@ -1,8 +0,0 @@ -\BOOKMARK [1][-]{section.1}{\376\377\152\041\162\110\131\007\165\050}{}% 1 -\BOOKMARK [1][-]{section.2}{\376\377\000Q\000\040\000l\000e\000a\000r\000n\000i\000n\000g\173\227\154\325}{}% 2 -\BOOKMARK [1][-]{section.3}{\376\377\000S\000a\000r\000s\000a\173\227\154\325}{}% 3 -\BOOKMARK [1][-]{section.4}{\376\377\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\173\227\154\325}{}% 4 -\BOOKMARK [1][-]{section.5}{\376\377\000D\000Q\000N\173\227\154\325}{}% 5 -\BOOKMARK [1][-]{section.6}{\376\377\000S\000o\000f\000t\000Q\173\227\154\325}{}% 6 -\BOOKMARK [1][-]{section.7}{\376\377\000S\000A\000C\000-\000S\173\227\154\325}{}% 7 -\BOOKMARK [1][-]{section.8}{\376\377\000S\000A\000C\173\227\154\325}{}% 8 diff --git a/projects/assets/pseudocodes/pseudocodes.pdf b/projects/assets/pseudocodes/pseudocodes.pdf index c476940..b34cabc 100644 Binary files a/projects/assets/pseudocodes/pseudocodes.pdf and b/projects/assets/pseudocodes/pseudocodes.pdf differ diff --git a/projects/assets/pseudocodes/pseudocodes.synctex.gz b/projects/assets/pseudocodes/pseudocodes.synctex.gz deleted file mode 100644 index 4e7cc3f..0000000 Binary files a/projects/assets/pseudocodes/pseudocodes.synctex.gz and /dev/null differ diff --git a/projects/assets/pseudocodes/pseudocodes.tex b/projects/assets/pseudocodes/pseudocodes.tex index d48ab65..929453a 100644 --- a/projects/assets/pseudocodes/pseudocodes.tex +++ b/projects/assets/pseudocodes/pseudocodes.tex @@ -11,6 +11,27 @@ \usepackage{float} % 调用该包能够使用[H] % \pagestyle{plain} % 去除页眉,但是保留页脚编号,都去掉plain换empty +% 更改脚注为圆圈 +\usepackage{pifont} +\makeatletter +\newcommand*{\circnum}[1]{% + \expandafter\@circnum\csname c@#1\endcsname +} +\newcommand*{\@circnum}[1]{% + \ifnum#1<1 % + \@ctrerr + \else + \ifnum#1>20 % + \@ctrerr + \else + \ding{\the\numexpr 171+(#1)\relax}% + \fi + \fi +} +\makeatother + +\renewcommand*{\thefootnote}{\circnum{footnote}} + \begin{document} \tableofcontents % 目录,注意要运行两下或者vscode保存两下才能显示 % \singlespacing @@ -69,27 +90,10 @@ \end{algorithm} \footnotetext[1]{Reinforcement Learning: An Introduction} \clearpage -\section{Policy Gradient算法} -\begin{algorithm}[H] % [H]固定位置 - \floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]} - \renewcommand{\thealgorithm}{} % 去掉算法标号 - \caption{} - \begin{algorithmic}[1] % [1]显示步数 - \STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$ - \FOR {回合数 = $1,M$} - \STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition - \FOR {时步 = $1,t$} - \STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$ - \STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$ - \ENDFOR - \ENDFOR - \end{algorithmic} -\end{algorithm} -\footnotetext[1]{Reinforcement Learning: An Introduction} -\clearpage + \section{DQN算法} \begin{algorithm}[H] % [H]固定位置 - \floatname{algorithm}{{DQN算法}{\hypersetup{linkcolor=white}\footnotemark}} + \floatname{algorithm}{{DQN算法}\footnotemark[1]} \renewcommand{\thealgorithm}{} % 去掉算法标号 \caption{} \renewcommand{\algorithmicrequire}{\textbf{输入:}} @@ -109,10 +113,10 @@ \STATE 更新环境状态$s_{t+1} \leftarrow s_t$ \STATE {\bfseries 更新策略:} \STATE 从$D$中采样一个batch的transition - \STATE 计算实际的$Q$值,即$y_{j}${\hypersetup{linkcolor=white}\footnotemark} - \STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降{\hypersetup{linkcolor=white}\footnotemark} + \STATE 计算实际的$Q$值,即$y_{j}$\footnotemark[2] + \STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降\footnotemark[3] \ENDFOR - \STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q${\hypersetup{linkcolor=white}\footnotemark} + \STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q$\footnotemark[4]] \ENDFOR \end{algorithmic} \end{algorithm} @@ -121,7 +125,46 @@ \footnotetext[3]{$\theta_i \leftarrow \theta_i - \lambda \nabla_{\theta_{i}} L_{i}\left(\theta_{i}\right)$} \footnotetext[4]{此处也可像原论文中放到小循环中改成每$C$步,但没有每$C$个回合稳定} \clearpage +\section{Policy Gradient算法} +\begin{algorithm}[H] % [H]固定位置 + \floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \begin{algorithmic}[1] % [1]显示步数 + \STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$ + \FOR {回合数 = $1,M$} + \STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition + \FOR {时步 = $1,t$} + \STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$ + \STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$ + \ENDFOR + \ENDFOR + \end{algorithmic} +\end{algorithm} +\footnotetext[1]{Reinforcement Learning: An Introduction} +\clearpage +\section{Advantage Actor Critic算法} +\begin{algorithm}[H] % [H]固定位置 + \floatname{algorithm}{{Q Actor Critic算法}} + \renewcommand{\thealgorithm}{} % 去掉算法标号 + \caption{} + \begin{algorithmic}[1] % [1]显示步数 + \STATE 初始化Actor参数$\theta$和Critic参数$w$ + \FOR {回合数 = $1,M$} + \STATE 根据策略$\pi_{\theta}(a|s)$采样一个(或几个)回合的transition + \STATE {\bfseries 更新Critic参数\footnotemark[1]} + \FOR {时步 = $t+1,1$} + \STATE 计算Advantage,即$ \delta_t = r_t + \gamma Q_w(s_{t+1},a_{t+1})-Q_w(s_t,a_t)$ + \STATE $w \leftarrow w+\alpha_{w} \delta_{t} \nabla_{w} Q_w(s_t,a_t)$ + \STATE $a_t \leftarrow a_{t+1}$,$s_t \leftarrow s_{t+1}$ + \ENDFOR + \STATE 更新Actor参数$\theta \leftarrow \theta+\alpha_{\theta} Q_{w}(s, a) \nabla_{\theta} \log \pi_{\theta}(a \mid s)$ + \ENDFOR + \end{algorithmic} +\end{algorithm} +\footnotetext[1]{这里结合TD error的特性按照从$t+1$到$1$计算法Advantage更方便} +\clearpage \section{SoftQ算法} \begin{algorithm}[H] \floatname{algorithm}{{SoftQ算法}} diff --git a/projects/assets/pseudocodes/pseudocodes.toc b/projects/assets/pseudocodes/pseudocodes.toc deleted file mode 100644 index e33ad0b..0000000 --- a/projects/assets/pseudocodes/pseudocodes.toc +++ /dev/null @@ -1,8 +0,0 @@ -\contentsline {section}{\numberline {1}模版备用}{2}{section.1}% -\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}% -\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}% -\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}% -\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}% -\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}% -\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}% -\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}% diff --git a/projects/codes/A2C/a2c.py b/projects/codes/A2C/a2c.py index ba0ed7c..c1a88a5 100644 --- a/projects/codes/A2C/a2c.py +++ b/projects/codes/A2C/a2c.py @@ -1,56 +1,60 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: JiangJi -Email: johnjim0816@gmail.com -Date: 2021-05-03 22:16:08 -LastEditor: JiangJi -LastEditTime: 2022-07-20 23:54:40 -Discription: -Environment: -''' import torch -import torch.optim as optim -import torch.nn as nn -import torch.nn.functional as F -from torch.distributions import Categorical +import numpy as np + -class ActorCritic(nn.Module): - ''' A2C网络模型,包含一个Actor和Critic - ''' - def __init__(self, input_dim, output_dim, hidden_dim): - super(ActorCritic, self).__init__() - self.critic = nn.Sequential( - nn.Linear(input_dim, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, 1) - ) - self.actor = nn.Sequential( - nn.Linear(input_dim, hidden_dim), - nn.ReLU(), - nn.Linear(hidden_dim, output_dim), - nn.Softmax(dim=1), - ) - - def forward(self, x): - value = self.critic(x) - probs = self.actor(x) - dist = Categorical(probs) - return dist, value class A2C: - ''' A2C算法 - ''' - def __init__(self,n_states,n_actions,cfg) -> None: - self.gamma = cfg.gamma - self.device = torch.device(cfg.device) - self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device) - self.optimizer = optim.Adam(self.model.parameters()) + def __init__(self,models,memories,cfg): + self.n_actions = cfg['n_actions'] + self.gamma = cfg['gamma'] + self.device = torch.device(cfg['device']) + self.memory = memories['ACMemory'] + self.actor = models['Actor'].to(self.device) + self.critic = models['Critic'].to(self.device) + self.actor_optim = torch.optim.Adam(self.actor.parameters(), lr=cfg['actor_lr']) + self.critic_optim = torch.optim.Adam(self.critic.parameters(), lr=cfg['critic_lr']) + def sample_action(self,state): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + dist = self.actor(state) + value = self.critic(state) # note that 'dist' need require_grad=True + value = value.detach().numpy().squeeze(0)[0] + action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1) + return action,value,dist + def predict_action(self,state): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + dist = self.actor(state) + value = self.critic(state) # note that 'dist' need require_grad=True + value = value.detach().numpy().squeeze(0)[0] + action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1) + return action,value,dist + def update(self,next_state,entropy): + value_pool,log_prob_pool,reward_pool = self.memory.sample() + next_state = torch.tensor(next_state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + next_value = self.critic(next_state) + returns = np.zeros_like(reward_pool) + for t in reversed(range(len(reward_pool))): + next_value = reward_pool[t] + self.gamma * next_value # G(s_{t},a{t}) = r_{t+1} + gamma * V(s_{t+1}) + returns[t] = next_value + returns = torch.tensor(returns, device=self.device) + value_pool = torch.tensor(value_pool, device=self.device) + advantages = returns - value_pool + log_prob_pool = torch.stack(log_prob_pool) + actor_loss = (-log_prob_pool * advantages).mean() + critic_loss = 0.5 * advantages.pow(2).mean() + tot_loss = actor_loss + critic_loss + 0.001 * entropy + self.actor_optim.zero_grad() + self.critic_optim.zero_grad() + tot_loss.backward() + self.actor_optim.step() + self.critic_optim.step() + self.memory.clear() + def save_model(self, path): + from pathlib import Path + # create path + Path(path).mkdir(parents=True, exist_ok=True) + torch.save(self.actor.state_dict(), f"{path}/actor_checkpoint.pt") + torch.save(self.critic.state_dict(), f"{path}/critic_checkpoint.pt") - def compute_returns(self,next_value, rewards, masks): - R = next_value - returns = [] - for step in reversed(range(len(rewards))): - R = rewards[step] + self.gamma * R * masks[step] - returns.insert(0, R) - return returns \ No newline at end of file + def load_model(self, path): + self.actor.load_state_dict(torch.load(f"{path}/actor_checkpoint.pt")) + self.critic.load_state_dict(torch.load(f"{path}/critic_checkpoint.pt")) \ No newline at end of file diff --git a/projects/codes/A2C/a2c_2.py b/projects/codes/A2C/a2c_2.py new file mode 100644 index 0000000..74e2cfe --- /dev/null +++ b/projects/codes/A2C/a2c_2.py @@ -0,0 +1,55 @@ +import torch +import numpy as np + +class A2C_2: + def __init__(self,models,memories,cfg): + self.n_actions = cfg['n_actions'] + self.gamma = cfg['gamma'] + self.device = torch.device(cfg['device']) + self.memory = memories['ACMemory'] + self.ac_net = models['ActorCritic'].to(self.device) + self.ac_optimizer = torch.optim.Adam(self.ac_net.parameters(), lr=cfg['lr']) + def sample_action(self,state): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + value, dist = self.ac_net(state) # note that 'dist' need require_grad=True + value = value.detach().numpy().squeeze(0)[0] + action = np.random.choice(self.n_actions, p=dist.detach().numpy().squeeze(0)) # shape(p=(n_actions,1) + return action,value,dist + def predict_action(self,state): + ''' predict can be all wrapped with no_grad(), then donot need detach(), or you can just copy contents of 'sample_action' + ''' + with torch.no_grad(): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + value, dist = self.ac_net(state) + value = value.numpy().squeeze(0)[0] # shape(value) = (1,) + action = np.random.choice(self.n_actions, p=dist.numpy().squeeze(0)) # shape(p=(n_actions,1) + return action,value,dist + def update(self,next_state,entropy): + value_pool,log_prob_pool,reward_pool = self.memory.sample() + next_state = torch.tensor(next_state, device=self.device, dtype=torch.float32).unsqueeze(dim=0) + next_value,_ = self.ac_net(next_state) + returns = np.zeros_like(reward_pool) + for t in reversed(range(len(reward_pool))): + next_value = reward_pool[t] + self.gamma * next_value # G(s_{t},a{t}) = r_{t+1} + gamma * V(s_{t+1}) + returns[t] = next_value + returns = torch.tensor(returns, device=self.device) + value_pool = torch.tensor(value_pool, device=self.device) + advantages = returns - value_pool + log_prob_pool = torch.stack(log_prob_pool) + actor_loss = (-log_prob_pool * advantages).mean() + critic_loss = 0.5 * advantages.pow(2).mean() + ac_loss = actor_loss + critic_loss + 0.001 * entropy + self.ac_optimizer.zero_grad() + ac_loss.backward() + self.ac_optimizer.step() + self.memory.clear() + def save_model(self, path): + from pathlib import Path + # create path + Path(path).mkdir(parents=True, exist_ok=True) + torch.save(self.ac_net.state_dict(), f"{path}/a2c_checkpoint.pt") + + def load_model(self, path): + self.ac_net.load_state_dict(torch.load(f"{path}/a2c_checkpoint.pt")) + + \ No newline at end of file diff --git a/projects/codes/A2C/main.py b/projects/codes/A2C/main.py new file mode 100644 index 0000000..e5585e8 --- /dev/null +++ b/projects/codes/A2C/main.py @@ -0,0 +1,121 @@ +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import datetime +import argparse +import gym +import torch +import numpy as np +from common.utils import all_seed +from common.launcher import Launcher +from common.memories import PGReplay +from common.models import ActorSoftmax,Critic +from envs.register import register_env +from a2c import A2C + +class Main(Launcher): + def get_args(self): + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default=1600,type=int,help="episodes of training") + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--actor_lr',default=3e-4,type=float,help="learning rate of actor") + parser.add_argument('--critic_lr',default=1e-3,type=float,help="learning rate of critic") + parser.add_argument('--actor_hidden_dim',default=256,type=int,help="hidden of actor net") + parser.add_argument('--critic_hidden_dim',default=256,type=int,help="hidden of critic net") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--seed',default=10,type=int,help="seed") + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", + 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", + } + args = {**vars(args),**default_args} # type(dict) + return args + def env_agent_config(self,cfg): + ''' create env and agent + ''' + register_env(cfg['env_name']) + env = gym.make(cfg['env_name']) + if cfg['seed'] !=0: # set random seed + all_seed(env,seed=cfg["seed"]) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + print(f"n_states: {n_states}, n_actions: {n_actions}") + cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + models = {'Actor':ActorSoftmax(cfg['n_states'],cfg['n_actions'], hidden_dim = cfg['actor_hidden_dim']),'Critic':Critic(cfg['n_states'],1,hidden_dim=cfg['critic_hidden_dim'])} + memories = {'ACMemory':PGReplay()} + agent = A2C(models,memories,cfg) + return env,agent + def train(self,cfg,env,agent): + print("Start training!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + + for i_ep in range(cfg['train_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + ep_entropy = 0 + state = env.reset() # reset and obtain initial state + + for _ in range(cfg['ep_max_steps']): + action, value, dist = agent.sample_action(state) # sample action + next_state, reward, done, _ = env.step(action) # update env and return transitions + log_prob = torch.log(dist.squeeze(0)[action]) + entropy = -np.sum(np.mean(dist.detach().numpy()) * np.log(dist.detach().numpy())) + agent.memory.push((value,log_prob,reward)) # save transitions + state = next_state # update state + ep_reward += reward + ep_entropy += entropy + ep_step += 1 + if done: + break + agent.update(next_state,ep_entropy) # update agent + rewards.append(ep_reward) + steps.append(ep_step) + if (i_ep+1)%10==0: + print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}') + print("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + def test(self,cfg,env,agent): + print("Start testing!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg['test_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg['ep_max_steps']): + action,_,_ = agent.predict_action(state) # predict action + next_state, reward, done, _ = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}") + print("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/A2C/main2.py b/projects/codes/A2C/main2.py new file mode 100644 index 0000000..c81754f --- /dev/null +++ b/projects/codes/A2C/main2.py @@ -0,0 +1,120 @@ +import sys,os +os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # avoid "OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized." +curr_path = os.path.dirname(os.path.abspath(__file__)) # current path +parent_path = os.path.dirname(curr_path) # parent path +sys.path.append(parent_path) # add path to system path + +import datetime +import argparse +import gym +import torch +import numpy as np +from common.utils import all_seed +from common.launcher import Launcher +from common.memories import PGReplay +from common.models import ActorCriticSoftmax +from envs.register import register_env +from a2c_2 import A2C_2 + +class Main(Launcher): + def get_args(self): + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default='A2C',type=str,help="name of algorithm") + parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default=2000,type=int,help="episodes of training") + parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--lr',default=3e-4,type=float,help="learning rate") + parser.add_argument('--actor_hidden_dim',default=256,type=int) + parser.add_argument('--critic_hidden_dim',default=256,type=int) + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--seed',default=10,type=int,help="seed") + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", + 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", + } + args = {**vars(args),**default_args} # type(dict) + return args + def env_agent_config(self,cfg): + ''' create env and agent + ''' + register_env(cfg['env_name']) + env = gym.make(cfg['env_name']) + if cfg['seed'] !=0: # set random seed + all_seed(env,seed=cfg["seed"]) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + print(f"n_states: {n_states}, n_actions: {n_actions}") + cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + models = {'ActorCritic':ActorCriticSoftmax(cfg['n_states'],cfg['n_actions'], actor_hidden_dim = cfg['actor_hidden_dim'],critic_hidden_dim=cfg['critic_hidden_dim'])} + memories = {'ACMemory':PGReplay()} + agent = A2C_2(models,memories,cfg) + return env,agent + def train(self,cfg,env,agent): + print("Start training!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + + for i_ep in range(cfg['train_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + ep_entropy = 0 + state = env.reset() # reset and obtain initial state + + for _ in range(cfg['ep_max_steps']): + action, value, dist = agent.sample_action(state) # sample action + next_state, reward, done, _ = env.step(action) # update env and return transitions + log_prob = torch.log(dist.squeeze(0)[action]) + entropy = -np.sum(np.mean(dist.detach().numpy()) * np.log(dist.detach().numpy())) + agent.memory.push((value,log_prob,reward)) # save transitions + state = next_state # update state + ep_reward += reward + ep_entropy += entropy + ep_step += 1 + if done: + break + agent.update(next_state,ep_entropy) # update agent + rewards.append(ep_reward) + steps.append(ep_step) + if (i_ep+1)%10==0: + print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}') + print("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + def test(self,cfg,env,agent): + print("Start testing!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg['test_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg['ep_max_steps']): + action,_,_ = agent.predict_action(state) # predict action + next_state, reward, done, _ = env.step(action) + state = next_state + ep_reward += reward + ep_step += 1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}") + print("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + +if __name__ == "__main__": + main = Main() + main.run() + + + + diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt new file mode 100644 index 0000000..c346b1b Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/models/a2c_checkpoint.pt differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json new file mode 100644 index 0000000..2ce53a7 --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/params.json @@ -0,0 +1,19 @@ +{ + "algo_name": "A2C", + "env_name": "CartPole-v0", + "train_eps": 2000, + "test_eps": 20, + "ep_max_steps": 100000, + "gamma": 0.99, + "lr": 0.0003, + "actor_hidden_dim": 256, + "critic_hidden_dim": 256, + "device": "cpu", + "seed": 10, + "show_fig": false, + "save_fig": true, + "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/results/", + "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-135818/models/", + "n_states": 4, + "n_actions": 2 +} \ No newline at end of file diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png new file mode 100644 index 0000000..b1bbebb Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_curve.png differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv new file mode 100644 index 0000000..221744d --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,200.0,200 +1,200.0,200 +2,93.0,93 +3,155.0,155 +4,116.0,116 +5,200.0,200 +6,190.0,190 +7,176.0,176 +8,200.0,200 +9,200.0,200 +10,200.0,200 +11,179.0,179 +12,200.0,200 +13,185.0,185 +14,191.0,191 +15,200.0,200 +16,200.0,200 +17,124.0,124 +18,200.0,200 +19,172.0,172 diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png new file mode 100644 index 0000000..4410e5e Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_curve.png differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv new file mode 100644 index 0000000..7d5debb --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-135818/results/training_results.csv @@ -0,0 +1,2001 @@ +episodes,rewards,steps +0,16.0,16 +1,17.0,17 +2,19.0,19 +3,95.0,95 +4,13.0,13 +5,22.0,22 +6,15.0,15 +7,20.0,20 +8,13.0,13 +9,20.0,20 +10,21.0,21 +11,59.0,59 +12,16.0,16 +13,35.0,35 +14,17.0,17 +15,22.0,22 +16,28.0,28 +17,19.0,19 +18,12.0,12 +19,17.0,17 +20,16.0,16 +21,28.0,28 +22,11.0,11 +23,11.0,11 +24,11.0,11 +25,18.0,18 +26,15.0,15 +27,33.0,33 +28,21.0,21 +29,34.0,34 +30,11.0,11 +31,16.0,16 +32,27.0,27 +33,19.0,19 +34,16.0,16 +35,33.0,33 +36,21.0,21 +37,12.0,12 +38,26.0,26 +39,34.0,34 +40,11.0,11 +41,38.0,38 +42,13.0,13 +43,12.0,12 +44,20.0,20 +45,17.0,17 +46,10.0,10 +47,20.0,20 +48,22.0,22 +49,21.0,21 +50,20.0,20 +51,32.0,32 +52,10.0,10 +53,33.0,33 +54,25.0,25 +55,30.0,30 +56,22.0,22 +57,25.0,25 +58,19.0,19 +59,12.0,12 +60,9.0,9 +61,11.0,11 +62,12.0,12 +63,28.0,28 +64,12.0,12 +65,12.0,12 +66,12.0,12 +67,34.0,34 +68,12.0,12 +69,25.0,25 +70,13.0,13 +71,26.0,26 +72,13.0,13 +73,22.0,22 +74,24.0,24 +75,9.0,9 +76,14.0,14 +77,17.0,17 +78,14.0,14 +79,25.0,25 +80,23.0,23 +81,38.0,38 +82,30.0,30 +83,28.0,28 +84,25.0,25 +85,16.0,16 +86,13.0,13 +87,34.0,34 +88,16.0,16 +89,48.0,48 +90,12.0,12 +91,25.0,25 +92,25.0,25 +93,17.0,17 +94,13.0,13 +95,12.0,12 +96,23.0,23 +97,22.0,22 +98,12.0,12 +99,16.0,16 +100,16.0,16 +101,10.0,10 +102,14.0,14 +103,20.0,20 +104,13.0,13 +105,16.0,16 +106,14.0,14 +107,22.0,22 +108,17.0,17 +109,19.0,19 +110,26.0,26 +111,16.0,16 +112,22.0,22 +113,20.0,20 +114,27.0,27 +115,16.0,16 +116,40.0,40 +117,14.0,14 +118,15.0,15 +119,40.0,40 +120,23.0,23 +121,32.0,32 +122,13.0,13 +123,33.0,33 +124,18.0,18 +125,26.0,26 +126,30.0,30 +127,28.0,28 +128,12.0,12 +129,45.0,45 +130,14.0,14 +131,40.0,40 +132,13.0,13 +133,16.0,16 +134,78.0,78 +135,19.0,19 +136,19.0,19 +137,20.0,20 +138,26.0,26 +139,21.0,21 +140,28.0,28 +141,17.0,17 +142,19.0,19 +143,13.0,13 +144,54.0,54 +145,41.0,41 +146,10.0,10 +147,15.0,15 +148,14.0,14 +149,19.0,19 +150,19.0,19 +151,32.0,32 +152,39.0,39 +153,36.0,36 +154,21.0,21 +155,58.0,58 +156,15.0,15 +157,55.0,55 +158,16.0,16 +159,46.0,46 +160,25.0,25 +161,15.0,15 +162,13.0,13 +163,18.0,18 +164,19.0,19 +165,22.0,22 +166,17.0,17 +167,48.0,48 +168,28.0,28 +169,29.0,29 +170,60.0,60 +171,12.0,12 +172,34.0,34 +173,18.0,18 +174,20.0,20 +175,18.0,18 +176,29.0,29 +177,14.0,14 +178,23.0,23 +179,26.0,26 +180,23.0,23 +181,77.0,77 +182,46.0,46 +183,25.0,25 +184,37.0,37 +185,12.0,12 +186,12.0,12 +187,36.0,36 +188,30.0,30 +189,135.0,135 +190,11.0,11 +191,18.0,18 +192,34.0,34 +193,12.0,12 +194,22.0,22 +195,19.0,19 +196,21.0,21 +197,26.0,26 +198,13.0,13 +199,128.0,128 +200,11.0,11 +201,21.0,21 +202,27.0,27 +203,11.0,11 +204,14.0,14 +205,70.0,70 +206,10.0,10 +207,18.0,18 +208,35.0,35 +209,80.0,80 +210,19.0,19 +211,95.0,95 +212,14.0,14 +213,20.0,20 +214,30.0,30 +215,19.0,19 +216,20.0,20 +217,54.0,54 +218,27.0,27 +219,19.0,19 +220,30.0,30 +221,19.0,19 +222,55.0,55 +223,29.0,29 +224,65.0,65 +225,19.0,19 +226,38.0,38 +227,14.0,14 +228,62.0,62 +229,37.0,37 +230,50.0,50 +231,40.0,40 +232,39.0,39 +233,16.0,16 +234,18.0,18 +235,86.0,86 +236,45.0,45 +237,37.0,37 +238,35.0,35 +239,20.0,20 +240,22.0,22 +241,40.0,40 +242,29.0,29 +243,17.0,17 +244,34.0,34 +245,91.0,91 +246,31.0,31 +247,69.0,69 +248,31.0,31 +249,30.0,30 +250,30.0,30 +251,23.0,23 +252,22.0,22 +253,52.0,52 +254,25.0,25 +255,32.0,32 +256,37.0,37 +257,31.0,31 +258,18.0,18 +259,60.0,60 +260,15.0,15 +261,23.0,23 +262,34.0,34 +263,43.0,43 +264,66.0,66 +265,15.0,15 +266,19.0,19 +267,55.0,55 +268,65.0,65 +269,50.0,50 +270,19.0,19 +271,37.0,37 +272,33.0,33 +273,33.0,33 +274,16.0,16 +275,19.0,19 +276,18.0,18 +277,52.0,52 +278,27.0,27 +279,48.0,48 +280,39.0,39 +281,29.0,29 +282,86.0,86 +283,37.0,37 +284,21.0,21 +285,21.0,21 +286,52.0,52 +287,88.0,88 +288,30.0,30 +289,34.0,34 +290,22.0,22 +291,26.0,26 +292,18.0,18 +293,29.0,29 +294,57.0,57 +295,61.0,61 +296,73.0,73 +297,19.0,19 +298,32.0,32 +299,49.0,49 +300,58.0,58 +301,31.0,31 +302,29.0,29 +303,73.0,73 +304,21.0,21 +305,33.0,33 +306,23.0,23 +307,22.0,22 +308,36.0,36 +309,41.0,41 +310,19.0,19 +311,41.0,41 +312,67.0,67 +313,95.0,95 +314,43.0,43 +315,27.0,27 +316,29.0,29 +317,30.0,30 +318,23.0,23 +319,32.0,32 +320,48.0,48 +321,33.0,33 +322,32.0,32 +323,63.0,63 +324,20.0,20 +325,25.0,25 +326,23.0,23 +327,42.0,42 +328,15.0,15 +329,66.0,66 +330,40.0,40 +331,31.0,31 +332,73.0,73 +333,52.0,52 +334,31.0,31 +335,69.0,69 +336,33.0,33 +337,39.0,39 +338,21.0,21 +339,34.0,34 +340,32.0,32 +341,38.0,38 +342,29.0,29 +343,32.0,32 +344,14.0,14 +345,51.0,51 +346,38.0,38 +347,51.0,51 +348,28.0,28 +349,85.0,85 +350,34.0,34 +351,57.0,57 +352,21.0,21 +353,22.0,22 +354,27.0,27 +355,19.0,19 +356,77.0,77 +357,30.0,30 +358,28.0,28 +359,53.0,53 +360,48.0,48 +361,41.0,41 +362,26.0,26 +363,35.0,35 +364,52.0,52 +365,42.0,42 +366,21.0,21 +367,34.0,34 +368,43.0,43 +369,82.0,82 +370,43.0,43 +371,67.0,67 +372,56.0,56 +373,54.0,54 +374,27.0,27 +375,37.0,37 +376,32.0,32 +377,23.0,23 +378,32.0,32 +379,40.0,40 +380,26.0,26 +381,22.0,22 +382,23.0,23 +383,100.0,100 +384,45.0,45 +385,57.0,57 +386,51.0,51 +387,15.0,15 +388,17.0,17 +389,63.0,63 +390,67.0,67 +391,80.0,80 +392,64.0,64 +393,29.0,29 +394,74.0,74 +395,51.0,51 +396,88.0,88 +397,11.0,11 +398,27.0,27 +399,30.0,30 +400,22.0,22 +401,36.0,36 +402,13.0,13 +403,37.0,37 +404,86.0,86 +405,18.0,18 +406,25.0,25 +407,21.0,21 +408,30.0,30 +409,28.0,28 +410,43.0,43 +411,23.0,23 +412,17.0,17 +413,32.0,32 +414,25.0,25 +415,22.0,22 +416,36.0,36 +417,52.0,52 +418,33.0,33 +419,16.0,16 +420,30.0,30 +421,52.0,52 +422,59.0,59 +423,35.0,35 +424,39.0,39 +425,47.0,47 +426,30.0,30 +427,32.0,32 +428,42.0,42 +429,37.0,37 +430,35.0,35 +431,25.0,25 +432,39.0,39 +433,26.0,26 +434,58.0,58 +435,64.0,64 +436,30.0,30 +437,33.0,33 +438,42.0,42 +439,30.0,30 +440,47.0,47 +441,69.0,69 +442,47.0,47 +443,40.0,40 +444,53.0,53 +445,38.0,38 +446,176.0,176 +447,116.0,116 +448,40.0,40 +449,86.0,86 +450,38.0,38 +451,39.0,39 +452,48.0,48 +453,22.0,22 +454,64.0,64 +455,30.0,30 +456,36.0,36 +457,46.0,46 +458,16.0,16 +459,103.0,103 +460,58.0,58 +461,16.0,16 +462,36.0,36 +463,21.0,21 +464,79.0,79 +465,29.0,29 +466,67.0,67 +467,59.0,59 +468,50.0,50 +469,72.0,72 +470,75.0,75 +471,26.0,26 +472,36.0,36 +473,35.0,35 +474,40.0,40 +475,49.0,49 +476,47.0,47 +477,42.0,42 +478,37.0,37 +479,33.0,33 +480,60.0,60 +481,34.0,34 +482,20.0,20 +483,69.0,69 +484,63.0,63 +485,49.0,49 +486,18.0,18 +487,68.0,68 +488,24.0,24 +489,79.0,79 +490,22.0,22 +491,39.0,39 +492,64.0,64 +493,20.0,20 +494,21.0,21 +495,22.0,22 +496,56.0,56 +497,56.0,56 +498,39.0,39 +499,64.0,64 +500,42.0,42 +501,40.0,40 +502,44.0,44 +503,30.0,30 +504,56.0,56 +505,137.0,137 +506,37.0,37 +507,19.0,19 +508,59.0,59 +509,29.0,29 +510,108.0,108 +511,53.0,53 +512,26.0,26 +513,43.0,43 +514,27.0,27 +515,34.0,34 +516,51.0,51 +517,35.0,35 +518,90.0,90 +519,64.0,64 +520,63.0,63 +521,33.0,33 +522,29.0,29 +523,48.0,48 +524,40.0,40 +525,55.0,55 +526,26.0,26 +527,69.0,69 +528,48.0,48 +529,50.0,50 +530,34.0,34 +531,31.0,31 +532,26.0,26 +533,60.0,60 +534,60.0,60 +535,80.0,80 +536,44.0,44 +537,62.0,62 +538,47.0,47 +539,79.0,79 +540,91.0,91 +541,84.0,84 +542,134.0,134 +543,49.0,49 +544,37.0,37 +545,23.0,23 +546,52.0,52 +547,52.0,52 +548,57.0,57 +549,69.0,69 +550,76.0,76 +551,33.0,33 +552,117.0,117 +553,83.0,83 +554,38.0,38 +555,45.0,45 +556,119.0,119 +557,105.0,105 +558,151.0,151 +559,59.0,59 +560,41.0,41 +561,49.0,49 +562,29.0,29 +563,61.0,61 +564,106.0,106 +565,43.0,43 +566,51.0,51 +567,54.0,54 +568,60.0,60 +569,29.0,29 +570,54.0,54 +571,47.0,47 +572,101.0,101 +573,26.0,26 +574,71.0,71 +575,115.0,115 +576,21.0,21 +577,47.0,47 +578,71.0,71 +579,61.0,61 +580,30.0,30 +581,72.0,72 +582,37.0,37 +583,62.0,62 +584,76.0,76 +585,49.0,49 +586,78.0,78 +587,62.0,62 +588,86.0,86 +589,56.0,56 +590,103.0,103 +591,97.0,97 +592,53.0,53 +593,110.0,110 +594,91.0,91 +595,52.0,52 +596,67.0,67 +597,40.0,40 +598,71.0,71 +599,52.0,52 +600,49.0,49 +601,101.0,101 +602,93.0,93 +603,33.0,33 +604,56.0,56 +605,53.0,53 +606,103.0,103 +607,134.0,134 +608,83.0,83 +609,78.0,78 +610,25.0,25 +611,52.0,52 +612,61.0,61 +613,49.0,49 +614,91.0,91 +615,46.0,46 +616,34.0,34 +617,72.0,72 +618,62.0,62 +619,42.0,42 +620,63.0,63 +621,60.0,60 +622,178.0,178 +623,43.0,43 +624,66.0,66 +625,23.0,23 +626,42.0,42 +627,26.0,26 +628,73.0,73 +629,30.0,30 +630,39.0,39 +631,36.0,36 +632,47.0,47 +633,58.0,58 +634,45.0,45 +635,82.0,82 +636,55.0,55 +637,31.0,31 +638,71.0,71 +639,68.0,68 +640,119.0,119 +641,42.0,42 +642,72.0,72 +643,36.0,36 +644,47.0,47 +645,126.0,126 +646,64.0,64 +647,60.0,60 +648,147.0,147 +649,34.0,34 +650,17.0,17 +651,59.0,59 +652,46.0,46 +653,126.0,126 +654,76.0,76 +655,85.0,85 +656,68.0,68 +657,36.0,36 +658,53.0,53 +659,116.0,116 +660,99.0,99 +661,29.0,29 +662,22.0,22 +663,89.0,89 +664,166.0,166 +665,73.0,73 +666,28.0,28 +667,110.0,110 +668,92.0,92 +669,76.0,76 +670,65.0,65 +671,48.0,48 +672,27.0,27 +673,38.0,38 +674,44.0,44 +675,70.0,70 +676,103.0,103 +677,48.0,48 +678,56.0,56 +679,51.0,51 +680,30.0,30 +681,118.0,118 +682,35.0,35 +683,12.0,12 +684,64.0,64 +685,105.0,105 +686,23.0,23 +687,52.0,52 +688,153.0,153 +689,65.0,65 +690,44.0,44 +691,38.0,38 +692,55.0,55 +693,37.0,37 +694,18.0,18 +695,106.0,106 +696,175.0,175 +697,88.0,88 +698,22.0,22 +699,60.0,60 +700,22.0,22 +701,78.0,78 +702,54.0,54 +703,61.0,61 +704,50.0,50 +705,55.0,55 +706,34.0,34 +707,23.0,23 +708,39.0,39 +709,45.0,45 +710,43.0,43 +711,113.0,113 +712,59.0,59 +713,36.0,36 +714,71.0,71 +715,35.0,35 +716,45.0,45 +717,42.0,42 +718,40.0,40 +719,26.0,26 +720,52.0,52 +721,29.0,29 +722,47.0,47 +723,45.0,45 +724,73.0,73 +725,42.0,42 +726,42.0,42 +727,30.0,30 +728,119.0,119 +729,60.0,60 +730,18.0,18 +731,18.0,18 +732,21.0,21 +733,33.0,33 +734,45.0,45 +735,35.0,35 +736,28.0,28 +737,37.0,37 +738,26.0,26 +739,28.0,28 +740,31.0,31 +741,130.0,130 +742,48.0,48 +743,65.0,65 +744,38.0,38 +745,54.0,54 +746,92.0,92 +747,53.0,53 +748,42.0,42 +749,87.0,87 +750,65.0,65 +751,45.0,45 +752,58.0,58 +753,27.0,27 +754,20.0,20 +755,59.0,59 +756,105.0,105 +757,54.0,54 +758,27.0,27 +759,46.0,46 +760,29.0,29 +761,15.0,15 +762,58.0,58 +763,22.0,22 +764,45.0,45 +765,44.0,44 +766,81.0,81 +767,61.0,61 +768,23.0,23 +769,134.0,134 +770,38.0,38 +771,200.0,200 +772,63.0,63 +773,62.0,62 +774,36.0,36 +775,68.0,68 +776,94.0,94 +777,26.0,26 +778,61.0,61 +779,77.0,77 +780,71.0,71 +781,63.0,63 +782,40.0,40 +783,26.0,26 +784,126.0,126 +785,59.0,59 +786,64.0,64 +787,57.0,57 +788,99.0,99 +789,47.0,47 +790,68.0,68 +791,38.0,38 +792,57.0,57 +793,42.0,42 +794,79.0,79 +795,108.0,108 +796,63.0,63 +797,62.0,62 +798,189.0,189 +799,157.0,157 +800,93.0,93 +801,53.0,53 +802,56.0,56 +803,144.0,144 +804,63.0,63 +805,41.0,41 +806,134.0,134 +807,23.0,23 +808,90.0,90 +809,124.0,124 +810,42.0,42 +811,40.0,40 +812,29.0,29 +813,46.0,46 +814,160.0,160 +815,34.0,34 +816,91.0,91 +817,60.0,60 +818,50.0,50 +819,113.0,113 +820,108.0,108 +821,56.0,56 +822,200.0,200 +823,154.0,154 +824,78.0,78 +825,55.0,55 +826,136.0,136 +827,66.0,66 +828,81.0,81 +829,23.0,23 +830,63.0,63 +831,85.0,85 +832,91.0,91 +833,85.0,85 +834,17.0,17 +835,85.0,85 +836,152.0,152 +837,59.0,59 +838,40.0,40 +839,103.0,103 +840,135.0,135 +841,50.0,50 +842,22.0,22 +843,75.0,75 +844,97.0,97 +845,59.0,59 +846,57.0,57 +847,122.0,122 +848,100.0,100 +849,132.0,132 +850,53.0,53 +851,106.0,106 +852,87.0,87 +853,82.0,82 +854,154.0,154 +855,139.0,139 +856,27.0,27 +857,35.0,35 +858,60.0,60 +859,188.0,188 +860,116.0,116 +861,160.0,160 +862,190.0,190 +863,61.0,61 +864,122.0,122 +865,97.0,97 +866,54.0,54 +867,24.0,24 +868,122.0,122 +869,161.0,161 +870,40.0,40 +871,165.0,165 +872,145.0,145 +873,155.0,155 +874,90.0,90 +875,58.0,58 +876,53.0,53 +877,47.0,47 +878,53.0,53 +879,86.0,86 +880,56.0,56 +881,152.0,152 +882,77.0,77 +883,50.0,50 +884,85.0,85 +885,200.0,200 +886,96.0,96 +887,85.0,85 +888,44.0,44 +889,39.0,39 +890,200.0,200 +891,164.0,164 +892,36.0,36 +893,139.0,139 +894,44.0,44 +895,46.0,46 +896,103.0,103 +897,168.0,168 +898,189.0,189 +899,200.0,200 +900,69.0,69 +901,71.0,71 +902,147.0,147 +903,140.0,140 +904,200.0,200 +905,82.0,82 +906,129.0,129 +907,164.0,164 +908,28.0,28 +909,73.0,73 +910,174.0,174 +911,176.0,176 +912,132.0,132 +913,149.0,149 +914,93.0,93 +915,52.0,52 +916,93.0,93 +917,33.0,33 +918,154.0,154 +919,200.0,200 +920,200.0,200 +921,200.0,200 +922,67.0,67 +923,83.0,83 +924,162.0,162 +925,41.0,41 +926,103.0,103 +927,200.0,200 +928,131.0,131 +929,117.0,117 +930,77.0,77 +931,45.0,45 +932,144.0,144 +933,123.0,123 +934,122.0,122 +935,29.0,29 +936,89.0,89 +937,71.0,71 +938,200.0,200 +939,80.0,80 +940,98.0,98 +941,143.0,143 +942,200.0,200 +943,95.0,95 +944,83.0,83 +945,62.0,62 +946,62.0,62 +947,118.0,118 +948,144.0,144 +949,113.0,113 +950,110.0,110 +951,139.0,139 +952,70.0,70 +953,79.0,79 +954,176.0,176 +955,151.0,151 +956,24.0,24 +957,50.0,50 +958,90.0,90 +959,20.0,20 +960,65.0,65 +961,176.0,176 +962,37.0,37 +963,48.0,48 +964,89.0,89 +965,190.0,190 +966,155.0,155 +967,26.0,26 +968,200.0,200 +969,186.0,186 +970,60.0,60 +971,115.0,115 +972,115.0,115 +973,121.0,121 +974,177.0,177 +975,200.0,200 +976,51.0,51 +977,105.0,105 +978,200.0,200 +979,68.0,68 +980,170.0,170 +981,70.0,70 +982,55.0,55 +983,70.0,70 +984,66.0,66 +985,161.0,161 +986,40.0,40 +987,200.0,200 +988,107.0,107 +989,80.0,80 +990,128.0,128 +991,154.0,154 +992,101.0,101 +993,178.0,178 +994,129.0,129 +995,128.0,128 +996,146.0,146 +997,142.0,142 +998,200.0,200 +999,62.0,62 +1000,19.0,19 +1001,82.0,82 +1002,63.0,63 +1003,129.0,129 +1004,54.0,54 +1005,125.0,125 +1006,113.0,113 +1007,93.0,93 +1008,200.0,200 +1009,48.0,48 +1010,58.0,58 +1011,66.0,66 +1012,41.0,41 +1013,145.0,145 +1014,42.0,42 +1015,185.0,185 +1016,199.0,199 +1017,200.0,200 +1018,125.0,125 +1019,145.0,145 +1020,32.0,32 +1021,141.0,141 +1022,195.0,195 +1023,175.0,175 +1024,162.0,162 +1025,127.0,127 +1026,154.0,154 +1027,166.0,166 +1028,200.0,200 +1029,188.0,188 +1030,200.0,200 +1031,200.0,200 +1032,162.0,162 +1033,141.0,141 +1034,200.0,200 +1035,155.0,155 +1036,134.0,134 +1037,146.0,146 +1038,192.0,192 +1039,136.0,136 +1040,200.0,200 +1041,149.0,149 +1042,113.0,113 +1043,40.0,40 +1044,178.0,178 +1045,126.0,126 +1046,200.0,200 +1047,37.0,37 +1048,200.0,200 +1049,141.0,141 +1050,118.0,118 +1051,34.0,34 +1052,142.0,142 +1053,65.0,65 +1054,200.0,200 +1055,133.0,133 +1056,137.0,137 +1057,148.0,148 +1058,148.0,148 +1059,136.0,136 +1060,159.0,159 +1061,48.0,48 +1062,109.0,109 +1063,65.0,65 +1064,130.0,130 +1065,191.0,191 +1066,200.0,200 +1067,200.0,200 +1068,48.0,48 +1069,200.0,200 +1070,65.0,65 +1071,164.0,164 +1072,200.0,200 +1073,156.0,156 +1074,200.0,200 +1075,131.0,131 +1076,126.0,126 +1077,200.0,200 +1078,200.0,200 +1079,32.0,32 +1080,175.0,175 +1081,200.0,200 +1082,84.0,84 +1083,81.0,81 +1084,183.0,183 +1085,51.0,51 +1086,155.0,155 +1087,146.0,146 +1088,108.0,108 +1089,176.0,176 +1090,189.0,189 +1091,200.0,200 +1092,164.0,164 +1093,70.0,70 +1094,200.0,200 +1095,172.0,172 +1096,163.0,163 +1097,168.0,168 +1098,181.0,181 +1099,200.0,200 +1100,33.0,33 +1101,200.0,200 +1102,58.0,58 +1103,200.0,200 +1104,156.0,156 +1105,200.0,200 +1106,138.0,138 +1107,200.0,200 +1108,81.0,81 +1109,105.0,105 +1110,87.0,87 +1111,170.0,170 +1112,200.0,200 +1113,200.0,200 +1114,200.0,200 +1115,200.0,200 +1116,200.0,200 +1117,200.0,200 +1118,200.0,200 +1119,158.0,158 +1120,64.0,64 +1121,138.0,138 +1122,200.0,200 +1123,158.0,158 +1124,86.0,86 +1125,125.0,125 +1126,105.0,105 +1127,200.0,200 +1128,120.0,120 +1129,53.0,53 +1130,127.0,127 +1131,200.0,200 +1132,48.0,48 +1133,200.0,200 +1134,144.0,144 +1135,42.0,42 +1136,100.0,100 +1137,160.0,160 +1138,200.0,200 +1139,200.0,200 +1140,200.0,200 +1141,200.0,200 +1142,135.0,135 +1143,184.0,184 +1144,184.0,184 +1145,168.0,168 +1146,162.0,162 +1147,52.0,52 +1148,120.0,120 +1149,133.0,133 +1150,200.0,200 +1151,178.0,178 +1152,200.0,200 +1153,22.0,22 +1154,200.0,200 +1155,108.0,108 +1156,200.0,200 +1157,200.0,200 +1158,102.0,102 +1159,200.0,200 +1160,200.0,200 +1161,200.0,200 +1162,65.0,65 +1163,131.0,131 +1164,129.0,129 +1165,136.0,136 +1166,137.0,137 +1167,40.0,40 +1168,130.0,130 +1169,99.0,99 +1170,131.0,131 +1171,200.0,200 +1172,172.0,172 +1173,200.0,200 +1174,200.0,200 +1175,83.0,83 +1176,151.0,151 +1177,200.0,200 +1178,70.0,70 +1179,84.0,84 +1180,172.0,172 +1181,200.0,200 +1182,22.0,22 +1183,118.0,118 +1184,200.0,200 +1185,200.0,200 +1186,200.0,200 +1187,200.0,200 +1188,200.0,200 +1189,145.0,145 +1190,121.0,121 +1191,159.0,159 +1192,126.0,126 +1193,49.0,49 +1194,200.0,200 +1195,119.0,119 +1196,155.0,155 +1197,71.0,71 +1198,200.0,200 +1199,79.0,79 +1200,68.0,68 +1201,200.0,200 +1202,200.0,200 +1203,143.0,143 +1204,200.0,200 +1205,54.0,54 +1206,180.0,180 +1207,158.0,158 +1208,149.0,149 +1209,170.0,170 +1210,118.0,118 +1211,155.0,155 +1212,200.0,200 +1213,200.0,200 +1214,143.0,143 +1215,200.0,200 +1216,200.0,200 +1217,55.0,55 +1218,200.0,200 +1219,200.0,200 +1220,168.0,168 +1221,170.0,170 +1222,112.0,112 +1223,108.0,108 +1224,104.0,104 +1225,135.0,135 +1226,131.0,131 +1227,98.0,98 +1228,63.0,63 +1229,200.0,200 +1230,119.0,119 +1231,130.0,130 +1232,113.0,113 +1233,170.0,170 +1234,200.0,200 +1235,164.0,164 +1236,149.0,149 +1237,200.0,200 +1238,116.0,116 +1239,200.0,200 +1240,133.0,133 +1241,200.0,200 +1242,164.0,164 +1243,154.0,154 +1244,191.0,191 +1245,128.0,128 +1246,119.0,119 +1247,114.0,114 +1248,200.0,200 +1249,111.0,111 +1250,78.0,78 +1251,200.0,200 +1252,200.0,200 +1253,172.0,172 +1254,200.0,200 +1255,105.0,105 +1256,181.0,181 +1257,200.0,200 +1258,83.0,83 +1259,200.0,200 +1260,176.0,176 +1261,200.0,200 +1262,147.0,147 +1263,165.0,165 +1264,167.0,167 +1265,183.0,183 +1266,117.0,117 +1267,200.0,200 +1268,200.0,200 +1269,171.0,171 +1270,20.0,20 +1271,200.0,200 +1272,200.0,200 +1273,200.0,200 +1274,109.0,109 +1275,142.0,142 +1276,117.0,117 +1277,200.0,200 +1278,176.0,176 +1279,200.0,200 +1280,101.0,101 +1281,200.0,200 +1282,130.0,130 +1283,200.0,200 +1284,111.0,111 +1285,124.0,124 +1286,178.0,178 +1287,200.0,200 +1288,184.0,184 +1289,200.0,200 +1290,200.0,200 +1291,200.0,200 +1292,200.0,200 +1293,130.0,130 +1294,200.0,200 +1295,134.0,134 +1296,195.0,195 +1297,200.0,200 +1298,62.0,62 +1299,200.0,200 +1300,200.0,200 +1301,165.0,165 +1302,190.0,190 +1303,200.0,200 +1304,200.0,200 +1305,168.0,168 +1306,200.0,200 +1307,64.0,64 +1308,122.0,122 +1309,200.0,200 +1310,134.0,134 +1311,200.0,200 +1312,200.0,200 +1313,200.0,200 +1314,150.0,150 +1315,187.0,187 +1316,130.0,130 +1317,140.0,140 +1318,157.0,157 +1319,200.0,200 +1320,149.0,149 +1321,200.0,200 +1322,58.0,58 +1323,84.0,84 +1324,140.0,140 +1325,139.0,139 +1326,117.0,117 +1327,175.0,175 +1328,135.0,135 +1329,169.0,169 +1330,200.0,200 +1331,143.0,143 +1332,127.0,127 +1333,127.0,127 +1334,148.0,148 +1335,200.0,200 +1336,136.0,136 +1337,200.0,200 +1338,200.0,200 +1339,175.0,175 +1340,102.0,102 +1341,200.0,200 +1342,97.0,97 +1343,120.0,120 +1344,59.0,59 +1345,200.0,200 +1346,200.0,200 +1347,104.0,104 +1348,128.0,128 +1349,200.0,200 +1350,195.0,195 +1351,96.0,96 +1352,196.0,196 +1353,200.0,200 +1354,58.0,58 +1355,200.0,200 +1356,200.0,200 +1357,114.0,114 +1358,104.0,104 +1359,200.0,200 +1360,179.0,179 +1361,200.0,200 +1362,200.0,200 +1363,140.0,140 +1364,138.0,138 +1365,57.0,57 +1366,165.0,165 +1367,174.0,174 +1368,199.0,199 +1369,110.0,110 +1370,200.0,200 +1371,154.0,154 +1372,200.0,200 +1373,78.0,78 +1374,200.0,200 +1375,185.0,185 +1376,167.0,167 +1377,161.0,161 +1378,155.0,155 +1379,117.0,117 +1380,128.0,128 +1381,94.0,94 +1382,200.0,200 +1383,121.0,121 +1384,61.0,61 +1385,21.0,21 +1386,105.0,105 +1387,185.0,185 +1388,200.0,200 +1389,124.0,124 +1390,200.0,200 +1391,133.0,133 +1392,200.0,200 +1393,153.0,153 +1394,200.0,200 +1395,200.0,200 +1396,152.0,152 +1397,146.0,146 +1398,200.0,200 +1399,183.0,183 +1400,195.0,195 +1401,172.0,172 +1402,151.0,151 +1403,122.0,122 +1404,200.0,200 +1405,200.0,200 +1406,200.0,200 +1407,200.0,200 +1408,130.0,130 +1409,148.0,148 +1410,200.0,200 +1411,200.0,200 +1412,200.0,200 +1413,157.0,157 +1414,136.0,136 +1415,115.0,115 +1416,200.0,200 +1417,105.0,105 +1418,124.0,124 +1419,144.0,144 +1420,34.0,34 +1421,151.0,151 +1422,101.0,101 +1423,64.0,64 +1424,200.0,200 +1425,100.0,100 +1426,54.0,54 +1427,132.0,132 +1428,200.0,200 +1429,131.0,131 +1430,51.0,51 +1431,123.0,123 +1432,99.0,99 +1433,200.0,200 +1434,200.0,200 +1435,144.0,144 +1436,166.0,166 +1437,122.0,122 +1438,147.0,147 +1439,200.0,200 +1440,103.0,103 +1441,164.0,164 +1442,76.0,76 +1443,159.0,159 +1444,152.0,152 +1445,200.0,200 +1446,129.0,129 +1447,124.0,124 +1448,40.0,40 +1449,200.0,200 +1450,117.0,117 +1451,175.0,175 +1452,51.0,51 +1453,101.0,101 +1454,117.0,117 +1455,179.0,179 +1456,44.0,44 +1457,190.0,190 +1458,135.0,135 +1459,183.0,183 +1460,118.0,118 +1461,200.0,200 +1462,109.0,109 +1463,86.0,86 +1464,147.0,147 +1465,200.0,200 +1466,124.0,124 +1467,128.0,128 +1468,156.0,156 +1469,200.0,200 +1470,167.0,167 +1471,197.0,197 +1472,75.0,75 +1473,168.0,168 +1474,114.0,114 +1475,153.0,153 +1476,146.0,146 +1477,188.0,188 +1478,144.0,144 +1479,200.0,200 +1480,51.0,51 +1481,35.0,35 +1482,152.0,152 +1483,161.0,161 +1484,114.0,114 +1485,200.0,200 +1486,161.0,161 +1487,200.0,200 +1488,93.0,93 +1489,116.0,116 +1490,152.0,152 +1491,200.0,200 +1492,200.0,200 +1493,200.0,200 +1494,86.0,86 +1495,200.0,200 +1496,178.0,178 +1497,200.0,200 +1498,200.0,200 +1499,154.0,154 +1500,135.0,135 +1501,200.0,200 +1502,146.0,146 +1503,78.0,78 +1504,115.0,115 +1505,189.0,189 +1506,133.0,133 +1507,123.0,123 +1508,158.0,158 +1509,200.0,200 +1510,200.0,200 +1511,200.0,200 +1512,200.0,200 +1513,200.0,200 +1514,200.0,200 +1515,200.0,200 +1516,119.0,119 +1517,162.0,162 +1518,200.0,200 +1519,114.0,114 +1520,200.0,200 +1521,128.0,128 +1522,200.0,200 +1523,200.0,200 +1524,130.0,130 +1525,65.0,65 +1526,200.0,200 +1527,200.0,200 +1528,200.0,200 +1529,188.0,188 +1530,159.0,159 +1531,200.0,200 +1532,200.0,200 +1533,200.0,200 +1534,147.0,147 +1535,180.0,180 +1536,152.0,152 +1537,178.0,178 +1538,131.0,131 +1539,118.0,118 +1540,153.0,153 +1541,197.0,197 +1542,200.0,200 +1543,200.0,200 +1544,178.0,178 +1545,67.0,67 +1546,137.0,137 +1547,51.0,51 +1548,160.0,160 +1549,200.0,200 +1550,124.0,124 +1551,109.0,109 +1552,181.0,181 +1553,182.0,182 +1554,136.0,136 +1555,91.0,91 +1556,159.0,159 +1557,192.0,192 +1558,106.0,106 +1559,200.0,200 +1560,169.0,169 +1561,167.0,167 +1562,141.0,141 +1563,127.0,127 +1564,71.0,71 +1565,134.0,134 +1566,200.0,200 +1567,115.0,115 +1568,99.0,99 +1569,184.0,184 +1570,200.0,200 +1571,133.0,133 +1572,153.0,153 +1573,200.0,200 +1574,194.0,194 +1575,169.0,169 +1576,113.0,113 +1577,147.0,147 +1578,140.0,140 +1579,200.0,200 +1580,113.0,113 +1581,181.0,181 +1582,200.0,200 +1583,182.0,182 +1584,185.0,185 +1585,197.0,197 +1586,200.0,200 +1587,151.0,151 +1588,49.0,49 +1589,137.0,137 +1590,166.0,166 +1591,149.0,149 +1592,126.0,126 +1593,73.0,73 +1594,127.0,127 +1595,104.0,104 +1596,65.0,65 +1597,63.0,63 +1598,126.0,126 +1599,181.0,181 +1600,132.0,132 +1601,89.0,89 +1602,130.0,130 +1603,150.0,150 +1604,100.0,100 +1605,139.0,139 +1606,119.0,119 +1607,48.0,48 +1608,80.0,80 +1609,105.0,105 +1610,85.0,85 +1611,200.0,200 +1612,142.0,142 +1613,95.0,95 +1614,50.0,50 +1615,51.0,51 +1616,124.0,124 +1617,47.0,47 +1618,159.0,159 +1619,154.0,154 +1620,200.0,200 +1621,88.0,88 +1622,65.0,65 +1623,111.0,111 +1624,99.0,99 +1625,120.0,120 +1626,127.0,127 +1627,43.0,43 +1628,80.0,80 +1629,163.0,163 +1630,90.0,90 +1631,154.0,154 +1632,127.0,127 +1633,39.0,39 +1634,200.0,200 +1635,161.0,161 +1636,119.0,119 +1637,156.0,156 +1638,200.0,200 +1639,200.0,200 +1640,41.0,41 +1641,200.0,200 +1642,136.0,136 +1643,157.0,157 +1644,142.0,142 +1645,125.0,125 +1646,155.0,155 +1647,139.0,139 +1648,122.0,122 +1649,116.0,116 +1650,200.0,200 +1651,144.0,144 +1652,170.0,170 +1653,200.0,200 +1654,103.0,103 +1655,105.0,105 +1656,193.0,193 +1657,122.0,122 +1658,200.0,200 +1659,191.0,191 +1660,200.0,200 +1661,200.0,200 +1662,200.0,200 +1663,200.0,200 +1664,200.0,200 +1665,200.0,200 +1666,200.0,200 +1667,64.0,64 +1668,200.0,200 +1669,121.0,121 +1670,200.0,200 +1671,171.0,171 +1672,200.0,200 +1673,130.0,130 +1674,200.0,200 +1675,200.0,200 +1676,188.0,188 +1677,200.0,200 +1678,200.0,200 +1679,200.0,200 +1680,181.0,181 +1681,200.0,200 +1682,200.0,200 +1683,135.0,135 +1684,200.0,200 +1685,114.0,114 +1686,189.0,189 +1687,200.0,200 +1688,200.0,200 +1689,200.0,200 +1690,184.0,184 +1691,200.0,200 +1692,200.0,200 +1693,55.0,55 +1694,153.0,153 +1695,200.0,200 +1696,200.0,200 +1697,125.0,125 +1698,177.0,177 +1699,154.0,154 +1700,53.0,53 +1701,112.0,112 +1702,184.0,184 +1703,200.0,200 +1704,200.0,200 +1705,137.0,137 +1706,72.0,72 +1707,200.0,200 +1708,200.0,200 +1709,200.0,200 +1710,121.0,121 +1711,200.0,200 +1712,200.0,200 +1713,142.0,142 +1714,200.0,200 +1715,200.0,200 +1716,169.0,169 +1717,200.0,200 +1718,200.0,200 +1719,196.0,196 +1720,135.0,135 +1721,200.0,200 +1722,200.0,200 +1723,200.0,200 +1724,96.0,96 +1725,200.0,200 +1726,200.0,200 +1727,200.0,200 +1728,200.0,200 +1729,138.0,138 +1730,200.0,200 +1731,139.0,139 +1732,200.0,200 +1733,190.0,190 +1734,200.0,200 +1735,200.0,200 +1736,138.0,138 +1737,114.0,114 +1738,159.0,159 +1739,120.0,120 +1740,186.0,186 +1741,200.0,200 +1742,183.0,183 +1743,200.0,200 +1744,200.0,200 +1745,200.0,200 +1746,99.0,99 +1747,200.0,200 +1748,100.0,100 +1749,187.0,187 +1750,106.0,106 +1751,200.0,200 +1752,200.0,200 +1753,200.0,200 +1754,52.0,52 +1755,197.0,197 +1756,165.0,165 +1757,200.0,200 +1758,200.0,200 +1759,92.0,92 +1760,200.0,200 +1761,200.0,200 +1762,70.0,70 +1763,165.0,165 +1764,192.0,192 +1765,200.0,200 +1766,200.0,200 +1767,87.0,87 +1768,150.0,150 +1769,149.0,149 +1770,79.0,79 +1771,200.0,200 +1772,200.0,200 +1773,117.0,117 +1774,200.0,200 +1775,135.0,135 +1776,200.0,200 +1777,130.0,130 +1778,200.0,200 +1779,200.0,200 +1780,200.0,200 +1781,200.0,200 +1782,200.0,200 +1783,200.0,200 +1784,200.0,200 +1785,200.0,200 +1786,200.0,200 +1787,200.0,200 +1788,140.0,140 +1789,200.0,200 +1790,200.0,200 +1791,42.0,42 +1792,198.0,198 +1793,200.0,200 +1794,200.0,200 +1795,85.0,85 +1796,164.0,164 +1797,99.0,99 +1798,151.0,151 +1799,200.0,200 +1800,200.0,200 +1801,199.0,199 +1802,200.0,200 +1803,190.0,190 +1804,114.0,114 +1805,200.0,200 +1806,200.0,200 +1807,161.0,161 +1808,200.0,200 +1809,187.0,187 +1810,145.0,145 +1811,200.0,200 +1812,200.0,200 +1813,200.0,200 +1814,96.0,96 +1815,163.0,163 +1816,160.0,160 +1817,200.0,200 +1818,200.0,200 +1819,50.0,50 +1820,200.0,200 +1821,102.0,102 +1822,200.0,200 +1823,200.0,200 +1824,200.0,200 +1825,200.0,200 +1826,109.0,109 +1827,200.0,200 +1828,129.0,129 +1829,200.0,200 +1830,171.0,171 +1831,183.0,183 +1832,106.0,106 +1833,200.0,200 +1834,136.0,136 +1835,100.0,100 +1836,200.0,200 +1837,188.0,188 +1838,200.0,200 +1839,200.0,200 +1840,162.0,162 +1841,200.0,200 +1842,200.0,200 +1843,177.0,177 +1844,200.0,200 +1845,200.0,200 +1846,200.0,200 +1847,200.0,200 +1848,166.0,166 +1849,200.0,200 +1850,53.0,53 +1851,200.0,200 +1852,200.0,200 +1853,153.0,153 +1854,190.0,190 +1855,200.0,200 +1856,200.0,200 +1857,200.0,200 +1858,200.0,200 +1859,136.0,136 +1860,200.0,200 +1861,143.0,143 +1862,45.0,45 +1863,129.0,129 +1864,200.0,200 +1865,200.0,200 +1866,200.0,200 +1867,200.0,200 +1868,60.0,60 +1869,150.0,150 +1870,174.0,174 +1871,157.0,157 +1872,198.0,198 +1873,200.0,200 +1874,91.0,91 +1875,200.0,200 +1876,112.0,112 +1877,159.0,159 +1878,186.0,186 +1879,200.0,200 +1880,82.0,82 +1881,192.0,192 +1882,147.0,147 +1883,200.0,200 +1884,200.0,200 +1885,174.0,174 +1886,181.0,181 +1887,200.0,200 +1888,74.0,74 +1889,200.0,200 +1890,200.0,200 +1891,200.0,200 +1892,157.0,157 +1893,200.0,200 +1894,200.0,200 +1895,180.0,180 +1896,170.0,170 +1897,200.0,200 +1898,135.0,135 +1899,200.0,200 +1900,175.0,175 +1901,200.0,200 +1902,200.0,200 +1903,118.0,118 +1904,147.0,147 +1905,44.0,44 +1906,200.0,200 +1907,58.0,58 +1908,185.0,185 +1909,200.0,200 +1910,200.0,200 +1911,200.0,200 +1912,78.0,78 +1913,190.0,190 +1914,177.0,177 +1915,112.0,112 +1916,200.0,200 +1917,142.0,142 +1918,200.0,200 +1919,92.0,92 +1920,172.0,172 +1921,200.0,200 +1922,178.0,178 +1923,200.0,200 +1924,200.0,200 +1925,138.0,138 +1926,100.0,100 +1927,200.0,200 +1928,95.0,95 +1929,200.0,200 +1930,200.0,200 +1931,129.0,129 +1932,154.0,154 +1933,200.0,200 +1934,200.0,200 +1935,133.0,133 +1936,152.0,152 +1937,133.0,133 +1938,200.0,200 +1939,200.0,200 +1940,200.0,200 +1941,200.0,200 +1942,200.0,200 +1943,167.0,167 +1944,179.0,179 +1945,164.0,164 +1946,187.0,187 +1947,156.0,156 +1948,200.0,200 +1949,200.0,200 +1950,130.0,130 +1951,200.0,200 +1952,200.0,200 +1953,200.0,200 +1954,200.0,200 +1955,65.0,65 +1956,200.0,200 +1957,148.0,148 +1958,200.0,200 +1959,200.0,200 +1960,200.0,200 +1961,168.0,168 +1962,164.0,164 +1963,200.0,200 +1964,200.0,200 +1965,103.0,103 +1966,200.0,200 +1967,173.0,173 +1968,200.0,200 +1969,146.0,146 +1970,197.0,197 +1971,123.0,123 +1972,162.0,162 +1973,200.0,200 +1974,162.0,162 +1975,82.0,82 +1976,157.0,157 +1977,138.0,138 +1978,37.0,37 +1979,200.0,200 +1980,194.0,194 +1981,200.0,200 +1982,104.0,104 +1983,198.0,198 +1984,200.0,200 +1985,200.0,200 +1986,154.0,154 +1987,200.0,200 +1988,200.0,200 +1989,158.0,158 +1990,200.0,200 +1991,142.0,142 +1992,185.0,185 +1993,69.0,69 +1994,200.0,200 +1995,144.0,144 +1996,164.0,164 +1997,189.0,189 +1998,200.0,200 +1999,141.0,141 diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt new file mode 100644 index 0000000..20d78c0 Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/actor_checkpoint.pt differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt new file mode 100644 index 0000000..c35547d Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/models/critic_checkpoint.pt differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json new file mode 100644 index 0000000..010058e --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/params.json @@ -0,0 +1 @@ +{"algo_name": "A2C", "env_name": "CartPole-v0", "train_eps": 1600, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "actor_lr": 0.0003, "critic_lr": 0.001, "actor_hidden_dim": 256, "critic_hidden_dim": 256, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/A2C/outputs/CartPole-v0/20220829-143327/models/", "n_states": 4, "n_actions": 2} \ No newline at end of file diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png new file mode 100644 index 0000000..96a9a22 Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_curve.png differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv new file mode 100644 index 0000000..ebf3893 --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,177.0,177 +1,180.0,180 +2,200.0,200 +3,200.0,200 +4,167.0,167 +5,124.0,124 +6,128.0,128 +7,200.0,200 +8,200.0,200 +9,200.0,200 +10,186.0,186 +11,187.0,187 +12,200.0,200 +13,176.0,176 +14,200.0,200 +15,200.0,200 +16,200.0,200 +17,200.0,200 +18,185.0,185 +19,180.0,180 diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png new file mode 100644 index 0000000..860a49c Binary files /dev/null and b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_curve.png differ diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv new file mode 100644 index 0000000..f05699c --- /dev/null +++ b/projects/codes/A2C/outputs/CartPole-v0/20220829-143327/results/training_results.csv @@ -0,0 +1,1601 @@ +episodes,rewards,steps +0,16.0,16 +1,14.0,14 +2,18.0,18 +3,31.0,31 +4,23.0,23 +5,43.0,43 +6,14.0,14 +7,20.0,20 +8,24.0,24 +9,14.0,14 +10,12.0,12 +11,16.0,16 +12,17.0,17 +13,13.0,13 +14,78.0,78 +15,36.0,36 +16,9.0,9 +17,19.0,19 +18,18.0,18 +19,20.0,20 +20,33.0,33 +21,37.0,37 +22,15.0,15 +23,24.0,24 +24,32.0,32 +25,19.0,19 +26,65.0,65 +27,13.0,13 +28,26.0,26 +29,27.0,27 +30,15.0,15 +31,10.0,10 +32,16.0,16 +33,30.0,30 +34,42.0,42 +35,30.0,30 +36,35.0,35 +37,21.0,21 +38,36.0,36 +39,18.0,18 +40,81.0,81 +41,12.0,12 +42,21.0,21 +43,32.0,32 +44,39.0,39 +45,10.0,10 +46,13.0,13 +47,35.0,35 +48,22.0,22 +49,20.0,20 +50,67.0,67 +51,28.0,28 +52,13.0,13 +53,14.0,14 +54,13.0,13 +55,26.0,26 +56,12.0,12 +57,17.0,17 +58,41.0,41 +59,14.0,14 +60,68.0,68 +61,12.0,12 +62,20.0,20 +63,19.0,19 +64,31.0,31 +65,31.0,31 +66,34.0,34 +67,24.0,24 +68,31.0,31 +69,37.0,37 +70,49.0,49 +71,15.0,15 +72,97.0,97 +73,18.0,18 +74,20.0,20 +75,31.0,31 +76,68.0,68 +77,21.0,21 +78,19.0,19 +79,19.0,19 +80,19.0,19 +81,14.0,14 +82,17.0,17 +83,35.0,35 +84,25.0,25 +85,59.0,59 +86,29.0,29 +87,35.0,35 +88,73.0,73 +89,26.0,26 +90,81.0,81 +91,20.0,20 +92,78.0,78 +93,26.0,26 +94,43.0,43 +95,25.0,25 +96,24.0,24 +97,14.0,14 +98,27.0,27 +99,16.0,16 +100,68.0,68 +101,57.0,57 +102,31.0,31 +103,46.0,46 +104,17.0,17 +105,16.0,16 +106,21.0,21 +107,25.0,25 +108,13.0,13 +109,50.0,50 +110,12.0,12 +111,34.0,34 +112,16.0,16 +113,34.0,34 +114,19.0,19 +115,38.0,38 +116,20.0,20 +117,42.0,42 +118,28.0,28 +119,53.0,53 +120,43.0,43 +121,37.0,37 +122,13.0,13 +123,29.0,29 +124,17.0,17 +125,23.0,23 +126,22.0,22 +127,21.0,21 +128,62.0,62 +129,52.0,52 +130,32.0,32 +131,37.0,37 +132,38.0,38 +133,28.0,28 +134,33.0,33 +135,33.0,33 +136,35.0,35 +137,43.0,43 +138,41.0,41 +139,18.0,18 +140,30.0,30 +141,48.0,48 +142,50.0,50 +143,34.0,34 +144,86.0,86 +145,21.0,21 +146,29.0,29 +147,29.0,29 +148,24.0,24 +149,88.0,88 +150,28.0,28 +151,19.0,19 +152,35.0,35 +153,24.0,24 +154,11.0,11 +155,49.0,49 +156,30.0,30 +157,44.0,44 +158,26.0,26 +159,19.0,19 +160,12.0,12 +161,65.0,65 +162,18.0,18 +163,11.0,11 +164,38.0,38 +165,28.0,28 +166,17.0,17 +167,51.0,51 +168,18.0,18 +169,39.0,39 +170,22.0,22 +171,26.0,26 +172,33.0,33 +173,35.0,35 +174,60.0,60 +175,12.0,12 +176,19.0,19 +177,62.0,62 +178,79.0,79 +179,36.0,36 +180,31.0,31 +181,23.0,23 +182,23.0,23 +183,40.0,40 +184,19.0,19 +185,18.0,18 +186,18.0,18 +187,23.0,23 +188,43.0,43 +189,45.0,45 +190,28.0,28 +191,61.0,61 +192,50.0,50 +193,40.0,40 +194,9.0,9 +195,56.0,56 +196,43.0,43 +197,43.0,43 +198,18.0,18 +199,43.0,43 +200,26.0,26 +201,44.0,44 +202,35.0,35 +203,25.0,25 +204,31.0,31 +205,26.0,26 +206,40.0,40 +207,36.0,36 +208,50.0,50 +209,25.0,25 +210,29.0,29 +211,11.0,11 +212,23.0,23 +213,23.0,23 +214,36.0,36 +215,39.0,39 +216,45.0,45 +217,14.0,14 +218,46.0,46 +219,62.0,62 +220,15.0,15 +221,19.0,19 +222,28.0,28 +223,39.0,39 +224,14.0,14 +225,25.0,25 +226,35.0,35 +227,16.0,16 +228,22.0,22 +229,41.0,41 +230,21.0,21 +231,22.0,22 +232,37.0,37 +233,32.0,32 +234,18.0,18 +235,23.0,23 +236,23.0,23 +237,16.0,16 +238,38.0,38 +239,25.0,25 +240,37.0,37 +241,13.0,13 +242,30.0,30 +243,27.0,27 +244,27.0,27 +245,23.0,23 +246,62.0,62 +247,31.0,31 +248,37.0,37 +249,26.0,26 +250,77.0,77 +251,15.0,15 +252,25.0,25 +253,20.0,20 +254,27.0,27 +255,42.0,42 +256,41.0,41 +257,34.0,34 +258,23.0,23 +259,37.0,37 +260,47.0,47 +261,22.0,22 +262,34.0,34 +263,10.0,10 +264,37.0,37 +265,29.0,29 +266,49.0,49 +267,67.0,67 +268,15.0,15 +269,81.0,81 +270,95.0,95 +271,54.0,54 +272,19.0,19 +273,31.0,31 +274,54.0,54 +275,46.0,46 +276,21.0,21 +277,22.0,22 +278,58.0,58 +279,58.0,58 +280,37.0,37 +281,37.0,37 +282,25.0,25 +283,20.0,20 +284,46.0,46 +285,80.0,80 +286,25.0,25 +287,18.0,18 +288,18.0,18 +289,18.0,18 +290,37.0,37 +291,20.0,20 +292,62.0,62 +293,22.0,22 +294,23.0,23 +295,28.0,28 +296,38.0,38 +297,15.0,15 +298,17.0,17 +299,20.0,20 +300,20.0,20 +301,18.0,18 +302,77.0,77 +303,54.0,54 +304,95.0,95 +305,25.0,25 +306,79.0,79 +307,116.0,116 +308,52.0,52 +309,58.0,58 +310,15.0,15 +311,60.0,60 +312,97.0,97 +313,61.0,61 +314,18.0,18 +315,69.0,69 +316,18.0,18 +317,20.0,20 +318,19.0,19 +319,16.0,16 +320,21.0,21 +321,12.0,12 +322,30.0,30 +323,51.0,51 +324,37.0,37 +325,28.0,28 +326,29.0,29 +327,66.0,66 +328,56.0,56 +329,79.0,79 +330,56.0,56 +331,70.0,70 +332,33.0,33 +333,54.0,54 +334,35.0,35 +335,18.0,18 +336,141.0,141 +337,38.0,38 +338,18.0,18 +339,34.0,34 +340,20.0,20 +341,95.0,95 +342,29.0,29 +343,40.0,40 +344,20.0,20 +345,14.0,14 +346,83.0,83 +347,72.0,72 +348,88.0,88 +349,41.0,41 +350,103.0,103 +351,85.0,85 +352,14.0,14 +353,51.0,51 +354,68.0,68 +355,102.0,102 +356,61.0,61 +357,22.0,22 +358,22.0,22 +359,15.0,15 +360,23.0,23 +361,35.0,35 +362,72.0,72 +363,32.0,32 +364,107.0,107 +365,34.0,34 +366,28.0,28 +367,99.0,99 +368,140.0,140 +369,40.0,40 +370,24.0,24 +371,157.0,157 +372,33.0,33 +373,35.0,35 +374,18.0,18 +375,21.0,21 +376,23.0,23 +377,67.0,67 +378,112.0,112 +379,21.0,21 +380,38.0,38 +381,12.0,12 +382,23.0,23 +383,31.0,31 +384,36.0,36 +385,20.0,20 +386,82.0,82 +387,20.0,20 +388,32.0,32 +389,28.0,28 +390,26.0,26 +391,26.0,26 +392,46.0,46 +393,35.0,35 +394,32.0,32 +395,67.0,67 +396,30.0,30 +397,36.0,36 +398,67.0,67 +399,51.0,51 +400,21.0,21 +401,28.0,28 +402,46.0,46 +403,29.0,29 +404,30.0,30 +405,22.0,22 +406,24.0,24 +407,78.0,78 +408,28.0,28 +409,41.0,41 +410,38.0,38 +411,56.0,56 +412,19.0,19 +413,38.0,38 +414,51.0,51 +415,33.0,33 +416,40.0,40 +417,35.0,35 +418,59.0,59 +419,138.0,138 +420,18.0,18 +421,10.0,10 +422,25.0,25 +423,104.0,104 +424,127.0,127 +425,44.0,44 +426,35.0,35 +427,62.0,62 +428,108.0,108 +429,39.0,39 +430,15.0,15 +431,73.0,73 +432,24.0,24 +433,23.0,23 +434,35.0,35 +435,14.0,14 +436,47.0,47 +437,112.0,112 +438,79.0,79 +439,99.0,99 +440,66.0,66 +441,64.0,64 +442,63.0,63 +443,28.0,28 +444,116.0,116 +445,94.0,94 +446,91.0,91 +447,60.0,60 +448,22.0,22 +449,68.0,68 +450,22.0,22 +451,34.0,34 +452,23.0,23 +453,117.0,117 +454,26.0,26 +455,30.0,30 +456,31.0,31 +457,50.0,50 +458,56.0,56 +459,81.0,81 +460,43.0,43 +461,49.0,49 +462,62.0,62 +463,64.0,64 +464,16.0,16 +465,37.0,37 +466,103.0,103 +467,66.0,66 +468,25.0,25 +469,50.0,50 +470,118.0,118 +471,42.0,42 +472,69.0,69 +473,55.0,55 +474,41.0,41 +475,25.0,25 +476,114.0,114 +477,27.0,27 +478,27.0,27 +479,61.0,61 +480,61.0,61 +481,66.0,66 +482,73.0,73 +483,35.0,35 +484,47.0,47 +485,77.0,77 +486,34.0,34 +487,16.0,16 +488,22.0,22 +489,47.0,47 +490,72.0,72 +491,76.0,76 +492,74.0,74 +493,36.0,36 +494,47.0,47 +495,25.0,25 +496,22.0,22 +497,20.0,20 +498,40.0,40 +499,44.0,44 +500,41.0,41 +501,42.0,42 +502,112.0,112 +503,64.0,64 +504,95.0,95 +505,112.0,112 +506,117.0,117 +507,84.0,84 +508,79.0,79 +509,129.0,129 +510,139.0,139 +511,41.0,41 +512,82.0,82 +513,54.0,54 +514,69.0,69 +515,44.0,44 +516,31.0,31 +517,64.0,64 +518,41.0,41 +519,100.0,100 +520,86.0,86 +521,44.0,44 +522,38.0,38 +523,36.0,36 +524,41.0,41 +525,22.0,22 +526,51.0,51 +527,24.0,24 +528,47.0,47 +529,79.0,79 +530,125.0,125 +531,50.0,50 +532,35.0,35 +533,48.0,48 +534,85.0,85 +535,58.0,58 +536,101.0,101 +537,200.0,200 +538,79.0,79 +539,159.0,159 +540,71.0,71 +541,71.0,71 +542,77.0,77 +543,78.0,78 +544,46.0,46 +545,49.0,49 +546,74.0,74 +547,71.0,71 +548,106.0,106 +549,36.0,36 +550,33.0,33 +551,160.0,160 +552,53.0,53 +553,54.0,54 +554,27.0,27 +555,55.0,55 +556,174.0,174 +557,33.0,33 +558,61.0,61 +559,118.0,118 +560,128.0,128 +561,148.0,148 +562,97.0,97 +563,63.0,63 +564,44.0,44 +565,110.0,110 +566,156.0,156 +567,50.0,50 +568,163.0,163 +569,126.0,126 +570,114.0,114 +571,78.0,78 +572,48.0,48 +573,59.0,59 +574,116.0,116 +575,46.0,46 +576,135.0,135 +577,71.0,71 +578,19.0,19 +579,43.0,43 +580,89.0,89 +581,97.0,97 +582,21.0,21 +583,120.0,120 +584,54.0,54 +585,24.0,24 +586,62.0,62 +587,78.0,78 +588,36.0,36 +589,71.0,71 +590,25.0,25 +591,71.0,71 +592,56.0,56 +593,78.0,78 +594,65.0,65 +595,200.0,200 +596,200.0,200 +597,167.0,167 +598,59.0,59 +599,73.0,73 +600,66.0,66 +601,35.0,35 +602,186.0,186 +603,140.0,140 +604,49.0,49 +605,134.0,134 +606,46.0,46 +607,149.0,149 +608,82.0,82 +609,119.0,119 +610,126.0,126 +611,52.0,52 +612,89.0,89 +613,200.0,200 +614,89.0,89 +615,93.0,93 +616,200.0,200 +617,39.0,39 +618,113.0,113 +619,67.0,67 +620,164.0,164 +621,120.0,120 +622,74.0,74 +623,153.0,153 +624,124.0,124 +625,41.0,41 +626,173.0,173 +627,48.0,48 +628,200.0,200 +629,58.0,58 +630,35.0,35 +631,45.0,45 +632,43.0,43 +633,157.0,157 +634,111.0,111 +635,98.0,98 +636,102.0,102 +637,124.0,124 +638,111.0,111 +639,42.0,42 +640,128.0,128 +641,183.0,183 +642,49.0,49 +643,64.0,64 +644,117.0,117 +645,150.0,150 +646,103.0,103 +647,115.0,115 +648,41.0,41 +649,200.0,200 +650,162.0,162 +651,105.0,105 +652,94.0,94 +653,55.0,55 +654,73.0,73 +655,65.0,65 +656,29.0,29 +657,148.0,148 +658,42.0,42 +659,17.0,17 +660,60.0,60 +661,59.0,59 +662,80.0,80 +663,41.0,41 +664,143.0,143 +665,200.0,200 +666,45.0,45 +667,95.0,95 +668,61.0,61 +669,63.0,63 +670,170.0,170 +671,150.0,150 +672,139.0,139 +673,44.0,44 +674,44.0,44 +675,47.0,47 +676,72.0,72 +677,129.0,129 +678,79.0,79 +679,128.0,128 +680,126.0,126 +681,65.0,65 +682,57.0,57 +683,58.0,58 +684,66.0,66 +685,89.0,89 +686,150.0,150 +687,97.0,97 +688,20.0,20 +689,58.0,58 +690,81.0,81 +691,54.0,54 +692,55.0,55 +693,197.0,197 +694,61.0,61 +695,157.0,157 +696,166.0,166 +697,79.0,79 +698,128.0,128 +699,200.0,200 +700,46.0,46 +701,140.0,140 +702,19.0,19 +703,144.0,144 +704,138.0,138 +705,46.0,46 +706,200.0,200 +707,61.0,61 +708,114.0,114 +709,100.0,100 +710,85.0,85 +711,200.0,200 +712,36.0,36 +713,142.0,142 +714,22.0,22 +715,82.0,82 +716,49.0,49 +717,139.0,139 +718,173.0,173 +719,47.0,47 +720,67.0,67 +721,197.0,197 +722,157.0,157 +723,149.0,149 +724,29.0,29 +725,85.0,85 +726,135.0,135 +727,157.0,157 +728,141.0,141 +729,165.0,165 +730,102.0,102 +731,192.0,192 +732,196.0,196 +733,183.0,183 +734,75.0,75 +735,41.0,41 +736,122.0,122 +737,200.0,200 +738,166.0,166 +739,109.0,109 +740,200.0,200 +741,200.0,200 +742,24.0,24 +743,20.0,20 +744,138.0,138 +745,122.0,122 +746,200.0,200 +747,156.0,156 +748,191.0,191 +749,91.0,91 +750,105.0,105 +751,145.0,145 +752,130.0,130 +753,150.0,150 +754,77.0,77 +755,137.0,137 +756,181.0,181 +757,200.0,200 +758,132.0,132 +759,200.0,200 +760,76.0,76 +761,63.0,63 +762,160.0,160 +763,28.0,28 +764,135.0,135 +765,43.0,43 +766,146.0,146 +767,179.0,179 +768,82.0,82 +769,126.0,126 +770,148.0,148 +771,110.0,110 +772,116.0,116 +773,55.0,55 +774,158.0,158 +775,155.0,155 +776,200.0,200 +777,153.0,153 +778,147.0,147 +779,54.0,54 +780,173.0,173 +781,44.0,44 +782,47.0,47 +783,200.0,200 +784,179.0,179 +785,194.0,194 +786,200.0,200 +787,141.0,141 +788,130.0,130 +789,133.0,133 +790,53.0,53 +791,124.0,124 +792,143.0,143 +793,58.0,58 +794,190.0,190 +795,130.0,130 +796,130.0,130 +797,200.0,200 +798,163.0,163 +799,200.0,200 +800,103.0,103 +801,200.0,200 +802,200.0,200 +803,18.0,18 +804,156.0,156 +805,165.0,165 +806,129.0,129 +807,33.0,33 +808,132.0,132 +809,200.0,200 +810,110.0,110 +811,93.0,93 +812,198.0,198 +813,200.0,200 +814,200.0,200 +815,182.0,182 +816,75.0,75 +817,200.0,200 +818,200.0,200 +819,200.0,200 +820,144.0,144 +821,118.0,118 +822,200.0,200 +823,60.0,60 +824,200.0,200 +825,134.0,134 +826,154.0,154 +827,116.0,116 +828,21.0,21 +829,200.0,200 +830,105.0,105 +831,158.0,158 +832,122.0,122 +833,88.0,88 +834,108.0,108 +835,112.0,112 +836,134.0,134 +837,165.0,165 +838,200.0,200 +839,138.0,138 +840,164.0,164 +841,200.0,200 +842,30.0,30 +843,181.0,181 +844,149.0,149 +845,102.0,102 +846,128.0,128 +847,74.0,74 +848,112.0,112 +849,80.0,80 +850,190.0,190 +851,35.0,35 +852,40.0,40 +853,121.0,121 +854,125.0,125 +855,99.0,99 +856,115.0,115 +857,171.0,171 +858,200.0,200 +859,50.0,50 +860,200.0,200 +861,143.0,143 +862,146.0,146 +863,47.0,47 +864,154.0,154 +865,48.0,48 +866,103.0,103 +867,200.0,200 +868,151.0,151 +869,46.0,46 +870,155.0,155 +871,40.0,40 +872,124.0,124 +873,41.0,41 +874,45.0,45 +875,158.0,158 +876,29.0,29 +877,200.0,200 +878,200.0,200 +879,151.0,151 +880,158.0,158 +881,200.0,200 +882,15.0,15 +883,180.0,180 +884,75.0,75 +885,196.0,196 +886,176.0,176 +887,67.0,67 +888,90.0,90 +889,161.0,161 +890,88.0,88 +891,200.0,200 +892,64.0,64 +893,111.0,111 +894,184.0,184 +895,189.0,189 +896,109.0,109 +897,167.0,167 +898,99.0,99 +899,180.0,180 +900,121.0,121 +901,126.0,126 +902,200.0,200 +903,200.0,200 +904,177.0,177 +905,107.0,107 +906,200.0,200 +907,133.0,133 +908,164.0,164 +909,200.0,200 +910,160.0,160 +911,120.0,120 +912,200.0,200 +913,65.0,65 +914,27.0,27 +915,200.0,200 +916,162.0,162 +917,25.0,25 +918,118.0,118 +919,56.0,56 +920,107.0,107 +921,200.0,200 +922,166.0,166 +923,69.0,69 +924,187.0,187 +925,126.0,126 +926,200.0,200 +927,49.0,49 +928,99.0,99 +929,200.0,200 +930,200.0,200 +931,153.0,153 +932,158.0,158 +933,200.0,200 +934,145.0,145 +935,126.0,126 +936,133.0,133 +937,81.0,81 +938,200.0,200 +939,57.0,57 +940,200.0,200 +941,131.0,131 +942,200.0,200 +943,200.0,200 +944,200.0,200 +945,171.0,171 +946,200.0,200 +947,200.0,200 +948,200.0,200 +949,176.0,176 +950,110.0,110 +951,158.0,158 +952,137.0,137 +953,103.0,103 +954,200.0,200 +955,200.0,200 +956,200.0,200 +957,190.0,190 +958,130.0,130 +959,139.0,139 +960,200.0,200 +961,172.0,172 +962,152.0,152 +963,154.0,154 +964,52.0,52 +965,194.0,194 +966,52.0,52 +967,169.0,169 +968,200.0,200 +969,176.0,176 +970,127.0,127 +971,157.0,157 +972,200.0,200 +973,134.0,134 +974,138.0,138 +975,133.0,133 +976,170.0,170 +977,159.0,159 +978,88.0,88 +979,141.0,141 +980,117.0,117 +981,157.0,157 +982,145.0,145 +983,200.0,200 +984,129.0,129 +985,155.0,155 +986,83.0,83 +987,152.0,152 +988,156.0,156 +989,200.0,200 +990,135.0,135 +991,75.0,75 +992,138.0,138 +993,83.0,83 +994,200.0,200 +995,128.0,128 +996,122.0,122 +997,200.0,200 +998,34.0,34 +999,161.0,161 +1000,143.0,143 +1001,200.0,200 +1002,103.0,103 +1003,168.0,168 +1004,200.0,200 +1005,200.0,200 +1006,167.0,167 +1007,200.0,200 +1008,76.0,76 +1009,101.0,101 +1010,153.0,153 +1011,113.0,113 +1012,109.0,109 +1013,188.0,188 +1014,122.0,122 +1015,181.0,181 +1016,166.0,166 +1017,189.0,189 +1018,200.0,200 +1019,187.0,187 +1020,116.0,116 +1021,200.0,200 +1022,108.0,108 +1023,18.0,18 +1024,158.0,158 +1025,200.0,200 +1026,43.0,43 +1027,200.0,200 +1028,199.0,199 +1029,200.0,200 +1030,133.0,133 +1031,171.0,171 +1032,200.0,200 +1033,200.0,200 +1034,200.0,200 +1035,156.0,156 +1036,52.0,52 +1037,200.0,200 +1038,121.0,121 +1039,188.0,188 +1040,167.0,167 +1041,200.0,200 +1042,124.0,124 +1043,102.0,102 +1044,161.0,161 +1045,200.0,200 +1046,200.0,200 +1047,135.0,135 +1048,200.0,200 +1049,80.0,80 +1050,200.0,200 +1051,66.0,66 +1052,200.0,200 +1053,200.0,200 +1054,112.0,112 +1055,195.0,195 +1056,200.0,200 +1057,170.0,170 +1058,194.0,194 +1059,200.0,200 +1060,200.0,200 +1061,59.0,59 +1062,75.0,75 +1063,200.0,200 +1064,200.0,200 +1065,97.0,97 +1066,171.0,171 +1067,30.0,30 +1068,200.0,200 +1069,101.0,101 +1070,124.0,124 +1071,136.0,136 +1072,184.0,184 +1073,149.0,149 +1074,137.0,137 +1075,167.0,167 +1076,136.0,136 +1077,200.0,200 +1078,139.0,139 +1079,85.0,85 +1080,137.0,137 +1081,161.0,161 +1082,81.0,81 +1083,200.0,200 +1084,200.0,200 +1085,200.0,200 +1086,200.0,200 +1087,87.0,87 +1088,174.0,174 +1089,200.0,200 +1090,128.0,128 +1091,200.0,200 +1092,200.0,200 +1093,200.0,200 +1094,120.0,120 +1095,200.0,200 +1096,131.0,131 +1097,200.0,200 +1098,200.0,200 +1099,200.0,200 +1100,146.0,146 +1101,200.0,200 +1102,200.0,200 +1103,200.0,200 +1104,80.0,80 +1105,200.0,200 +1106,172.0,172 +1107,143.0,143 +1108,200.0,200 +1109,200.0,200 +1110,181.0,181 +1111,189.0,189 +1112,133.0,133 +1113,200.0,200 +1114,111.0,111 +1115,200.0,200 +1116,200.0,200 +1117,200.0,200 +1118,192.0,192 +1119,200.0,200 +1120,200.0,200 +1121,200.0,200 +1122,144.0,144 +1123,27.0,27 +1124,200.0,200 +1125,198.0,198 +1126,186.0,186 +1127,80.0,80 +1128,200.0,200 +1129,169.0,169 +1130,48.0,48 +1131,198.0,198 +1132,162.0,162 +1133,58.0,58 +1134,200.0,200 +1135,200.0,200 +1136,189.0,189 +1137,200.0,200 +1138,117.0,117 +1139,200.0,200 +1140,200.0,200 +1141,150.0,150 +1142,163.0,163 +1143,161.0,161 +1144,200.0,200 +1145,113.0,113 +1146,181.0,181 +1147,193.0,193 +1148,98.0,98 +1149,200.0,200 +1150,22.0,22 +1151,125.0,125 +1152,200.0,200 +1153,200.0,200 +1154,200.0,200 +1155,67.0,67 +1156,186.0,186 +1157,189.0,189 +1158,186.0,186 +1159,156.0,156 +1160,200.0,200 +1161,200.0,200 +1162,116.0,116 +1163,77.0,77 +1164,148.0,148 +1165,111.0,111 +1166,68.0,68 +1167,140.0,140 +1168,114.0,114 +1169,200.0,200 +1170,173.0,173 +1171,97.0,97 +1172,166.0,166 +1173,154.0,154 +1174,200.0,200 +1175,200.0,200 +1176,129.0,129 +1177,111.0,111 +1178,200.0,200 +1179,85.0,85 +1180,71.0,71 +1181,200.0,200 +1182,158.0,158 +1183,130.0,130 +1184,161.0,161 +1185,188.0,188 +1186,124.0,124 +1187,190.0,190 +1188,157.0,157 +1189,188.0,188 +1190,194.0,194 +1191,173.0,173 +1192,123.0,123 +1193,200.0,200 +1194,123.0,123 +1195,200.0,200 +1196,200.0,200 +1197,114.0,114 +1198,45.0,45 +1199,144.0,144 +1200,107.0,107 +1201,184.0,184 +1202,121.0,121 +1203,200.0,200 +1204,50.0,50 +1205,123.0,123 +1206,73.0,73 +1207,142.0,142 +1208,38.0,38 +1209,129.0,129 +1210,123.0,123 +1211,149.0,149 +1212,97.0,97 +1213,40.0,40 +1214,177.0,177 +1215,200.0,200 +1216,166.0,166 +1217,106.0,106 +1218,114.0,114 +1219,53.0,53 +1220,162.0,162 +1221,181.0,181 +1222,128.0,128 +1223,155.0,155 +1224,180.0,180 +1225,139.0,139 +1226,99.0,99 +1227,95.0,95 +1228,159.0,159 +1229,30.0,30 +1230,67.0,67 +1231,127.0,127 +1232,33.0,33 +1233,80.0,80 +1234,200.0,200 +1235,200.0,200 +1236,200.0,200 +1237,117.0,117 +1238,128.0,128 +1239,47.0,47 +1240,134.0,134 +1241,129.0,129 +1242,135.0,135 +1243,200.0,200 +1244,200.0,200 +1245,101.0,101 +1246,35.0,35 +1247,73.0,73 +1248,95.0,95 +1249,125.0,125 +1250,200.0,200 +1251,155.0,155 +1252,48.0,48 +1253,200.0,200 +1254,153.0,153 +1255,173.0,173 +1256,128.0,128 +1257,200.0,200 +1258,200.0,200 +1259,196.0,196 +1260,50.0,50 +1261,193.0,193 +1262,200.0,200 +1263,200.0,200 +1264,200.0,200 +1265,200.0,200 +1266,179.0,179 +1267,180.0,180 +1268,200.0,200 +1269,200.0,200 +1270,200.0,200 +1271,120.0,120 +1272,200.0,200 +1273,60.0,60 +1274,99.0,99 +1275,178.0,178 +1276,157.0,157 +1277,200.0,200 +1278,177.0,177 +1279,200.0,200 +1280,200.0,200 +1281,200.0,200 +1282,200.0,200 +1283,200.0,200 +1284,200.0,200 +1285,200.0,200 +1286,97.0,97 +1287,167.0,167 +1288,183.0,183 +1289,200.0,200 +1290,61.0,61 +1291,192.0,192 +1292,200.0,200 +1293,137.0,137 +1294,200.0,200 +1295,200.0,200 +1296,200.0,200 +1297,200.0,200 +1298,200.0,200 +1299,200.0,200 +1300,103.0,103 +1301,142.0,142 +1302,200.0,200 +1303,47.0,47 +1304,189.0,189 +1305,41.0,41 +1306,200.0,200 +1307,200.0,200 +1308,132.0,132 +1309,154.0,154 +1310,95.0,95 +1311,200.0,200 +1312,200.0,200 +1313,200.0,200 +1314,71.0,71 +1315,200.0,200 +1316,170.0,170 +1317,121.0,121 +1318,200.0,200 +1319,127.0,127 +1320,200.0,200 +1321,120.0,120 +1322,200.0,200 +1323,200.0,200 +1324,161.0,161 +1325,37.0,37 +1326,200.0,200 +1327,200.0,200 +1328,200.0,200 +1329,49.0,49 +1330,118.0,118 +1331,200.0,200 +1332,167.0,167 +1333,200.0,200 +1334,99.0,99 +1335,137.0,137 +1336,200.0,200 +1337,41.0,41 +1338,200.0,200 +1339,200.0,200 +1340,97.0,97 +1341,34.0,34 +1342,40.0,40 +1343,197.0,197 +1344,51.0,51 +1345,200.0,200 +1346,156.0,156 +1347,200.0,200 +1348,75.0,75 +1349,118.0,118 +1350,200.0,200 +1351,73.0,73 +1352,200.0,200 +1353,133.0,133 +1354,200.0,200 +1355,200.0,200 +1356,162.0,162 +1357,37.0,37 +1358,130.0,130 +1359,123.0,123 +1360,200.0,200 +1361,99.0,99 +1362,200.0,200 +1363,46.0,46 +1364,200.0,200 +1365,190.0,190 +1366,34.0,34 +1367,37.0,37 +1368,200.0,200 +1369,200.0,200 +1370,131.0,131 +1371,200.0,200 +1372,200.0,200 +1373,158.0,158 +1374,175.0,175 +1375,134.0,134 +1376,100.0,100 +1377,200.0,200 +1378,200.0,200 +1379,123.0,123 +1380,200.0,200 +1381,200.0,200 +1382,200.0,200 +1383,116.0,116 +1384,200.0,200 +1385,88.0,88 +1386,200.0,200 +1387,200.0,200 +1388,147.0,147 +1389,200.0,200 +1390,200.0,200 +1391,84.0,84 +1392,200.0,200 +1393,184.0,184 +1394,200.0,200 +1395,179.0,179 +1396,200.0,200 +1397,200.0,200 +1398,130.0,130 +1399,29.0,29 +1400,200.0,200 +1401,200.0,200 +1402,200.0,200 +1403,200.0,200 +1404,195.0,195 +1405,118.0,118 +1406,200.0,200 +1407,200.0,200 +1408,200.0,200 +1409,154.0,154 +1410,185.0,185 +1411,200.0,200 +1412,152.0,152 +1413,200.0,200 +1414,200.0,200 +1415,200.0,200 +1416,200.0,200 +1417,31.0,31 +1418,200.0,200 +1419,134.0,134 +1420,172.0,172 +1421,112.0,112 +1422,153.0,153 +1423,199.0,199 +1424,200.0,200 +1425,200.0,200 +1426,200.0,200 +1427,200.0,200 +1428,166.0,166 +1429,200.0,200 +1430,200.0,200 +1431,199.0,199 +1432,195.0,195 +1433,174.0,174 +1434,46.0,46 +1435,174.0,174 +1436,23.0,23 +1437,157.0,157 +1438,200.0,200 +1439,170.0,170 +1440,92.0,92 +1441,200.0,200 +1442,200.0,200 +1443,72.0,72 +1444,200.0,200 +1445,200.0,200 +1446,200.0,200 +1447,118.0,118 +1448,119.0,119 +1449,109.0,109 +1450,101.0,101 +1451,32.0,32 +1452,197.0,197 +1453,154.0,154 +1454,138.0,138 +1455,141.0,141 +1456,141.0,141 +1457,200.0,200 +1458,90.0,90 +1459,200.0,200 +1460,122.0,122 +1461,144.0,144 +1462,155.0,155 +1463,200.0,200 +1464,160.0,160 +1465,129.0,129 +1466,200.0,200 +1467,112.0,112 +1468,132.0,132 +1469,144.0,144 +1470,184.0,184 +1471,200.0,200 +1472,26.0,26 +1473,200.0,200 +1474,26.0,26 +1475,128.0,128 +1476,200.0,200 +1477,173.0,173 +1478,145.0,145 +1479,128.0,128 +1480,118.0,118 +1481,50.0,50 +1482,184.0,184 +1483,166.0,166 +1484,142.0,142 +1485,104.0,104 +1486,180.0,180 +1487,200.0,200 +1488,200.0,200 +1489,200.0,200 +1490,123.0,123 +1491,200.0,200 +1492,140.0,140 +1493,200.0,200 +1494,200.0,200 +1495,200.0,200 +1496,200.0,200 +1497,117.0,117 +1498,13.0,13 +1499,200.0,200 +1500,127.0,127 +1501,200.0,200 +1502,200.0,200 +1503,200.0,200 +1504,200.0,200 +1505,200.0,200 +1506,200.0,200 +1507,77.0,77 +1508,152.0,152 +1509,38.0,38 +1510,125.0,125 +1511,154.0,154 +1512,142.0,142 +1513,120.0,120 +1514,200.0,200 +1515,191.0,191 +1516,21.0,21 +1517,101.0,101 +1518,191.0,191 +1519,170.0,170 +1520,200.0,200 +1521,30.0,30 +1522,191.0,191 +1523,200.0,200 +1524,200.0,200 +1525,200.0,200 +1526,135.0,135 +1527,200.0,200 +1528,185.0,185 +1529,123.0,123 +1530,156.0,156 +1531,200.0,200 +1532,140.0,140 +1533,200.0,200 +1534,136.0,136 +1535,139.0,139 +1536,200.0,200 +1537,169.0,169 +1538,200.0,200 +1539,200.0,200 +1540,103.0,103 +1541,91.0,91 +1542,200.0,200 +1543,200.0,200 +1544,65.0,65 +1545,200.0,200 +1546,169.0,169 +1547,59.0,59 +1548,175.0,175 +1549,200.0,200 +1550,200.0,200 +1551,189.0,189 +1552,200.0,200 +1553,200.0,200 +1554,151.0,151 +1555,108.0,108 +1556,146.0,146 +1557,200.0,200 +1558,198.0,198 +1559,119.0,119 +1560,105.0,105 +1561,175.0,175 +1562,200.0,200 +1563,136.0,136 +1564,200.0,200 +1565,86.0,86 +1566,200.0,200 +1567,200.0,200 +1568,200.0,200 +1569,124.0,124 +1570,200.0,200 +1571,122.0,122 +1572,200.0,200 +1573,200.0,200 +1574,47.0,47 +1575,200.0,200 +1576,194.0,194 +1577,200.0,200 +1578,121.0,121 +1579,200.0,200 +1580,200.0,200 +1581,190.0,190 +1582,200.0,200 +1583,200.0,200 +1584,200.0,200 +1585,145.0,145 +1586,121.0,121 +1587,198.0,198 +1588,200.0,200 +1589,200.0,200 +1590,130.0,130 +1591,185.0,185 +1592,193.0,193 +1593,200.0,200 +1594,200.0,200 +1595,200.0,200 +1596,200.0,200 +1597,168.0,168 +1598,200.0,200 +1599,200.0,200 diff --git a/projects/codes/A2C/README.md b/projects/codes/A3C/README.md similarity index 100% rename from projects/codes/A2C/README.md rename to projects/codes/A3C/README.md diff --git a/projects/codes/A3C/a3c.py b/projects/codes/A3C/a3c.py new file mode 100644 index 0000000..ba0ed7c --- /dev/null +++ b/projects/codes/A3C/a3c.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# coding=utf-8 +''' +Author: JiangJi +Email: johnjim0816@gmail.com +Date: 2021-05-03 22:16:08 +LastEditor: JiangJi +LastEditTime: 2022-07-20 23:54:40 +Discription: +Environment: +''' +import torch +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +from torch.distributions import Categorical + +class ActorCritic(nn.Module): + ''' A2C网络模型,包含一个Actor和Critic + ''' + def __init__(self, input_dim, output_dim, hidden_dim): + super(ActorCritic, self).__init__() + self.critic = nn.Sequential( + nn.Linear(input_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, 1) + ) + + self.actor = nn.Sequential( + nn.Linear(input_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, output_dim), + nn.Softmax(dim=1), + ) + + def forward(self, x): + value = self.critic(x) + probs = self.actor(x) + dist = Categorical(probs) + return dist, value +class A2C: + ''' A2C算法 + ''' + def __init__(self,n_states,n_actions,cfg) -> None: + self.gamma = cfg.gamma + self.device = torch.device(cfg.device) + self.model = ActorCritic(n_states, n_actions, cfg.hidden_size).to(self.device) + self.optimizer = optim.Adam(self.model.parameters()) + + def compute_returns(self,next_value, rewards, masks): + R = next_value + returns = [] + for step in reversed(range(len(rewards))): + R = rewards[step] + self.gamma * R * masks[step] + returns.insert(0, R) + return returns \ No newline at end of file diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/params.json similarity index 100% rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/params.json rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/params.json diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy similarity index 100% rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_ma_rewards.npy diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy similarity index 100% rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards.npy diff --git a/projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png b/projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png similarity index 100% rename from projects/codes/A2C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png rename to projects/codes/A3C/outputs/CartPole-v0/20220713-221850/results/train_rewards_curve.png diff --git a/projects/codes/A2C/task0.py b/projects/codes/A3C/task0.py similarity index 99% rename from projects/codes/A2C/task0.py rename to projects/codes/A3C/task0.py index e29266b..09dcceb 100644 --- a/projects/codes/A2C/task0.py +++ b/projects/codes/A3C/task0.py @@ -10,7 +10,7 @@ import torch.optim as optim import datetime import argparse from common.multiprocessing_env import SubprocVecEnv -from a2c import ActorCritic +from a3c import ActorCritic from common.utils import save_results, make_dir from common.utils import plot_rewards, save_args diff --git a/projects/codes/DQN/main.py b/projects/codes/DQN/main.py index ecf281d..d3c022c 100644 --- a/projects/codes/DQN/main.py +++ b/projects/codes/DQN/main.py @@ -24,6 +24,7 @@ def get_args(): parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") parser.add_argument('--gamma',default=0.95,type=float,help="discounted factor") parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") @@ -72,7 +73,7 @@ def train(cfg, env, agent): ep_reward = 0 # reward per episode ep_step = 0 state = env.reset() # reset and obtain initial state - while True: + for _ in range(cfg['ep_max_steps']): ep_step += 1 action = agent.sample_action(state) # sample action next_state, reward, done, _ = env.step(action) # update env and return transitions @@ -91,7 +92,7 @@ def train(cfg, env, agent): print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}: Epislon: {agent.epsilon:.3f}') print("Finish training!") env.close() - res_dic = {'episodes':range(len(rewards)),'rewards':rewards} + res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} return res_dic def test(cfg, env, agent): @@ -103,7 +104,7 @@ def test(cfg, env, agent): ep_reward = 0 # reward per episode ep_step = 0 state = env.reset() # reset and obtain initial state - while True: + for _ in range(cfg['ep_max_steps']): ep_step+=1 action = agent.predict_action(state) # predict action next_state, reward, done, _ = env.step(action) @@ -116,7 +117,7 @@ def test(cfg, env, agent): print(f"Episode: {i_ep+1}/{cfg['test_eps']},Reward: {ep_reward:.2f}") print("Finish testing!") env.close() - return {'episodes':range(len(rewards)),'rewards':rewards} + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} if __name__ == "__main__": diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt new file mode 100644 index 0000000..e357d49 Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/models/checkpoint.pt differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json new file mode 100644 index 0000000..83d8c57 --- /dev/null +++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/params.json @@ -0,0 +1 @@ +{"algo_name": "DQN", "env_name": "CartPole-v1", "train_eps": 2000, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 6000, "lr": 1e-05, "memory_capacity": 200000, "batch_size": 64, "target_update": 4, "hidden_dim": 256, "device": "cuda", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/results", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\DQN/outputs/CartPole-v1/20220828-214702/models", "n_states": 4, "n_actions": 2} \ No newline at end of file diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png new file mode 100644 index 0000000..f97050f Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_curve.png differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv new file mode 100644 index 0000000..bb0b8f6 --- /dev/null +++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,371.0,371 +1,446.0,446 +2,300.0,300 +3,500.0,500 +4,313.0,313 +5,500.0,500 +6,341.0,341 +7,489.0,489 +8,304.0,304 +9,358.0,358 +10,278.0,278 +11,500.0,500 +12,500.0,500 +13,500.0,500 +14,500.0,500 +15,476.0,476 +16,308.0,308 +17,394.0,394 +18,500.0,500 +19,500.0,500 diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png new file mode 100644 index 0000000..a14bb8c Binary files /dev/null and b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_curve.png differ diff --git a/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv new file mode 100644 index 0000000..6bfc2ad --- /dev/null +++ b/projects/codes/DQN/outputs/CartPole-v1/20220828-214702/results/training_results.csv @@ -0,0 +1,2001 @@ +episodes,rewards,steps +0,38.0,38 +1,16.0,16 +2,28.0,28 +3,10.0,10 +4,18.0,18 +5,10.0,10 +6,8.0,8 +7,19.0,19 +8,18.0,18 +9,32.0,32 +10,12.0,12 +11,14.0,14 +12,16.0,16 +13,20.0,20 +14,33.0,33 +15,24.0,24 +16,28.0,28 +17,33.0,33 +18,31.0,31 +19,14.0,14 +20,10.0,10 +21,19.0,19 +22,16.0,16 +23,19.0,19 +24,11.0,11 +25,23.0,23 +26,20.0,20 +27,26.0,26 +28,16.0,16 +29,16.0,16 +30,11.0,11 +31,13.0,13 +32,16.0,16 +33,42.0,42 +34,15.0,15 +35,38.0,38 +36,16.0,16 +37,46.0,46 +38,28.0,28 +39,60.0,60 +40,32.0,32 +41,9.0,9 +42,23.0,23 +43,19.0,19 +44,12.0,12 +45,17.0,17 +46,27.0,27 +47,22.0,22 +48,26.0,26 +49,11.0,11 +50,15.0,15 +51,9.0,9 +52,24.0,24 +53,29.0,29 +54,11.0,11 +55,16.0,16 +56,23.0,23 +57,14.0,14 +58,11.0,11 +59,16.0,16 +60,24.0,24 +61,27.0,27 +62,11.0,11 +63,20.0,20 +64,14.0,14 +65,11.0,11 +66,15.0,15 +67,16.0,16 +68,13.0,13 +69,28.0,28 +70,14.0,14 +71,10.0,10 +72,28.0,28 +73,20.0,20 +74,13.0,13 +75,11.0,11 +76,27.0,27 +77,15.0,15 +78,14.0,14 +79,14.0,14 +80,10.0,10 +81,11.0,11 +82,14.0,14 +83,13.0,13 +84,15.0,15 +85,13.0,13 +86,10.0,10 +87,11.0,11 +88,11.0,11 +89,13.0,13 +90,14.0,14 +91,25.0,25 +92,14.0,14 +93,14.0,14 +94,19.0,19 +95,18.0,18 +96,9.0,9 +97,9.0,9 +98,11.0,11 +99,19.0,19 +100,10.0,10 +101,40.0,40 +102,10.0,10 +103,13.0,13 +104,13.0,13 +105,18.0,18 +106,13.0,13 +107,11.0,11 +108,11.0,11 +109,18.0,18 +110,20.0,20 +111,10.0,10 +112,24.0,24 +113,9.0,9 +114,10.0,10 +115,13.0,13 +116,21.0,21 +117,12.0,12 +118,14.0,14 +119,10.0,10 +120,10.0,10 +121,16.0,16 +122,10.0,10 +123,18.0,18 +124,13.0,13 +125,17.0,17 +126,14.0,14 +127,12.0,12 +128,16.0,16 +129,11.0,11 +130,15.0,15 +131,10.0,10 +132,13.0,13 +133,17.0,17 +134,9.0,9 +135,34.0,34 +136,23.0,23 +137,14.0,14 +138,17.0,17 +139,13.0,13 +140,27.0,27 +141,15.0,15 +142,16.0,16 +143,15.0,15 +144,11.0,11 +145,12.0,12 +146,9.0,9 +147,30.0,30 +148,12.0,12 +149,12.0,12 +150,13.0,13 +151,14.0,14 +152,17.0,17 +153,10.0,10 +154,15.0,15 +155,10.0,10 +156,14.0,14 +157,22.0,22 +158,12.0,12 +159,11.0,11 +160,34.0,34 +161,11.0,11 +162,12.0,12 +163,18.0,18 +164,9.0,9 +165,17.0,17 +166,16.0,16 +167,10.0,10 +168,17.0,17 +169,11.0,11 +170,21.0,21 +171,15.0,15 +172,17.0,17 +173,11.0,11 +174,23.0,23 +175,10.0,10 +176,25.0,25 +177,12.0,12 +178,10.0,10 +179,16.0,16 +180,14.0,14 +181,21.0,21 +182,11.0,11 +183,12.0,12 +184,11.0,11 +185,10.0,10 +186,11.0,11 +187,17.0,17 +188,10.0,10 +189,14.0,14 +190,11.0,11 +191,12.0,12 +192,9.0,9 +193,11.0,11 +194,11.0,11 +195,16.0,16 +196,15.0,15 +197,10.0,10 +198,9.0,9 +199,17.0,17 +200,12.0,12 +201,9.0,9 +202,11.0,11 +203,9.0,9 +204,9.0,9 +205,16.0,16 +206,15.0,15 +207,13.0,13 +208,11.0,11 +209,13.0,13 +210,17.0,17 +211,8.0,8 +212,8.0,8 +213,12.0,12 +214,15.0,15 +215,13.0,13 +216,14.0,14 +217,11.0,11 +218,14.0,14 +219,13.0,13 +220,12.0,12 +221,9.0,9 +222,10.0,10 +223,10.0,10 +224,11.0,11 +225,9.0,9 +226,16.0,16 +227,23.0,23 +228,13.0,13 +229,16.0,16 +230,9.0,9 +231,12.0,12 +232,11.0,11 +233,10.0,10 +234,13.0,13 +235,15.0,15 +236,12.0,12 +237,11.0,11 +238,9.0,9 +239,11.0,11 +240,11.0,11 +241,11.0,11 +242,12.0,12 +243,8.0,8 +244,8.0,8 +245,10.0,10 +246,12.0,12 +247,12.0,12 +248,9.0,9 +249,12.0,12 +250,13.0,13 +251,11.0,11 +252,12.0,12 +253,10.0,10 +254,10.0,10 +255,11.0,11 +256,17.0,17 +257,11.0,11 +258,14.0,14 +259,12.0,12 +260,10.0,10 +261,11.0,11 +262,16.0,16 +263,13.0,13 +264,13.0,13 +265,15.0,15 +266,11.0,11 +267,8.0,8 +268,13.0,13 +269,15.0,15 +270,11.0,11 +271,9.0,9 +272,10.0,10 +273,11.0,11 +274,11.0,11 +275,9.0,9 +276,18.0,18 +277,13.0,13 +278,11.0,11 +279,14.0,14 +280,12.0,12 +281,16.0,16 +282,10.0,10 +283,12.0,12 +284,13.0,13 +285,9.0,9 +286,14.0,14 +287,26.0,26 +288,9.0,9 +289,10.0,10 +290,12.0,12 +291,13.0,13 +292,8.0,8 +293,13.0,13 +294,12.0,12 +295,11.0,11 +296,14.0,14 +297,10.0,10 +298,11.0,11 +299,12.0,12 +300,9.0,9 +301,11.0,11 +302,11.0,11 +303,15.0,15 +304,11.0,11 +305,11.0,11 +306,13.0,13 +307,8.0,8 +308,9.0,9 +309,10.0,10 +310,12.0,12 +311,13.0,13 +312,9.0,9 +313,15.0,15 +314,11.0,11 +315,12.0,12 +316,12.0,12 +317,15.0,15 +318,13.0,13 +319,8.0,8 +320,16.0,16 +321,9.0,9 +322,11.0,11 +323,12.0,12 +324,16.0,16 +325,9.0,9 +326,13.0,13 +327,13.0,13 +328,15.0,15 +329,12.0,12 +330,12.0,12 +331,8.0,8 +332,11.0,11 +333,15.0,15 +334,12.0,12 +335,11.0,11 +336,13.0,13 +337,13.0,13 +338,16.0,16 +339,11.0,11 +340,8.0,8 +341,10.0,10 +342,14.0,14 +343,18.0,18 +344,11.0,11 +345,10.0,10 +346,11.0,11 +347,11.0,11 +348,20.0,20 +349,14.0,14 +350,10.0,10 +351,14.0,14 +352,9.0,9 +353,9.0,9 +354,12.0,12 +355,9.0,9 +356,10.0,10 +357,9.0,9 +358,10.0,10 +359,10.0,10 +360,22.0,22 +361,11.0,11 +362,12.0,12 +363,11.0,11 +364,8.0,8 +365,24.0,24 +366,11.0,11 +367,10.0,10 +368,10.0,10 +369,10.0,10 +370,10.0,10 +371,9.0,9 +372,9.0,9 +373,21.0,21 +374,10.0,10 +375,12.0,12 +376,14.0,14 +377,15.0,15 +378,10.0,10 +379,17.0,17 +380,8.0,8 +381,14.0,14 +382,11.0,11 +383,9.0,9 +384,10.0,10 +385,9.0,9 +386,15.0,15 +387,11.0,11 +388,17.0,17 +389,12.0,12 +390,11.0,11 +391,15.0,15 +392,10.0,10 +393,13.0,13 +394,12.0,12 +395,10.0,10 +396,12.0,12 +397,9.0,9 +398,14.0,14 +399,9.0,9 +400,13.0,13 +401,10.0,10 +402,13.0,13 +403,16.0,16 +404,9.0,9 +405,8.0,8 +406,11.0,11 +407,9.0,9 +408,15.0,15 +409,12.0,12 +410,15.0,15 +411,15.0,15 +412,15.0,15 +413,14.0,14 +414,12.0,12 +415,11.0,11 +416,14.0,14 +417,12.0,12 +418,14.0,14 +419,11.0,11 +420,8.0,8 +421,9.0,9 +422,13.0,13 +423,13.0,13 +424,8.0,8 +425,10.0,10 +426,10.0,10 +427,15.0,15 +428,14.0,14 +429,9.0,9 +430,12.0,12 +431,13.0,13 +432,12.0,12 +433,10.0,10 +434,14.0,14 +435,11.0,11 +436,12.0,12 +437,14.0,14 +438,10.0,10 +439,12.0,12 +440,9.0,9 +441,15.0,15 +442,12.0,12 +443,10.0,10 +444,8.0,8 +445,12.0,12 +446,14.0,14 +447,12.0,12 +448,9.0,9 +449,10.0,10 +450,13.0,13 +451,9.0,9 +452,10.0,10 +453,9.0,9 +454,11.0,11 +455,10.0,10 +456,18.0,18 +457,16.0,16 +458,10.0,10 +459,11.0,11 +460,15.0,15 +461,12.0,12 +462,11.0,11 +463,12.0,12 +464,11.0,11 +465,10.0,10 +466,12.0,12 +467,10.0,10 +468,12.0,12 +469,15.0,15 +470,11.0,11 +471,10.0,10 +472,12.0,12 +473,9.0,9 +474,13.0,13 +475,11.0,11 +476,15.0,15 +477,10.0,10 +478,11.0,11 +479,13.0,13 +480,10.0,10 +481,10.0,10 +482,10.0,10 +483,10.0,10 +484,15.0,15 +485,11.0,11 +486,12.0,12 +487,16.0,16 +488,10.0,10 +489,16.0,16 +490,11.0,11 +491,9.0,9 +492,9.0,9 +493,18.0,18 +494,10.0,10 +495,9.0,9 +496,40.0,40 +497,21.0,21 +498,10.0,10 +499,36.0,36 +500,37.0,37 +501,22.0,22 +502,30.0,30 +503,23.0,23 +504,35.0,35 +505,48.0,48 +506,32.0,32 +507,21.0,21 +508,28.0,28 +509,29.0,29 +510,10.0,10 +511,27.0,27 +512,20.0,20 +513,23.0,23 +514,24.0,24 +515,21.0,21 +516,25.0,25 +517,20.0,20 +518,15.0,15 +519,23.0,23 +520,14.0,14 +521,18.0,18 +522,18.0,18 +523,18.0,18 +524,18.0,18 +525,20.0,20 +526,13.0,13 +527,21.0,21 +528,20.0,20 +529,17.0,17 +530,17.0,17 +531,17.0,17 +532,20.0,20 +533,15.0,15 +534,17.0,17 +535,17.0,17 +536,16.0,16 +537,16.0,16 +538,14.0,14 +539,21.0,21 +540,22.0,22 +541,14.0,14 +542,20.0,20 +543,25.0,25 +544,18.0,18 +545,22.0,22 +546,21.0,21 +547,20.0,20 +548,23.0,23 +549,20.0,20 +550,20.0,20 +551,25.0,25 +552,18.0,18 +553,14.0,14 +554,16.0,16 +555,16.0,16 +556,15.0,15 +557,26.0,26 +558,18.0,18 +559,20.0,20 +560,27.0,27 +561,18.0,18 +562,20.0,20 +563,20.0,20 +564,19.0,19 +565,26.0,26 +566,21.0,21 +567,25.0,25 +568,24.0,24 +569,24.0,24 +570,24.0,24 +571,17.0,17 +572,28.0,28 +573,20.0,20 +574,22.0,22 +575,16.0,16 +576,22.0,22 +577,14.0,14 +578,27.0,27 +579,29.0,29 +580,19.0,19 +581,22.0,22 +582,29.0,29 +583,29.0,29 +584,23.0,23 +585,22.0,22 +586,21.0,21 +587,18.0,18 +588,28.0,28 +589,29.0,29 +590,23.0,23 +591,23.0,23 +592,20.0,20 +593,32.0,32 +594,38.0,38 +595,29.0,29 +596,25.0,25 +597,22.0,22 +598,37.0,37 +599,20.0,20 +600,17.0,17 +601,22.0,22 +602,23.0,23 +603,28.0,28 +604,31.0,31 +605,23.0,23 +606,26.0,26 +607,22.0,22 +608,27.0,27 +609,32.0,32 +610,33.0,33 +611,27.0,27 +612,23.0,23 +613,38.0,38 +614,26.0,26 +615,27.0,27 +616,30.0,30 +617,22.0,22 +618,27.0,27 +619,38.0,38 +620,34.0,34 +621,39.0,39 +622,24.0,24 +623,29.0,29 +624,26.0,26 +625,30.0,30 +626,33.0,33 +627,34.0,34 +628,27.0,27 +629,26.0,26 +630,27.0,27 +631,37.0,37 +632,38.0,38 +633,46.0,46 +634,48.0,48 +635,56.0,56 +636,39.0,39 +637,18.0,18 +638,63.0,63 +639,51.0,51 +640,45.0,45 +641,27.0,27 +642,56.0,56 +643,32.0,32 +644,49.0,49 +645,40.0,40 +646,59.0,59 +647,52.0,52 +648,36.0,36 +649,43.0,43 +650,54.0,54 +651,59.0,59 +652,58.0,58 +653,61.0,61 +654,66.0,66 +655,38.0,38 +656,33.0,33 +657,96.0,96 +658,82.0,82 +659,56.0,56 +660,42.0,42 +661,38.0,38 +662,48.0,48 +663,83.0,83 +664,33.0,33 +665,72.0,72 +666,41.0,41 +667,57.0,57 +668,54.0,54 +669,69.0,69 +670,63.0,63 +671,81.0,81 +672,69.0,69 +673,65.0,65 +674,55.0,55 +675,64.0,64 +676,54.0,54 +677,93.0,93 +678,47.0,47 +679,84.0,84 +680,46.0,46 +681,63.0,63 +682,51.0,51 +683,64.0,64 +684,58.0,58 +685,72.0,72 +686,35.0,35 +687,59.0,59 +688,124.0,124 +689,64.0,64 +690,59.0,59 +691,77.0,77 +692,55.0,55 +693,63.0,63 +694,100.0,100 +695,100.0,100 +696,58.0,58 +697,85.0,85 +698,50.0,50 +699,57.0,57 +700,59.0,59 +701,72.0,72 +702,200.0,200 +703,118.0,118 +704,63.0,63 +705,125.0,125 +706,80.0,80 +707,49.0,49 +708,52.0,52 +709,71.0,71 +710,68.0,68 +711,71.0,71 +712,60.0,60 +713,58.0,58 +714,192.0,192 +715,57.0,57 +716,93.0,93 +717,107.0,107 +718,59.0,59 +719,71.0,71 +720,81.0,81 +721,76.0,76 +722,98.0,98 +723,95.0,95 +724,99.0,99 +725,132.0,132 +726,99.0,99 +727,58.0,58 +728,95.0,95 +729,79.0,79 +730,70.0,70 +731,76.0,76 +732,58.0,58 +733,174.0,174 +734,58.0,58 +735,178.0,178 +736,92.0,92 +737,114.0,114 +738,101.0,101 +739,59.0,59 +740,171.0,171 +741,179.0,179 +742,85.0,85 +743,115.0,115 +744,74.0,74 +745,99.0,99 +746,174.0,174 +747,124.0,124 +748,101.0,101 +749,106.0,106 +750,75.0,75 +751,70.0,70 +752,139.0,139 +753,76.0,76 +754,86.0,86 +755,82.0,82 +756,51.0,51 +757,78.0,78 +758,67.0,67 +759,72.0,72 +760,83.0,83 +761,58.0,58 +762,109.0,109 +763,62.0,62 +764,74.0,74 +765,83.0,83 +766,111.0,111 +767,67.0,67 +768,57.0,57 +769,59.0,59 +770,112.0,112 +771,197.0,197 +772,111.0,111 +773,90.0,90 +774,111.0,111 +775,81.0,81 +776,72.0,72 +777,80.0,80 +778,130.0,130 +779,156.0,156 +780,100.0,100 +781,85.0,85 +782,66.0,66 +783,76.0,76 +784,105.0,105 +785,57.0,57 +786,201.0,201 +787,176.0,176 +788,175.0,175 +789,198.0,198 +790,73.0,73 +791,79.0,79 +792,59.0,59 +793,55.0,55 +794,84.0,84 +795,131.0,131 +796,67.0,67 +797,199.0,199 +798,94.0,94 +799,97.0,97 +800,94.0,94 +801,185.0,185 +802,98.0,98 +803,136.0,136 +804,141.0,141 +805,81.0,81 +806,77.0,77 +807,100.0,100 +808,99.0,99 +809,133.0,133 +810,154.0,154 +811,74.0,74 +812,79.0,79 +813,94.0,94 +814,168.0,168 +815,338.0,338 +816,64.0,64 +817,112.0,112 +818,69.0,69 +819,143.0,143 +820,170.0,170 +821,170.0,170 +822,77.0,77 +823,83.0,83 +824,104.0,104 +825,152.0,152 +826,198.0,198 +827,159.0,159 +828,235.0,235 +829,76.0,76 +830,201.0,201 +831,289.0,289 +832,113.0,113 +833,294.0,294 +834,74.0,74 +835,416.0,416 +836,194.0,194 +837,85.0,85 +838,170.0,170 +839,208.0,208 +840,177.0,177 +841,83.0,83 +842,82.0,82 +843,183.0,183 +844,90.0,90 +845,398.0,398 +846,244.0,244 +847,99.0,99 +848,310.0,310 +849,195.0,195 +850,183.0,183 +851,162.0,162 +852,115.0,115 +853,82.0,82 +854,233.0,233 +855,102.0,102 +856,262.0,262 +857,300.0,300 +858,245.0,245 +859,299.0,299 +860,150.0,150 +861,199.0,199 +862,79.0,79 +863,74.0,74 +864,113.0,113 +865,152.0,152 +866,126.0,126 +867,68.0,68 +868,185.0,185 +869,156.0,156 +870,63.0,63 +871,121.0,121 +872,83.0,83 +873,72.0,72 +874,337.0,337 +875,179.0,179 +876,325.0,325 +877,115.0,115 +878,217.0,217 +879,74.0,74 +880,90.0,90 +881,218.0,218 +882,82.0,82 +883,173.0,173 +884,106.0,106 +885,83.0,83 +886,68.0,68 +887,173.0,173 +888,159.0,159 +889,57.0,57 +890,80.0,80 +891,302.0,302 +892,71.0,71 +893,59.0,59 +894,153.0,153 +895,129.0,129 +896,63.0,63 +897,147.0,147 +898,82.0,82 +899,186.0,186 +900,74.0,74 +901,81.0,81 +902,61.0,61 +903,88.0,88 +904,101.0,101 +905,60.0,60 +906,154.0,154 +907,202.0,202 +908,96.0,96 +909,81.0,81 +910,91.0,91 +911,77.0,77 +912,63.0,63 +913,154.0,154 +914,57.0,57 +915,86.0,86 +916,84.0,84 +917,66.0,66 +918,141.0,141 +919,88.0,88 +920,89.0,89 +921,49.0,49 +922,97.0,97 +923,50.0,50 +924,211.0,211 +925,69.0,69 +926,278.0,278 +927,75.0,75 +928,60.0,60 +929,57.0,57 +930,200.0,200 +931,101.0,101 +932,70.0,70 +933,93.0,93 +934,61.0,61 +935,80.0,80 +936,94.0,94 +937,53.0,53 +938,223.0,223 +939,142.0,142 +940,74.0,74 +941,60.0,60 +942,75.0,75 +943,78.0,78 +944,81.0,81 +945,51.0,51 +946,215.0,215 +947,64.0,64 +948,70.0,70 +949,85.0,85 +950,102.0,102 +951,48.0,48 +952,69.0,69 +953,65.0,65 +954,70.0,70 +955,174.0,174 +956,46.0,46 +957,75.0,75 +958,75.0,75 +959,62.0,62 +960,71.0,71 +961,67.0,67 +962,48.0,48 +963,64.0,64 +964,58.0,58 +965,64.0,64 +966,82.0,82 +967,70.0,70 +968,68.0,68 +969,48.0,48 +970,48.0,48 +971,53.0,53 +972,80.0,80 +973,46.0,46 +974,101.0,101 +975,303.0,303 +976,59.0,59 +977,212.0,212 +978,64.0,64 +979,76.0,76 +980,69.0,69 +981,241.0,241 +982,46.0,46 +983,45.0,45 +984,124.0,124 +985,99.0,99 +986,210.0,210 +987,67.0,67 +988,78.0,78 +989,58.0,58 +990,54.0,54 +991,63.0,63 +992,37.0,37 +993,46.0,46 +994,63.0,63 +995,48.0,48 +996,70.0,70 +997,58.0,58 +998,88.0,88 +999,62.0,62 +1000,173.0,173 +1001,99.0,99 +1002,47.0,47 +1003,47.0,47 +1004,74.0,74 +1005,101.0,101 +1006,42.0,42 +1007,46.0,46 +1008,61.0,61 +1009,42.0,42 +1010,48.0,48 +1011,60.0,60 +1012,42.0,42 +1013,53.0,53 +1014,54.0,54 +1015,62.0,62 +1016,98.0,98 +1017,50.0,50 +1018,39.0,39 +1019,60.0,60 +1020,52.0,52 +1021,46.0,46 +1022,68.0,68 +1023,40.0,40 +1024,41.0,41 +1025,54.0,54 +1026,66.0,66 +1027,112.0,112 +1028,55.0,55 +1029,46.0,46 +1030,72.0,72 +1031,54.0,54 +1032,51.0,51 +1033,43.0,43 +1034,66.0,66 +1035,59.0,59 +1036,57.0,57 +1037,68.0,68 +1038,63.0,63 +1039,38.0,38 +1040,48.0,48 +1041,58.0,58 +1042,58.0,58 +1043,116.0,116 +1044,52.0,52 +1045,180.0,180 +1046,91.0,91 +1047,292.0,292 +1048,65.0,65 +1049,46.0,46 +1050,40.0,40 +1051,192.0,192 +1052,46.0,46 +1053,52.0,52 +1054,50.0,50 +1055,37.0,37 +1056,136.0,136 +1057,46.0,46 +1058,35.0,35 +1059,89.0,89 +1060,34.0,34 +1061,101.0,101 +1062,102.0,102 +1063,166.0,166 +1064,62.0,62 +1065,40.0,40 +1066,37.0,37 +1067,45.0,45 +1068,45.0,45 +1069,48.0,48 +1070,67.0,67 +1071,87.0,87 +1072,51.0,51 +1073,103.0,103 +1074,46.0,46 +1075,52.0,52 +1076,40.0,40 +1077,47.0,47 +1078,49.0,49 +1079,35.0,35 +1080,46.0,46 +1081,38.0,38 +1082,36.0,36 +1083,219.0,219 +1084,57.0,57 +1085,39.0,39 +1086,48.0,48 +1087,37.0,37 +1088,46.0,46 +1089,37.0,37 +1090,65.0,65 +1091,39.0,39 +1092,44.0,44 +1093,85.0,85 +1094,50.0,50 +1095,39.0,39 +1096,57.0,57 +1097,221.0,221 +1098,35.0,35 +1099,59.0,59 +1100,46.0,46 +1101,38.0,38 +1102,37.0,37 +1103,62.0,62 +1104,59.0,59 +1105,46.0,46 +1106,40.0,40 +1107,74.0,74 +1108,58.0,58 +1109,37.0,37 +1110,56.0,56 +1111,52.0,52 +1112,45.0,45 +1113,76.0,76 +1114,54.0,54 +1115,37.0,37 +1116,41.0,41 +1117,47.0,47 +1118,56.0,56 +1119,39.0,39 +1120,37.0,37 +1121,42.0,42 +1122,59.0,59 +1123,38.0,38 +1124,49.0,49 +1125,49.0,49 +1126,130.0,130 +1127,52.0,52 +1128,45.0,45 +1129,43.0,43 +1130,57.0,57 +1131,37.0,37 +1132,43.0,43 +1133,60.0,60 +1134,58.0,58 +1135,57.0,57 +1136,35.0,35 +1137,57.0,57 +1138,154.0,154 +1139,39.0,39 +1140,48.0,48 +1141,78.0,78 +1142,58.0,58 +1143,70.0,70 +1144,52.0,52 +1145,53.0,53 +1146,58.0,58 +1147,40.0,40 +1148,74.0,74 +1149,39.0,39 +1150,69.0,69 +1151,78.0,78 +1152,34.0,34 +1153,44.0,44 +1154,45.0,45 +1155,173.0,173 +1156,190.0,190 +1157,47.0,47 +1158,36.0,36 +1159,52.0,52 +1160,44.0,44 +1161,50.0,50 +1162,96.0,96 +1163,88.0,88 +1164,38.0,38 +1165,44.0,44 +1166,102.0,102 +1167,49.0,49 +1168,46.0,46 +1169,68.0,68 +1170,46.0,46 +1171,50.0,50 +1172,58.0,58 +1173,46.0,46 +1174,50.0,50 +1175,40.0,40 +1176,44.0,44 +1177,75.0,75 +1178,109.0,109 +1179,51.0,51 +1180,44.0,44 +1181,42.0,42 +1182,41.0,41 +1183,62.0,62 +1184,48.0,48 +1185,60.0,60 +1186,52.0,52 +1187,73.0,73 +1188,39.0,39 +1189,42.0,42 +1190,89.0,89 +1191,64.0,64 +1192,40.0,40 +1193,42.0,42 +1194,59.0,59 +1195,48.0,48 +1196,45.0,45 +1197,48.0,48 +1198,171.0,171 +1199,77.0,77 +1200,97.0,97 +1201,43.0,43 +1202,86.0,86 +1203,62.0,62 +1204,67.0,67 +1205,86.0,86 +1206,77.0,77 +1207,88.0,88 +1208,73.0,73 +1209,80.0,80 +1210,94.0,94 +1211,71.0,71 +1212,96.0,96 +1213,65.0,65 +1214,61.0,61 +1215,63.0,63 +1216,72.0,72 +1217,78.0,78 +1218,92.0,92 +1219,64.0,64 +1220,76.0,76 +1221,69.0,69 +1222,86.0,86 +1223,93.0,93 +1224,70.0,70 +1225,67.0,67 +1226,89.0,89 +1227,72.0,72 +1228,106.0,106 +1229,76.0,76 +1230,218.0,218 +1231,64.0,64 +1232,224.0,224 +1233,68.0,68 +1234,217.0,217 +1235,216.0,216 +1236,205.0,205 +1237,234.0,234 +1238,196.0,196 +1239,217.0,217 +1240,213.0,213 +1241,247.0,247 +1242,295.0,295 +1243,197.0,197 +1244,212.0,212 +1245,196.0,196 +1246,226.0,226 +1247,228.0,228 +1248,240.0,240 +1249,189.0,189 +1250,190.0,190 +1251,242.0,242 +1252,204.0,204 +1253,201.0,201 +1254,189.0,189 +1255,225.0,225 +1256,212.0,212 +1257,198.0,198 +1258,284.0,284 +1259,197.0,197 +1260,194.0,194 +1261,230.0,230 +1262,200.0,200 +1263,207.0,207 +1264,207.0,207 +1265,233.0,233 +1266,213.0,213 +1267,275.0,275 +1268,201.0,201 +1269,201.0,201 +1270,202.0,202 +1271,264.0,264 +1272,212.0,212 +1273,490.0,490 +1274,222.0,222 +1275,244.0,244 +1276,500.0,500 +1277,244.0,244 +1278,246.0,246 +1279,237.0,237 +1280,210.0,210 +1281,274.0,274 +1282,258.0,258 +1283,405.0,405 +1284,216.0,216 +1285,500.0,500 +1286,218.0,218 +1287,361.0,361 +1288,262.0,262 +1289,500.0,500 +1290,194.0,194 +1291,361.0,361 +1292,194.0,194 +1293,229.0,229 +1294,361.0,361 +1295,270.0,270 +1296,297.0,297 +1297,225.0,225 +1298,214.0,214 +1299,256.0,256 +1300,330.0,330 +1301,347.0,347 +1302,372.0,372 +1303,183.0,183 +1304,300.0,300 +1305,313.0,313 +1306,227.0,227 +1307,307.0,307 +1308,296.0,296 +1309,212.0,212 +1310,244.0,244 +1311,206.0,206 +1312,262.0,262 +1313,274.0,274 +1314,225.0,225 +1315,209.0,209 +1316,272.0,272 +1317,213.0,213 +1318,262.0,262 +1319,214.0,214 +1320,224.0,224 +1321,368.0,368 +1322,264.0,264 +1323,243.0,243 +1324,299.0,299 +1325,210.0,210 +1326,310.0,310 +1327,252.0,252 +1328,201.0,201 +1329,193.0,193 +1330,220.0,220 +1331,224.0,224 +1332,203.0,203 +1333,268.0,268 +1334,288.0,288 +1335,330.0,330 +1336,331.0,331 +1337,249.0,249 +1338,222.0,222 +1339,237.0,237 +1340,204.0,204 +1341,329.0,329 +1342,232.0,232 +1343,175.0,175 +1344,251.0,251 +1345,259.0,259 +1346,220.0,220 +1347,246.0,246 +1348,215.0,215 +1349,257.0,257 +1350,350.0,350 +1351,269.0,269 +1352,266.0,266 +1353,220.0,220 +1354,276.0,276 +1355,281.0,281 +1356,200.0,200 +1357,274.0,274 +1358,260.0,260 +1359,393.0,393 +1360,240.0,240 +1361,197.0,197 +1362,273.0,273 +1363,220.0,220 +1364,228.0,228 +1365,337.0,337 +1366,203.0,203 +1367,500.0,500 +1368,214.0,214 +1369,271.0,271 +1370,211.0,211 +1371,264.0,264 +1372,338.0,338 +1373,298.0,298 +1374,358.0,358 +1375,454.0,454 +1376,317.0,317 +1377,283.0,283 +1378,441.0,441 +1379,343.0,343 +1380,270.0,270 +1381,263.0,263 +1382,405.0,405 +1383,255.0,255 +1384,500.0,500 +1385,389.0,389 +1386,212.0,212 +1387,339.0,339 +1388,225.0,225 +1389,500.0,500 +1390,467.0,467 +1391,237.0,237 +1392,257.0,257 +1393,352.0,352 +1394,264.0,264 +1395,452.0,452 +1396,388.0,388 +1397,447.0,447 +1398,258.0,258 +1399,269.0,269 +1400,264.0,264 +1401,238.0,238 +1402,258.0,258 +1403,433.0,433 +1404,500.0,500 +1405,298.0,298 +1406,500.0,500 +1407,287.0,287 +1408,329.0,329 +1409,500.0,500 +1410,424.0,424 +1411,239.0,239 +1412,350.0,350 +1413,287.0,287 +1414,388.0,388 +1415,498.0,498 +1416,454.0,454 +1417,351.0,351 +1418,277.0,277 +1419,256.0,256 +1420,339.0,339 +1421,338.0,338 +1422,339.0,339 +1423,292.0,292 +1424,500.0,500 +1425,264.0,264 +1426,381.0,381 +1427,320.0,320 +1428,500.0,500 +1429,388.0,388 +1430,500.0,500 +1431,500.0,500 +1432,500.0,500 +1433,309.0,309 +1434,470.0,470 +1435,496.0,496 +1436,326.0,326 +1437,500.0,500 +1438,500.0,500 +1439,284.0,284 +1440,309.0,309 +1441,349.0,349 +1442,245.0,245 +1443,407.0,407 +1444,305.0,305 +1445,233.0,233 +1446,469.0,469 +1447,304.0,304 +1448,303.0,303 +1449,500.0,500 +1450,257.0,257 +1451,336.0,336 +1452,500.0,500 +1453,440.0,440 +1454,500.0,500 +1455,500.0,500 +1456,317.0,317 +1457,500.0,500 +1458,475.0,475 +1459,395.0,395 +1460,331.0,331 +1461,374.0,374 +1462,500.0,500 +1463,246.0,246 +1464,355.0,355 +1465,500.0,500 +1466,500.0,500 +1467,260.0,260 +1468,500.0,500 +1469,437.0,437 +1470,500.0,500 +1471,367.0,367 +1472,388.0,388 +1473,239.0,239 +1474,493.0,493 +1475,322.0,322 +1476,500.0,500 +1477,416.0,416 +1478,403.0,403 +1479,500.0,500 +1480,355.0,355 +1481,500.0,500 +1482,437.0,437 +1483,433.0,433 +1484,500.0,500 +1485,246.0,246 +1486,364.0,364 +1487,255.0,255 +1488,500.0,500 +1489,443.0,443 +1490,500.0,500 +1491,287.0,287 +1492,402.0,402 +1493,500.0,500 +1494,499.0,499 +1495,500.0,500 +1496,248.0,248 +1497,266.0,266 +1498,500.0,500 +1499,338.0,338 +1500,395.0,395 +1501,304.0,304 +1502,433.0,433 +1503,351.0,351 +1504,230.0,230 +1505,352.0,352 +1506,500.0,500 +1507,265.0,265 +1508,500.0,500 +1509,244.0,244 +1510,392.0,392 +1511,467.0,467 +1512,353.0,353 +1513,500.0,500 +1514,473.0,473 +1515,246.0,246 +1516,336.0,336 +1517,317.0,317 +1518,325.0,325 +1519,481.0,481 +1520,374.0,374 +1521,231.0,231 +1522,500.0,500 +1523,234.0,234 +1524,290.0,290 +1525,297.0,297 +1526,299.0,299 +1527,364.0,364 +1528,326.0,326 +1529,482.0,482 +1530,233.0,233 +1531,500.0,500 +1532,264.0,264 +1533,314.0,314 +1534,500.0,500 +1535,433.0,433 +1536,415.0,415 +1537,288.0,288 +1538,458.0,458 +1539,308.0,308 +1540,500.0,500 +1541,459.0,459 +1542,273.0,273 +1543,500.0,500 +1544,500.0,500 +1545,470.0,470 +1546,364.0,364 +1547,425.0,425 +1548,374.0,374 +1549,399.0,399 +1550,500.0,500 +1551,500.0,500 +1552,500.0,500 +1553,497.0,497 +1554,272.0,272 +1555,268.0,268 +1556,292.0,292 +1557,500.0,500 +1558,281.0,281 +1559,272.0,272 +1560,411.0,411 +1561,500.0,500 +1562,430.0,430 +1563,415.0,415 +1564,500.0,500 +1565,464.0,464 +1566,436.0,436 +1567,500.0,500 +1568,344.0,344 +1569,395.0,395 +1570,385.0,385 +1571,232.0,232 +1572,260.0,260 +1573,499.0,499 +1574,411.0,411 +1575,500.0,500 +1576,290.0,290 +1577,321.0,321 +1578,481.0,481 +1579,473.0,473 +1580,301.0,301 +1581,404.0,404 +1582,410.0,410 +1583,437.0,437 +1584,311.0,311 +1585,500.0,500 +1586,231.0,231 +1587,376.0,376 +1588,359.0,359 +1589,276.0,276 +1590,457.0,457 +1591,500.0,500 +1592,318.0,318 +1593,500.0,500 +1594,309.0,309 +1595,481.0,481 +1596,274.0,274 +1597,331.0,331 +1598,500.0,500 +1599,259.0,259 +1600,500.0,500 +1601,291.0,291 +1602,499.0,499 +1603,256.0,256 +1604,266.0,266 +1605,500.0,500 +1606,325.0,325 +1607,359.0,359 +1608,274.0,274 +1609,357.0,357 +1610,465.0,465 +1611,500.0,500 +1612,435.0,435 +1613,268.0,268 +1614,251.0,251 +1615,252.0,252 +1616,275.0,275 +1617,284.0,284 +1618,416.0,416 +1619,229.0,229 +1620,500.0,500 +1621,265.0,265 +1622,354.0,354 +1623,251.0,251 +1624,381.0,381 +1625,279.0,279 +1626,267.0,267 +1627,232.0,232 +1628,365.0,365 +1629,500.0,500 +1630,489.0,489 +1631,500.0,500 +1632,243.0,243 +1633,253.0,253 +1634,334.0,334 +1635,500.0,500 +1636,280.0,280 +1637,268.0,268 +1638,356.0,356 +1639,500.0,500 +1640,253.0,253 +1641,244.0,244 +1642,237.0,237 +1643,421.0,421 +1644,247.0,247 +1645,378.0,378 +1646,252.0,252 +1647,282.0,282 +1648,247.0,247 +1649,289.0,289 +1650,226.0,226 +1651,289.0,289 +1652,480.0,480 +1653,500.0,500 +1654,270.0,270 +1655,309.0,309 +1656,292.0,292 +1657,272.0,272 +1658,233.0,233 +1659,261.0,261 +1660,500.0,500 +1661,316.0,316 +1662,310.0,310 +1663,276.0,276 +1664,315.0,315 +1665,267.0,267 +1666,420.0,420 +1667,320.0,320 +1668,500.0,500 +1669,370.0,370 +1670,500.0,500 +1671,246.0,246 +1672,296.0,296 +1673,256.0,256 +1674,281.0,281 +1675,327.0,327 +1676,242.0,242 +1677,393.0,393 +1678,332.0,332 +1679,288.0,288 +1680,250.0,250 +1681,391.0,391 +1682,296.0,296 +1683,490.0,490 +1684,224.0,224 +1685,369.0,369 +1686,311.0,311 +1687,335.0,335 +1688,227.0,227 +1689,500.0,500 +1690,242.0,242 +1691,363.0,363 +1692,284.0,284 +1693,254.0,254 +1694,386.0,386 +1695,353.0,353 +1696,443.0,443 +1697,500.0,500 +1698,253.0,253 +1699,293.0,293 +1700,500.0,500 +1701,259.0,259 +1702,254.0,254 +1703,343.0,343 +1704,313.0,313 +1705,253.0,253 +1706,409.0,409 +1707,474.0,474 +1708,226.0,226 +1709,325.0,325 +1710,441.0,441 +1711,252.0,252 +1712,430.0,430 +1713,287.0,287 +1714,318.0,318 +1715,323.0,323 +1716,268.0,268 +1717,288.0,288 +1718,292.0,292 +1719,323.0,323 +1720,291.0,291 +1721,399.0,399 +1722,263.0,263 +1723,385.0,385 +1724,229.0,229 +1725,282.0,282 +1726,347.0,347 +1727,257.0,257 +1728,264.0,264 +1729,282.0,282 +1730,402.0,402 +1731,328.0,328 +1732,227.0,227 +1733,272.0,272 +1734,462.0,462 +1735,236.0,236 +1736,302.0,302 +1737,275.0,275 +1738,280.0,280 +1739,331.0,331 +1740,352.0,352 +1741,500.0,500 +1742,389.0,389 +1743,303.0,303 +1744,398.0,398 +1745,359.0,359 +1746,436.0,436 +1747,233.0,233 +1748,295.0,295 +1749,234.0,234 +1750,290.0,290 +1751,261.0,261 +1752,248.0,248 +1753,263.0,263 +1754,368.0,368 +1755,500.0,500 +1756,276.0,276 +1757,243.0,243 +1758,500.0,500 +1759,289.0,289 +1760,500.0,500 +1761,275.0,275 +1762,297.0,297 +1763,250.0,250 +1764,405.0,405 +1765,261.0,261 +1766,239.0,239 +1767,351.0,351 +1768,301.0,301 +1769,384.0,384 +1770,240.0,240 +1771,258.0,258 +1772,258.0,258 +1773,249.0,249 +1774,254.0,254 +1775,374.0,374 +1776,347.0,347 +1777,377.0,377 +1778,263.0,263 +1779,229.0,229 +1780,292.0,292 +1781,259.0,259 +1782,259.0,259 +1783,260.0,260 +1784,267.0,267 +1785,256.0,256 +1786,306.0,306 +1787,238.0,238 +1788,257.0,257 +1789,252.0,252 +1790,293.0,293 +1791,273.0,273 +1792,308.0,308 +1793,291.0,291 +1794,342.0,342 +1795,273.0,273 +1796,257.0,257 +1797,221.0,221 +1798,276.0,276 +1799,279.0,279 +1800,269.0,269 +1801,291.0,291 +1802,359.0,359 +1803,431.0,431 +1804,375.0,375 +1805,298.0,298 +1806,253.0,253 +1807,276.0,276 +1808,258.0,258 +1809,242.0,242 +1810,397.0,397 +1811,394.0,394 +1812,323.0,323 +1813,257.0,257 +1814,343.0,343 +1815,287.0,287 +1816,372.0,372 +1817,294.0,294 +1818,261.0,261 +1819,270.0,270 +1820,284.0,284 +1821,247.0,247 +1822,372.0,372 +1823,292.0,292 +1824,357.0,357 +1825,247.0,247 +1826,355.0,355 +1827,447.0,447 +1828,251.0,251 +1829,375.0,375 +1830,262.0,262 +1831,340.0,340 +1832,243.0,243 +1833,261.0,261 +1834,247.0,247 +1835,499.0,499 +1836,242.0,242 +1837,237.0,237 +1838,255.0,255 +1839,320.0,320 +1840,216.0,216 +1841,356.0,356 +1842,261.0,261 +1843,247.0,247 +1844,229.0,229 +1845,238.0,238 +1846,233.0,233 +1847,232.0,232 +1848,234.0,234 +1849,391.0,391 +1850,273.0,273 +1851,438.0,438 +1852,402.0,402 +1853,394.0,394 +1854,287.0,287 +1855,230.0,230 +1856,251.0,251 +1857,278.0,278 +1858,378.0,378 +1859,249.0,249 +1860,271.0,271 +1861,296.0,296 +1862,256.0,256 +1863,270.0,270 +1864,500.0,500 +1865,385.0,385 +1866,284.0,284 +1867,248.0,248 +1868,283.0,283 +1869,246.0,246 +1870,339.0,339 +1871,415.0,415 +1872,276.0,276 +1873,275.0,275 +1874,457.0,457 +1875,500.0,500 +1876,281.0,281 +1877,324.0,324 +1878,414.0,414 +1879,314.0,314 +1880,449.0,449 +1881,281.0,281 +1882,368.0,368 +1883,322.0,322 +1884,235.0,235 +1885,337.0,337 +1886,500.0,500 +1887,311.0,311 +1888,347.0,347 +1889,365.0,365 +1890,272.0,272 +1891,342.0,342 +1892,379.0,379 +1893,247.0,247 +1894,321.0,321 +1895,403.0,403 +1896,464.0,464 +1897,330.0,330 +1898,361.0,361 +1899,500.0,500 +1900,433.0,433 +1901,500.0,500 +1902,293.0,293 +1903,386.0,386 +1904,283.0,283 +1905,366.0,366 +1906,278.0,278 +1907,279.0,279 +1908,415.0,415 +1909,480.0,480 +1910,500.0,500 +1911,353.0,353 +1912,500.0,500 +1913,269.0,269 +1914,500.0,500 +1915,385.0,385 +1916,246.0,246 +1917,481.0,481 +1918,500.0,500 +1919,462.0,462 +1920,373.0,373 +1921,500.0,500 +1922,272.0,272 +1923,500.0,500 +1924,495.0,495 +1925,500.0,500 +1926,295.0,295 +1927,249.0,249 +1928,256.0,256 +1929,500.0,500 +1930,317.0,317 +1931,500.0,500 +1932,317.0,317 +1933,258.0,258 +1934,380.0,380 +1935,402.0,402 +1936,500.0,500 +1937,319.0,319 +1938,319.0,319 +1939,500.0,500 +1940,447.0,447 +1941,500.0,500 +1942,459.0,459 +1943,500.0,500 +1944,299.0,299 +1945,290.0,290 +1946,318.0,318 +1947,500.0,500 +1948,500.0,500 +1949,500.0,500 +1950,500.0,500 +1951,478.0,478 +1952,500.0,500 +1953,500.0,500 +1954,330.0,330 +1955,366.0,366 +1956,500.0,500 +1957,283.0,283 +1958,300.0,300 +1959,292.0,292 +1960,270.0,270 +1961,500.0,500 +1962,474.0,474 +1963,328.0,328 +1964,389.0,389 +1965,500.0,500 +1966,493.0,493 +1967,357.0,357 +1968,500.0,500 +1969,500.0,500 +1970,500.0,500 +1971,320.0,320 +1972,385.0,385 +1973,500.0,500 +1974,422.0,422 +1975,405.0,405 +1976,500.0,500 +1977,363.0,363 +1978,329.0,329 +1979,309.0,309 +1980,500.0,500 +1981,500.0,500 +1982,277.0,277 +1983,461.0,461 +1984,262.0,262 +1985,500.0,500 +1986,500.0,500 +1987,370.0,370 +1988,500.0,500 +1989,255.0,255 +1990,449.0,449 +1991,361.0,361 +1992,319.0,319 +1993,382.0,382 +1994,363.0,363 +1995,500.0,500 +1996,336.0,336 +1997,500.0,500 +1998,500.0,500 +1999,500.0,500 diff --git a/projects/codes/PolicyGradient/main.py b/projects/codes/PolicyGradient/main.py index 2e4c5e4..3473c38 100644 --- a/projects/codes/PolicyGradient/main.py +++ b/projects/codes/PolicyGradient/main.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-11-22 23:21:53 LastEditor: John -LastEditTime: 2022-08-25 20:59:23 +LastEditTime: 2022-08-27 00:04:08 Discription: Environment: ''' @@ -34,7 +34,7 @@ class PGNet(MLP): def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) - x = F.sigmoid(self.fc3(x)) + x = torch.sigmoid(self.fc3(x)) return x class Main(Launcher): @@ -47,8 +47,9 @@ class Main(Launcher): parser.add_argument('--env_name',default='CartPole-v0',type=str,help="name of environment") parser.add_argument('--train_eps',default=200,type=int,help="episodes of training") parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") + parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--lr',default=0.005,type=float,help="learning rate") + parser.add_argument('--lr',default=0.01,type=float,help="learning rate") parser.add_argument('--update_fre',default=8,type=int) parser.add_argument('--hidden_dim',default=36,type=int) parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") @@ -81,7 +82,7 @@ class Main(Launcher): for i_ep in range(cfg['train_eps']): state = env.reset() ep_reward = 0 - for _ in count(): + for _ in range(cfg['ep_max_steps']): action = agent.sample_action(state) # sample action next_state, reward, done, _ = env.step(action) ep_reward += reward @@ -90,8 +91,9 @@ class Main(Launcher): agent.memory.push((state,float(action),reward)) state = next_state if done: - print(f"Episode:{i_ep+1}/{cfg['train_eps']}, Reward:{ep_reward:.2f}") break + if (i_ep+1) % 10 == 0: + print(f"Episode:{i_ep+1}/{cfg['train_eps']}, Reward:{ep_reward:.2f}") if (i_ep+1) % cfg['update_fre'] == 0: agent.update() rewards.append(ep_reward) @@ -107,7 +109,7 @@ class Main(Launcher): for i_ep in range(cfg['test_eps']): state = env.reset() ep_reward = 0 - for _ in count(): + for _ in range(cfg['ep_max_steps']): action = agent.predict_action(state) next_state, reward, done, _ = env.step(action) ep_reward += reward @@ -115,9 +117,9 @@ class Main(Launcher): reward = 0 state = next_state if done: - print(f"Episode: {i_ep+1}/{cfg['test_eps']},Reward: {ep_reward:.2f}") break - rewards.append(ep_reward) + print(f"Episode: {i_ep+1}/{cfg['test_eps']},Reward: {ep_reward:.2f}") + rewards.append(ep_reward) print("Finish testing!") env.close() return {'episodes':range(len(rewards)),'rewards':rewards} diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt deleted file mode 100644 index 2676e7a..0000000 Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/checkpoint.pt and /dev/null differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json deleted file mode 100644 index 2a3810d..0000000 --- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "PolicyGradient", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "gamma": 0.99, "lr": 0.005, "update_fre": 8, "hidden_dim": 36, "device": "cpu", "seed": 1, "save_fig": true, "show_fig": false, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/models/", "n_states": 4, "n_actions": 2} \ No newline at end of file diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png deleted file mode 100644 index a38dd4b..0000000 Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_curve.png and /dev/null differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png deleted file mode 100644 index 3e0db7c..0000000 Binary files a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_curve.png and /dev/null differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv deleted file mode 100644 index daeb8f2..0000000 --- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/training_results.csv +++ /dev/null @@ -1,201 +0,0 @@ -episodes,rewards -0,26.0 -1,53.0 -2,10.0 -3,37.0 -4,22.0 -5,21.0 -6,12.0 -7,34.0 -8,38.0 -9,40.0 -10,23.0 -11,14.0 -12,16.0 -13,25.0 -14,15.0 -15,23.0 -16,11.0 -17,28.0 -18,21.0 -19,62.0 -20,33.0 -21,27.0 -22,15.0 -23,17.0 -24,26.0 -25,35.0 -26,26.0 -27,14.0 -28,42.0 -29,45.0 -30,34.0 -31,39.0 -32,31.0 -33,17.0 -34,42.0 -35,41.0 -36,31.0 -37,39.0 -38,28.0 -39,12.0 -40,36.0 -41,33.0 -42,47.0 -43,40.0 -44,63.0 -45,36.0 -46,64.0 -47,79.0 -48,49.0 -49,40.0 -50,65.0 -51,47.0 -52,51.0 -53,30.0 -54,26.0 -55,41.0 -56,86.0 -57,61.0 -58,38.0 -59,200.0 -60,49.0 -61,70.0 -62,61.0 -63,101.0 -64,200.0 -65,152.0 -66,108.0 -67,46.0 -68,72.0 -69,87.0 -70,27.0 -71,126.0 -72,46.0 -73,25.0 -74,14.0 -75,42.0 -76,38.0 -77,55.0 -78,42.0 -79,51.0 -80,67.0 -81,83.0 -82,178.0 -83,115.0 -84,140.0 -85,97.0 -86,85.0 -87,61.0 -88,153.0 -89,200.0 -90,200.0 -91,200.0 -92,200.0 -93,64.0 -94,200.0 -95,200.0 -96,157.0 -97,128.0 -98,160.0 -99,35.0 -100,140.0 -101,113.0 -102,200.0 -103,154.0 -104,200.0 -105,200.0 -106,200.0 -107,198.0 -108,137.0 -109,200.0 -110,200.0 -111,102.0 -112,200.0 -113,200.0 -114,200.0 -115,200.0 -116,148.0 -117,200.0 -118,200.0 -119,200.0 -120,200.0 -121,200.0 -122,194.0 -123,200.0 -124,200.0 -125,200.0 -126,183.0 -127,200.0 -128,200.0 -129,200.0 -130,200.0 -131,200.0 -132,200.0 -133,200.0 -134,200.0 -135,200.0 -136,93.0 -137,96.0 -138,84.0 -139,103.0 -140,79.0 -141,104.0 -142,82.0 -143,105.0 -144,200.0 -145,200.0 -146,171.0 -147,200.0 -148,200.0 -149,200.0 -150,200.0 -151,197.0 -152,133.0 -153,142.0 -154,147.0 -155,156.0 -156,131.0 -157,181.0 -158,163.0 -159,146.0 -160,200.0 -161,176.0 -162,200.0 -163,173.0 -164,177.0 -165,200.0 -166,200.0 -167,200.0 -168,200.0 -169,200.0 -170,200.0 -171,200.0 -172,200.0 -173,200.0 -174,200.0 -175,200.0 -176,200.0 -177,200.0 -178,200.0 -179,200.0 -180,200.0 -181,200.0 -182,200.0 -183,200.0 -184,200.0 -185,200.0 -186,200.0 -187,200.0 -188,200.0 -189,200.0 -190,200.0 -191,200.0 -192,200.0 -193,200.0 -194,200.0 -195,200.0 -196,190.0 -197,200.0 -198,189.0 -199,200.0 diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt new file mode 100644 index 0000000..7b98cda Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/models/checkpoint.pt differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json new file mode 100644 index 0000000..4dfae79 --- /dev/null +++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/params.json @@ -0,0 +1 @@ +{"algo_name": "PolicyGradient", "env_name": "CartPole-v0", "train_eps": 200, "test_eps": 20, "ep_max_steps": 100000, "gamma": 0.99, "lr": 0.01, "update_fre": 8, "hidden_dim": 36, "device": "cpu", "seed": 1, "save_fig": true, "show_fig": false, "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PolicyGradient/outputs/CartPole-v0/20220827-000433/results/", "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\PolicyGradient/outputs/CartPole-v0/20220827-000433/models/", "n_states": 4, "n_actions": 2} \ No newline at end of file diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png new file mode 100644 index 0000000..e3c3489 Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_curve.png differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv similarity index 81% rename from projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv rename to projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv index 958b0ef..fb73fd6 100644 --- a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220825-205930/results/testing_results.csv +++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/testing_results.csv @@ -1,7 +1,7 @@ episodes,rewards 0,200.0 1,200.0 -2,165.0 +2,200.0 3,200.0 4,200.0 5,200.0 @@ -10,12 +10,12 @@ episodes,rewards 8,200.0 9,200.0 10,200.0 -11,168.0 +11,200.0 12,200.0 13,200.0 14,200.0 -15,115.0 -16,198.0 +15,200.0 +16,200.0 17,200.0 18,200.0 19,200.0 diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png new file mode 100644 index 0000000..1f954a1 Binary files /dev/null and b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_curve.png differ diff --git a/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv new file mode 100644 index 0000000..715be6d --- /dev/null +++ b/projects/codes/PolicyGradient/outputs/CartPole-v0/20220827-000433/results/training_results.csv @@ -0,0 +1,201 @@ +episodes,rewards +0,26.0 +1,53.0 +2,10.0 +3,37.0 +4,22.0 +5,21.0 +6,12.0 +7,34.0 +8,93.0 +9,36.0 +10,29.0 +11,18.0 +12,14.0 +13,62.0 +14,20.0 +15,40.0 +16,10.0 +17,10.0 +18,10.0 +19,11.0 +20,10.0 +21,14.0 +22,12.0 +23,8.0 +24,19.0 +25,33.0 +26,22.0 +27,32.0 +28,16.0 +29,24.0 +30,24.0 +31,24.0 +32,75.0 +33,33.0 +34,33.0 +35,72.0 +36,110.0 +37,48.0 +38,60.0 +39,43.0 +40,61.0 +41,34.0 +42,50.0 +43,61.0 +44,53.0 +45,58.0 +46,36.0 +47,44.0 +48,42.0 +49,64.0 +50,67.0 +51,52.0 +52,39.0 +53,42.0 +54,40.0 +55,33.0 +56,200.0 +57,199.0 +58,149.0 +59,185.0 +60,134.0 +61,174.0 +62,162.0 +63,200.0 +64,93.0 +65,72.0 +66,69.0 +67,51.0 +68,62.0 +69,98.0 +70,73.0 +71,73.0 +72,200.0 +73,200.0 +74,200.0 +75,200.0 +76,200.0 +77,200.0 +78,200.0 +79,133.0 +80,200.0 +81,200.0 +82,200.0 +83,200.0 +84,200.0 +85,200.0 +86,200.0 +87,200.0 +88,114.0 +89,151.0 +90,129.0 +91,156.0 +92,112.0 +93,172.0 +94,171.0 +95,141.0 +96,200.0 +97,200.0 +98,200.0 +99,200.0 +100,200.0 +101,200.0 +102,200.0 +103,200.0 +104,188.0 +105,199.0 +106,138.0 +107,200.0 +108,200.0 +109,181.0 +110,145.0 +111,200.0 +112,135.0 +113,119.0 +114,112.0 +115,122.0 +116,118.0 +117,119.0 +118,131.0 +119,119.0 +120,109.0 +121,96.0 +122,105.0 +123,29.0 +124,110.0 +125,113.0 +126,18.0 +127,90.0 +128,145.0 +129,152.0 +130,151.0 +131,109.0 +132,141.0 +133,109.0 +134,136.0 +135,143.0 +136,200.0 +137,200.0 +138,200.0 +139,200.0 +140,200.0 +141,200.0 +142,200.0 +143,200.0 +144,192.0 +145,173.0 +146,180.0 +147,182.0 +148,186.0 +149,175.0 +150,176.0 +151,191.0 +152,200.0 +153,200.0 +154,200.0 +155,200.0 +156,200.0 +157,200.0 +158,200.0 +159,200.0 +160,200.0 +161,200.0 +162,200.0 +163,200.0 +164,200.0 +165,200.0 +166,200.0 +167,200.0 +168,200.0 +169,200.0 +170,200.0 +171,200.0 +172,200.0 +173,200.0 +174,200.0 +175,200.0 +176,200.0 +177,200.0 +178,200.0 +179,200.0 +180,200.0 +181,200.0 +182,200.0 +183,200.0 +184,200.0 +185,200.0 +186,200.0 +187,200.0 +188,200.0 +189,200.0 +190,200.0 +191,200.0 +192,200.0 +193,200.0 +194,200.0 +195,200.0 +196,200.0 +197,200.0 +198,200.0 +199,200.0 diff --git a/projects/codes/PolicyGradient/pg.py b/projects/codes/PolicyGradient/pg.py index aeef3f8..d0b4956 100644 --- a/projects/codes/PolicyGradient/pg.py +++ b/projects/codes/PolicyGradient/pg.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-11-22 23:27:44 LastEditor: John -LastEditTime: 2022-08-25 20:58:59 +LastEditTime: 2022-08-27 13:45:26 Discription: Environment: ''' @@ -31,8 +31,11 @@ class PolicyGradient: state = torch.from_numpy(state).float() state = Variable(state) probs = self.policy_net(state) + print("probs") + print(probs) m = Bernoulli(probs) # 伯努利分布 action = m.sample() + action = action.data.numpy().astype(int)[0] # 转为标量 return action def predict_action(self,state): diff --git a/projects/codes/QLearning/main.py b/projects/codes/QLearning/main.py index 8f423ef..7adbfbe 100644 --- a/projects/codes/QLearning/main.py +++ b/projects/codes/QLearning/main.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2020-09-11 23:03:00 LastEditor: John -LastEditTime: 2022-08-25 14:59:15 +LastEditTime: 2022-08-26 22:46:21 Discription: Environment: ''' @@ -57,7 +57,10 @@ class Main(Launcher): env = CliffWalkingWapper(env) if cfg['seed'] !=0: # set random seed all_seed(env,seed=cfg["seed"]) - n_states = env.observation_space.n # state dimension + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) n_actions = env.action_space.n # action dimension print(f"n_states: {n_states}, n_actions: {n_actions}") cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv deleted file mode 100644 index 86359a6..0000000 --- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_results.csv +++ /dev/null @@ -1,21 +0,0 @@ -episodes,rewards -0,-13 -1,-13 -2,-13 -3,-13 -4,-13 -5,-13 -6,-13 -7,-13 -8,-13 -9,-13 -10,-13 -11,-13 -12,-13 -13,-13 -14,-13 -15,-13 -16,-13 -17,-13 -18,-13 -19,-13 diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv deleted file mode 100644 index 99b7e5f..0000000 --- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_results.csv +++ /dev/null @@ -1,401 +0,0 @@ -episodes,rewards -0,-2131 -1,-1086 -2,-586 -3,-220 -4,-154 -5,-122 -6,-150 -7,-159 -8,-164 -9,-88 -10,-195 -11,-114 -12,-60 -13,-179 -14,-101 -15,-304 -16,-96 -17,-119 -18,-113 -19,-98 -20,-106 -21,-105 -22,-77 -23,-51 -24,-105 -25,-136 -26,-100 -27,-29 -28,-79 -29,-114 -30,-82 -31,-70 -32,-75 -33,-51 -34,-94 -35,-52 -36,-93 -37,-71 -38,-73 -39,-48 -40,-52 -41,-96 -42,-46 -43,-65 -44,-57 -45,-41 -46,-104 -47,-51 -48,-181 -49,-229 -50,-39 -51,-69 -52,-53 -53,-59 -54,-26 -55,-75 -56,-31 -57,-60 -58,-63 -59,-40 -60,-35 -61,-79 -62,-42 -63,-22 -64,-73 -65,-71 -66,-18 -67,-55 -68,-29 -69,-43 -70,-70 -71,-49 -72,-42 -73,-29 -74,-81 -75,-36 -76,-38 -77,-36 -78,-52 -79,-28 -80,-42 -81,-52 -82,-66 -83,-31 -84,-27 -85,-49 -86,-28 -87,-54 -88,-34 -89,-35 -90,-50 -91,-36 -92,-36 -93,-46 -94,-34 -95,-135 -96,-39 -97,-36 -98,-26 -99,-56 -100,-40 -101,-40 -102,-26 -103,-28 -104,-31 -105,-35 -106,-26 -107,-57 -108,-44 -109,-41 -110,-31 -111,-26 -112,-25 -113,-41 -114,-32 -115,-44 -116,-30 -117,-32 -118,-30 -119,-25 -120,-23 -121,-47 -122,-24 -123,-45 -124,-39 -125,-21 -126,-43 -127,-143 -128,-26 -129,-20 -130,-32 -131,-16 -132,-24 -133,-42 -134,-25 -135,-36 -136,-19 -137,-29 -138,-43 -139,-17 -140,-150 -141,-32 -142,-34 -143,-19 -144,-26 -145,-30 -146,-31 -147,-49 -148,-33 -149,-21 -150,-17 -151,-48 -152,-34 -153,-20 -154,-20 -155,-26 -156,-21 -157,-13 -158,-40 -159,-22 -160,-26 -161,-30 -162,-29 -163,-25 -164,-26 -165,-27 -166,-21 -167,-29 -168,-24 -169,-17 -170,-22 -171,-35 -172,-35 -173,-18 -174,-135 -175,-15 -176,-23 -177,-28 -178,-25 -179,-24 -180,-29 -181,-31 -182,-24 -183,-129 -184,-45 -185,-24 -186,-17 -187,-20 -188,-21 -189,-23 -190,-15 -191,-32 -192,-22 -193,-19 -194,-17 -195,-45 -196,-15 -197,-14 -198,-14 -199,-37 -200,-23 -201,-17 -202,-19 -203,-21 -204,-23 -205,-27 -206,-14 -207,-18 -208,-23 -209,-34 -210,-23 -211,-13 -212,-25 -213,-17 -214,-13 -215,-21 -216,-29 -217,-18 -218,-24 -219,-15 -220,-27 -221,-25 -222,-21 -223,-19 -224,-17 -225,-18 -226,-13 -227,-22 -228,-14 -229,-13 -230,-29 -231,-23 -232,-15 -233,-15 -234,-14 -235,-28 -236,-25 -237,-17 -238,-23 -239,-29 -240,-15 -241,-14 -242,-15 -243,-23 -244,-15 -245,-16 -246,-19 -247,-13 -248,-16 -249,-17 -250,-25 -251,-30 -252,-13 -253,-14 -254,-15 -255,-22 -256,-14 -257,-17 -258,-126 -259,-15 -260,-21 -261,-16 -262,-23 -263,-14 -264,-13 -265,-13 -266,-19 -267,-13 -268,-19 -269,-17 -270,-17 -271,-13 -272,-19 -273,-13 -274,-13 -275,-16 -276,-22 -277,-14 -278,-15 -279,-19 -280,-34 -281,-13 -282,-15 -283,-32 -284,-13 -285,-13 -286,-13 -287,-14 -288,-16 -289,-13 -290,-13 -291,-17 -292,-13 -293,-13 -294,-22 -295,-14 -296,-15 -297,-13 -298,-13 -299,-13 -300,-16 -301,-13 -302,-14 -303,-13 -304,-13 -305,-13 -306,-24 -307,-13 -308,-13 -309,-15 -310,-13 -311,-13 -312,-13 -313,-15 -314,-13 -315,-19 -316,-15 -317,-17 -318,-13 -319,-13 -320,-13 -321,-13 -322,-13 -323,-15 -324,-13 -325,-13 -326,-13 -327,-123 -328,-13 -329,-13 -330,-13 -331,-13 -332,-13 -333,-13 -334,-13 -335,-13 -336,-16 -337,-13 -338,-23 -339,-13 -340,-13 -341,-13 -342,-13 -343,-13 -344,-13 -345,-13 -346,-13 -347,-13 -348,-13 -349,-13 -350,-134 -351,-13 -352,-13 -353,-13 -354,-13 -355,-13 -356,-13 -357,-13 -358,-13 -359,-13 -360,-15 -361,-13 -362,-13 -363,-13 -364,-13 -365,-13 -366,-13 -367,-13 -368,-13 -369,-14 -370,-13 -371,-13 -372,-13 -373,-13 -374,-13 -375,-13 -376,-13 -377,-124 -378,-13 -379,-13 -380,-13 -381,-13 -382,-13 -383,-13 -384,-13 -385,-13 -386,-13 -387,-13 -388,-13 -389,-121 -390,-13 -391,-13 -392,-13 -393,-13 -394,-13 -395,-13 -396,-13 -397,-13 -398,-17 -399,-13 diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl similarity index 94% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl index a328ce9..2369fe1 100644 Binary files a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/Qleaning_model.pkl and b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json new file mode 100644 index 0000000..09764c9 --- /dev/null +++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/params.json @@ -0,0 +1 @@ +{"algo_name": "Q-learning", "env_name": "CliffWalking-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/CliffWalking-v0/20220826-224730/models/", "n_states": 48, "n_actions": 4} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png similarity index 100% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/testing_curve.png rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_curve.png diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv new file mode 100644 index 0000000..c48c7ef --- /dev/null +++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,-13,13 +1,-13,13 +2,-13,13 +3,-13,13 +4,-13,13 +5,-13,13 +6,-13,13 +7,-13,13 +8,-13,13 +9,-13,13 +10,-13,13 +11,-13,13 +12,-13,13 +13,-13,13 +14,-13,13 +15,-13,13 +16,-13,13 +17,-13,13 +18,-13,13 +19,-13,13 diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_curve.png b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png similarity index 100% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/training_curve.png rename to projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_curve.png diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv new file mode 100644 index 0000000..523dc54 --- /dev/null +++ b/projects/codes/QLearning/outputs/CliffWalking-v0/20220826-224730/results/training_results.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-2131,448 +1,-1086,492 +2,-586,388 +3,-220,220 +4,-154,154 +5,-122,122 +6,-150,150 +7,-159,159 +8,-164,164 +9,-88,88 +10,-195,195 +11,-114,114 +12,-60,60 +13,-179,179 +14,-101,101 +15,-304,205 +16,-96,96 +17,-119,119 +18,-113,113 +19,-98,98 +20,-106,106 +21,-105,105 +22,-77,77 +23,-51,51 +24,-105,105 +25,-136,136 +26,-100,100 +27,-29,29 +28,-79,79 +29,-114,114 +30,-82,82 +31,-70,70 +32,-75,75 +33,-51,51 +34,-94,94 +35,-52,52 +36,-93,93 +37,-71,71 +38,-73,73 +39,-48,48 +40,-52,52 +41,-96,96 +42,-46,46 +43,-65,65 +44,-57,57 +45,-41,41 +46,-104,104 +47,-51,51 +48,-181,82 +49,-229,130 +50,-39,39 +51,-69,69 +52,-53,53 +53,-59,59 +54,-26,26 +55,-75,75 +56,-31,31 +57,-60,60 +58,-63,63 +59,-40,40 +60,-35,35 +61,-79,79 +62,-42,42 +63,-22,22 +64,-73,73 +65,-71,71 +66,-18,18 +67,-55,55 +68,-29,29 +69,-43,43 +70,-70,70 +71,-49,49 +72,-42,42 +73,-29,29 +74,-81,81 +75,-36,36 +76,-38,38 +77,-36,36 +78,-52,52 +79,-28,28 +80,-42,42 +81,-52,52 +82,-66,66 +83,-31,31 +84,-27,27 +85,-49,49 +86,-28,28 +87,-54,54 +88,-34,34 +89,-35,35 +90,-50,50 +91,-36,36 +92,-36,36 +93,-46,46 +94,-34,34 +95,-135,36 +96,-39,39 +97,-36,36 +98,-26,26 +99,-56,56 +100,-40,40 +101,-40,40 +102,-26,26 +103,-28,28 +104,-31,31 +105,-35,35 +106,-26,26 +107,-57,57 +108,-44,44 +109,-41,41 +110,-31,31 +111,-26,26 +112,-25,25 +113,-41,41 +114,-32,32 +115,-44,44 +116,-30,30 +117,-32,32 +118,-30,30 +119,-25,25 +120,-23,23 +121,-47,47 +122,-24,24 +123,-45,45 +124,-39,39 +125,-21,21 +126,-43,43 +127,-143,44 +128,-26,26 +129,-20,20 +130,-32,32 +131,-16,16 +132,-24,24 +133,-42,42 +134,-25,25 +135,-36,36 +136,-19,19 +137,-29,29 +138,-43,43 +139,-17,17 +140,-150,51 +141,-32,32 +142,-34,34 +143,-19,19 +144,-26,26 +145,-30,30 +146,-31,31 +147,-49,49 +148,-33,33 +149,-21,21 +150,-17,17 +151,-48,48 +152,-34,34 +153,-20,20 +154,-20,20 +155,-26,26 +156,-21,21 +157,-13,13 +158,-40,40 +159,-22,22 +160,-26,26 +161,-30,30 +162,-29,29 +163,-25,25 +164,-26,26 +165,-27,27 +166,-21,21 +167,-29,29 +168,-24,24 +169,-17,17 +170,-22,22 +171,-35,35 +172,-35,35 +173,-18,18 +174,-135,36 +175,-15,15 +176,-23,23 +177,-28,28 +178,-25,25 +179,-24,24 +180,-29,29 +181,-31,31 +182,-24,24 +183,-129,30 +184,-45,45 +185,-24,24 +186,-17,17 +187,-20,20 +188,-21,21 +189,-23,23 +190,-15,15 +191,-32,32 +192,-22,22 +193,-19,19 +194,-17,17 +195,-45,45 +196,-15,15 +197,-14,14 +198,-14,14 +199,-37,37 +200,-23,23 +201,-17,17 +202,-19,19 +203,-21,21 +204,-23,23 +205,-27,27 +206,-14,14 +207,-18,18 +208,-23,23 +209,-34,34 +210,-23,23 +211,-13,13 +212,-25,25 +213,-17,17 +214,-13,13 +215,-21,21 +216,-29,29 +217,-18,18 +218,-24,24 +219,-15,15 +220,-27,27 +221,-25,25 +222,-21,21 +223,-19,19 +224,-17,17 +225,-18,18 +226,-13,13 +227,-22,22 +228,-14,14 +229,-13,13 +230,-29,29 +231,-23,23 +232,-15,15 +233,-15,15 +234,-14,14 +235,-28,28 +236,-25,25 +237,-17,17 +238,-23,23 +239,-29,29 +240,-15,15 +241,-14,14 +242,-15,15 +243,-23,23 +244,-15,15 +245,-16,16 +246,-19,19 +247,-13,13 +248,-16,16 +249,-17,17 +250,-25,25 +251,-30,30 +252,-13,13 +253,-14,14 +254,-15,15 +255,-22,22 +256,-14,14 +257,-17,17 +258,-126,27 +259,-15,15 +260,-21,21 +261,-16,16 +262,-23,23 +263,-14,14 +264,-13,13 +265,-13,13 +266,-19,19 +267,-13,13 +268,-19,19 +269,-17,17 +270,-17,17 +271,-13,13 +272,-19,19 +273,-13,13 +274,-13,13 +275,-16,16 +276,-22,22 +277,-14,14 +278,-15,15 +279,-19,19 +280,-34,34 +281,-13,13 +282,-15,15 +283,-32,32 +284,-13,13 +285,-13,13 +286,-13,13 +287,-14,14 +288,-16,16 +289,-13,13 +290,-13,13 +291,-17,17 +292,-13,13 +293,-13,13 +294,-22,22 +295,-14,14 +296,-15,15 +297,-13,13 +298,-13,13 +299,-13,13 +300,-16,16 +301,-13,13 +302,-14,14 +303,-13,13 +304,-13,13 +305,-13,13 +306,-24,24 +307,-13,13 +308,-13,13 +309,-15,15 +310,-13,13 +311,-13,13 +312,-13,13 +313,-15,15 +314,-13,13 +315,-19,19 +316,-15,15 +317,-17,17 +318,-13,13 +319,-13,13 +320,-13,13 +321,-13,13 +322,-13,13 +323,-15,15 +324,-13,13 +325,-13,13 +326,-13,13 +327,-123,24 +328,-13,13 +329,-13,13 +330,-13,13 +331,-13,13 +332,-13,13 +333,-13,13 +334,-13,13 +335,-13,13 +336,-16,16 +337,-13,13 +338,-23,23 +339,-13,13 +340,-13,13 +341,-13,13 +342,-13,13 +343,-13,13 +344,-13,13 +345,-13,13 +346,-13,13 +347,-13,13 +348,-13,13 +349,-13,13 +350,-134,35 +351,-13,13 +352,-13,13 +353,-13,13 +354,-13,13 +355,-13,13 +356,-13,13 +357,-13,13 +358,-13,13 +359,-13,13 +360,-15,15 +361,-13,13 +362,-13,13 +363,-13,13 +364,-13,13 +365,-13,13 +366,-13,13 +367,-13,13 +368,-13,13 +369,-14,14 +370,-13,13 +371,-13,13 +372,-13,13 +373,-13,13 +374,-13,13 +375,-13,13 +376,-13,13 +377,-124,25 +378,-13,13 +379,-13,13 +380,-13,13 +381,-13,13 +382,-13,13 +383,-13,13 +384,-13,13 +385,-13,13 +386,-13,13 +387,-13,13 +388,-13,13 +389,-121,22 +390,-13,13 +391,-13,13 +392,-13,13 +393,-13,13 +394,-13,13 +395,-13,13 +396,-13,13 +397,-13,13 +398,-17,17 +399,-13,13 diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl new file mode 100644 index 0000000..6d6b01f Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/models/Qleaning_model.pkl differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json new file mode 100644 index 0000000..ead445f --- /dev/null +++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/params.json @@ -0,0 +1 @@ +{"algo_name": "Q-learning", "env_name": "Racetrack-v0", "train_eps": 400, "test_eps": 20, "gamma": 0.9, "epsilon_start": 0.95, "epsilon_end": 0.01, "epsilon_decay": 300, "lr": 0.1, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/results/", "model_path": "C:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\QLearning/outputs/Racetrack-v0/20220826-224626/models/", "n_states": 4, "n_actions": 9} \ No newline at end of file diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png new file mode 100644 index 0000000..fa1588a Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_curve.png differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv new file mode 100644 index 0000000..3d60bb2 --- /dev/null +++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,-1000,1000 +1,2,8 +2,4,6 +3,3,7 +4,2,8 +5,3,7 +6,4,6 +7,-1000,1000 +8,3,7 +9,-11,11 +10,-19,19 +11,-18,18 +12,1,9 +13,1,9 +14,4,6 +15,-16,16 +16,-17,17 +17,4,6 +18,-16,16 +19,4,6 diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png new file mode 100644 index 0000000..c0c7b24 Binary files /dev/null and b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_curve.png differ diff --git a/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv new file mode 100644 index 0000000..a7df26d --- /dev/null +++ b/projects/codes/QLearning/outputs/Racetrack-v0/20220826-224626/results/training_results.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-3580,1000 +1,-2960,1000 +2,-2670,1000 +3,-2720,1000 +4,-2670,1000 +5,-2570,1000 +6,-2407,977 +7,-2012,852 +8,-2500,1000 +9,-2530,1000 +10,-2550,1000 +11,-437,187 +12,-80,40 +13,-2450,1000 +14,-338,148 +15,-1175,525 +16,-755,325 +17,-411,181 +18,-1068,448 +19,-785,325 +20,-149,79 +21,-628,268 +22,-423,183 +23,-282,122 +24,-2198,938 +25,-13,13 +26,-253,113 +27,-48,28 +28,-72,42 +29,-123,63 +30,-305,145 +31,-72,32 +32,-142,72 +33,-13,13 +34,4,6 +35,-1285,545 +36,-174,94 +37,-436,196 +38,-759,339 +39,-11,11 +40,-17,17 +41,-283,123 +42,-181,81 +43,-44,24 +44,-55,35 +45,-135,65 +46,-577,277 +47,-234,114 +48,-54,34 +49,4,6 +50,-29,19 +51,-100,50 +52,-32,22 +53,-23,23 +54,4,6 +55,-17,17 +56,-18,18 +57,-48,28 +58,-34,24 +59,-45,25 +60,-29,19 +61,1,9 +62,-77,37 +63,3,7 +64,-25,15 +65,-3,13 +66,-78,48 +67,-69,39 +68,-105,45 +69,-48,28 +70,3,7 +71,4,6 +72,-100,50 +73,-130,60 +74,-20,20 +75,4,6 +76,4,6 +77,4,6 +78,4,6 +79,-47,27 +80,4,6 +81,4,6 +82,-174,94 +83,-12,12 +84,-26,16 +85,3,7 +86,3,7 +87,-42,32 +88,-48,28 +89,-97,57 +90,-11,11 +91,-16,16 +92,-15,15 +93,4,6 +94,-147,67 +95,-52,32 +96,-97,47 +97,3,7 +98,-17,17 +99,3,7 +100,4,6 +101,3,7 +102,3,7 +103,3,7 +104,1,9 +105,4,6 +106,4,6 +107,3,7 +108,4,6 +109,-68,38 +110,3,7 +111,4,6 +112,-14,14 +113,4,6 +114,-57,37 +115,3,7 +116,4,6 +117,-12,12 +118,3,7 +119,3,7 +120,-64,34 +121,-13,13 +122,3,7 +123,-13,13 +124,4,6 +125,3,7 +126,-32,22 +127,-41,31 +128,3,7 +129,3,7 +130,3,7 +131,4,6 +132,4,6 +133,3,7 +134,-12,12 +135,-31,21 +136,4,6 +137,3,7 +138,-51,31 +139,-48,28 +140,4,6 +141,-85,45 +142,-14,14 +143,4,6 +144,3,7 +145,-6,16 +146,4,6 +147,4,6 +148,-15,15 +149,4,6 +150,-24,24 +151,3,7 +152,-14,14 +153,-18,18 +154,3,7 +155,4,6 +156,-85,45 +157,-51,31 +158,3,7 +159,2,8 +160,3,7 +161,-79,39 +162,-14,14 +163,-13,13 +164,4,6 +165,3,7 +166,4,6 +167,3,7 +168,-74,34 +169,-15,15 +170,4,6 +171,-14,14 +172,4,6 +173,-31,21 +174,-8,18 +175,4,6 +176,4,6 +177,4,6 +178,4,6 +179,-29,19 +180,4,6 +181,3,7 +182,4,6 +183,-82,42 +184,3,7 +185,4,6 +186,4,6 +187,-11,11 +188,-23,23 +189,-33,23 +190,3,7 +191,-12,12 +192,-44,24 +193,-62,42 +194,-16,16 +195,4,6 +196,-12,12 +197,3,7 +198,-13,13 +199,3,7 +200,3,7 +201,4,6 +202,4,6 +203,4,6 +204,-28,18 +205,-16,16 +206,3,7 +207,4,6 +208,-12,12 +209,-13,13 +210,-66,36 +211,-14,14 +212,4,6 +213,4,6 +214,-15,15 +215,-60,30 +216,4,6 +217,3,7 +218,4,6 +219,-33,23 +220,-12,12 +221,-14,14 +222,4,6 +223,3,7 +224,-97,47 +225,4,6 +226,2,8 +227,4,6 +228,4,6 +229,3,7 +230,-11,11 +231,4,6 +232,3,7 +233,3,7 +234,4,6 +235,3,7 +236,3,7 +237,-32,22 +238,-13,13 +239,3,7 +240,-22,22 +241,4,6 +242,2,8 +243,-31,21 +244,4,6 +245,-4,14 +246,-30,20 +247,4,6 +248,3,7 +249,-26,16 +250,4,6 +251,-12,12 +252,2,8 +253,1,9 +254,4,6 +255,2,8 +256,2,8 +257,-12,12 +258,3,7 +259,-48,28 +260,4,6 +261,4,6 +262,-51,31 +263,-12,12 +264,4,6 +265,2,8 +266,2,8 +267,2,8 +268,3,7 +269,4,6 +270,4,6 +271,-17,17 +272,4,6 +273,-13,13 +274,-16,16 +275,-97,57 +276,3,7 +277,-1,11 +278,-32,22 +279,3,7 +280,4,6 +281,3,7 +282,3,7 +283,3,7 +284,3,7 +285,2,8 +286,3,7 +287,-15,15 +288,2,8 +289,-18,18 +290,4,6 +291,-36,26 +292,4,6 +293,4,6 +294,4,6 +295,4,6 +296,-77,47 +297,-14,14 +298,3,7 +299,3,7 +300,3,7 +301,4,6 +302,3,7 +303,4,6 +304,-12,12 +305,-45,35 +306,-63,43 +307,2,8 +308,4,6 +309,4,6 +310,-13,13 +311,4,6 +312,-13,13 +313,4,6 +314,3,7 +315,-30,20 +316,-13,13 +317,3,7 +318,4,6 +319,4,6 +320,-12,12 +321,-13,13 +322,3,7 +323,3,7 +324,3,7 +325,3,7 +326,-36,26 +327,4,6 +328,3,7 +329,3,7 +330,3,7 +331,3,7 +332,-14,14 +333,-16,16 +334,3,7 +335,3,7 +336,-14,14 +337,1,9 +338,2,8 +339,3,7 +340,4,6 +341,-36,26 +342,-14,14 +343,-78,48 +344,2,8 +345,-37,27 +346,3,7 +347,3,7 +348,-37,27 +349,-16,16 +350,4,6 +351,-15,15 +352,4,6 +353,2,8 +354,-44,24 +355,-13,13 +356,-14,14 +357,-17,17 +358,-13,13 +359,3,7 +360,2,8 +361,4,6 +362,3,7 +363,-5,15 +364,-14,14 +365,2,8 +366,-12,12 +367,3,7 +368,4,6 +369,2,8 +370,2,8 +371,1,9 +372,-16,16 +373,1,9 +374,4,6 +375,-16,16 +376,3,7 +377,2,8 +378,-13,13 +379,-44,34 +380,-16,16 +381,-30,20 +382,4,6 +383,4,6 +384,2,8 +385,-15,15 +386,4,6 +387,3,7 +388,2,8 +389,4,6 +390,2,8 +391,3,7 +392,3,7 +393,-14,14 +394,-15,15 +395,3,7 +396,-13,13 +397,3,7 +398,4,6 +399,3,7 diff --git a/projects/codes/Sarsa/main.py b/projects/codes/Sarsa/main.py index 1ea4527..cb1b22c 100644 --- a/projects/codes/Sarsa/main.py +++ b/projects/codes/Sarsa/main.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-11 17:59:16 LastEditor: John -LastEditTime: 2022-08-25 14:26:36 +LastEditTime: 2022-08-26 23:03:39 Discription: Environment: ''' @@ -20,117 +20,105 @@ import argparse from envs.register import register_env from envs.wrappers import CliffWalkingWapper from Sarsa.sarsa import Sarsa -from common.utils import save_results,make_dir,plot_rewards,save_args,all_seed +from common.utils import all_seed +from common.launcher import Launcher -def get_args(): - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='Racetrack-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default=300,type=int,help="episodes of training") - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") - parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") - parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") - parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") - parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") - parser.add_argument('--lr',default=0.2,type=float,help="learning rate") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--seed',default=10,type=int,help="seed") - parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args() - default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", - 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", - } - args = {**vars(args),**default_args} # type(dict) - return args +class Main(Launcher): + def get_args(self): + curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time + parser = argparse.ArgumentParser(description="hyperparameters") + parser.add_argument('--algo_name',default = 'Sarsa',type=str,help="name of algorithm") + parser.add_argument('--env_name',default = 'Racetrack-v0',type=str,help="name of environment") + parser.add_argument('--train_eps',default = 300,type=int,help="episodes of training") + parser.add_argument('--test_eps',default = 20,type=int,help="episodes of testing") + parser.add_argument('--ep_max_steps',default = 100000,type=int,help="steps per episode, much larger value can simulate infinite steps") + parser.add_argument('--gamma',default=0.99,type=float,help="discounted factor") + parser.add_argument('--epsilon_start',default=0.90,type=float,help="initial value of epsilon") + parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") + parser.add_argument('--epsilon_decay',default=200,type=int,help="decay rate of epsilon") + parser.add_argument('--lr',default=0.2,type=float,help="learning rate") + parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") + parser.add_argument('--seed',default=10,type=int,help="seed") + parser.add_argument('--show_fig',default=False,type=bool,help="if show figure or not") + parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") + args = parser.parse_args() + default_args = {'result_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/results/", + 'model_path':f"{curr_path}/outputs/{args.env_name}/{curr_time}/models/", + } + args = {**vars(args),**default_args} # type(dict) + return args -def env_agent_config(cfg): - register_env(cfg['env_name']) - env = gym.make(cfg['env_name']) - if cfg['seed'] !=0: # set random seed - all_seed(env,seed= cfg['seed']) - if cfg['env_name'] == 'CliffWalking-v0': - env = CliffWalkingWapper(env) - try: # state dimension - n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) - except AttributeError: - n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) - n_actions = env.action_space.n # action dimension - print(f"n_states: {n_states}, n_actions: {n_actions}") - cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters - agent = Sarsa(cfg) - return env,agent + def env_agent_config(self,cfg): + register_env(cfg['env_name']) + env = gym.make(cfg['env_name']) + if cfg['seed'] !=0: # set random seed + all_seed(env,seed= cfg['seed']) + if cfg['env_name'] == 'CliffWalking-v0': + env = CliffWalkingWapper(env) + try: # state dimension + n_states = env.observation_space.n # print(hasattr(env.observation_space, 'n')) + except AttributeError: + n_states = env.observation_space.shape[0] # print(hasattr(env.observation_space, 'shape')) + n_actions = env.action_space.n # action dimension + print(f"n_states: {n_states}, n_actions: {n_actions}") + cfg.update({"n_states":n_states,"n_actions":n_actions}) # update to cfg paramters + agent = Sarsa(cfg) + return env,agent -def train(cfg,env,agent): - print("Start training!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['train_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 # step per episode - state = env.reset() # reset and obtain initial state - action = agent.sample_action(state) - while True: - # for _ in range(cfg.ep_max_steps): - next_state, reward, done, _ = env.step(action) # update env and return transitions - next_action = agent.sample_action(next_state) - agent.update(state, action, reward, next_state, next_action,done) # update agent - state = next_state # update state - action = next_action - ep_reward += reward - ep_step += 1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - if (i_ep+1)%10==0: - print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps:{ep_step}, Epislon: {agent.epsilon:.3f}') - print("Finish training!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + def train(self,cfg,env,agent): + print("Start training!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg['train_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 # step per episode + state = env.reset() # reset and obtain initial state + action = agent.sample_action(state) + # while True: + for _ in range(cfg['ep_max_steps']): + next_state, reward, done, _ = env.step(action) # update env and return transitions + next_action = agent.sample_action(next_state) + agent.update(state, action, reward, next_state, next_action,done) # update agent + state = next_state # update state + action = next_action + ep_reward += reward + ep_step += 1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + if (i_ep+1)%10==0: + print(f'Episode: {i_ep+1}/{cfg["train_eps"]}, Reward: {ep_reward:.2f}, Steps: {ep_step}, Epislon: {agent.epsilon:.3f}') + print("Finish training!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} -def test(cfg,env,agent): - print("Start testing!") - print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") - rewards = [] # record rewards for all episodes - steps = [] # record steps for all episodes - for i_ep in range(cfg['test_eps']): - ep_reward = 0 # reward per episode - ep_step = 0 - while True: - # for _ in range(cfg.ep_max_steps): - action = agent.predict_action(state) - next_state, reward, done = env.step(action) - state = next_state - ep_reward+=reward - ep_step+=1 - if done: - break - rewards.append(ep_reward) - steps.append(ep_step) - print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps:{ep_step}, Reward: {ep_reward:.2f}") - print("Finish testing!") - return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} + def test(self,cfg,env,agent): + print("Start testing!") + print(f"Env: {cfg['env_name']}, Algorithm: {cfg['algo_name']}, Device: {cfg['device']}") + rewards = [] # record rewards for all episodes + steps = [] # record steps for all episodes + for i_ep in range(cfg['test_eps']): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg['ep_max_steps']): + action = agent.predict_action(state) + next_state, reward, done, _ = env.step(action) + state = next_state + ep_reward+=reward + ep_step+=1 + if done: + break + rewards.append(ep_reward) + steps.append(ep_step) + print(f"Episode: {i_ep+1}/{cfg['test_eps']}, Steps: {ep_step}, Reward: {ep_reward:.2f}") + print("Finish testing!") + return {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps} if __name__ == "__main__": - cfg = get_args() - # 训练 - env, agent = env_agent_config(cfg) - res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) # save parameters - agent.save(path=cfg.model_path) # save model - save_results(res_dic, tag='train', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], cfg, tag="train") - # 测试 - env, agent = env_agent_config(cfg) - agent.load(path=cfg.model_path) # 导入模型 - res_dic = test(cfg, env, agent) - save_results(res_dic, tag='test', - path=cfg.result_path) # 保存结果 - plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 + main = Main() + main.run() diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl deleted file mode 100644 index 1c8f133..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/sarsa_model.pkl and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json deleted file mode 100644 index 8492e8e..0000000 --- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/params.json +++ /dev/null @@ -1 +0,0 @@ -{"algo_name": "Sarsa", "env_name": "CliffWalking-v0", "train_eps": 300, "test_eps": 20, "ep_max_steps": 200, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/models/", "save_fig": true} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy deleted file mode 100644 index ef51f5e..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png deleted file mode 100644 index 5b97ea1..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/test_rewards_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy deleted file mode 100644 index c7ad308..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png deleted file mode 100644 index 111f028..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220803-142740/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl deleted file mode 100644 index f1d9dcf..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/models/sarsa_model.pkl and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json deleted file mode 100644 index 517bb98..0000000 --- a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/params.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "algo_name": "Sarsa", - "env_name": "CliffWalking-v0", - "train_eps": 400, - "test_eps": 20, - "gamma": 0.9, - "epsilon_start": 0.95, - "epsilon_end": 0.01, - "epsilon_decay": 300, - "lr": 0.1, - "device": "cpu", - "result_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/results/", - "model_path": "c:\\Users\\24438\\Desktop\\rl-tutorials\\codes\\Sarsa/outputs/CliffWalking-v0/20220804-223029/models/", - "save_fig": true -} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy deleted file mode 100644 index 1b35004..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy deleted file mode 100644 index f9979cc..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards.npy and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png deleted file mode 100644 index 9ffa9bf..0000000 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/train_rewards_curve.png and /dev/null differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl new file mode 100644 index 0000000..fb8efd6 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/checkpoint.pkl differ diff --git a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json similarity index 54% rename from projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json index 0bdad3a..e16e735 100644 --- a/projects/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/params.json +++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/params.json @@ -1,5 +1,5 @@ { - "algo_name": "Q-learning", + "algo_name": "Sarsa", "env_name": "CliffWalking-v0", "train_eps": 400, "test_eps": 20, @@ -12,8 +12,8 @@ "seed": 10, "show_fig": false, "save_fig": true, - "result_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/results/", - "model_path": "/Users/jj/Desktop/rl-tutorials/codes/QLearning/outputs/CliffWalking-v0/20220824-103255/models/", + "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/", + "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/models/", "n_states": 48, "n_actions": 4 } \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png similarity index 99% rename from projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png rename to projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png index d600435..cf20c71 100644 Binary files a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220804-223029/results/test_rewards_curve.png and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_curve.png differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv new file mode 100644 index 0000000..7f09e4b --- /dev/null +++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,-15,15 +1,-15,15 +2,-15,15 +3,-15,15 +4,-15,15 +5,-15,15 +6,-15,15 +7,-15,15 +8,-15,15 +9,-15,15 +10,-15,15 +11,-15,15 +12,-15,15 +13,-15,15 +14,-15,15 +15,-15,15 +16,-15,15 +17,-15,15 +18,-15,15 +19,-15,15 diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png new file mode 100644 index 0000000..14dbf39 Binary files /dev/null and b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_curve.png differ diff --git a/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv new file mode 100644 index 0000000..c51b354 --- /dev/null +++ b/projects/codes/Sarsa/outputs/CliffWalking-v0/20220825-213316/results/training_results.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-649,154 +1,-2822,842 +2,-176,176 +3,-139,139 +4,-221,221 +5,-51,51 +6,-219,219 +7,-247,148 +8,-90,90 +9,-145,145 +10,-104,104 +11,-162,162 +12,-49,49 +13,-129,129 +14,-140,140 +15,-19,19 +16,-131,131 +17,-115,115 +18,-43,43 +19,-133,133 +20,-73,73 +21,-89,89 +22,-131,131 +23,-61,61 +24,-113,113 +25,-119,119 +26,-119,119 +27,-71,71 +28,-132,132 +29,-47,47 +30,-79,79 +31,-57,57 +32,-125,125 +33,-77,77 +34,-87,87 +35,-49,49 +36,-57,57 +37,-81,81 +38,-81,81 +39,-97,97 +40,-61,61 +41,-85,85 +42,-217,118 +43,-39,39 +44,-117,117 +45,-41,41 +46,-71,71 +47,-105,105 +48,-73,73 +49,-68,68 +50,-95,95 +51,-41,41 +52,-41,41 +53,-67,67 +54,-71,71 +55,-65,65 +56,-41,41 +57,-61,61 +58,-81,81 +59,-21,21 +60,-76,76 +61,-80,80 +62,-23,23 +63,-53,53 +64,-67,67 +65,-33,33 +66,-41,41 +67,-59,59 +68,-33,33 +69,-64,64 +70,-188,89 +71,-47,47 +72,-57,57 +73,-45,45 +74,-33,33 +75,-79,79 +76,-45,45 +77,-23,23 +78,-47,47 +79,-57,57 +80,-47,47 +81,-45,45 +82,-53,53 +83,-29,29 +84,-33,33 +85,-69,69 +86,-61,61 +87,-35,35 +88,-59,59 +89,-43,43 +90,-17,17 +91,-39,39 +92,-59,59 +93,-29,29 +94,-31,31 +95,-55,55 +96,-35,35 +97,-45,45 +98,-29,29 +99,-59,59 +100,-25,25 +101,-29,29 +102,-33,33 +103,-39,39 +104,-19,19 +105,-47,47 +106,-57,57 +107,-19,19 +108,-47,47 +109,-25,25 +110,-23,23 +111,-53,53 +112,-39,39 +113,-34,34 +114,-27,27 +115,-27,27 +116,-63,63 +117,-33,33 +118,-17,17 +119,-21,21 +120,-19,19 +121,-49,49 +122,-25,25 +123,-39,39 +124,-25,25 +125,-167,68 +126,-35,35 +127,-29,29 +128,-31,31 +129,-44,44 +130,-33,33 +131,-23,23 +132,-37,37 +133,-134,35 +134,-31,31 +135,-19,19 +136,-29,29 +137,-37,37 +138,-25,25 +139,-39,39 +140,-47,47 +141,-29,29 +142,-27,27 +143,-21,21 +144,-41,41 +145,-29,29 +146,-25,25 +147,-25,25 +148,-21,21 +149,-29,29 +150,-39,39 +151,-35,35 +152,-35,35 +153,-32,32 +154,-31,31 +155,-19,19 +156,-21,21 +157,-35,35 +158,-33,33 +159,-37,37 +160,-25,25 +161,-41,41 +162,-25,25 +163,-23,23 +164,-27,27 +165,-25,25 +166,-39,39 +167,-28,28 +168,-24,24 +169,-23,23 +170,-41,41 +171,-17,17 +172,-35,35 +173,-23,23 +174,-29,29 +175,-17,17 +176,-39,39 +177,-33,33 +178,-29,29 +179,-24,24 +180,-23,23 +181,-19,19 +182,-15,15 +183,-23,23 +184,-39,39 +185,-25,25 +186,-35,35 +187,-33,33 +188,-19,19 +189,-35,35 +190,-21,21 +191,-131,32 +192,-15,15 +193,-23,23 +194,-21,21 +195,-17,17 +196,-23,23 +197,-31,31 +198,-21,21 +199,-31,31 +200,-35,35 +201,-27,27 +202,-19,19 +203,-21,21 +204,-23,23 +205,-23,23 +206,-21,21 +207,-31,31 +208,-25,25 +209,-23,23 +210,-17,17 +211,-19,19 +212,-25,25 +213,-23,23 +214,-19,19 +215,-19,19 +216,-25,25 +217,-25,25 +218,-25,25 +219,-25,25 +220,-23,23 +221,-19,19 +222,-19,19 +223,-149,50 +224,-41,41 +225,-19,19 +226,-29,29 +227,-37,37 +228,-17,17 +229,-17,17 +230,-19,19 +231,-27,27 +232,-19,19 +233,-33,33 +234,-23,23 +235,-23,23 +236,-34,34 +237,-15,15 +238,-33,33 +239,-29,29 +240,-17,17 +241,-23,23 +242,-17,17 +243,-19,19 +244,-21,21 +245,-23,23 +246,-17,17 +247,-15,15 +248,-39,39 +249,-21,21 +250,-23,23 +251,-29,29 +252,-15,15 +253,-17,17 +254,-29,29 +255,-15,15 +256,-21,21 +257,-19,19 +258,-19,19 +259,-21,21 +260,-17,17 +261,-21,21 +262,-27,27 +263,-27,27 +264,-21,21 +265,-19,19 +266,-17,17 +267,-23,23 +268,-19,19 +269,-17,17 +270,-19,19 +271,-19,19 +272,-17,17 +273,-23,23 +274,-17,17 +275,-22,22 +276,-31,31 +277,-19,19 +278,-17,17 +279,-33,33 +280,-19,19 +281,-17,17 +282,-31,31 +283,-15,15 +284,-15,15 +285,-15,15 +286,-29,29 +287,-19,19 +288,-17,17 +289,-26,26 +290,-17,17 +291,-19,19 +292,-15,15 +293,-21,21 +294,-21,21 +295,-15,15 +296,-19,19 +297,-15,15 +298,-17,17 +299,-19,19 +300,-17,17 +301,-21,21 +302,-17,17 +303,-27,27 +304,-17,17 +305,-19,19 +306,-15,15 +307,-19,19 +308,-33,33 +309,-17,17 +310,-20,20 +311,-19,19 +312,-17,17 +313,-15,15 +314,-23,23 +315,-15,15 +316,-15,15 +317,-17,17 +318,-25,25 +319,-15,15 +320,-17,17 +321,-19,19 +322,-17,17 +323,-15,15 +324,-23,23 +325,-19,19 +326,-17,17 +327,-23,23 +328,-15,15 +329,-19,19 +330,-15,15 +331,-17,17 +332,-19,19 +333,-15,15 +334,-17,17 +335,-17,17 +336,-19,19 +337,-15,15 +338,-19,19 +339,-19,19 +340,-17,17 +341,-15,15 +342,-21,21 +343,-19,19 +344,-17,17 +345,-17,17 +346,-15,15 +347,-21,21 +348,-20,20 +349,-15,15 +350,-15,15 +351,-15,15 +352,-19,19 +353,-17,17 +354,-15,15 +355,-27,27 +356,-15,15 +357,-15,15 +358,-23,23 +359,-125,26 +360,-132,33 +361,-17,17 +362,-15,15 +363,-17,17 +364,-23,23 +365,-17,17 +366,-15,15 +367,-15,15 +368,-17,17 +369,-15,15 +370,-17,17 +371,-15,15 +372,-15,15 +373,-15,15 +374,-15,15 +375,-15,15 +376,-15,15 +377,-15,15 +378,-15,15 +379,-15,15 +380,-17,17 +381,-15,15 +382,-15,15 +383,-19,19 +384,-15,15 +385,-17,17 +386,-27,27 +387,-15,15 +388,-21,21 +389,-125,26 +390,-15,15 +391,-15,15 +392,-15,15 +393,-27,27 +394,-15,15 +395,-15,15 +396,-17,17 +397,-15,15 +398,-15,15 +399,-15,15 diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl new file mode 100644 index 0000000..81268a4 Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/checkpoint.pkl differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json new file mode 100644 index 0000000..accb050 --- /dev/null +++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/params.json @@ -0,0 +1 @@ +{"algo_name": "Sarsa", "env_name": "Racetrack-v0", "train_eps": 300, "test_eps": 20, "gamma": 0.99, "epsilon_start": 0.9, "epsilon_end": 0.01, "epsilon_decay": 200, "lr": 0.2, "device": "cpu", "seed": 10, "show_fig": false, "save_fig": true, "result_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/", "model_path": "/Users/jj/Desktop/rl-tutorials/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/models/", "n_states": 4, "n_actions": 9} \ No newline at end of file diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png new file mode 100644 index 0000000..c78b938 Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_curve.png differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv new file mode 100644 index 0000000..2cb817f --- /dev/null +++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/testing_results.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,4,6 +1,4,6 +2,-1010,1000 +3,-14,14 +4,4,6 +5,4,6 +6,4,6 +7,-1060,1000 +8,2,8 +9,-12,12 +10,3,7 +11,-15,15 +12,3,7 +13,4,6 +14,-14,14 +15,3,7 +16,-18,18 +17,4,6 +18,4,6 +19,-1020,1000 diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png new file mode 100644 index 0000000..5c612d6 Binary files /dev/null and b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_curve.png differ diff --git a/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv new file mode 100644 index 0000000..0912f70 --- /dev/null +++ b/projects/codes/Sarsa/outputs/Racetrack-v0/20220825-212738/results/training_results.csv @@ -0,0 +1,301 @@ +episodes,rewards,steps +0,-3460,1000 +1,-2800,1000 +2,-2910,1000 +3,-2620,1000 +4,-2620,1000 +5,-2590,1000 +6,-2390,1000 +7,-2510,1000 +8,-2470,1000 +9,-611,251 +10,-891,371 +11,-265,125 +12,-2281,911 +13,-1203,523 +14,-616,266 +15,-213,113 +16,-633,273 +17,-1112,482 +18,-350,160 +19,-852,342 +20,-87,47 +21,-11,11 +22,-27,17 +23,-117,57 +24,-15,15 +25,4,6 +26,-27,17 +27,-94,44 +28,-184,84 +29,-44,24 +30,-150,80 +31,-14,14 +32,-219,89 +33,-50,30 +34,-111,61 +35,-10,10 +36,-28,18 +37,-34,24 +38,-12,12 +39,-19,19 +40,-136,66 +41,-171,71 +42,-51,31 +43,4,6 +44,-117,57 +45,4,6 +46,4,6 +47,-127,67 +48,-78,48 +49,-311,131 +50,-25,15 +51,4,6 +52,-49,29 +53,-25,15 +54,-78,48 +55,-238,108 +56,4,6 +57,-17,17 +58,-29,19 +59,-218,98 +60,4,6 +61,-129,59 +62,-344,144 +63,-25,15 +64,-15,15 +65,-77,37 +66,2,8 +67,0,10 +68,4,6 +69,4,6 +70,-242,102 +71,3,7 +72,4,6 +73,-53,33 +74,-14,14 +75,4,6 +76,4,6 +77,-30,20 +78,-12,12 +79,2,8 +80,-12,12 +81,-150,70 +82,-48,28 +83,-102,52 +84,4,6 +85,-97,47 +86,-10,10 +87,-125,55 +88,-28,18 +89,-26,16 +90,-107,57 +91,4,6 +92,-16,16 +93,-84,44 +94,-13,13 +95,-43,23 +96,-14,14 +97,-12,12 +98,-13,13 +99,-2,12 +100,-14,14 +101,-47,27 +102,4,6 +103,4,6 +104,-91,51 +105,-65,35 +106,4,6 +107,-12,12 +108,-14,14 +109,-13,13 +110,4,6 +111,-41,31 +112,-13,13 +113,4,6 +114,-4,14 +115,-74,34 +116,4,6 +117,-60,30 +118,4,6 +119,-15,15 +120,3,7 +121,4,6 +122,4,6 +123,-19,19 +124,4,6 +125,-49,29 +126,-13,13 +127,-30,20 +128,2,8 +129,-21,21 +130,-45,25 +131,-32,22 +132,-67,37 +133,-46,26 +134,0,10 +135,-12,12 +136,-9,9 +137,-10,10 +138,-14,14 +139,4,6 +140,-11,11 +141,-12,12 +142,2,8 +143,-35,25 +144,4,6 +145,-73,43 +146,4,6 +147,-20,20 +148,4,6 +149,2,8 +150,-29,19 +151,-20,20 +152,4,6 +153,-28,18 +154,4,6 +155,4,6 +156,4,6 +157,4,6 +158,-34,24 +159,4,6 +160,4,6 +161,4,6 +162,-25,15 +163,4,6 +164,3,7 +165,-48,28 +166,4,6 +167,-58,38 +168,-20,20 +169,-9,9 +170,3,7 +171,4,6 +172,3,7 +173,-33,23 +174,-50,30 +175,-16,16 +176,-32,22 +177,-65,35 +178,4,6 +179,-13,13 +180,-11,11 +181,3,7 +182,4,6 +183,-16,16 +184,-12,12 +185,4,6 +186,-48,28 +187,-13,13 +188,2,8 +189,3,7 +190,-27,17 +191,3,7 +192,4,6 +193,4,6 +194,4,6 +195,4,6 +196,4,6 +197,-13,13 +198,-14,14 +199,4,6 +200,4,6 +201,-13,13 +202,-33,23 +203,4,6 +204,-32,22 +205,4,6 +206,-48,28 +207,4,6 +208,4,6 +209,3,7 +210,4,6 +211,-34,24 +212,3,7 +213,4,6 +214,4,6 +215,4,6 +216,3,7 +217,-12,12 +218,3,7 +219,-8,8 +220,3,7 +221,4,6 +222,-46,26 +223,-33,23 +224,4,6 +225,1,9 +226,3,7 +227,2,8 +228,-34,24 +229,4,6 +230,4,6 +231,4,6 +232,4,6 +233,-55,35 +234,-37,27 +235,4,6 +236,-14,14 +237,-65,35 +238,4,6 +239,-13,13 +240,4,6 +241,4,6 +242,-13,13 +243,-30,20 +244,3,7 +245,-13,13 +246,4,6 +247,4,6 +248,-13,13 +249,-32,22 +250,4,6 +251,-55,35 +252,-12,12 +253,3,7 +254,3,7 +255,3,7 +256,4,6 +257,2,8 +258,-12,12 +259,3,7 +260,-10,10 +261,-12,12 +262,4,6 +263,3,7 +264,3,7 +265,-16,16 +266,3,7 +267,-47,27 +268,-13,13 +269,4,6 +270,3,7 +271,-13,13 +272,4,6 +273,4,6 +274,-17,17 +275,4,6 +276,3,7 +277,3,7 +278,4,6 +279,-41,31 +280,3,7 +281,-47,27 +282,-32,22 +283,4,6 +284,3,7 +285,-17,17 +286,3,7 +287,3,7 +288,3,7 +289,-12,12 +290,4,6 +291,3,7 +292,3,7 +293,-24,14 +294,3,7 +295,4,6 +296,3,7 +297,3,7 +298,3,7 +299,-13,13 diff --git a/projects/codes/Sarsa/sarsa.py b/projects/codes/Sarsa/sarsa.py index c10d226..37ed818 100644 --- a/projects/codes/Sarsa/sarsa.py +++ b/projects/codes/Sarsa/sarsa.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 16:58:16 LastEditor: John -LastEditTime: 2022-08-25 00:23:22 +LastEditTime: 2022-08-25 21:26:08 Discription: Environment: ''' @@ -30,7 +30,7 @@ class Sarsa(object): self.sample_count += 1 self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.sample_count / self.epsilon_decay) # The probability to select a random action, is is log decayed - best_action = np.argmax(self.Q_table[state]) + best_action = np.argmax(self.Q_table[str(state)]) # array cannot be hashtable, thus convert to str action_probs = np.ones(self.n_actions, dtype=float) * self.epsilon / self.n_actions action_probs[best_action] += (1.0 - self.epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) @@ -38,27 +38,27 @@ class Sarsa(object): def predict_action(self,state): ''' predict action while testing ''' - action = np.argmax(self.Q_table[state]) + action = np.argmax(self.Q_table[str(state)]) return action def update(self, state, action, reward, next_state, next_action,done): - Q_predict = self.Q_table[state][action] + Q_predict = self.Q_table[str(state)][action] if done: Q_target = reward # terminal state else: - Q_target = reward + self.gamma * self.Q_table[next_state][next_action] # the only difference from Q learning - self.Q_table[state][action] += self.lr * (Q_target - Q_predict) + Q_target = reward + self.gamma * self.Q_table[str(next_state)][next_action] # the only difference from Q learning + self.Q_table[str(state)][action] += self.lr * (Q_target - Q_predict) def save_model(self,path): import dill from pathlib import Path # create path Path(path).mkdir(parents=True, exist_ok=True) torch.save( - obj=self.Q_table_table, + obj=self.Q_table, f=path+"checkpoint.pkl", pickle_module=dill ) print("Model saved!") def load_model(self, path): import dill - self.Q_table_table =torch.load(f=path+'checkpoint.pkl',pickle_module=dill) + self.Q_table=torch.load(f=path+'checkpoint.pkl',pickle_module=dill) print("Mode loaded!") \ No newline at end of file diff --git a/projects/codes/Sarsa/task1.py b/projects/codes/Sarsa/task1.py deleted file mode 100644 index 3fe8fb9..0000000 --- a/projects/codes/Sarsa/task1.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -''' -Author: John -Email: johnjim0816@gmail.com -Date: 2020-09-11 23:03:00 -LastEditor: John -LastEditTime: 2022-08-04 22:44:00 -Discription: -Environment: -''' -import sys -import os -curr_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在绝对路径 -parent_path = os.path.dirname(curr_path) # 父路径 -sys.path.append(parent_path) # 添加路径到系统路径 - -import gym -import torch -import datetime -import argparse -from envs.gridworld_env import CliffWalkingWapper -from Sarsa.sarsa import Sarsa -from common.utils import plot_rewards,save_args -from common.utils import save_results,make_dir - - -def get_args(): - """ - """ - curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - parser = argparse.ArgumentParser(description="hyperparameters") - parser.add_argument('--algo_name',default='Sarsa',type=str,help="name of algorithm") - parser.add_argument('--env_name',default='CliffWalking-v0',type=str,help="name of environment") - parser.add_argument('--train_eps',default=400,type=int,help="episodes of training") # 训练的回合数 - parser.add_argument('--test_eps',default=20,type=int,help="episodes of testing") # 测试的回合数 - parser.add_argument('--gamma',default=0.90,type=float,help="discounted factor") # 折扣因子 - parser.add_argument('--epsilon_start',default=0.95,type=float,help="initial value of epsilon") # e-greedy策略中初始epsilon - parser.add_argument('--epsilon_end',default=0.01,type=float,help="final value of epsilon") # e-greedy策略中的终止epsilon - parser.add_argument('--epsilon_decay',default=300,type=int,help="decay rate of epsilon") # e-greedy策略中epsilon的衰减率 - parser.add_argument('--lr',default=0.1,type=float,help="learning rate") - parser.add_argument('--device',default='cpu',type=str,help="cpu or cuda") - parser.add_argument('--result_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/results/' ) - parser.add_argument('--model_path',default=curr_path + "/outputs/" + parser.parse_args().env_name + \ - '/' + curr_time + '/models/' ) # path to save models - parser.add_argument('--save_fig',default=True,type=bool,help="if save figure or not") - args = parser.parse_args([]) - return args -curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # 获取当前时间 - -def train(cfg,env,agent): - print('开始训练!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录奖励 - for i_ep in range(cfg.train_eps): - ep_reward = 0 # 记录每个回合的奖励 - state = env.reset() # 重置环境,即开始新的回合 - action = agent.sample(state) - while True: - action = agent.sample(state) # 根据算法采样一个动作 - next_state, reward, done, _ = env.step(action) # 与环境进行一次动作交互 - next_action = agent.sample(next_state) - agent.update(state, action, reward, next_state, next_action,done) # 算法更新 - state = next_state # 更新状态 - action = next_action - ep_reward += reward - if done: - break - rewards.append(ep_reward) - print(f"回合:{i_ep+1}/{cfg.train_eps},奖励:{ep_reward:.1f},Epsilon:{agent.epsilon}") - print('完成训练!') - return {"rewards":rewards} - -def test(cfg,env,agent): - print('开始测试!') - print(f'环境:{cfg.env_name}, 算法:{cfg.algo_name}, 设备:{cfg.device}') - rewards = [] # 记录所有回合的奖励 - for i_ep in range(cfg.test_eps): - ep_reward = 0 # 记录每个episode的reward - state = env.reset() # 重置环境, 重新开一局(即开始新的一个回合) - while True: - action = agent.predict(state) # 根据算法选择一个动作 - next_state, reward, done, _ = env.step(action) # 与环境进行一个交互 - state = next_state # 更新状态 - ep_reward += reward - if done: - break - rewards.append(ep_reward) - print(f"回合数:{i_ep+1}/{cfg.test_eps}, 奖励:{ep_reward:.1f}") - print('完成测试!') - return {"rewards":rewards} - -def env_agent_config(cfg,seed=1): - '''创建环境和智能体 - Args: - cfg ([type]): [description] - seed (int, optional): 随机种子. Defaults to 1. - Returns: - env [type]: 环境 - agent : 智能体 - ''' - env = gym.make(cfg.env_name) - env = CliffWalkingWapper(env) - env.seed(seed) # 设置随机种子 - n_states = env.observation_space.n # 状态维度 - n_actions = env.action_space.n # 动作维度 - print(f"状态数:{n_states},动作数:{n_actions}") - agent = Sarsa(n_actions,cfg) - return env,agent -if __name__ == "__main__": - cfg = get_args() - # 训练 - env, agent = env_agent_config(cfg) - res_dic = train(cfg, env, agent) - make_dir(cfg.result_path, cfg.model_path) - save_args(cfg) # save parameters - agent.save(path=cfg.model_path) # save model - save_results(res_dic, tag='train', - path=cfg.result_path) - plot_rewards(res_dic['rewards'], cfg, tag="train") - # 测试 - env, agent = env_agent_config(cfg) - agent.load(path=cfg.model_path) # 导入模型 - res_dic = test(cfg, env, agent) - save_results(res_dic, tag='test', - path=cfg.result_path) # 保存结果 - plot_rewards(res_dic['rewards'], cfg, tag="test") # 画出结果 - - - diff --git a/projects/codes/common/launcher.py b/projects/codes/common/launcher.py index d26bce1..43f6f45 100644 --- a/projects/codes/common/launcher.py +++ b/projects/codes/common/launcher.py @@ -24,7 +24,7 @@ class Launcher: save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train") # plot results # testing - env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step + # env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step agent.load_model(path = cfg['model_path']) # load model res_dic = self.test(cfg, env, agent) save_results(res_dic, tag='test', diff --git a/projects/codes/common/memories.py b/projects/codes/common/memories.py index 255333a..1317dd1 100644 --- a/projects/codes/common/memories.py +++ b/projects/codes/common/memories.py @@ -5,7 +5,7 @@ @Email: johnjim0816@gmail.com @Date: 2020-06-10 15:27:16 @LastEditor: John -LastEditTime: 2022-08-22 17:23:21 +LastEditTime: 2022-08-28 23:44:06 @Discription: @Environment: python 3.7.7 ''' @@ -39,12 +39,12 @@ class ReplayBufferQue: def __init__(self, capacity: int) -> None: self.capacity = capacity self.buffer = deque(maxlen=self.capacity) - def push(self,trainsitions): + def push(self,transitions): '''_summary_ Args: trainsitions (tuple): _description_ ''' - self.buffer.append(trainsitions) + self.buffer.append(transitions) def sample(self, batch_size: int, sequential: bool = False): if batch_size > len(self.buffer): batch_size = len(self.buffer) diff --git a/projects/codes/common/models.py b/projects/codes/common/models.py index 1e7bbaa..3e3e562 100644 --- a/projects/codes/common/models.py +++ b/projects/codes/common/models.py @@ -5,7 +5,7 @@ Author: John Email: johnjim0816@gmail.com Date: 2021-03-12 21:14:12 LastEditor: John -LastEditTime: 2021-09-15 13:21:03 +LastEditTime: 2022-08-29 14:24:44 Discription: Environment: ''' @@ -31,40 +31,45 @@ class MLP(nn.Module): x = F.relu(self.fc2(x)) return self.fc3(x) +class ActorSoftmax(nn.Module): + def __init__(self, input_dim, output_dim, hidden_dim=256): + super(ActorSoftmax, self).__init__() + self.fc1 = nn.Linear(input_dim, hidden_dim) + self.fc2 = nn.Linear(hidden_dim, output_dim) + def forward(self,state): + dist = F.relu(self.fc1(state)) + dist = F.softmax(self.fc2(dist),dim=1) + return dist class Critic(nn.Module): - def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3): - super(Critic, self).__init__() - - self.linear1 = nn.Linear(n_obs + n_actions, hidden_size) - self.linear2 = nn.Linear(hidden_size, hidden_size) - self.linear3 = nn.Linear(hidden_size, 1) - # 随机初始化为较小的值 - self.linear3.weight.data.uniform_(-init_w, init_w) - self.linear3.bias.data.uniform_(-init_w, init_w) - - def forward(self, state, action): - # 按维数1拼接 - x = torch.cat([state, action], 1) - x = F.relu(self.linear1(x)) - x = F.relu(self.linear2(x)) - x = self.linear3(x) - return x + def __init__(self,input_dim,output_dim,hidden_dim=256): + super(Critic,self).__init__() + assert output_dim == 1 # critic must output a single value + self.fc1 = nn.Linear(input_dim, hidden_dim) + self.fc2 = nn.Linear(hidden_dim, output_dim) + def forward(self,state): + value = F.relu(self.fc1(state)) + value = self.fc2(value) + return value -class Actor(nn.Module): - def __init__(self, n_obs, n_actions, hidden_size, init_w=3e-3): - super(Actor, self).__init__() - self.linear1 = nn.Linear(n_obs, hidden_size) - self.linear2 = nn.Linear(hidden_size, hidden_size) - self.linear3 = nn.Linear(hidden_size, n_actions) +class ActorCriticSoftmax(nn.Module): + def __init__(self, input_dim, output_dim, actor_hidden_dim=256,critic_hidden_dim=256): + super(ActorCriticSoftmax, self).__init__() + + self.critic_fc1 = nn.Linear(input_dim, critic_hidden_dim) + self.critic_fc2 = nn.Linear(critic_hidden_dim, 1) + + self.actor_fc1 = nn.Linear(input_dim, actor_hidden_dim) + self.actor_fc2 = nn.Linear(actor_hidden_dim, output_dim) + + def forward(self, state): + # state = Variable(torch.from_numpy(state).float().unsqueeze(0)) + value = F.relu(self.critic_fc1(state)) + value = self.critic_fc2(value) - self.linear3.weight.data.uniform_(-init_w, init_w) - self.linear3.bias.data.uniform_(-init_w, init_w) - - def forward(self, x): - x = F.relu(self.linear1(x)) - x = F.relu(self.linear2(x)) - x = torch.tanh(self.linear3(x)) - return x + policy_dist = F.relu(self.actor_fc1(state)) + policy_dist = F.softmax(self.actor_fc2(policy_dist), dim=1) + + return value, policy_dist class ActorCritic(nn.Module): def __init__(self, n_states, n_actions, hidden_dim=256): diff --git a/projects/codes/envs/register.py b/projects/codes/envs/register.py index d92a93d..38074cf 100644 --- a/projects/codes/envs/register.py +++ b/projects/codes/envs/register.py @@ -5,7 +5,7 @@ def register_env(env_name): if env_name == 'Racetrack-v0': register( id='Racetrack-v0', - entry_point='racetrack:RacetrackEnv', + entry_point='envs.racetrack:RacetrackEnv', max_episode_steps=1000, kwargs={} ) diff --git a/projects/codes/scripts/A2C_CartPole-v0.sh b/projects/codes/scripts/A2C_CartPole-v0.sh new file mode 100644 index 0000000..4fcc9a1 --- /dev/null +++ b/projects/codes/scripts/A2C_CartPole-v0.sh @@ -0,0 +1,15 @@ +# run A2C on CartPole-v0 +# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" + +if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/anaconda3/etc/profile.d/conda.sh" + source ~/anaconda3/etc/profile.d/conda.sh +elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" + source ~/opt/anaconda3/etc/profile.d/conda.sh +else + echo 'please manually config the conda source path' +fi +conda activate easyrl # easyrl here can be changed to another name of conda env that you have created +codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path +python $codes_dir/A2C/main.py \ No newline at end of file diff --git a/projects/codes/scripts/DQN_task2.sh b/projects/codes/scripts/DQN_Acrobot-v1.sh similarity index 100% rename from projects/codes/scripts/DQN_task2.sh rename to projects/codes/scripts/DQN_Acrobot-v1.sh diff --git a/projects/codes/scripts/DQN_task0.sh b/projects/codes/scripts/DQN_CartPole-v0.sh similarity index 100% rename from projects/codes/scripts/DQN_task0.sh rename to projects/codes/scripts/DQN_CartPole-v0.sh diff --git a/projects/codes/scripts/DQN_task1.sh b/projects/codes/scripts/DQN_CartPole-v1.sh similarity index 83% rename from projects/codes/scripts/DQN_task1.sh rename to projects/codes/scripts/DQN_CartPole-v1.sh index d9ca67d..8cac524 100644 --- a/projects/codes/scripts/DQN_task1.sh +++ b/projects/codes/scripts/DQN_CartPole-v1.sh @@ -1,6 +1,4 @@ -''' -run DQN on CartPole-v1, not finished yet -''' +# run DQN on CartPole-v1, not finished yet # source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then echo "source file at ~/anaconda3/etc/profile.d/conda.sh" @@ -13,4 +11,4 @@ else fi conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/DQN/main.py --env_name CartPole-v1 --train_eps 500 --epsilon_decay 1000 --memory_capacity 200000 --batch_size 128 --device cuda \ No newline at end of file +python $codes_dir/DQN/main.py --env_name CartPole-v1 --train_eps 2000 --gamma 0.99 --epsilon_decay 6000 --lr 0.00001 --memory_capacity 200000 --batch_size 64 --device cuda \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_task0.sh b/projects/codes/scripts/PolicyGradient_CartPole-v0.sh similarity index 93% rename from projects/codes/scripts/Qlearning_task0.sh rename to projects/codes/scripts/PolicyGradient_CartPole-v0.sh index 7ed9089..d7e0a69 100644 --- a/projects/codes/scripts/Qlearning_task0.sh +++ b/projects/codes/scripts/PolicyGradient_CartPole-v0.sh @@ -1,4 +1,3 @@ - # source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then echo "source file at ~/anaconda3/etc/profile.d/conda.sh" @@ -11,4 +10,4 @@ else fi conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/QLearning/main.py --device cpu \ No newline at end of file +python $codes_dir/PolicyGradient/main.py \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_CliffWalking-v0.sh b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh new file mode 100644 index 0000000..233cec7 --- /dev/null +++ b/projects/codes/scripts/Qlearning_CliffWalking-v0.sh @@ -0,0 +1,12 @@ +if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/anaconda3/etc/profile.d/conda.sh" + source ~/anaconda3/etc/profile.d/conda.sh +elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" + source ~/opt/anaconda3/etc/profile.d/conda.sh +else + echo 'please manually config the conda source path' +fi +conda activate easyrl # easyrl here can be changed to another name of conda env that you have created +codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path +python $codes_dir/QLearning/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_task1.sh b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh similarity index 93% rename from projects/codes/scripts/Qlearning_task1.sh rename to projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh index fadb1a6..0df0547 100644 --- a/projects/codes/scripts/Qlearning_task1.sh +++ b/projects/codes/scripts/Qlearning_FrozenLakeNoSlippery-v1.sh @@ -11,5 +11,4 @@ else fi conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/envs/register.py # register environment python $codes_dir/QLearning/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --epsilon_start 0.70 --epsilon_end 0.1 --epsilon_decay 2000 --gamma 0.9 --lr 0.9 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Qlearning_Racetrack-v0.sh b/projects/codes/scripts/Qlearning_Racetrack-v0.sh new file mode 100644 index 0000000..00599fa --- /dev/null +++ b/projects/codes/scripts/Qlearning_Racetrack-v0.sh @@ -0,0 +1,14 @@ + +# source conda, if you are already in proper conda environment, then comment the codes util "conda activate easyrl" +if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/anaconda3/etc/profile.d/conda.sh" + source ~/anaconda3/etc/profile.d/conda.sh +elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" + source ~/opt/anaconda3/etc/profile.d/conda.sh +else + echo 'please manually config the conda source path' +fi +conda activate easyrl # easyrl here can be changed to another name of conda env that you have created +codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path +python $codes_dir/QLearning/main.py --env_name Racetrack-v0 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_CliffWalking-v0.sh b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh new file mode 100644 index 0000000..c4f5e6a --- /dev/null +++ b/projects/codes/scripts/Sarsa_CliffWalking-v0.sh @@ -0,0 +1,12 @@ +if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/anaconda3/etc/profile.d/conda.sh" + source ~/anaconda3/etc/profile.d/conda.sh +elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" + source ~/opt/anaconda3/etc/profile.d/conda.sh +else + echo 'please manually config the conda source path' +fi +conda activate easyrl # easyrl here can be changed to another name of conda env that you have created +codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path +python $codes_dir/Sarsa/main.py --env_name CliffWalking-v0 --train_eps 400 --gamma 0.90 --epsilon_start 0.95 --epsilon_end 0.01 --epsilon_decay 300 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh new file mode 100644 index 0000000..f215c94 --- /dev/null +++ b/projects/codes/scripts/Sarsa_FrozenLakeNoSlippery-v1.sh @@ -0,0 +1,13 @@ +# Sarsa for FrozenLakeNoSlippery-v1, cannot converge like Qlearning! +if [ -f "$HOME/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/anaconda3/etc/profile.d/conda.sh" + source ~/anaconda3/etc/profile.d/conda.sh +elif [ -f "$HOME/opt/anaconda3/etc/profile.d/conda.sh" ]; then + echo "source file at ~/opt/anaconda3/etc/profile.d/conda.sh" + source ~/opt/anaconda3/etc/profile.d/conda.sh +else + echo 'please manually config the conda source path' +fi +conda activate easyrl # easyrl here can be changed to another name of conda env that you have created +codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path +python $codes_dir/Sarsa/main.py --env_name FrozenLakeNoSlippery-v1 --train_eps 800 --ep_max_steps 10 --epsilon_start 0.50 --epsilon_end 0.01 --epsilon_decay 2000 --gamma 0.9 --lr 0.1 --device cpu \ No newline at end of file diff --git a/projects/codes/scripts/Sarsa_task0.sh b/projects/codes/scripts/Sarsa_Racetrack-v0.sh similarity index 86% rename from projects/codes/scripts/Sarsa_task0.sh rename to projects/codes/scripts/Sarsa_Racetrack-v0.sh index 49358de..dcd6cac 100644 --- a/projects/codes/scripts/Sarsa_task0.sh +++ b/projects/codes/scripts/Sarsa_Racetrack-v0.sh @@ -9,5 +9,4 @@ else fi conda activate easyrl # easyrl here can be changed to another name of conda env that you have created codes_dir=$(dirname $(dirname $(readlink -f "$0"))) # "codes" path -python $codes_dir/envs/register.py # register environment -python $codes_dir/Sarsa/main.py \ No newline at end of file +python $codes_dir/Sarsa/main.py --env_name Racetrack-v0 \ No newline at end of file