hot update A2C
This commit is contained in:
@@ -1,35 +0,0 @@
|
||||
\relax
|
||||
\providecommand\hyper@newdestlabel[2]{}
|
||||
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
|
||||
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
|
||||
\global\let\oldcontentsline\contentsline
|
||||
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
|
||||
\global\let\oldnewlabel\newlabel
|
||||
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
|
||||
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
|
||||
\AtEndDocument{\ifx\hyper@anchor\@undefined
|
||||
\let\contentsline\oldcontentsline
|
||||
\let\newlabel\oldnewlabel
|
||||
\fi}
|
||||
\fi}
|
||||
\global\let\hyper@last\relax
|
||||
\gdef\HyperFirstAtBeginDocument#1{#1}
|
||||
\providecommand*\HyPL@Entry[1]{}
|
||||
\HyPL@Entry{0<</S/D>>}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}模版备用}{2}{section.1}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{3}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{4}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{5}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{6}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{7}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{8}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{9}{algorithm.}\protected@file@percent }
|
||||
\gdef \@abspage@last{9}
|
||||
@@ -1,570 +0,0 @@
|
||||
This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22) 23 AUG 2022 19:26
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
file:line:error style messages enabled.
|
||||
%&-line parsing enabled.
|
||||
**/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes
|
||||
(/Users/jj/Desktop/rl-tutorials/assets/pseudocodes/pseudocodes.tex
|
||||
LaTeX2e <2020-10-01> patch level 4
|
||||
L3 programming layer <2021-02-18> (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexart.cls (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexbackend.cfg
|
||||
File: ctexbackend.cfg 2021/03/14 v2.5.6 Backend configuration file (CTEX)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3kernel/expl3.sty
|
||||
Package: expl3 2021-02-18 L3 programming layer (loader)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/l3backend/l3backend-xetex.def
|
||||
File: l3backend-xetex.def 2021-03-18 L3 backend support: XeTeX
|
||||
(|extractbb --version)
|
||||
\c__kernel_sys_dvipdfmx_version_int=\count175
|
||||
\l__color_backend_stack_int=\count176
|
||||
\g__color_backend_stack_int=\count177
|
||||
\g__graphics_track_int=\count178
|
||||
\l__pdf_internal_box=\box47
|
||||
\g__pdf_backend_object_int=\count179
|
||||
\g__pdf_backend_annotation_int=\count180
|
||||
\g__pdf_backend_link_int=\count181
|
||||
))
|
||||
Document Class: ctexart 2021/03/14 v2.5.6 Chinese adapter for class article (CTEX)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-2020-10-01.sty (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xparse/xparse-generic.tex))) (/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty
|
||||
Package: l3keys2e 2021-03-12 LaTeX2e option processing using LaTeX3 keys
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexhook.sty
|
||||
Package: ctexhook 2021/03/14 v2.5.6 Document and package hooks (CTEX)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/ctexpatch.sty
|
||||
Package: ctexpatch 2021/03/14 v2.5.6 Patching commands (CTEX)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/fix-cm.sty
|
||||
Package: fix-cm 2015/01/14 v1.1t fixes to LaTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/ts1enc.def
|
||||
File: ts1enc.def 2001/06/05 v3.0e (jk/car/fm) Standard LaTeX file
|
||||
LaTeX Font Info: Redeclaring font encoding TS1 on input line 47.
|
||||
)) (/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel.sty
|
||||
Package: everysel 2021/01/20 v2.1 EverySelectfont Package (MS)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/everysel/everysel-2011-10-28.sty))
|
||||
\l__ctex_tmp_int=\count182
|
||||
\l__ctex_tmp_box=\box48
|
||||
\l__ctex_tmp_dim=\dimen138
|
||||
\g__ctex_section_depth_int=\count183
|
||||
\g__ctex_font_size_int=\count184
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctexopts.cfg
|
||||
File: ctexopts.cfg 2021/03/14 v2.5.6 Option configuration file (CTEX)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/article.cls
|
||||
Document Class: article 2020/04/10 v1.4m Standard LaTeX document class
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/size11.clo
|
||||
File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option)
|
||||
)
|
||||
\c@part=\count185
|
||||
\c@section=\count186
|
||||
\c@subsection=\count187
|
||||
\c@subsubsection=\count188
|
||||
\c@paragraph=\count189
|
||||
\c@subparagraph=\count190
|
||||
\c@figure=\count191
|
||||
\c@table=\count192
|
||||
\abovecaptionskip=\skip47
|
||||
\belowcaptionskip=\skip48
|
||||
\bibindent=\dimen139
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def
|
||||
File: ctex-engine-xetex.def 2021/03/14 v2.5.6 XeLaTeX adapter (CTEX)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.sty
|
||||
Package: xeCJK 2020/10/19 v3.8.6 Typesetting CJK scripts with XeLaTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty
|
||||
Package: xtemplate 2021-03-12 L3 Experimental prototype document functions
|
||||
\l__xtemplate_tmp_dim=\dimen140
|
||||
\l__xtemplate_tmp_int=\count193
|
||||
\l__xtemplate_tmp_muskip=\muskip16
|
||||
\l__xtemplate_tmp_skip=\skip49
|
||||
)
|
||||
\l__xeCJK_tmp_int=\count194
|
||||
\l__xeCJK_tmp_box=\box49
|
||||
\l__xeCJK_tmp_dim=\dimen141
|
||||
\l__xeCJK_tmp_skip=\skip50
|
||||
\g__xeCJK_space_factor_int=\count195
|
||||
\l__xeCJK_begin_int=\count196
|
||||
\l__xeCJK_end_int=\count197
|
||||
\c__xeCJK_CJK_class_int=\XeTeXcharclass1
|
||||
\c__xeCJK_FullLeft_class_int=\XeTeXcharclass2
|
||||
\c__xeCJK_FullRight_class_int=\XeTeXcharclass3
|
||||
\c__xeCJK_HalfLeft_class_int=\XeTeXcharclass4
|
||||
\c__xeCJK_HalfRight_class_int=\XeTeXcharclass5
|
||||
\c__xeCJK_NormalSpace_class_int=\XeTeXcharclass6
|
||||
\c__xeCJK_CM_class_int=\XeTeXcharclass7
|
||||
\c__xeCJK_HangulJamo_class_int=\XeTeXcharclass8
|
||||
\l__xeCJK_last_skip=\skip51
|
||||
\g__xeCJK_node_int=\count198
|
||||
\c__xeCJK_CJK_node_dim=\dimen142
|
||||
\c__xeCJK_CJK-space_node_dim=\dimen143
|
||||
\c__xeCJK_default_node_dim=\dimen144
|
||||
\c__xeCJK_default-space_node_dim=\dimen145
|
||||
\c__xeCJK_CJK-widow_node_dim=\dimen146
|
||||
\c__xeCJK_normalspace_node_dim=\dimen147
|
||||
\l__xeCJK_ccglue_skip=\skip52
|
||||
\l__xeCJK_ecglue_skip=\skip53
|
||||
\l__xeCJK_punct_kern_skip=\skip54
|
||||
\l__xeCJK_last_penalty_int=\count199
|
||||
\l__xeCJK_last_bound_dim=\dimen148
|
||||
\l__xeCJK_last_kern_dim=\dimen149
|
||||
\l__xeCJK_widow_penalty_int=\count266
|
||||
|
||||
Package xtemplate Info: Declaring object type 'xeCJK/punctuation' taking 0
|
||||
(xtemplate) argument(s) on line 2341.
|
||||
|
||||
\l__xeCJK_fixed_punct_width_dim=\dimen150
|
||||
\l__xeCJK_mixed_punct_width_dim=\dimen151
|
||||
\l__xeCJK_middle_punct_width_dim=\dimen152
|
||||
\l__xeCJK_fixed_margin_width_dim=\dimen153
|
||||
\l__xeCJK_mixed_margin_width_dim=\dimen154
|
||||
\l__xeCJK_middle_margin_width_dim=\dimen155
|
||||
\l__xeCJK_bound_punct_width_dim=\dimen156
|
||||
\l__xeCJK_bound_margin_width_dim=\dimen157
|
||||
\l__xeCJK_margin_minimum_dim=\dimen158
|
||||
\l__xeCJK_kerning_total_width_dim=\dimen159
|
||||
\l__xeCJK_same_align_margin_dim=\dimen160
|
||||
\l__xeCJK_different_align_margin_dim=\dimen161
|
||||
\l__xeCJK_kerning_margin_width_dim=\dimen162
|
||||
\l__xeCJK_kerning_margin_minimum_dim=\dimen163
|
||||
\l__xeCJK_bound_dim=\dimen164
|
||||
\l__xeCJK_reverse_bound_dim=\dimen165
|
||||
\l__xeCJK_margin_dim=\dimen166
|
||||
\l__xeCJK_minimum_bound_dim=\dimen167
|
||||
\l__xeCJK_kerning_margin_dim=\dimen168
|
||||
\g__xeCJK_family_int=\count267
|
||||
\l__xeCJK_fam_int=\count268
|
||||
\g__xeCJK_fam_allocation_int=\count269
|
||||
\l__xeCJK_verb_case_int=\count270
|
||||
\l__xeCJK_verb_exspace_skip=\skip55
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.sty
|
||||
Package: fontspec 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
|
||||
Package: fontspec-xetex 2020/02/21 v2.7i Font selection for XeLaTeX and LuaLaTeX
|
||||
\l__fontspec_script_int=\count271
|
||||
\l__fontspec_language_int=\count272
|
||||
\l__fontspec_strnum_int=\count273
|
||||
\l__fontspec_tmp_int=\count274
|
||||
\l__fontspec_tmpa_int=\count275
|
||||
\l__fontspec_tmpb_int=\count276
|
||||
\l__fontspec_tmpc_int=\count277
|
||||
\l__fontspec_em_int=\count278
|
||||
\l__fontspec_emdef_int=\count279
|
||||
\l__fontspec_strong_int=\count280
|
||||
\l__fontspec_strongdef_int=\count281
|
||||
\l__fontspec_tmpa_dim=\dimen169
|
||||
\l__fontspec_tmpb_dim=\dimen170
|
||||
\l__fontspec_tmpc_dim=\dimen171
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/fontenc.sty
|
||||
Package: fontenc 2020/08/10 v2.0s Standard LaTeX package
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/usr/local/texlive/2021/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg
|
||||
File: xeCJK.cfg 2020/10/19 v3.8.6 Configuration file for xeCJK package
|
||||
))
|
||||
\ccwd=\dimen172
|
||||
\l__ctex_ccglue_skip=\skip56
|
||||
)
|
||||
\l__ctex_ziju_dim=\dimen173
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber.sty
|
||||
Package: zhnumber 2020/05/01 v2.8 Typesetting numbers with Chinese glyphs
|
||||
\l__zhnum_scale_int=\count282
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/zhnumber/zhnumber-utf8.cfg
|
||||
File: zhnumber-utf8.cfg 2020/05/01 v2.8 Chinese numerals with UTF8 encoding
|
||||
))
|
||||
\l__ctex_heading_skip=\skip57
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def
|
||||
File: ctex-scheme-chinese-article.def 2021/03/14 v2.5.6 Chinese scheme for article (CTEX)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg
|
||||
File: ctex-name-utf8.cfg 2021/03/14 v2.5.6 Caption with encoding UTF-8 (CTEX)
|
||||
)) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-mac.def
|
||||
File: ctex-fontset-mac.def 2021/03/14 v2.5.6 macOS fonts definition (CTEX)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-macnew.def
|
||||
File: ctex-fontset-macnew.def 2021/03/14 v2.5.6 macOS fonts definition for El Capitan or later version (CTEX)
|
||||
|
||||
|
||||
Package fontspec Warning: Font "Songti SC Light" does not contain requested
|
||||
(fontspec) Script "CJK".
|
||||
|
||||
|
||||
Package fontspec Info: Font family 'SongtiSCLight(0)' created for font 'Songti
|
||||
(fontspec) SC Light' with options
|
||||
(fontspec) [Script={CJK},BoldItalicFont={Kaiti SC
|
||||
(fontspec) Bold},BoldFont={Songti SC Bold},ItalicFont={Kaiti SC}].
|
||||
(fontspec)
|
||||
(fontspec) This font family consists of the following NFSS
|
||||
(fontspec) series/shapes:
|
||||
(fontspec)
|
||||
(fontspec) - 'normal' (m/n) with NFSS spec.: <->"Songti SC
|
||||
(fontspec) Light/OT:language=dflt;"
|
||||
(fontspec) - 'small caps' (m/sc) with NFSS spec.:
|
||||
(fontspec) - 'bold' (b/n) with NFSS spec.: <->"Songti SC
|
||||
(fontspec) Bold/OT:language=dflt;"
|
||||
(fontspec) - 'bold small caps' (b/sc) with NFSS spec.:
|
||||
(fontspec) - 'italic' (m/it) with NFSS spec.: <->"Kaiti
|
||||
(fontspec) SC/OT:language=dflt;"
|
||||
(fontspec) - 'italic small caps' (m/scit) with NFSS spec.:
|
||||
(fontspec) - 'bold italic' (b/it) with NFSS spec.: <->"Kaiti SC
|
||||
(fontspec) Bold/OT:language=dflt;"
|
||||
(fontspec) - 'bold italic small caps' (b/scit) with NFSS spec.:
|
||||
|
||||
))) (/usr/local/texlive/2021/texmf-dist/tex/latex/ctex/config/ctex.cfg
|
||||
File: ctex.cfg 2021/03/14 v2.5.6 Configuration file (CTEX)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithm.sty
|
||||
Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD.
|
||||
Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating environment
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/float/float.sty
|
||||
Package: float 2001/11/08 v1.3d Float enhancements (AL)
|
||||
\c@float@type=\count283
|
||||
\float@exts=\toks15
|
||||
\float@box=\box50
|
||||
\@float@everytoks=\toks16
|
||||
\@floatcapt=\box51
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/base/ifthen.sty
|
||||
Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
|
||||
)
|
||||
\@float@every@algorithm=\toks17
|
||||
\c@algorithm=\count284
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/algorithms/algorithmic.sty
|
||||
Invalid UTF-8 byte or sequence at line 11 replaced by U+FFFD.
|
||||
Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/graphics/keyval.sty
|
||||
Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
|
||||
\KV@toks@=\toks18
|
||||
)
|
||||
\c@ALC@unique=\count285
|
||||
\c@ALC@line=\count286
|
||||
\c@ALC@rem=\count287
|
||||
\c@ALC@depth=\count288
|
||||
\ALC@tlm=\skip58
|
||||
\algorithmicindent=\skip59
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amssymb.sty
|
||||
Package: amssymb 2013/01/14 v3.01 AMS font symbols
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/amsfonts.sty
|
||||
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
|
||||
\@emptytoks=\toks19
|
||||
\symAMSa=\mathgroup4
|
||||
\symAMSb=\mathgroup5
|
||||
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
|
||||
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
|
||||
)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsmath.sty
|
||||
Package: amsmath 2020/09/23 v2.17i AMS math features
|
||||
\@mathmargin=\skip60
|
||||
|
||||
For additional information on amsmath, use the `?' option.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amstext.sty
|
||||
Package: amstext 2000/06/29 v2.01 AMS text
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsgen.sty
|
||||
File: amsgen.sty 1999/11/30 v2.0 generic functions
|
||||
\@emptytoks=\toks20
|
||||
\ex@=\dimen174
|
||||
)) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsbsy.sty
|
||||
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
|
||||
\pmbraise@=\dimen175
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/amsmath/amsopn.sty
|
||||
Package: amsopn 2016/03/08 v2.02 operator names
|
||||
)
|
||||
\inf@bad=\count289
|
||||
LaTeX Info: Redefining \frac on input line 234.
|
||||
\uproot@=\count290
|
||||
\leftroot@=\count291
|
||||
LaTeX Info: Redefining \overline on input line 399.
|
||||
\classnum@=\count292
|
||||
\DOTSCASE@=\count293
|
||||
LaTeX Info: Redefining \ldots on input line 496.
|
||||
LaTeX Info: Redefining \dots on input line 499.
|
||||
LaTeX Info: Redefining \cdots on input line 620.
|
||||
\Mathstrutbox@=\box52
|
||||
\strutbox@=\box53
|
||||
\big@size=\dimen176
|
||||
LaTeX Font Info: Redeclaring font encoding OML on input line 743.
|
||||
LaTeX Font Info: Redeclaring font encoding OMS on input line 744.
|
||||
\macc@depth=\count294
|
||||
\c@MaxMatrixCols=\count295
|
||||
\dotsspace@=\muskip17
|
||||
\c@parentequation=\count296
|
||||
\dspbrk@lvl=\count297
|
||||
\tag@help=\toks21
|
||||
\row@=\count298
|
||||
\column@=\count299
|
||||
\maxfields@=\count300
|
||||
\andhelp@=\toks22
|
||||
\eqnshift@=\dimen177
|
||||
\alignsep@=\dimen178
|
||||
\tagshift@=\dimen179
|
||||
\tagwidth@=\dimen180
|
||||
\totwidth@=\dimen181
|
||||
\lineht@=\dimen182
|
||||
\@envbody=\toks23
|
||||
\multlinegap=\skip61
|
||||
\multlinetaggap=\skip62
|
||||
\mathdisplay@stack=\toks24
|
||||
LaTeX Info: Redefining \[ on input line 2923.
|
||||
LaTeX Info: Redefining \] on input line 2924.
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref.sty
|
||||
Package: hyperref 2021-02-27 v7.00k Hypertext links for LaTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
|
||||
Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/iftex/iftex.sty
|
||||
Package: iftex 2020/03/06 v1.0d TeX engine tests
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
|
||||
Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/infwarerr/infwarerr.sty
|
||||
Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
|
||||
)
|
||||
Package pdftexcmds Info: \pdf@primitive is available.
|
||||
Package pdftexcmds Info: \pdf@ifprimitive is available.
|
||||
Package pdftexcmds Info: \pdfdraftmode not found.
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty
|
||||
Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
|
||||
Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdfescape/pdfescape.sty
|
||||
Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hycolor/hycolor.sty
|
||||
Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
|
||||
Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/auxhook/auxhook.sty
|
||||
Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/kvoptions/kvoptions.sty
|
||||
Package: kvoptions 2020-10-07 v3.14 Key value format for package options (HO)
|
||||
)
|
||||
\@linkdim=\dimen183
|
||||
\Hy@linkcounter=\count301
|
||||
\Hy@pagecounter=\count302
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/pd1enc.def
|
||||
File: pd1enc.def 2021-02-27 v7.00k Hyperref: PDFDocEncoding definition (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref-langpatches.def
|
||||
File: hyperref-langpatches.def 2021-02-27 v7.00k Hyperref: patches for babel languages
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/intcalc/intcalc.sty
|
||||
Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/etexcmds/etexcmds.sty
|
||||
Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
|
||||
)
|
||||
\Hy@SavedSpaceFactor=\count303
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/puenc.def
|
||||
File: puenc.def 2021-02-27 v7.00k Hyperref: PDF Unicode definition (HO)
|
||||
)
|
||||
Package hyperref Info: Option `unicode' set `true' on input line 4073.
|
||||
Package hyperref Info: Hyper figures OFF on input line 4192.
|
||||
Package hyperref Info: Link nesting OFF on input line 4197.
|
||||
Package hyperref Info: Hyper index ON on input line 4200.
|
||||
Package hyperref Info: Plain pages OFF on input line 4207.
|
||||
Package hyperref Info: Backreferencing OFF on input line 4212.
|
||||
Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
|
||||
Package hyperref Info: Bookmarks ON on input line 4445.
|
||||
\c@Hy@tempcnt=\count304
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/url/url.sty
|
||||
\Urlmuskip=\muskip18
|
||||
Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
|
||||
)
|
||||
LaTeX Info: Redefining \url on input line 4804.
|
||||
\XeTeXLinkMargin=\dimen184
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/bitset/bitset.sty
|
||||
Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
|
||||
Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
|
||||
))
|
||||
\Fld@menulength=\count305
|
||||
\Field@Width=\dimen185
|
||||
\Fld@charsize=\dimen186
|
||||
Package hyperref Info: Hyper figures OFF on input line 6075.
|
||||
Package hyperref Info: Link nesting OFF on input line 6080.
|
||||
Package hyperref Info: Hyper index ON on input line 6083.
|
||||
Package hyperref Info: backreferencing OFF on input line 6090.
|
||||
Package hyperref Info: Link coloring OFF on input line 6095.
|
||||
Package hyperref Info: Link coloring with OCG OFF on input line 6100.
|
||||
Package hyperref Info: PDF/A mode OFF on input line 6105.
|
||||
LaTeX Info: Redefining \ref on input line 6145.
|
||||
LaTeX Info: Redefining \pageref on input line 6149.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/atbegshi-ltx.sty
|
||||
Package: atbegshi-ltx 2020/08/17 v1.0a Emulation of the original atbegshi package
|
||||
with kernel methods
|
||||
)
|
||||
\Hy@abspage=\count306
|
||||
\c@Item=\count307
|
||||
\c@Hfootnote=\count308
|
||||
)
|
||||
Package hyperref Info: Driver (autodetected): hxetex.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hxetex.def
|
||||
File: hxetex.def 2021-02-27 v7.00k Hyperref driver for XeTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/stringenc/stringenc.sty
|
||||
Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)
|
||||
)
|
||||
\pdfm@box=\box54
|
||||
\c@Hy@AnnotLevel=\count309
|
||||
\HyField@AnnotCount=\count310
|
||||
\Fld@listcount=\count311
|
||||
\c@bookmark@seq@number=\count312
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
|
||||
Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/atveryend-ltx.sty
|
||||
Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atvery package
|
||||
with kernel methods
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
|
||||
Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
|
||||
)
|
||||
Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 286.
|
||||
)
|
||||
\Hy@SectionHShift=\skip63
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty
|
||||
Package: setspace 2011/12/19 v6.7a set line spacing
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/titlesec/titlesec.sty
|
||||
Package: titlesec 2019/10/16 v2.13 Sectioning titles
|
||||
\ttl@box=\box55
|
||||
\beforetitleunit=\skip64
|
||||
\aftertitleunit=\skip65
|
||||
\ttl@plus=\dimen187
|
||||
\ttl@minus=\dimen188
|
||||
\ttl@toksa=\toks25
|
||||
\titlewidth=\dimen189
|
||||
\titlewidthlast=\dimen190
|
||||
\titlewidthfirst=\dimen191
|
||||
) (./pseudocodes.aux)
|
||||
\openout1 = `pseudocodes.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 14.
|
||||
LaTeX Font Info: ... okay on input line 14.
|
||||
ABD: EverySelectfont initializing macros
|
||||
LaTeX Info: Redefining \selectfont on input line 14.
|
||||
|
||||
Package fontspec Info: Adjusting the maths setup (use [no-math] to avoid
|
||||
(fontspec) this).
|
||||
|
||||
\symlegacymaths=\mathgroup6
|
||||
LaTeX Font Info: Overwriting symbol font `legacymaths' in version `bold'
|
||||
(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \acute on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \grave on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \ddot on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \tilde on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \bar on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \breve on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \check on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \hat on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \dot on input line 14.
|
||||
LaTeX Font Info: Redeclaring math accent \mathring on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Gamma on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Delta on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Theta on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Lambda on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Xi on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Pi on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Sigma on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Phi on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Psi on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \Omega on input line 14.
|
||||
LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 14.
|
||||
LaTeX Font Info: Redeclaring symbol font `operators' on input line 14.
|
||||
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
|
||||
(Font) `operators' in the math version `normal' on input line 14.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
|
||||
(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 14.
|
||||
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
|
||||
(Font) `operators' in the math version `bold' on input line 14.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 14.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
|
||||
(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal'
|
||||
(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal'
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal'
|
||||
(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal'
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/m/n on input line 14.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
|
||||
(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold'
|
||||
(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold'
|
||||
(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 14.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold'
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/b/n on input line 14.
|
||||
Package hyperref Info: Link coloring OFF on input line 14.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/nameref.sty
|
||||
Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/refcount/refcount.sty
|
||||
Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
|
||||
Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
|
||||
)
|
||||
\c@section@level=\count313
|
||||
)
|
||||
LaTeX Info: Redefining \ref on input line 14.
|
||||
LaTeX Info: Redefining \pageref on input line 14.
|
||||
LaTeX Info: Redefining \nameref on input line 14.
|
||||
(./pseudocodes.out) (./pseudocodes.out)
|
||||
\@outlinefile=\write3
|
||||
\openout3 = `pseudocodes.out'.
|
||||
|
||||
(./pseudocodes.toc)
|
||||
\tf@toc=\write4
|
||||
\openout4 = `pseudocodes.toc'.
|
||||
|
||||
LaTeX Font Info: Font shape `TU/SongtiSCLight(0)/m/sl' in size <10.95> not available
|
||||
(Font) Font shape `TU/SongtiSCLight(0)/m/it' tried instead on input line 17.
|
||||
[1
|
||||
|
||||
]
|
||||
Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 22.
|
||||
[2
|
||||
|
||||
]
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 32.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsa.fd
|
||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 32.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsb.fd
|
||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||
) [3
|
||||
|
||||
] [4
|
||||
|
||||
] [5
|
||||
|
||||
] [6
|
||||
|
||||
] [7
|
||||
|
||||
] [8
|
||||
|
||||
]
|
||||
Overfull \hbox (32.54117pt too wide) in paragraph at lines 212--212
|
||||
[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^R\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 Q[] [] []$|
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (15.41673pt too wide) in paragraph at lines 213--213
|
||||
[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^^\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 ^^K [] [] \OT1/cmr/m/n/9 + [] \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 f[] []$\TU/lmr/m/n/9 ,$[][] \OT1/cmr/m/n/9 =
|
||||
[]
|
||||
|
||||
[9
|
||||
|
||||
] (./pseudocodes.aux)
|
||||
Package rerunfilecheck Info: File `pseudocodes.out' has not changed.
|
||||
(rerunfilecheck) Checksum: 35B5A79A86EF3BC70F1A0B3BCBEBAA13;724.
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
14827 strings out of 476919
|
||||
313456 string characters out of 5821840
|
||||
653576 words of memory out of 5000000
|
||||
34576 multiletter control sequences out of 15000+600000
|
||||
413609 words of font info for 91 fonts, out of 8000000 for 9000
|
||||
1348 hyphenation exceptions out of 8191
|
||||
101i,13n,104p,676b,697s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||
|
||||
Output written on pseudocodes.pdf (9 pages).
|
||||
@@ -1,8 +0,0 @@
|
||||
\BOOKMARK [1][-]{section.1}{\376\377\152\041\162\110\131\007\165\050}{}% 1
|
||||
\BOOKMARK [1][-]{section.2}{\376\377\000Q\000\040\000l\000e\000a\000r\000n\000i\000n\000g\173\227\154\325}{}% 2
|
||||
\BOOKMARK [1][-]{section.3}{\376\377\000S\000a\000r\000s\000a\173\227\154\325}{}% 3
|
||||
\BOOKMARK [1][-]{section.4}{\376\377\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\173\227\154\325}{}% 4
|
||||
\BOOKMARK [1][-]{section.5}{\376\377\000D\000Q\000N\173\227\154\325}{}% 5
|
||||
\BOOKMARK [1][-]{section.6}{\376\377\000S\000o\000f\000t\000Q\173\227\154\325}{}% 6
|
||||
\BOOKMARK [1][-]{section.7}{\376\377\000S\000A\000C\000-\000S\173\227\154\325}{}% 7
|
||||
\BOOKMARK [1][-]{section.8}{\376\377\000S\000A\000C\173\227\154\325}{}% 8
|
||||
Binary file not shown.
Binary file not shown.
@@ -11,6 +11,27 @@
|
||||
\usepackage{float} % 调用该包能够使用[H]
|
||||
% \pagestyle{plain} % 去除页眉,但是保留页脚编号,都去掉plain换empty
|
||||
|
||||
% 更改脚注为圆圈
|
||||
\usepackage{pifont}
|
||||
\makeatletter
|
||||
\newcommand*{\circnum}[1]{%
|
||||
\expandafter\@circnum\csname c@#1\endcsname
|
||||
}
|
||||
\newcommand*{\@circnum}[1]{%
|
||||
\ifnum#1<1 %
|
||||
\@ctrerr
|
||||
\else
|
||||
\ifnum#1>20 %
|
||||
\@ctrerr
|
||||
\else
|
||||
\ding{\the\numexpr 171+(#1)\relax}%
|
||||
\fi
|
||||
\fi
|
||||
}
|
||||
\makeatother
|
||||
|
||||
\renewcommand*{\thefootnote}{\circnum{footnote}}
|
||||
|
||||
\begin{document}
|
||||
\tableofcontents % 目录,注意要运行两下或者vscode保存两下才能显示
|
||||
% \singlespacing
|
||||
@@ -69,27 +90,10 @@
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
\section{Policy Gradient算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$
|
||||
\STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
|
||||
\section{DQN算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{DQN算法}{\hypersetup{linkcolor=white}\footnotemark}}
|
||||
\floatname{algorithm}{{DQN算法}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\renewcommand{\algorithmicrequire}{\textbf{输入:}}
|
||||
@@ -109,10 +113,10 @@
|
||||
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE 从$D$中采样一个batch的transition
|
||||
\STATE 计算实际的$Q$值,即$y_{j}${\hypersetup{linkcolor=white}\footnotemark}
|
||||
\STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降{\hypersetup{linkcolor=white}\footnotemark}
|
||||
\STATE 计算实际的$Q$值,即$y_{j}$\footnotemark[2]
|
||||
\STATE 对损失 $L(\theta)=\left(y_{i}-Q\left(s_{i}, a_{i} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降\footnotemark[3]
|
||||
\ENDFOR
|
||||
\STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q${\hypersetup{linkcolor=white}\footnotemark}
|
||||
\STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q$\footnotemark[4]]
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
@@ -121,7 +125,46 @@
|
||||
\footnotetext[3]{$\theta_i \leftarrow \theta_i - \lambda \nabla_{\theta_{i}} L_{i}\left(\theta_{i}\right)$}
|
||||
\footnotetext[4]{此处也可像原论文中放到小循环中改成每$C$步,但没有每$C$个回合稳定}
|
||||
\clearpage
|
||||
\section{Policy Gradient算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$
|
||||
\STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
\section{Advantage Actor Critic算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{Q Actor Critic算法}}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化Actor参数$\theta$和Critic参数$w$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 根据策略$\pi_{\theta}(a|s)$采样一个(或几个)回合的transition
|
||||
\STATE {\bfseries 更新Critic参数\footnotemark[1]}
|
||||
\FOR {时步 = $t+1,1$}
|
||||
\STATE 计算Advantage,即$ \delta_t = r_t + \gamma Q_w(s_{t+1},a_{t+1})-Q_w(s_t,a_t)$
|
||||
\STATE $w \leftarrow w+\alpha_{w} \delta_{t} \nabla_{w} Q_w(s_t,a_t)$
|
||||
\STATE $a_t \leftarrow a_{t+1}$,$s_t \leftarrow s_{t+1}$
|
||||
\ENDFOR
|
||||
\STATE 更新Actor参数$\theta \leftarrow \theta+\alpha_{\theta} Q_{w}(s, a) \nabla_{\theta} \log \pi_{\theta}(a \mid s)$
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{这里结合TD error的特性按照从$t+1$到$1$计算法Advantage更方便}
|
||||
|
||||
\clearpage
|
||||
\section{SoftQ算法}
|
||||
\begin{algorithm}[H]
|
||||
\floatname{algorithm}{{SoftQ算法}}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
\contentsline {section}{\numberline {1}模版备用}{2}{section.1}%
|
||||
\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}%
|
||||
\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}%
|
||||
\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}%
|
||||
\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}%
|
||||
\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}%
|
||||
\contentsline {section}{\numberline {7}SAC-S算法}{8}{section.7}%
|
||||
\contentsline {section}{\numberline {8}SAC算法}{9}{section.8}%
|
||||
Reference in New Issue
Block a user