hot update
This commit is contained in:
@@ -1,4 +1,33 @@
|
||||
\relax
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{1}{}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{}\protected@file@percent }
|
||||
\gdef \@abspage@last{2}
|
||||
\providecommand\hyper@newdestlabel[2]{}
|
||||
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
|
||||
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
|
||||
\global\let\oldcontentsline\contentsline
|
||||
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
|
||||
\global\let\oldnewlabel\newlabel
|
||||
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
|
||||
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
|
||||
\AtEndDocument{\ifx\hyper@anchor\@undefined
|
||||
\let\contentsline\oldcontentsline
|
||||
\let\newlabel\oldnewlabel
|
||||
\fi}
|
||||
\fi}
|
||||
\global\let\hyper@last\relax
|
||||
\gdef\HyperFirstAtBeginDocument#1{#1}
|
||||
\providecommand*\HyPL@Entry[1]{}
|
||||
\HyPL@Entry{0<</S/D>>}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {1}模版备用}{2}{section.1}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{2}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{3}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{4}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{5}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{6}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{7}{algorithm.}\protected@file@percent }
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {7}SAC算法}{8}{section.7}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algorithm}{\numberline {}{\ignorespaces }}{8}{algorithm.}\protected@file@percent }
|
||||
\gdef \@abspage@last{8}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22) 15 AUG 2022 15:05
|
||||
This is XeTeX, Version 3.141592653-2.6-0.999993 (TeX Live 2021) (preloaded format=xelatex 2021.8.22) 22 AUG 2022 16:54
|
||||
entering extended mode
|
||||
restricted \write18 enabled.
|
||||
file:line:error style messages enabled.
|
||||
@@ -292,107 +292,282 @@ LaTeX Font Info: Redeclaring font encoding OMS on input line 744.
|
||||
\mathdisplay@stack=\toks24
|
||||
LaTeX Info: Redefining \[ on input line 2923.
|
||||
LaTeX Info: Redefining \] on input line 2924.
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref.sty
|
||||
Package: hyperref 2021-02-27 v7.00k Hypertext links for LaTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
|
||||
Package: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/iftex/iftex.sty
|
||||
Package: iftex 2020/03/06 v1.0d TeX engine tests
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty
|
||||
Package: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/infwarerr/infwarerr.sty
|
||||
Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
|
||||
)
|
||||
Package pdftexcmds Info: \pdf@primitive is available.
|
||||
Package pdftexcmds Info: \pdf@ifprimitive is available.
|
||||
Package pdftexcmds Info: \pdfdraftmode not found.
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty
|
||||
Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
|
||||
Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/pdfescape/pdfescape.sty
|
||||
Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hycolor/hycolor.sty
|
||||
Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
|
||||
Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/auxhook/auxhook.sty
|
||||
Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/kvoptions/kvoptions.sty
|
||||
Package: kvoptions 2020-10-07 v3.14 Key value format for package options (HO)
|
||||
)
|
||||
\@linkdim=\dimen183
|
||||
\Hy@linkcounter=\count301
|
||||
\Hy@pagecounter=\count302
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/pd1enc.def
|
||||
File: pd1enc.def 2021-02-27 v7.00k Hyperref: PDFDocEncoding definition (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hyperref-langpatches.def
|
||||
File: hyperref-langpatches.def 2021-02-27 v7.00k Hyperref: patches for babel languages
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/intcalc/intcalc.sty
|
||||
Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/etexcmds/etexcmds.sty
|
||||
Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
|
||||
)
|
||||
\Hy@SavedSpaceFactor=\count303
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/puenc.def
|
||||
File: puenc.def 2021-02-27 v7.00k Hyperref: PDF Unicode definition (HO)
|
||||
)
|
||||
Package hyperref Info: Option `unicode' set `true' on input line 4073.
|
||||
Package hyperref Info: Hyper figures OFF on input line 4192.
|
||||
Package hyperref Info: Link nesting OFF on input line 4197.
|
||||
Package hyperref Info: Hyper index ON on input line 4200.
|
||||
Package hyperref Info: Plain pages OFF on input line 4207.
|
||||
Package hyperref Info: Backreferencing OFF on input line 4212.
|
||||
Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
|
||||
Package hyperref Info: Bookmarks ON on input line 4445.
|
||||
\c@Hy@tempcnt=\count304
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/url/url.sty
|
||||
\Urlmuskip=\muskip18
|
||||
Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
|
||||
)
|
||||
LaTeX Info: Redefining \url on input line 4804.
|
||||
\XeTeXLinkMargin=\dimen184
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/bitset/bitset.sty
|
||||
Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
|
||||
Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)
|
||||
))
|
||||
\Fld@menulength=\count305
|
||||
\Field@Width=\dimen185
|
||||
\Fld@charsize=\dimen186
|
||||
Package hyperref Info: Hyper figures OFF on input line 6075.
|
||||
Package hyperref Info: Link nesting OFF on input line 6080.
|
||||
Package hyperref Info: Hyper index ON on input line 6083.
|
||||
Package hyperref Info: backreferencing OFF on input line 6090.
|
||||
Package hyperref Info: Link coloring OFF on input line 6095.
|
||||
Package hyperref Info: Link coloring with OCG OFF on input line 6100.
|
||||
Package hyperref Info: PDF/A mode OFF on input line 6105.
|
||||
LaTeX Info: Redefining \ref on input line 6145.
|
||||
LaTeX Info: Redefining \pageref on input line 6149.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/atbegshi-ltx.sty
|
||||
Package: atbegshi-ltx 2020/08/17 v1.0a Emulation of the original atbegshi package
|
||||
with kernel methods
|
||||
)
|
||||
\Hy@abspage=\count306
|
||||
\c@Item=\count307
|
||||
\c@Hfootnote=\count308
|
||||
)
|
||||
Package hyperref Info: Driver (autodetected): hxetex.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/hxetex.def
|
||||
File: hxetex.def 2021-02-27 v7.00k Hyperref driver for XeTeX
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/generic/stringenc/stringenc.sty
|
||||
Package: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)
|
||||
)
|
||||
\pdfm@box=\box54
|
||||
\c@Hy@AnnotLevel=\count309
|
||||
\HyField@AnnotCount=\count310
|
||||
\Fld@listcount=\count311
|
||||
\c@bookmark@seq@number=\count312
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
|
||||
Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO)
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/base/atveryend-ltx.sty
|
||||
Package: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atvery package
|
||||
with kernel methods
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
|
||||
Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
|
||||
)
|
||||
Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 286.
|
||||
)
|
||||
\Hy@SectionHShift=\skip63
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/setspace/setspace.sty
|
||||
Package: setspace 2011/12/19 v6.7a set line spacing
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/latex/titlesec/titlesec.sty
|
||||
Package: titlesec 2019/10/16 v2.13 Sectioning titles
|
||||
\ttl@box=\box55
|
||||
\beforetitleunit=\skip64
|
||||
\aftertitleunit=\skip65
|
||||
\ttl@plus=\dimen187
|
||||
\ttl@minus=\dimen188
|
||||
\ttl@toksa=\toks25
|
||||
\titlewidth=\dimen189
|
||||
\titlewidthlast=\dimen190
|
||||
\titlewidthfirst=\dimen191
|
||||
) (./pseudocodes.aux)
|
||||
\openout1 = `pseudocodes.aux'.
|
||||
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 9.
|
||||
LaTeX Font Info: ... okay on input line 9.
|
||||
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
LaTeX Font Info: Checking defaults for PU/pdf/m/n on input line 13.
|
||||
LaTeX Font Info: ... okay on input line 13.
|
||||
ABD: EverySelectfont initializing macros
|
||||
LaTeX Info: Redefining \selectfont on input line 9.
|
||||
LaTeX Info: Redefining \selectfont on input line 13.
|
||||
|
||||
Package fontspec Info: Adjusting the maths setup (use [no-math] to avoid
|
||||
(fontspec) this).
|
||||
|
||||
\symlegacymaths=\mathgroup6
|
||||
LaTeX Font Info: Overwriting symbol font `legacymaths' in version `bold'
|
||||
(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \acute on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \grave on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \ddot on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \tilde on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \bar on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \breve on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \check on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \hat on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \dot on input line 9.
|
||||
LaTeX Font Info: Redeclaring math accent \mathring on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Gamma on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Delta on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Theta on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Lambda on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Xi on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Pi on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Sigma on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Phi on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Psi on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \Omega on input line 9.
|
||||
LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 9.
|
||||
LaTeX Font Info: Redeclaring symbol font `operators' on input line 9.
|
||||
(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \acute on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \grave on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \ddot on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \tilde on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \bar on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \breve on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \check on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \hat on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \dot on input line 13.
|
||||
LaTeX Font Info: Redeclaring math accent \mathring on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Gamma on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Delta on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Theta on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Lambda on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Xi on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Pi on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Sigma on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Phi on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Psi on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \Omega on input line 13.
|
||||
LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 13.
|
||||
LaTeX Font Info: Redeclaring symbol font `operators' on input line 13.
|
||||
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
|
||||
(Font) `operators' in the math version `normal' on input line 9.
|
||||
(Font) `operators' in the math version `normal' on input line 13.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
|
||||
(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 9.
|
||||
(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 13.
|
||||
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
|
||||
(Font) `operators' in the math version `bold' on input line 9.
|
||||
(Font) `operators' in the math version `bold' on input line 13.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 9.
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 13.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
|
||||
(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 9.
|
||||
(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal'
|
||||
(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 9.
|
||||
(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal'
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 9.
|
||||
(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal'
|
||||
(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 9.
|
||||
(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal'
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/m/n on input line 9.
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/m/n on input line 13.
|
||||
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
|
||||
(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 9.
|
||||
(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold'
|
||||
(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 9.
|
||||
(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold'
|
||||
(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 9.
|
||||
(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 13.
|
||||
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold'
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/b/n on input line 9.
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 20.
|
||||
(Font) OT1/cmtt/m/n --> TU/lmtt/b/n on input line 13.
|
||||
Package hyperref Info: Link coloring OFF on input line 13.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/hyperref/nameref.sty
|
||||
Package: nameref 2021-04-02 v2.47 Cross-referencing by name of section
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/refcount/refcount.sty
|
||||
Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
|
||||
) (/usr/local/texlive/2021/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
|
||||
Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
|
||||
)
|
||||
\c@section@level=\count313
|
||||
)
|
||||
LaTeX Info: Redefining \ref on input line 13.
|
||||
LaTeX Info: Redefining \pageref on input line 13.
|
||||
LaTeX Info: Redefining \nameref on input line 13.
|
||||
(./pseudocodes.out) (./pseudocodes.out)
|
||||
\@outlinefile=\write3
|
||||
\openout3 = `pseudocodes.out'.
|
||||
|
||||
(./pseudocodes.toc)
|
||||
\tf@toc=\write4
|
||||
\openout4 = `pseudocodes.toc'.
|
||||
|
||||
LaTeX Font Info: Font shape `TU/SongtiSCLight(0)/m/sl' in size <10.95> not available
|
||||
(Font) Font shape `TU/SongtiSCLight(0)/m/it' tried instead on input line 16.
|
||||
[1
|
||||
|
||||
]
|
||||
Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on input line 21.
|
||||
[2
|
||||
|
||||
]
|
||||
LaTeX Font Info: Trying to load font information for U+msa on input line 31.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsa.fd
|
||||
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
|
||||
)
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 20.
|
||||
LaTeX Font Info: Trying to load font information for U+msb on input line 31.
|
||||
(/usr/local/texlive/2021/texmf-dist/tex/latex/amsfonts/umsb.fd
|
||||
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
|
||||
)
|
||||
Overfull \hbox (38.0069pt too wide) in paragraph at lines 32--33
|
||||
[] []\TU/SongtiSCLight(0)/m/n/10.95 计 算 实 际 的 $\OML/cmm/m/it/10.95 Q$ \TU/SongtiSCLight(0)/m/n/10.95 值,| 即 $\OML/cmm/m/it/10.95 y[] \OT1/cmr/m/n/10.95 = []$
|
||||
) [3
|
||||
|
||||
] [4
|
||||
|
||||
] [5
|
||||
|
||||
]
|
||||
Underfull \hbox (badness 10000) in paragraph at lines 111--112
|
||||
[] []\TU/SongtiSCLight(0)/m/n/10.95 计 算 实 际 的 $\OML/cmm/m/it/10.95 Q$ \TU/SongtiSCLight(0)/m/n/10.95 值,| 即 $\OML/cmm/m/it/10.95 y[] \OT1/cmr/m/n/10.95 =
|
||||
[]
|
||||
|
||||
[1
|
||||
[6
|
||||
|
||||
] [2
|
||||
] [7
|
||||
|
||||
] (./pseudocodes.aux) )
|
||||
]
|
||||
Overfull \hbox (32.54117pt too wide) in paragraph at lines 183--183
|
||||
[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^R\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 Q[] [] []$|
|
||||
[]
|
||||
|
||||
|
||||
Overfull \hbox (15.41673pt too wide) in paragraph at lines 184--184
|
||||
[][]$[]\OML/cmm/m/it/9 J[]\OT1/cmr/m/n/9 (\OML/cmm/m/it/9 ^^^\OT1/cmr/m/n/9 ) = \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 ^^K [] [] \OT1/cmr/m/n/9 + [] \OMS/cmsy/m/n/9 r[]\OML/cmm/m/it/9 f[] []$\TU/lmr/m/n/9 ,$[][] \OT1/cmr/m/n/9 =
|
||||
[]
|
||||
|
||||
[8
|
||||
|
||||
] (./pseudocodes.aux)
|
||||
Package rerunfilecheck Info: File `pseudocodes.out' has not changed.
|
||||
(rerunfilecheck) Checksum: 4575BA7458AA23D6E696EFFE39D05727;640.
|
||||
)
|
||||
Here is how much of TeX's memory you used:
|
||||
7847 strings out of 476919
|
||||
208964 string characters out of 5821840
|
||||
529246 words of memory out of 5000000
|
||||
27739 multiletter control sequences out of 15000+600000
|
||||
410995 words of font info for 73 fonts, out of 8000000 for 9000
|
||||
14813 strings out of 476919
|
||||
312635 string characters out of 5821840
|
||||
653471 words of memory out of 5000000
|
||||
34563 multiletter control sequences out of 15000+600000
|
||||
413601 words of font info for 90 fonts, out of 8000000 for 9000
|
||||
1348 hyphenation exceptions out of 8191
|
||||
101i,11n,104p,414b,663s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||
101i,13n,104p,676b,736s stack positions out of 5000i,500n,10000p,200000b,80000s
|
||||
|
||||
Output written on pseudocodes.pdf (2 pages).
|
||||
Output written on pseudocodes.pdf (8 pages).
|
||||
|
||||
7
projects/assets/pseudocodes/pseudocodes.out
Normal file
7
projects/assets/pseudocodes/pseudocodes.out
Normal file
@@ -0,0 +1,7 @@
|
||||
\BOOKMARK [1][-]{section.1}{\376\377\152\041\162\110\131\007\165\050}{}% 1
|
||||
\BOOKMARK [1][-]{section.2}{\376\377\000Q\000\040\000l\000e\000a\000r\000n\000i\000n\000g\173\227\154\325}{}% 2
|
||||
\BOOKMARK [1][-]{section.3}{\376\377\000S\000a\000r\000s\000a\173\227\154\325}{}% 3
|
||||
\BOOKMARK [1][-]{section.4}{\376\377\000P\000o\000l\000i\000c\000y\000\040\000G\000r\000a\000d\000i\000e\000n\000t\173\227\154\325}{}% 4
|
||||
\BOOKMARK [1][-]{section.5}{\376\377\000D\000Q\000N\173\227\154\325}{}% 5
|
||||
\BOOKMARK [1][-]{section.6}{\376\377\000S\000o\000f\000t\000Q\173\227\154\325}{}% 6
|
||||
\BOOKMARK [1][-]{section.7}{\376\377\000S\000A\000C\173\227\154\325}{}% 7
|
||||
Binary file not shown.
Binary file not shown.
@@ -4,17 +4,96 @@
|
||||
\usepackage{algorithmic}
|
||||
\usepackage{amssymb}
|
||||
\usepackage{amsmath}
|
||||
|
||||
|
||||
\usepackage{hyperref}
|
||||
% \usepackage[hidelinks]{hyperref} 去除超链接的红色框
|
||||
\usepackage{setspace}
|
||||
\usepackage{titlesec}
|
||||
\usepackage{float} % 调用该包能够使用[H]
|
||||
% \pagestyle{plain} % 去除页眉,但是保留页脚编号,都去掉plain换empty
|
||||
\begin{document}
|
||||
|
||||
\begin{algorithm}
|
||||
\tableofcontents % 目录,注意要运行两下或者vscode保存两下才能显示
|
||||
% \singlespacing
|
||||
\clearpage
|
||||
\section{模版备用}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{算法}}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 测试
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\clearpage
|
||||
\section{Q learning算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{Q-learning算法}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化Q表$Q(s,a)$为任意值,但其中$Q(s_{terminal},)=0$,即终止状态对应的Q值为0
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 重置环境,获得初始状态$s_1$
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 根据$\varepsilon-greedy$策略采样动作$a_t$
|
||||
\STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE $Q(s_t,a_t) \leftarrow Q(s_t,a_t)+\alpha[r_t+\gamma\max _{a}Q(s_{t+1},a)-Q(s_t,a_t)]$
|
||||
\STATE 更新状态$s_{t+1} \leftarrow s_t$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
\section{Sarsa算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{Sarsa算法}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化Q表$Q(s,a)$为任意值,但其中$Q(s_{terminal},)=0$,即终止状态对应的Q值为0
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 重置环境,获得初始状态$s_1$
|
||||
\STATE 根据$\varepsilon-greedy$策略采样初始动作$a_1$
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$
|
||||
\STATE 根据$\varepsilon-greedy$策略$s_{t+1}$和采样动作$a_{t+1}$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE $Q(s_t,a_t) \leftarrow Q(s_t,a_t)+\alpha[r_t+\gamma Q(s_{t+1},a_{t+1})-Q(s_t,a_t)]$
|
||||
\STATE 更新状态$s_{t+1} \leftarrow s_t$
|
||||
\STATE 更新动作$a_{t+1} \leftarrow a_t$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
\section{Policy Gradient算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{REINFORCE算法:Monte-Carlo Policy Gradient}\footnotemark[1]}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1] % [1]显示步数
|
||||
\STATE 初始化策略参数$\boldsymbol{\theta} \in \mathbb{R}^{d^{\prime}}($ e.g., to $\mathbf{0})$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 根据策略$\pi(\cdot \mid \cdot, \boldsymbol{\theta})$采样一个(或几个)回合的transition
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 计算回报$G \leftarrow \sum_{k=t+1}^{T} \gamma^{k-t-1} R_{k}$
|
||||
\STATE 更新策略$\boldsymbol{\theta} \leftarrow {\boldsymbol{\theta}+\alpha \gamma^{t}} G \nabla \ln \pi\left(A_{t} \mid S_{t}, \boldsymbol{\theta}\right)$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{Reinforcement Learning: An Introduction}
|
||||
\clearpage
|
||||
\section{DQN算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{DQN算法}}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\renewcommand{\algorithmicrequire}{\textbf{输入:}}
|
||||
\renewcommand{\algorithmicensure}{\textbf{输出:}}
|
||||
\begin{algorithmic}
|
||||
\begin{algorithmic}[1]
|
||||
% \REQUIRE $n \geq 0 \vee x \neq 0$ % 输入
|
||||
% \ENSURE $y = x^n$ % 输出
|
||||
\STATE 初始化策略网络参数$\theta$ % 初始化
|
||||
@@ -24,40 +103,85 @@
|
||||
\STATE 重置环境,获得初始状态$s_t$
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 根据$\varepsilon-greedy$策略采样动作$a_t$
|
||||
\STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$
|
||||
\STATE 环境根据$a_t$反馈奖励$r_t$和下一个状态$s_{t+1}$
|
||||
\STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中
|
||||
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE 从$D$中采样一个batch的transition
|
||||
\STATE 计算实际的$Q$值,即$y_{j}= \begin{cases}r_{j} & \text {对于终止状态} s_{j+1} \\ r_{j}+\gamma \max _{a^{\prime}} Q\left(s_{j+1}, a^{\prime} ; \theta\right) & \text {对于非终止状态} s_{j+1}\end{cases}$
|
||||
\STATE 对损失 $\left(y_{j}-Q\left(s_{j}, a_{j} ; \theta\right)\right)^{2}$关于参数$\theta$做随机梯度下降
|
||||
\STATE 每$C$步复制参数$\hat{Q} \leftarrow Q$
|
||||
\ENDFOR
|
||||
\STATE 每$C$个回合复制参数$\hat{Q}\leftarrow Q$(此处也可像原论文中放到小循环中改成每$C$步,但没有每$C$个回合稳定)
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\clearpage
|
||||
|
||||
\begin{algorithm}
|
||||
\section{SoftQ算法}
|
||||
\begin{algorithm}[H]
|
||||
\floatname{algorithm}{{SoftQ算法}}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}
|
||||
\begin{algorithmic}[1]
|
||||
\STATE 初始化参数$\theta$和$\phi$% 初始化
|
||||
\STATE 复制参数$\bar{\theta} \leftarrow \theta, \bar{\phi} \leftarrow \phi$
|
||||
\STATE 初始化经验回放$D$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 根据$a_{t} \leftarrow f^{\phi}\left(\xi ; \mathbf{s}_{t}\right)$采样动作,其中$\xi \sim \mathcal{N}(\mathbf{0}, \boldsymbol{I})$
|
||||
\STATE 根据$\mathbf{a}_{t} \leftarrow f^{\phi}\left(\xi ; \mathbf{s}_{t}\right)$采样动作,其中$\xi \sim \mathcal{N}(\mathbf{0}, \boldsymbol{I})$
|
||||
\STATE 环境根据$a_t$反馈奖励$s_t$和下一个状态$s_{t+1}$
|
||||
\STATE 存储transition即$(s_t,a_t,r_t,s_{t+1})$到经验回放$D$中
|
||||
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
||||
\STATE 待完善
|
||||
\STATE {\bfseries 更新soft Q函数参数:}
|
||||
\STATE 对于每个$s^{(i)}_{t+1}$采样$\left\{\mathbf{a}^{(i, j)}\right\}_{j=0}^{M} \sim q_{\mathbf{a}^{\prime}}$
|
||||
\STATE 计算empirical soft values $V_{\mathrm{soft}}^{\theta}\left(\mathbf{s}_{t}\right)$\footnotemark[1]
|
||||
\STATE 计算empirical gradient $J_{Q}(\theta)$\footnotemark[2]
|
||||
\STATE 根据$J_{Q}(\theta)$使用ADAM更新参数$\theta$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE 对于每个$s^{(i)}_{t}$采样$\left\{\xi^{(i, j)}\right\}_{j=0}^{M} \sim \mathcal{N}(\mathbf{0}, \boldsymbol{I})$
|
||||
\STATE 计算$\mathbf{a}_{t}^{(i, j)}=f^{\phi}\left(\xi^{(i, j)}, \mathbf{s}_{t}^{(i)}\right)$
|
||||
\STATE 使用经验估计计算$\Delta f^{\phi}\left(\cdot ; \mathbf{s}_{t}\right)$\footnotemark[3]
|
||||
\STATE 计算经验估计$\frac{\partial J_{\pi}\left(\phi ; \mathbf{s}_{t}\right)}{\partial \phi} \propto \mathbb{E}_{\xi}\left[\Delta f^{\phi}\left(\xi ; \mathbf{s}_{t}\right) \frac{\partial f^{\phi}\left(\xi ; \mathbf{s}_{t}\right)}{\partial \phi}\right]$,即$\hat{\nabla}_{\phi} J_{\pi}$
|
||||
\STATE 根据$\hat{\nabla}_{\phi} J_{\pi}$使用ADAM更新参数$\phi$
|
||||
\STATE
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
|
||||
\STATE 每$C$个回合复制参数$\bar{\theta} \leftarrow \theta, \bar{\phi} \leftarrow \phi$
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\footnotetext[1]{$V_{\mathrm{soft}}^{\theta}\left(\mathbf{s}_{t}\right)=\alpha \log \mathbb{E}_{q_{\mathbf{a}^{\prime}}}\left[\frac{\exp \left(\frac{1}{\alpha} Q_{\mathrm{soft}}^{\theta}\left(\mathbf{s}_{t}, \mathbf{a}^{\prime}\right)\right)}{q_{\mathbf{a}^{\prime}}\left(\mathbf{a}^{\prime}\right)}\right]$}
|
||||
\footnotetext[2]{$J_{Q}(\theta)=\mathbb{E}_{\mathbf{s}_{t} \sim q_{\mathbf{s}_{t}}, \mathbf{a}_{t} \sim q_{\mathbf{a}_{t}}}\left[\frac{1}{2}\left(\hat{Q}_{\mathrm{soft}}^{\bar{\theta}}\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)-Q_{\mathrm{soft}}^{\theta}\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)\right)^{2}\right]$}
|
||||
\footnotetext[3]{$\begin{aligned} \Delta f^{\phi}\left(\cdot ; \mathbf{s}_{t}\right)=& \mathbb{E}_{\mathbf{a}_{t} \sim \pi^{\phi}}\left[\left.\kappa\left(\mathbf{a}_{t}, f^{\phi}\left(\cdot ; \mathbf{s}_{t}\right)\right) \nabla_{\mathbf{a}^{\prime}} Q_{\mathrm{soft}}^{\theta}\left(\mathbf{s}_{t}, \mathbf{a}^{\prime}\right)\right|_{\mathbf{a}^{\prime}=\mathbf{a}_{t}}\right.\\ &\left.+\left.\alpha \nabla_{\mathbf{a}^{\prime}} \kappa\left(\mathbf{a}^{\prime}, f^{\phi}\left(\cdot ; \mathbf{s}_{t}\right)\right)\right|_{\mathbf{a}^{\prime}=\mathbf{a}_{t}}\right] \end{aligned}$}
|
||||
\clearpage
|
||||
\section{SAC算法}
|
||||
\begin{algorithm}[H] % [H]固定位置
|
||||
\floatname{algorithm}{{Soft Actor Critic算法}}
|
||||
\renewcommand{\thealgorithm}{} % 去掉算法标号
|
||||
\caption{}
|
||||
\begin{algorithmic}[1]
|
||||
\STATE 初始化两个Actor的网络参数$\theta_1,\theta_2$以及一个Critic网络参数$\phi$ % 初始化
|
||||
\STATE 复制参数到目标网络$\bar{\theta_1} \leftarrow \theta_1,\bar{\theta_2} \leftarrow \theta_2,$
|
||||
\STATE 初始化经验回放$D$
|
||||
\FOR {回合数 = $1,M$}
|
||||
\STATE 重置环境,获得初始状态$s_t$
|
||||
\FOR {时步 = $1,t$}
|
||||
\STATE 根据$\boldsymbol{a}_{t} \sim \pi_{\phi}\left(\boldsymbol{a}_{t} \mid \mathbf{s}_{t}\right)$采样动作$a_t$
|
||||
\STATE 环境反馈奖励和下一个状态,$\mathbf{s}_{t+1} \sim p\left(\mathbf{s}_{t+1} \mid \mathbf{s}_{t}, \mathbf{a}_{t}\right)$
|
||||
\STATE 存储transition到经验回放中,$\mathcal{D} \leftarrow \mathcal{D} \cup\left\{\left(\mathbf{s}_{t}, \mathbf{a}_{t}, r\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right), \mathbf{s}_{t+1}\right)\right\}$
|
||||
\STATE 更新环境状态$s_{t+1} \leftarrow s_t$
|
||||
\STATE {\bfseries 更新策略:}
|
||||
\STATE 更新$Q$函数,$\theta_{i} \leftarrow \theta_{i}-\lambda_{Q} \hat{\nabla}_{\theta_{i}} J_{Q}\left(\theta_{i}\right)$ for $i \in\{1,2\}$\footnotemark[1]\footnotemark[2]
|
||||
\STATE 更新策略权重,$\phi \leftarrow \phi-\lambda_{\pi} \hat{\nabla}_{\phi} J_{\pi}(\phi)$ \footnotemark[3]
|
||||
\STATE 调整temperature,$\alpha \leftarrow \alpha-\lambda \hat{\nabla}_{\alpha} J(\alpha)$ \footnotemark[4]
|
||||
\STATE 更新目标网络权重,$\bar{\theta}_{i} \leftarrow \tau \theta_{i}+(1-\tau) \bar{\theta}_{i}$ for $i \in\{1,2\}$
|
||||
\ENDFOR
|
||||
\ENDFOR
|
||||
\end{algorithmic}
|
||||
|
||||
\end{algorithm}
|
||||
\footnotetext[1]{$J_{Q}(\theta)=\mathbb{E}_{\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right) \sim \mathcal{D}}\left[\frac{1}{2}\left(Q_{\theta}\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)-\left(r\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)+\gamma \mathbb{E}_{\mathbf{s}_{t+1} \sim p}\left[V_{\bar{\theta}}\left(\mathbf{s}_{t+1}\right)\right]\right)\right)^{2}\right]$}
|
||||
\footnotetext[2]{$\hat{\nabla}_{\theta} J_{Q}(\theta)=\nabla_{\theta} Q_{\theta}\left(\mathbf{a}_{t}, \mathbf{s}_{t}\right)\left(Q_{\theta}\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)-\left(r\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)+\gamma\left(Q_{\bar{\theta}}\left(\mathbf{s}_{t+1}, \mathbf{a}_{t+1}\right)-\alpha \log \left(\pi_{\phi}\left(\mathbf{a}_{t+1} \mid \mathbf{s}_{t+1}\right)\right)\right)\right)\right.$}
|
||||
\footnotetext[3]{$\hat{\nabla}_{\phi} J_{\pi}(\phi)=\nabla_{\phi} \alpha \log \left(\pi_{\phi}\left(\mathbf{a}_{t} \mid \mathbf{s}_{t}\right)\right)+\left(\nabla_{\mathbf{a}_{t}} \alpha \log \left(\pi_{\phi}\left(\mathbf{a}_{t} \mid \mathbf{s}_{t}\right)\right)-\nabla_{\mathbf{a}_{t}} Q\left(\mathbf{s}_{t}, \mathbf{a}_{t}\right)\right) \nabla_{\phi} f_{\phi}\left(\epsilon_{t} ; \mathbf{s}_{t}\right)$,$\mathbf{a}_{t}=f_{\phi}\left(\epsilon_{t} ; \mathbf{s}_{t}\right)$}
|
||||
\footnotetext[4]{$J(\alpha)=\mathbb{E}_{\mathbf{a}_{t} \sim \pi_{t}}\left[-\alpha \log \pi_{t}\left(\mathbf{a}_{t} \mid \mathbf{s}_{t}\right)-\alpha \overline{\mathcal{H}}\right]$}
|
||||
\clearpage
|
||||
\end{document}
|
||||
7
projects/assets/pseudocodes/pseudocodes.toc
Normal file
7
projects/assets/pseudocodes/pseudocodes.toc
Normal file
@@ -0,0 +1,7 @@
|
||||
\contentsline {section}{\numberline {1}模版备用}{2}{section.1}%
|
||||
\contentsline {section}{\numberline {2}Q learning算法}{3}{section.2}%
|
||||
\contentsline {section}{\numberline {3}Sarsa算法}{4}{section.3}%
|
||||
\contentsline {section}{\numberline {4}Policy Gradient算法}{5}{section.4}%
|
||||
\contentsline {section}{\numberline {5}DQN算法}{6}{section.5}%
|
||||
\contentsline {section}{\numberline {6}SoftQ算法}{7}{section.6}%
|
||||
\contentsline {section}{\numberline {7}SAC算法}{8}{section.7}%
|
||||
Reference in New Issue
Block a user