更新算法模版

2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions
--- a/projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml
+++ b/projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml
@@ -0,0 +1,21 @@
+general_cfg:
+  algo_name: A2C
+  device: cuda
+  env_name: CartPole-v1
+  mode: test
+  load_checkpoint: true
+  load_path: Train_CartPole-v1_A2C_20221031-232138
+  max_steps: 200
+  save_fig: true
+  seed: 1
+  show_fig: false
+  test_eps: 20
+  train_eps: 1000
+algo_cfg:
+  continuous: false
+  batch_size: 64
+  buffer_size: 100000
+  gamma: 0.99
+  actor_lr: 0.0003
+  critic_lr: 0.001
+  target_update: 4
--- a/projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml
+++ b/projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml
@@ -0,0 +1,19 @@
+general_cfg:
+  algo_name: A2C
+  device: cuda
+  env_name: CartPole-v1
+  mode: train
+  load_checkpoint: false
+  load_path: Train_CartPole-v1_DQN_20221026-054757
+  max_steps: 200
+  save_fig: true
+  seed: 1
+  show_fig: false
+  test_eps: 20
+  train_eps: 600
+algo_cfg:
+  continuous: false
+  batch_size: 64
+  buffer_size: 100000
+  gamma: 0.0003
+  lr: 0.001
--- a/projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml
+++ b/projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml
@@ -0,0 +1,21 @@
+general_cfg:
+  algo_name: A2C
+  device: cuda
+  env_name: Pendulum-v1
+  mode: train
+  eval_per_episode: 200
+  load_checkpoint: false
+  load_path: Train_CartPole-v1_DQN_20221026-054757
+  max_steps: 200
+  save_fig: true
+  seed: 1
+  show_fig: false
+  test_eps: 20
+  train_eps: 1000
+algo_cfg:
+  continuous: true
+  batch_size: 64
+  buffer_size: 100000
+  gamma: 0.0003
+  actor_lr: 0.0003
+  critic_lr: 0.001
--- a/projects/codes/A2C/config/config.py
+++ b/projects/codes/A2C/config/config.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# coding=utf-8
+'''
+Author: JiangJi
+Email: johnjim0816@gmail.com
+Date: 2022-10-30 00:53:03
+LastEditor: JiangJi
+LastEditTime: 2022-11-01 00:17:55
+Discription: default parameters of A2C
+'''
+from common.config import GeneralConfig,AlgoConfig
+
+class GeneralConfigA2C(GeneralConfig):
+    def __init__(self) -> None:
+        self.env_name = "CartPole-v1" # name of environment
+        self.algo_name = "A2C" # name of algorithm
+        self.mode = "train" # train or test
+        self.seed = 1 # random seed
+        self.device = "cuda" # device to use
+        self.train_eps = 1000 # number of episodes for training
+        self.test_eps = 20 # number of episodes for testing
+        self.max_steps = 200 # max steps for each episode
+        self.load_checkpoint = False
+        self.load_path = "tasks" # path to load model
+        self.show_fig = False # show figure or not
+        self.save_fig = True # save figure or not
+        
+class AlgoConfigA2C(AlgoConfig):
+    def __init__(self) -> None:
+        self.continuous = False # continuous or discrete action space
+        self.hidden_dim = 256 # hidden_dim for MLP
+        self.gamma = 0.99 # discount factor
+        self.actor_lr = 3e-4 # learning rate of actor
+        self.critic_lr = 1e-3 # learning rate of critic
+        self.actor_hidden_dim = 256 # hidden_dim for actor MLP
+        self.critic_hidden_dim = 256 # hidden_dim for critic MLP
+        self.buffer_size = 100000 # size of replay buffer
+        self.batch_size = 64 # batch size