更新算法模版
This commit is contained in:
21
projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml
Normal file
21
projects/codes/A2C/config/CartPole-v1_A2C_Test.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
general_cfg:
|
||||
algo_name: A2C
|
||||
device: cuda
|
||||
env_name: CartPole-v1
|
||||
mode: test
|
||||
load_checkpoint: true
|
||||
load_path: Train_CartPole-v1_A2C_20221031-232138
|
||||
max_steps: 200
|
||||
save_fig: true
|
||||
seed: 1
|
||||
show_fig: false
|
||||
test_eps: 20
|
||||
train_eps: 1000
|
||||
algo_cfg:
|
||||
continuous: false
|
||||
batch_size: 64
|
||||
buffer_size: 100000
|
||||
gamma: 0.99
|
||||
actor_lr: 0.0003
|
||||
critic_lr: 0.001
|
||||
target_update: 4
|
||||
19
projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml
Normal file
19
projects/codes/A2C/config/CartPole-v1_A2C_Train.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
general_cfg:
|
||||
algo_name: A2C
|
||||
device: cuda
|
||||
env_name: CartPole-v1
|
||||
mode: train
|
||||
load_checkpoint: false
|
||||
load_path: Train_CartPole-v1_DQN_20221026-054757
|
||||
max_steps: 200
|
||||
save_fig: true
|
||||
seed: 1
|
||||
show_fig: false
|
||||
test_eps: 20
|
||||
train_eps: 600
|
||||
algo_cfg:
|
||||
continuous: false
|
||||
batch_size: 64
|
||||
buffer_size: 100000
|
||||
gamma: 0.0003
|
||||
lr: 0.001
|
||||
21
projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml
Normal file
21
projects/codes/A2C/config/Pendulum-v1_A2C_Train.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
general_cfg:
|
||||
algo_name: A2C
|
||||
device: cuda
|
||||
env_name: Pendulum-v1
|
||||
mode: train
|
||||
eval_per_episode: 200
|
||||
load_checkpoint: false
|
||||
load_path: Train_CartPole-v1_DQN_20221026-054757
|
||||
max_steps: 200
|
||||
save_fig: true
|
||||
seed: 1
|
||||
show_fig: false
|
||||
test_eps: 20
|
||||
train_eps: 1000
|
||||
algo_cfg:
|
||||
continuous: true
|
||||
batch_size: 64
|
||||
buffer_size: 100000
|
||||
gamma: 0.0003
|
||||
actor_lr: 0.0003
|
||||
critic_lr: 0.001
|
||||
38
projects/codes/A2C/config/config.py
Normal file
38
projects/codes/A2C/config/config.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
# coding=utf-8
|
||||
'''
|
||||
Author: JiangJi
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2022-10-30 00:53:03
|
||||
LastEditor: JiangJi
|
||||
LastEditTime: 2022-11-01 00:17:55
|
||||
Discription: default parameters of A2C
|
||||
'''
|
||||
from common.config import GeneralConfig,AlgoConfig
|
||||
|
||||
class GeneralConfigA2C(GeneralConfig):
|
||||
def __init__(self) -> None:
|
||||
self.env_name = "CartPole-v1" # name of environment
|
||||
self.algo_name = "A2C" # name of algorithm
|
||||
self.mode = "train" # train or test
|
||||
self.seed = 1 # random seed
|
||||
self.device = "cuda" # device to use
|
||||
self.train_eps = 1000 # number of episodes for training
|
||||
self.test_eps = 20 # number of episodes for testing
|
||||
self.max_steps = 200 # max steps for each episode
|
||||
self.load_checkpoint = False
|
||||
self.load_path = "tasks" # path to load model
|
||||
self.show_fig = False # show figure or not
|
||||
self.save_fig = True # save figure or not
|
||||
|
||||
class AlgoConfigA2C(AlgoConfig):
|
||||
def __init__(self) -> None:
|
||||
self.continuous = False # continuous or discrete action space
|
||||
self.hidden_dim = 256 # hidden_dim for MLP
|
||||
self.gamma = 0.99 # discount factor
|
||||
self.actor_lr = 3e-4 # learning rate of actor
|
||||
self.critic_lr = 1e-3 # learning rate of critic
|
||||
self.actor_hidden_dim = 256 # hidden_dim for actor MLP
|
||||
self.critic_hidden_dim = 256 # hidden_dim for critic MLP
|
||||
self.buffer_size = 100000 # size of replay buffer
|
||||
self.batch_size = 64 # batch size
|
||||
Reference in New Issue
Block a user