更新算法模版
This commit is contained in:
38
projects/codes/common/config.py
Normal file
38
projects/codes/common/config.py
Normal file
@@ -0,0 +1,38 @@
|
||||
|
||||
class DefaultConfig:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
def print_cfg(self):
|
||||
print(self.__dict__)
|
||||
class GeneralConfig(DefaultConfig):
|
||||
def __init__(self) -> None:
|
||||
self.env_name = "CartPole-v1" # name of environment
|
||||
self.algo_name = "DQN" # name of algorithm
|
||||
self.mode = "train" # train or test
|
||||
self.seed = 0 # random seed
|
||||
self.device = "cuda" # device to use
|
||||
self.train_eps = 200 # number of episodes for training
|
||||
self.test_eps = 20 # number of episodes for testing
|
||||
self.eval_eps = 10 # number of episodes for evaluation
|
||||
self.eval_per_episode = 5 # evaluation per episode
|
||||
self.max_steps = 200 # max steps for each episode
|
||||
self.load_checkpoint = False
|
||||
self.load_path = None # path to load model
|
||||
self.show_fig = False # show figure or not
|
||||
self.save_fig = True # save figure or not
|
||||
|
||||
class AlgoConfig(DefaultConfig):
|
||||
def __init__(self) -> None:
|
||||
# set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end
|
||||
# self.epsilon_start = 0.95 # epsilon start value
|
||||
# self.epsilon_end = 0.01 # epsilon end value
|
||||
# self.epsilon_decay = 500 # epsilon decay rate
|
||||
self.gamma = 0.95 # discount factor
|
||||
# self.lr = 0.0001 # learning rate
|
||||
# self.buffer_size = 100000 # size of replay buffer
|
||||
# self.batch_size = 64 # batch size
|
||||
# self.target_update = 4 # target network update frequency
|
||||
class MergedConfig:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@@ -1,32 +1,124 @@
|
||||
from common.utils import save_args,save_results,plot_rewards
|
||||
from common.utils import get_logger,save_results,save_cfgs,plot_rewards,merge_class_attrs,load_cfgs
|
||||
from common.config import GeneralConfig,AlgoConfig,MergedConfig
|
||||
import time
|
||||
from pathlib import Path
|
||||
import datetime
|
||||
import argparse
|
||||
|
||||
class Launcher:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
def get_args(self):
|
||||
cfg = {}
|
||||
return cfg
|
||||
def env_agent_config(self,cfg):
|
||||
self.get_cfg()
|
||||
def get_cfg(self):
|
||||
self.cfgs = {'general_cfg':GeneralConfig(),'algo_cfg':AlgoConfig()} # create config
|
||||
def process_yaml_cfg(self):
|
||||
''' load yaml config
|
||||
'''
|
||||
parser = argparse.ArgumentParser(description="hyperparameters")
|
||||
parser.add_argument('--yaml', default = None, type=str,help='the path of config file')
|
||||
args = parser.parse_args()
|
||||
if args.yaml is not None:
|
||||
load_cfgs(self.cfgs, args.yaml)
|
||||
def print_cfg(self,cfg):
|
||||
''' print parameters
|
||||
'''
|
||||
cfg_dict = vars(cfg)
|
||||
print("Hyperparameters:")
|
||||
print(''.join(['=']*80))
|
||||
tplt = "{:^20}\t{:^20}\t{:^20}"
|
||||
print(tplt.format("Name", "Value", "Type"))
|
||||
for k,v in cfg_dict.items():
|
||||
print(tplt.format(k,v,str(type(v))))
|
||||
print(''.join(['=']*80))
|
||||
def env_agent_config(self,cfg,logger):
|
||||
env,agent = None,None
|
||||
return env,agent
|
||||
def train(self,cfg, env, agent):
|
||||
res_dic = {}
|
||||
return res_dic
|
||||
def test(self,cfg, env, agent):
|
||||
res_dic = {}
|
||||
return res_dic
|
||||
|
||||
def train_one_episode(self,env, agent, cfg):
|
||||
ep_reward = 0
|
||||
ep_step = 0
|
||||
return agent,ep_reward,ep_step
|
||||
def test_one_episode(self,env, agent, cfg):
|
||||
ep_reward = 0
|
||||
ep_step = 0
|
||||
return agent,ep_reward,ep_step
|
||||
def evaluate(self,env, agent, cfg):
|
||||
sum_eval_reward = 0
|
||||
for _ in range(cfg.eval_eps):
|
||||
_,eval_ep_reward,_ = self.test_one_episode(env, agent, cfg)
|
||||
sum_eval_reward += eval_ep_reward
|
||||
mean_eval_reward = sum_eval_reward/cfg.eval_eps
|
||||
return mean_eval_reward
|
||||
# def train(self,cfg, env, agent,logger):
|
||||
# res_dic = {}
|
||||
# return res_dic
|
||||
# def test(self,cfg, env, agent,logger):
|
||||
# res_dic = {}
|
||||
# return res_dic
|
||||
def create_path(self,cfg):
|
||||
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time
|
||||
self.task_dir = f"{cfg.mode.capitalize()}_{cfg.env_name}_{cfg.algo_name}_{curr_time}"
|
||||
Path(self.task_dir).mkdir(parents=True, exist_ok=True)
|
||||
self.model_dir = f"{self.task_dir}/models/"
|
||||
self.res_dir = f"{self.task_dir}/results/"
|
||||
self.log_dir = f"{self.task_dir}/logs/"
|
||||
def run(self):
|
||||
cfg = self.get_args()
|
||||
env, agent = self.env_agent_config(cfg)
|
||||
res_dic = self.train(cfg, env, agent)
|
||||
save_args(cfg,path = cfg['result_path']) # save parameters
|
||||
agent.save_model(path = cfg['model_path']) # save models
|
||||
save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results
|
||||
plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train") # plot results
|
||||
# testing
|
||||
# env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step
|
||||
agent.load_model(path = cfg['model_path']) # load model
|
||||
res_dic = self.test(cfg, env, agent)
|
||||
save_results(res_dic, tag='test',
|
||||
path = cfg['result_path'])
|
||||
plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "test")
|
||||
self.process_yaml_cfg() # load yaml config
|
||||
cfg = MergedConfig() # merge config
|
||||
cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
|
||||
cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
|
||||
self.print_cfg(cfg) # print the configuration
|
||||
self.create_path(cfg) # create the path to save the results
|
||||
logger = get_logger(self.log_dir) # create the logger
|
||||
env, agent = self.env_agent_config(cfg,logger)
|
||||
if cfg.load_checkpoint:
|
||||
agent.load_model(f"{cfg.load_path}/models/")
|
||||
logger.info(f"Start {cfg.mode}ing!")
|
||||
logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}")
|
||||
rewards = [] # record rewards for all episodes
|
||||
steps = [] # record steps for all episodes
|
||||
if cfg.mode.lower() == 'train':
|
||||
best_ep_reward = -float('inf')
|
||||
for i_ep in range(cfg.train_eps):
|
||||
agent,ep_reward,ep_step = self.train_one_episode(env, agent, cfg)
|
||||
logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
|
||||
rewards.append(ep_reward)
|
||||
steps.append(ep_step)
|
||||
# for _ in range
|
||||
if (i_ep+1)%cfg.eval_per_episode == 0:
|
||||
mean_eval_reward = self.evaluate(env, agent, cfg)
|
||||
if mean_eval_reward >= best_ep_reward: # update best reward
|
||||
logger.info(f"Current episode {i_ep+1} has the best eval reward: {mean_eval_reward:.3f}")
|
||||
best_ep_reward = mean_eval_reward
|
||||
agent.save_model(self.model_dir) # save models with best reward
|
||||
# env.close()
|
||||
elif cfg.mode.lower() == 'test':
|
||||
for i_ep in range(cfg.test_eps):
|
||||
agent,ep_reward,ep_step = self.test_one_episode(env, agent, cfg)
|
||||
logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
|
||||
rewards.append(ep_reward)
|
||||
steps.append(ep_step)
|
||||
agent.save_model(self.model_dir) # save models
|
||||
# env.close()
|
||||
logger.info(f"Finish {cfg.mode}ing!")
|
||||
res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
|
||||
save_results(res_dic, self.res_dir) # save results
|
||||
save_cfgs(self.cfgs, self.task_dir) # save config
|
||||
plot_rewards(rewards, title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)
|
||||
# def run(self):
|
||||
# self.process_yaml_cfg() # load yaml config
|
||||
# cfg = MergedConfig() # merge config
|
||||
# cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
|
||||
# cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
|
||||
# self.print_cfg(cfg) # print the configuration
|
||||
# self.create_path(cfg) # create the path to save the results
|
||||
# logger = get_logger(self.log_dir) # create the logger
|
||||
# env, agent = self.env_agent_config(cfg,logger)
|
||||
# if cfg.load_checkpoint:
|
||||
# agent.load_model(f"{cfg.load_path}/models/")
|
||||
# if cfg.mode.lower() == 'train':
|
||||
# res_dic = self.train(cfg, env, agent,logger)
|
||||
# elif cfg.mode.lower() == 'test':
|
||||
# res_dic = self.test(cfg, env, agent,logger)
|
||||
# save_results(res_dic, self.res_dir) # save results
|
||||
# save_cfgs(self.cfgs, self.task_dir) # save config
|
||||
# agent.save_model(self.model_dir) # save models
|
||||
# plot_rewards(res_dic['rewards'], title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 21:14:12
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-08-29 14:24:44
|
||||
LastEditTime: 2022-10-31 23:53:06
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -35,20 +35,65 @@ class ActorSoftmax(nn.Module):
|
||||
def __init__(self, input_dim, output_dim, hidden_dim=256):
|
||||
super(ActorSoftmax, self).__init__()
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim)
|
||||
self.fc2 = nn.Linear(hidden_dim, output_dim)
|
||||
def forward(self,state):
|
||||
dist = F.relu(self.fc1(state))
|
||||
dist = F.softmax(self.fc2(dist),dim=1)
|
||||
return dist
|
||||
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.fc3 = nn.Linear(hidden_dim, output_dim)
|
||||
def forward(self,x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
probs = F.softmax(self.fc3(x),dim=1)
|
||||
return probs
|
||||
|
||||
class ActorSoftmaxTanh(nn.Module):
|
||||
def __init__(self, input_dim, output_dim, hidden_dim=256):
|
||||
super(ActorSoftmaxTanh, self).__init__()
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim)
|
||||
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.fc3 = nn.Linear(hidden_dim, output_dim)
|
||||
def forward(self,x):
|
||||
x = F.tanh(self.fc1(x))
|
||||
x = F.tanh(self.fc2(x))
|
||||
probs = F.softmax(self.fc3(x),dim=1)
|
||||
return probs
|
||||
class ActorNormal(nn.Module):
|
||||
def __init__(self, n_states,n_actions, hidden_dim=256):
|
||||
super(ActorNormal, self).__init__()
|
||||
self.fc1 = nn.Linear(n_states, hidden_dim)
|
||||
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.fc3 = nn.Linear(hidden_dim, n_actions)
|
||||
self.fc4 = nn.Linear(hidden_dim, n_actions)
|
||||
def forward(self,x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
mu = torch.tanh(self.fc3(x))
|
||||
sigma = F.softplus(self.fc4(x)) + 0.001 # avoid 0
|
||||
return mu,sigma
|
||||
# class ActorSoftmax(nn.Module):
|
||||
# def __init__(self,input_dim, output_dim,
|
||||
# hidden_dim=256):
|
||||
# super(ActorSoftmax, self).__init__()
|
||||
# self.actor = nn.Sequential(
|
||||
# nn.Linear(input_dim, hidden_dim),
|
||||
# nn.ReLU(),
|
||||
# nn.Linear(hidden_dim, hidden_dim),
|
||||
# nn.ReLU(),
|
||||
# nn.Linear(hidden_dim, output_dim),
|
||||
# nn.Softmax(dim=-1)
|
||||
# )
|
||||
# def forward(self, state):
|
||||
# probs = self.actor(state)
|
||||
# dist = Categorical(probs)
|
||||
# return dist
|
||||
class Critic(nn.Module):
|
||||
def __init__(self,input_dim,output_dim,hidden_dim=256):
|
||||
super(Critic,self).__init__()
|
||||
assert output_dim == 1 # critic must output a single value
|
||||
self.fc1 = nn.Linear(input_dim, hidden_dim)
|
||||
self.fc2 = nn.Linear(hidden_dim, output_dim)
|
||||
def forward(self,state):
|
||||
value = F.relu(self.fc1(state))
|
||||
value = self.fc2(value)
|
||||
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.fc3 = nn.Linear(hidden_dim, output_dim)
|
||||
def forward(self,x):
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
value = self.fc3(x)
|
||||
return value
|
||||
|
||||
class ActorCriticSoftmax(nn.Module):
|
||||
@@ -72,18 +117,18 @@ class ActorCriticSoftmax(nn.Module):
|
||||
return value, policy_dist
|
||||
|
||||
class ActorCritic(nn.Module):
|
||||
def __init__(self, n_states, n_actions, hidden_dim=256):
|
||||
def __init__(self, input_dim, output_dim, hidden_dim=256):
|
||||
super(ActorCritic, self).__init__()
|
||||
self.critic = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.Linear(input_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, 1)
|
||||
)
|
||||
|
||||
self.actor = nn.Sequential(
|
||||
nn.Linear(n_states, hidden_dim),
|
||||
nn.Linear(input_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, n_actions),
|
||||
nn.Linear(hidden_dim, output_dim),
|
||||
nn.Softmax(dim=1),
|
||||
)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Author: John
|
||||
Email: johnjim0816@gmail.com
|
||||
Date: 2021-03-12 16:02:24
|
||||
LastEditor: John
|
||||
LastEditTime: 2022-08-24 10:31:30
|
||||
LastEditTime: 2022-10-26 07:38:17
|
||||
Discription:
|
||||
Environment:
|
||||
'''
|
||||
@@ -14,8 +14,13 @@ import numpy as np
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import json
|
||||
import yaml
|
||||
import pandas as pd
|
||||
from functools import wraps
|
||||
from time import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
from matplotlib.font_manager import FontProperties # 导入字体模块
|
||||
|
||||
@@ -61,17 +66,17 @@ def smooth(data, weight=0.9):
|
||||
last = smoothed_val
|
||||
return smoothed
|
||||
|
||||
def plot_rewards(rewards,cfg,path=None,tag='train'):
|
||||
def plot_rewards(rewards,title="learning curve",fpath=None,save_fig=True,show_fig=False):
|
||||
sns.set()
|
||||
plt.figure() # 创建一个图形实例,方便同时多画几个图
|
||||
plt.title(f"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}")
|
||||
plt.title(f"{title}")
|
||||
plt.xlabel('epsiodes')
|
||||
plt.plot(rewards, label='rewards')
|
||||
plt.plot(smooth(rewards), label='smoothed')
|
||||
plt.legend()
|
||||
if cfg['save_fig']:
|
||||
plt.savefig(f"{path}/{tag}ing_curve.png")
|
||||
if cfg['show_fig']:
|
||||
if save_fig:
|
||||
plt.savefig(f"{fpath}/learning_curve.png")
|
||||
if show_fig:
|
||||
plt.show()
|
||||
|
||||
def plot_losses(losses, algo="DQN", save=True, path='./'):
|
||||
@@ -85,48 +90,86 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
|
||||
plt.savefig(path+"losses_curve")
|
||||
plt.show()
|
||||
|
||||
def save_results(res_dic, tag='train', path = None):
|
||||
''' 保存奖励
|
||||
def save_results(res_dic,fpath = None):
|
||||
''' save results
|
||||
'''
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
Path(fpath).mkdir(parents=True, exist_ok=True)
|
||||
df = pd.DataFrame(res_dic)
|
||||
df.to_csv(f"{path}/{tag}ing_results.csv",index=None)
|
||||
print('Results saved!')
|
||||
|
||||
|
||||
def make_dir(*paths):
|
||||
''' 创建文件夹
|
||||
df.to_csv(f"{fpath}/res.csv",index=None)
|
||||
def merge_class_attrs(ob1, ob2):
|
||||
ob1.__dict__.update(ob2.__dict__)
|
||||
return ob1
|
||||
def get_logger(fpath):
|
||||
Path(fpath).mkdir(parents=True, exist_ok=True)
|
||||
logger = logging.getLogger(name='r') # set root logger if not set name
|
||||
logger.setLevel(logging.DEBUG)
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s: - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S')
|
||||
# output to file by using FileHandler
|
||||
fh = logging.FileHandler(fpath+"log.txt")
|
||||
fh.setLevel(logging.DEBUG)
|
||||
fh.setFormatter(formatter)
|
||||
# output to screen by using StreamHandler
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(logging.DEBUG)
|
||||
ch.setFormatter(formatter)
|
||||
# add Handler
|
||||
logger.addHandler(ch)
|
||||
logger.addHandler(fh)
|
||||
return logger
|
||||
def save_cfgs(cfgs, fpath):
|
||||
''' save config
|
||||
'''
|
||||
for path in paths:
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
Path(fpath).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(f"{fpath}/config.yaml", 'w') as f:
|
||||
for cfg_type in cfgs:
|
||||
yaml.dump({cfg_type: cfgs[cfg_type].__dict__}, f, default_flow_style=False)
|
||||
def load_cfgs(cfgs, fpath):
|
||||
with open(fpath) as f:
|
||||
load_cfg = yaml.load(f,Loader=yaml.FullLoader)
|
||||
for cfg_type in cfgs:
|
||||
for k, v in load_cfg[cfg_type].items():
|
||||
setattr(cfgs[cfg_type], k, v)
|
||||
# def del_empty_dir(*paths):
|
||||
# ''' 删除目录下所有空文件夹
|
||||
# '''
|
||||
# for path in paths:
|
||||
# dirs = os.listdir(path)
|
||||
# for dir in dirs:
|
||||
# if not os.listdir(os.path.join(path, dir)):
|
||||
# os.removedirs(os.path.join(path, dir))
|
||||
|
||||
|
||||
def del_empty_dir(*paths):
|
||||
''' 删除目录下所有空文件夹
|
||||
'''
|
||||
for path in paths:
|
||||
dirs = os.listdir(path)
|
||||
for dir in dirs:
|
||||
if not os.listdir(os.path.join(path, dir)):
|
||||
os.removedirs(os.path.join(path, dir))
|
||||
|
||||
class NpEncoder(json.JSONEncoder):
|
||||
def default(self, obj):
|
||||
if isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
if isinstance(obj, np.floating):
|
||||
return float(obj)
|
||||
if isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
return json.JSONEncoder.default(self, obj)
|
||||
# class NpEncoder(json.JSONEncoder):
|
||||
# def default(self, obj):
|
||||
# if isinstance(obj, np.integer):
|
||||
# return int(obj)
|
||||
# if isinstance(obj, np.floating):
|
||||
# return float(obj)
|
||||
# if isinstance(obj, np.ndarray):
|
||||
# return obj.tolist()
|
||||
# return json.JSONEncoder.default(self, obj)
|
||||
|
||||
def save_args(args,path=None):
|
||||
# save parameters
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
with open(f"{path}/params.json", 'w') as fp:
|
||||
json.dump(args, fp,cls=NpEncoder)
|
||||
print("Parameters saved!")
|
||||
# def save_args(args,path=None):
|
||||
# # save parameters
|
||||
# Path(path).mkdir(parents=True, exist_ok=True)
|
||||
# with open(f"{path}/params.json", 'w') as fp:
|
||||
# json.dump(args, fp,cls=NpEncoder)
|
||||
# print("Parameters saved!")
|
||||
|
||||
|
||||
def timing(func):
|
||||
''' a decorator to print the running time of a function
|
||||
'''
|
||||
@wraps(func)
|
||||
def wrap(*args, **kw):
|
||||
ts = time()
|
||||
result = func(*args, **kw)
|
||||
te = time()
|
||||
print(f"func: {func.__name__}, took: {te-ts:2.4f} seconds")
|
||||
return result
|
||||
return wrap
|
||||
def all_seed(env,seed = 1):
|
||||
''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function
|
||||
Args:
|
||||
@@ -136,7 +179,7 @@ def all_seed(env,seed = 1):
|
||||
import torch
|
||||
import numpy as np
|
||||
import random
|
||||
print(f"seed = {seed}")
|
||||
# print(f"seed = {seed}")
|
||||
env.seed(seed) # env config
|
||||
np.random.seed(seed)
|
||||
random.seed(seed)
|
||||
|
||||
Reference in New Issue
Block a user