更新算法模版

This commit is contained in:
johnjim0816
2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions

View File

@@ -0,0 +1,38 @@
class DefaultConfig:
def __init__(self) -> None:
pass
def print_cfg(self):
print(self.__dict__)
class GeneralConfig(DefaultConfig):
def __init__(self) -> None:
self.env_name = "CartPole-v1" # name of environment
self.algo_name = "DQN" # name of algorithm
self.mode = "train" # train or test
self.seed = 0 # random seed
self.device = "cuda" # device to use
self.train_eps = 200 # number of episodes for training
self.test_eps = 20 # number of episodes for testing
self.eval_eps = 10 # number of episodes for evaluation
self.eval_per_episode = 5 # evaluation per episode
self.max_steps = 200 # max steps for each episode
self.load_checkpoint = False
self.load_path = None # path to load model
self.show_fig = False # show figure or not
self.save_fig = True # save figure or not
class AlgoConfig(DefaultConfig):
def __init__(self) -> None:
# set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end
# self.epsilon_start = 0.95 # epsilon start value
# self.epsilon_end = 0.01 # epsilon end value
# self.epsilon_decay = 500 # epsilon decay rate
self.gamma = 0.95 # discount factor
# self.lr = 0.0001 # learning rate
# self.buffer_size = 100000 # size of replay buffer
# self.batch_size = 64 # batch size
# self.target_update = 4 # target network update frequency
class MergedConfig:
def __init__(self) -> None:
pass

View File

@@ -1,32 +1,124 @@
from common.utils import save_args,save_results,plot_rewards
from common.utils import get_logger,save_results,save_cfgs,plot_rewards,merge_class_attrs,load_cfgs
from common.config import GeneralConfig,AlgoConfig,MergedConfig
import time
from pathlib import Path
import datetime
import argparse
class Launcher:
def __init__(self) -> None:
pass
def get_args(self):
cfg = {}
return cfg
def env_agent_config(self,cfg):
self.get_cfg()
def get_cfg(self):
self.cfgs = {'general_cfg':GeneralConfig(),'algo_cfg':AlgoConfig()} # create config
def process_yaml_cfg(self):
''' load yaml config
'''
parser = argparse.ArgumentParser(description="hyperparameters")
parser.add_argument('--yaml', default = None, type=str,help='the path of config file')
args = parser.parse_args()
if args.yaml is not None:
load_cfgs(self.cfgs, args.yaml)
def print_cfg(self,cfg):
''' print parameters
'''
cfg_dict = vars(cfg)
print("Hyperparameters:")
print(''.join(['=']*80))
tplt = "{:^20}\t{:^20}\t{:^20}"
print(tplt.format("Name", "Value", "Type"))
for k,v in cfg_dict.items():
print(tplt.format(k,v,str(type(v))))
print(''.join(['=']*80))
def env_agent_config(self,cfg,logger):
env,agent = None,None
return env,agent
def train(self,cfg, env, agent):
res_dic = {}
return res_dic
def test(self,cfg, env, agent):
res_dic = {}
return res_dic
def train_one_episode(self,env, agent, cfg):
ep_reward = 0
ep_step = 0
return agent,ep_reward,ep_step
def test_one_episode(self,env, agent, cfg):
ep_reward = 0
ep_step = 0
return agent,ep_reward,ep_step
def evaluate(self,env, agent, cfg):
sum_eval_reward = 0
for _ in range(cfg.eval_eps):
_,eval_ep_reward,_ = self.test_one_episode(env, agent, cfg)
sum_eval_reward += eval_ep_reward
mean_eval_reward = sum_eval_reward/cfg.eval_eps
return mean_eval_reward
# def train(self,cfg, env, agent,logger):
# res_dic = {}
# return res_dic
# def test(self,cfg, env, agent,logger):
# res_dic = {}
# return res_dic
def create_path(self,cfg):
curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # obtain current time
self.task_dir = f"{cfg.mode.capitalize()}_{cfg.env_name}_{cfg.algo_name}_{curr_time}"
Path(self.task_dir).mkdir(parents=True, exist_ok=True)
self.model_dir = f"{self.task_dir}/models/"
self.res_dir = f"{self.task_dir}/results/"
self.log_dir = f"{self.task_dir}/logs/"
def run(self):
cfg = self.get_args()
env, agent = self.env_agent_config(cfg)
res_dic = self.train(cfg, env, agent)
save_args(cfg,path = cfg['result_path']) # save parameters
agent.save_model(path = cfg['model_path']) # save models
save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results
plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train") # plot results
# testing
# env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step
agent.load_model(path = cfg['model_path']) # load model
res_dic = self.test(cfg, env, agent)
save_results(res_dic, tag='test',
path = cfg['result_path'])
plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "test")
self.process_yaml_cfg() # load yaml config
cfg = MergedConfig() # merge config
cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
self.print_cfg(cfg) # print the configuration
self.create_path(cfg) # create the path to save the results
logger = get_logger(self.log_dir) # create the logger
env, agent = self.env_agent_config(cfg,logger)
if cfg.load_checkpoint:
agent.load_model(f"{cfg.load_path}/models/")
logger.info(f"Start {cfg.mode}ing!")
logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}")
rewards = [] # record rewards for all episodes
steps = [] # record steps for all episodes
if cfg.mode.lower() == 'train':
best_ep_reward = -float('inf')
for i_ep in range(cfg.train_eps):
agent,ep_reward,ep_step = self.train_one_episode(env, agent, cfg)
logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
rewards.append(ep_reward)
steps.append(ep_step)
# for _ in range
if (i_ep+1)%cfg.eval_per_episode == 0:
mean_eval_reward = self.evaluate(env, agent, cfg)
if mean_eval_reward >= best_ep_reward: # update best reward
logger.info(f"Current episode {i_ep+1} has the best eval reward: {mean_eval_reward:.3f}")
best_ep_reward = mean_eval_reward
agent.save_model(self.model_dir) # save models with best reward
# env.close()
elif cfg.mode.lower() == 'test':
for i_ep in range(cfg.test_eps):
agent,ep_reward,ep_step = self.test_one_episode(env, agent, cfg)
logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
rewards.append(ep_reward)
steps.append(ep_step)
agent.save_model(self.model_dir) # save models
# env.close()
logger.info(f"Finish {cfg.mode}ing!")
res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
save_results(res_dic, self.res_dir) # save results
save_cfgs(self.cfgs, self.task_dir) # save config
plot_rewards(rewards, title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)
# def run(self):
# self.process_yaml_cfg() # load yaml config
# cfg = MergedConfig() # merge config
# cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
# cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
# self.print_cfg(cfg) # print the configuration
# self.create_path(cfg) # create the path to save the results
# logger = get_logger(self.log_dir) # create the logger
# env, agent = self.env_agent_config(cfg,logger)
# if cfg.load_checkpoint:
# agent.load_model(f"{cfg.load_path}/models/")
# if cfg.mode.lower() == 'train':
# res_dic = self.train(cfg, env, agent,logger)
# elif cfg.mode.lower() == 'test':
# res_dic = self.test(cfg, env, agent,logger)
# save_results(res_dic, self.res_dir) # save results
# save_cfgs(self.cfgs, self.task_dir) # save config
# agent.save_model(self.model_dir) # save models
# plot_rewards(res_dic['rewards'], title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-12 21:14:12
LastEditor: John
LastEditTime: 2022-08-29 14:24:44
LastEditTime: 2022-10-31 23:53:06
Discription:
Environment:
'''
@@ -35,20 +35,65 @@ class ActorSoftmax(nn.Module):
def __init__(self, input_dim, output_dim, hidden_dim=256):
super(ActorSoftmax, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
def forward(self,state):
dist = F.relu(self.fc1(state))
dist = F.softmax(self.fc2(dist),dim=1)
return dist
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.fc3 = nn.Linear(hidden_dim, output_dim)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
probs = F.softmax(self.fc3(x),dim=1)
return probs
class ActorSoftmaxTanh(nn.Module):
def __init__(self, input_dim, output_dim, hidden_dim=256):
super(ActorSoftmaxTanh, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.fc3 = nn.Linear(hidden_dim, output_dim)
def forward(self,x):
x = F.tanh(self.fc1(x))
x = F.tanh(self.fc2(x))
probs = F.softmax(self.fc3(x),dim=1)
return probs
class ActorNormal(nn.Module):
def __init__(self, n_states,n_actions, hidden_dim=256):
super(ActorNormal, self).__init__()
self.fc1 = nn.Linear(n_states, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.fc3 = nn.Linear(hidden_dim, n_actions)
self.fc4 = nn.Linear(hidden_dim, n_actions)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
mu = torch.tanh(self.fc3(x))
sigma = F.softplus(self.fc4(x)) + 0.001 # avoid 0
return mu,sigma
# class ActorSoftmax(nn.Module):
# def __init__(self,input_dim, output_dim,
# hidden_dim=256):
# super(ActorSoftmax, self).__init__()
# self.actor = nn.Sequential(
# nn.Linear(input_dim, hidden_dim),
# nn.ReLU(),
# nn.Linear(hidden_dim, hidden_dim),
# nn.ReLU(),
# nn.Linear(hidden_dim, output_dim),
# nn.Softmax(dim=-1)
# )
# def forward(self, state):
# probs = self.actor(state)
# dist = Categorical(probs)
# return dist
class Critic(nn.Module):
def __init__(self,input_dim,output_dim,hidden_dim=256):
super(Critic,self).__init__()
assert output_dim == 1 # critic must output a single value
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
def forward(self,state):
value = F.relu(self.fc1(state))
value = self.fc2(value)
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.fc3 = nn.Linear(hidden_dim, output_dim)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
value = self.fc3(x)
return value
class ActorCriticSoftmax(nn.Module):
@@ -72,18 +117,18 @@ class ActorCriticSoftmax(nn.Module):
return value, policy_dist
class ActorCritic(nn.Module):
def __init__(self, n_states, n_actions, hidden_dim=256):
def __init__(self, input_dim, output_dim, hidden_dim=256):
super(ActorCritic, self).__init__()
self.critic = nn.Sequential(
nn.Linear(n_states, hidden_dim),
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, 1)
)
self.actor = nn.Sequential(
nn.Linear(n_states, hidden_dim),
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, n_actions),
nn.Linear(hidden_dim, output_dim),
nn.Softmax(dim=1),
)

View File

@@ -5,7 +5,7 @@ Author: John
Email: johnjim0816@gmail.com
Date: 2021-03-12 16:02:24
LastEditor: John
LastEditTime: 2022-08-24 10:31:30
LastEditTime: 2022-10-26 07:38:17
Discription:
Environment:
'''
@@ -14,8 +14,13 @@ import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import json
import yaml
import pandas as pd
from functools import wraps
from time import time
import logging
from pathlib import Path
from matplotlib.font_manager import FontProperties # 导入字体模块
@@ -61,17 +66,17 @@ def smooth(data, weight=0.9):
last = smoothed_val
return smoothed
def plot_rewards(rewards,cfg,path=None,tag='train'):
def plot_rewards(rewards,title="learning curve",fpath=None,save_fig=True,show_fig=False):
sns.set()
plt.figure() # 创建一个图形实例,方便同时多画几个图
plt.title(f"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}")
plt.title(f"{title}")
plt.xlabel('epsiodes')
plt.plot(rewards, label='rewards')
plt.plot(smooth(rewards), label='smoothed')
plt.legend()
if cfg['save_fig']:
plt.savefig(f"{path}/{tag}ing_curve.png")
if cfg['show_fig']:
if save_fig:
plt.savefig(f"{fpath}/learning_curve.png")
if show_fig:
plt.show()
def plot_losses(losses, algo="DQN", save=True, path='./'):
@@ -85,48 +90,86 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
plt.savefig(path+"losses_curve")
plt.show()
def save_results(res_dic, tag='train', path = None):
''' 保存奖励
def save_results(res_dic,fpath = None):
''' save results
'''
Path(path).mkdir(parents=True, exist_ok=True)
Path(fpath).mkdir(parents=True, exist_ok=True)
df = pd.DataFrame(res_dic)
df.to_csv(f"{path}/{tag}ing_results.csv",index=None)
print('Results saved')
def make_dir(*paths):
''' 创建文件夹
df.to_csv(f"{fpath}/res.csv",index=None)
def merge_class_attrs(ob1, ob2):
ob1.__dict__.update(ob2.__dict__)
return ob1
def get_logger(fpath):
Path(fpath).mkdir(parents=True, exist_ok=True)
logger = logging.getLogger(name='r') # set root logger if not set name
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s: - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
# output to file by using FileHandler
fh = logging.FileHandler(fpath+"log.txt")
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
# output to screen by using StreamHandler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
# add Handler
logger.addHandler(ch)
logger.addHandler(fh)
return logger
def save_cfgs(cfgs, fpath):
''' save config
'''
for path in paths:
Path(path).mkdir(parents=True, exist_ok=True)
Path(fpath).mkdir(parents=True, exist_ok=True)
with open(f"{fpath}/config.yaml", 'w') as f:
for cfg_type in cfgs:
yaml.dump({cfg_type: cfgs[cfg_type].__dict__}, f, default_flow_style=False)
def load_cfgs(cfgs, fpath):
with open(fpath) as f:
load_cfg = yaml.load(f,Loader=yaml.FullLoader)
for cfg_type in cfgs:
for k, v in load_cfg[cfg_type].items():
setattr(cfgs[cfg_type], k, v)
# def del_empty_dir(*paths):
# ''' 删除目录下所有空文件夹
# '''
# for path in paths:
# dirs = os.listdir(path)
# for dir in dirs:
# if not os.listdir(os.path.join(path, dir)):
# os.removedirs(os.path.join(path, dir))
def del_empty_dir(*paths):
''' 删除目录下所有空文件夹
'''
for path in paths:
dirs = os.listdir(path)
for dir in dirs:
if not os.listdir(os.path.join(path, dir)):
os.removedirs(os.path.join(path, dir))
class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
return json.JSONEncoder.default(self, obj)
# class NpEncoder(json.JSONEncoder):
# def default(self, obj):
# if isinstance(obj, np.integer):
# return int(obj)
# if isinstance(obj, np.floating):
# return float(obj)
# if isinstance(obj, np.ndarray):
# return obj.tolist()
# return json.JSONEncoder.default(self, obj)
def save_args(args,path=None):
# save parameters
Path(path).mkdir(parents=True, exist_ok=True)
with open(f"{path}/params.json", 'w') as fp:
json.dump(args, fp,cls=NpEncoder)
print("Parameters saved!")
# def save_args(args,path=None):
# # save parameters
# Path(path).mkdir(parents=True, exist_ok=True)
# with open(f"{path}/params.json", 'w') as fp:
# json.dump(args, fp,cls=NpEncoder)
# print("Parameters saved!")
def timing(func):
''' a decorator to print the running time of a function
'''
@wraps(func)
def wrap(*args, **kw):
ts = time()
result = func(*args, **kw)
te = time()
print(f"func: {func.__name__}, took: {te-ts:2.4f} seconds")
return result
return wrap
def all_seed(env,seed = 1):
''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function
Args:
@@ -136,7 +179,7 @@ def all_seed(env,seed = 1):
import torch
import numpy as np
import random
print(f"seed = {seed}")
# print(f"seed = {seed}")
env.seed(seed) # env config
np.random.seed(seed)
random.seed(seed)