更新算法模版

2022-11-06 12:15:36 +08:00
parent 466a17707f
commit dc78698262
256 changed files with 17282 additions and 10229 deletions
--- a/projects/codes/common/config.py
+++ b/projects/codes/common/config.py
@@ -0,0 +1,38 @@
+
+class DefaultConfig:
+    def __init__(self) -> None:
+        pass
+    def print_cfg(self):
+        print(self.__dict__)
+class GeneralConfig(DefaultConfig):
+    def __init__(self) -> None:
+        self.env_name = "CartPole-v1" # name of environment
+        self.algo_name = "DQN" # name of algorithm
+        self.mode = "train" # train or test
+        self.seed = 0 # random seed
+        self.device = "cuda" # device to use
+        self.train_eps = 200 # number of episodes for training
+        self.test_eps = 20 # number of episodes for testing
+        self.eval_eps = 10 # number of episodes for evaluation
+        self.eval_per_episode = 5 # evaluation per episode
+        self.max_steps = 200 # max steps for each episode
+        self.load_checkpoint = False
+        self.load_path = None # path to load model
+        self.show_fig = False # show figure or not
+        self.save_fig = True # save figure or not
+        
+class AlgoConfig(DefaultConfig):
+    def __init__(self) -> None:
+        # set epsilon_start=epsilon_end can obtain fixed epsilon=epsilon_end
+        # self.epsilon_start = 0.95 # epsilon start value
+        # self.epsilon_end = 0.01 # epsilon end value
+        # self.epsilon_decay = 500 # epsilon decay rate
+        self.gamma = 0.95 # discount factor
+        # self.lr = 0.0001 # learning rate
+        # self.buffer_size = 100000 # size of replay buffer
+        # self.batch_size = 64 # batch size
+        # self.target_update = 4 # target network update frequency
+class MergedConfig:
+    def __init__(self) -> None:
+        pass
+        
--- a/projects/codes/common/launcher.py
+++ b/projects/codes/common/launcher.py
@@ -1,32 +1,124 @@
-from common.utils import save_args,save_results,plot_rewards
+from common.utils import get_logger,save_results,save_cfgs,plot_rewards,merge_class_attrs,load_cfgs
+from common.config import GeneralConfig,AlgoConfig,MergedConfig
+import time
+from pathlib import Path
+import datetime
+import argparse
+
 class Launcher:
    def __init__(self) -> None:
-        pass
-    def get_args(self):
-        cfg = {}
-        return cfg
-    def env_agent_config(self,cfg):
+        self.get_cfg()
+    def get_cfg(self):
+        self.cfgs = {'general_cfg':GeneralConfig(),'algo_cfg':AlgoConfig()}  # create config 
+    def process_yaml_cfg(self):
+        ''' load yaml config
+        '''
+        parser = argparse.ArgumentParser(description="hyperparameters")  
+        parser.add_argument('--yaml', default = None, type=str,help='the path of config file')
+        args = parser.parse_args()
+        if args.yaml is not None:
+            load_cfgs(self.cfgs, args.yaml)
+    def print_cfg(self,cfg):
+        ''' print parameters
+        '''
+        cfg_dict = vars(cfg)
+        print("Hyperparameters:")
+        print(''.join(['=']*80))
+        tplt = "{:^20}\t{:^20}\t{:^20}"
+        print(tplt.format("Name", "Value", "Type"))
+        for k,v in cfg_dict.items():
+            print(tplt.format(k,v,str(type(v))))   
+        print(''.join(['=']*80))
+    def env_agent_config(self,cfg,logger):
        env,agent = None,None
        return env,agent
-    def train(self,cfg, env, agent):
-        res_dic = {}
-        return res_dic
-    def test(self,cfg, env, agent):
-        res_dic = {}
-        return res_dic
-
+    def train_one_episode(self,env, agent, cfg):
+        ep_reward = 0
+        ep_step = 0
+        return agent,ep_reward,ep_step
+    def test_one_episode(self,env, agent, cfg):
+        ep_reward = 0
+        ep_step = 0
+        return agent,ep_reward,ep_step
+    def evaluate(self,env, agent, cfg):
+        sum_eval_reward = 0
+        for _ in range(cfg.eval_eps):
+            _,eval_ep_reward,_ = self.test_one_episode(env, agent, cfg)
+            sum_eval_reward += eval_ep_reward
+        mean_eval_reward = sum_eval_reward/cfg.eval_eps
+        return mean_eval_reward
+    # def train(self,cfg, env, agent,logger):
+    #     res_dic = {}
+    #     return res_dic
+    # def test(self,cfg, env, agent,logger):
+    #     res_dic = {}
+    #     return res_dic
+    def create_path(self,cfg):
+        curr_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")   # obtain current time
+        self.task_dir = f"{cfg.mode.capitalize()}_{cfg.env_name}_{cfg.algo_name}_{curr_time}"
+        Path(self.task_dir).mkdir(parents=True, exist_ok=True)
+        self.model_dir = f"{self.task_dir}/models/"
+        self.res_dir = f"{self.task_dir}/results/"
+        self.log_dir = f"{self.task_dir}/logs/"
    def run(self):
-        cfg = self.get_args()
-        env, agent = self.env_agent_config(cfg)
-        res_dic = self.train(cfg, env, agent)
-        save_args(cfg,path = cfg['result_path']) # save parameters
-        agent.save_model(path = cfg['model_path'])  # save models
-        save_results(res_dic, tag = 'train', path = cfg['result_path']) # save results
-        plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "train")  # plot results
-        # testing
-        # env, agent = self.env_agent_config(cfg) # create new env for testing, sometimes can ignore this step
-        agent.load_model(path = cfg['model_path'])  # load model
-        res_dic = self.test(cfg, env, agent)
-        save_results(res_dic, tag='test',
-                    path = cfg['result_path'])  
-        plot_rewards(res_dic['rewards'], cfg, path = cfg['result_path'],tag = "test") 
+        self.process_yaml_cfg() # load yaml config
+        cfg = MergedConfig() # merge config
+        cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
+        cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
+        self.print_cfg(cfg) # print the configuration
+        self.create_path(cfg) # create the path to save the results
+        logger = get_logger(self.log_dir) # create the logger
+        env, agent = self.env_agent_config(cfg,logger)
+        if cfg.load_checkpoint:
+            agent.load_model(f"{cfg.load_path}/models/")
+        logger.info(f"Start {cfg.mode}ing!")
+        logger.info(f"Env: {cfg.env_name}, Algorithm: {cfg.algo_name}, Device: {cfg.device}")
+        rewards = []  # record rewards for all episodes
+        steps = [] # record steps for all episodes
+        if cfg.mode.lower() == 'train':
+            best_ep_reward = -float('inf')
+            for i_ep in range(cfg.train_eps):
+                agent,ep_reward,ep_step = self.train_one_episode(env, agent, cfg)
+                logger.info(f"Episode: {i_ep+1}/{cfg.train_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
+                rewards.append(ep_reward)
+                steps.append(ep_step)
+                # for _ in range
+                if (i_ep+1)%cfg.eval_per_episode == 0:
+                    mean_eval_reward = self.evaluate(env, agent, cfg)
+                    if mean_eval_reward  >= best_ep_reward: # update best reward
+                        logger.info(f"Current episode {i_ep+1} has the best eval reward: {mean_eval_reward:.3f}")
+                        best_ep_reward = mean_eval_reward 
+                        agent.save_model(self.model_dir) # save models with best reward
+            # env.close()
+        elif cfg.mode.lower() == 'test':
+            for i_ep in range(cfg.test_eps):
+                agent,ep_reward,ep_step = self.test_one_episode(env, agent, cfg)
+                logger.info(f"Episode: {i_ep+1}/{cfg.test_eps}, Reward: {ep_reward:.3f}, Step: {ep_step}")
+                rewards.append(ep_reward)
+                steps.append(ep_step)
+            agent.save_model(self.model_dir)  # save models
+            # env.close()
+        logger.info(f"Finish {cfg.mode}ing!")
+        res_dic = {'episodes':range(len(rewards)),'rewards':rewards,'steps':steps}
+        save_results(res_dic, self.res_dir) # save results
+        save_cfgs(self.cfgs, self.task_dir) # save config
+        plot_rewards(rewards, title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)
+    # def run(self):
+    #     self.process_yaml_cfg() # load yaml config
+    #     cfg = MergedConfig() # merge config
+    #     cfg = merge_class_attrs(cfg,self.cfgs['general_cfg'])
+    #     cfg = merge_class_attrs(cfg,self.cfgs['algo_cfg'])
+    #     self.print_cfg(cfg) # print the configuration
+    #     self.create_path(cfg) # create the path to save the results
+    #     logger = get_logger(self.log_dir) # create the logger
+    #     env, agent = self.env_agent_config(cfg,logger)
+    #     if cfg.load_checkpoint:
+    #         agent.load_model(f"{cfg.load_path}/models/")
+    #     if cfg.mode.lower() == 'train':
+    #         res_dic = self.train(cfg, env, agent,logger)
+    #     elif cfg.mode.lower() == 'test':
+    #         res_dic = self.test(cfg, env, agent,logger)
+    #     save_results(res_dic, self.res_dir) # save results
+    #     save_cfgs(self.cfgs, self.task_dir) # save config
+    #     agent.save_model(self.model_dir)  # save models
+    #     plot_rewards(res_dic['rewards'], title=f"{cfg.mode.lower()}ing curve on {cfg.device} of {cfg.algo_name} for {cfg.env_name}" ,fpath= self.res_dir)
--- a/projects/codes/common/models.py
+++ b/projects/codes/common/models.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 21:14:12
 LastEditor: John
-LastEditTime: 2022-08-29 14:24:44
+LastEditTime: 2022-10-31 23:53:06
 Discription: 
 Environment: 
 '''
@@ -35,20 +35,65 @@ class ActorSoftmax(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=256):
        super(ActorSoftmax, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
-        self.fc2 = nn.Linear(hidden_dim, output_dim)
-    def forward(self,state):
-        dist = F.relu(self.fc1(state))
-        dist = F.softmax(self.fc2(dist),dim=1)
-        return dist
+        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+        self.fc3 = nn.Linear(hidden_dim, output_dim)
+    def forward(self,x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        probs = F.softmax(self.fc3(x),dim=1)
+        return probs
+
+class ActorSoftmaxTanh(nn.Module):
+    def __init__(self, input_dim, output_dim, hidden_dim=256):
+        super(ActorSoftmaxTanh, self).__init__()
+        self.fc1 = nn.Linear(input_dim, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+        self.fc3 = nn.Linear(hidden_dim, output_dim)
+    def forward(self,x):
+        x = F.tanh(self.fc1(x))
+        x = F.tanh(self.fc2(x))
+        probs = F.softmax(self.fc3(x),dim=1)
+        return probs
+class ActorNormal(nn.Module):
+    def __init__(self, n_states,n_actions, hidden_dim=256):
+        super(ActorNormal, self).__init__()
+        self.fc1 = nn.Linear(n_states, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+        self.fc3 = nn.Linear(hidden_dim, n_actions)
+        self.fc4 = nn.Linear(hidden_dim, n_actions)
+    def forward(self,x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        mu = torch.tanh(self.fc3(x))
+        sigma = F.softplus(self.fc4(x)) + 0.001 # avoid 0
+        return mu,sigma
+# class ActorSoftmax(nn.Module):
+#     def __init__(self,input_dim, output_dim,
+#             hidden_dim=256):
+#         super(ActorSoftmax, self).__init__()
+#         self.actor = nn.Sequential(
+#                 nn.Linear(input_dim, hidden_dim),
+#                 nn.ReLU(),
+#                 nn.Linear(hidden_dim, hidden_dim),
+#                 nn.ReLU(),
+#                 nn.Linear(hidden_dim, output_dim),
+#                 nn.Softmax(dim=-1)
+#         )
+#     def forward(self, state):
+#         probs = self.actor(state)
+#         dist = Categorical(probs)
+#         return dist
 class Critic(nn.Module):
    def __init__(self,input_dim,output_dim,hidden_dim=256):
        super(Critic,self).__init__()
        assert output_dim == 1 # critic must output a single value
        self.fc1 = nn.Linear(input_dim, hidden_dim)
-        self.fc2 = nn.Linear(hidden_dim, output_dim)
-    def forward(self,state):
-        value = F.relu(self.fc1(state))
-        value = self.fc2(value)
+        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+        self.fc3 = nn.Linear(hidden_dim, output_dim)
+    def forward(self,x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        value = self.fc3(x)
        return value

 class ActorCriticSoftmax(nn.Module):
@@ -72,18 +117,18 @@ class ActorCriticSoftmax(nn.Module):
        return value, policy_dist

 class ActorCritic(nn.Module):
-    def __init__(self, n_states, n_actions, hidden_dim=256):
+    def __init__(self, input_dim, output_dim, hidden_dim=256):
        super(ActorCritic, self).__init__()
        self.critic = nn.Sequential(
-            nn.Linear(n_states, hidden_dim),
+            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
        
        self.actor = nn.Sequential(
-            nn.Linear(n_states, hidden_dim),
+            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
-            nn.Linear(hidden_dim, n_actions),
+            nn.Linear(hidden_dim, output_dim),
            nn.Softmax(dim=1),
        )
        
--- a/projects/codes/common/utils.py
+++ b/projects/codes/common/utils.py
@@ -5,7 +5,7 @@ Author: John
 Email: johnjim0816@gmail.com
 Date: 2021-03-12 16:02:24
 LastEditor: John
-LastEditTime: 2022-08-24 10:31:30
+LastEditTime: 2022-10-26 07:38:17
 Discription: 
 Environment: 
 '''
@@ -14,8 +14,13 @@ import numpy as np
 from pathlib import Path
 import matplotlib.pyplot as plt
 import seaborn as sns
-import json
+import yaml
 import pandas as pd
+from functools import wraps
+from time import time
+import logging
+from pathlib import Path
+

 from matplotlib.font_manager import FontProperties  # 导入字体模块

@@ -61,17 +66,17 @@ def smooth(data, weight=0.9):
        last = smoothed_val                                
    return smoothed

-def plot_rewards(rewards,cfg,path=None,tag='train'):
+def plot_rewards(rewards,title="learning curve",fpath=None,save_fig=True,show_fig=False):
    sns.set()
    plt.figure()  # 创建一个图形实例，方便同时多画几个图
-    plt.title(f"{tag}ing curve on {cfg['device']} of {cfg['algo_name']} for {cfg['env_name']}")
+    plt.title(f"{title}")
    plt.xlabel('epsiodes')
    plt.plot(rewards, label='rewards')
    plt.plot(smooth(rewards), label='smoothed')
    plt.legend()
-    if cfg['save_fig']:
-        plt.savefig(f"{path}/{tag}ing_curve.png")
-    if cfg['show_fig']:
+    if save_fig:
+        plt.savefig(f"{fpath}/learning_curve.png")
+    if show_fig:
        plt.show()

 def plot_losses(losses, algo="DQN", save=True, path='./'):
@@ -85,48 +90,86 @@ def plot_losses(losses, algo="DQN", save=True, path='./'):
        plt.savefig(path+"losses_curve")
    plt.show()

-def save_results(res_dic, tag='train', path = None):
-    ''' 保存奖励
+def save_results(res_dic,fpath = None):
+    ''' save results
    '''
-    Path(path).mkdir(parents=True, exist_ok=True)
+    Path(fpath).mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(res_dic)
-    df.to_csv(f"{path}/{tag}ing_results.csv",index=None)
-    print('Results saved！')
-
-
-def make_dir(*paths):
-    ''' 创建文件夹
+    df.to_csv(f"{fpath}/res.csv",index=None)
+def merge_class_attrs(ob1, ob2):
+    ob1.__dict__.update(ob2.__dict__)
+    return ob1
+def get_logger(fpath):
+    Path(fpath).mkdir(parents=True, exist_ok=True)
+    logger = logging.getLogger(name='r')  # set root logger if not set name
+    logger.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s: - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S')
+    # output to file by using FileHandler
+    fh = logging.FileHandler(fpath+"log.txt")
+    fh.setLevel(logging.DEBUG)
+    fh.setFormatter(formatter)
+    # output to screen by using StreamHandler
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.DEBUG)
+    ch.setFormatter(formatter)
+    # add Handler
+    logger.addHandler(ch)
+    logger.addHandler(fh)
+    return logger
+def save_cfgs(cfgs, fpath):
+    ''' save config
    '''
-    for path in paths:
-        Path(path).mkdir(parents=True, exist_ok=True)
+    Path(fpath).mkdir(parents=True, exist_ok=True)
+ 
+    with open(f"{fpath}/config.yaml", 'w') as f:
+        for cfg_type in cfgs:
+            yaml.dump({cfg_type: cfgs[cfg_type].__dict__}, f, default_flow_style=False)
+def load_cfgs(cfgs, fpath):
+    with open(fpath) as f:
+        load_cfg = yaml.load(f,Loader=yaml.FullLoader)
+        for cfg_type in cfgs:
+            for k, v in load_cfg[cfg_type].items():
+                setattr(cfgs[cfg_type], k, v)
+# def del_empty_dir(*paths):
+#     ''' 删除目录下所有空文件夹
+#     '''
+#     for path in paths:
+#         dirs = os.listdir(path)
+#         for dir in dirs:
+#             if not os.listdir(os.path.join(path, dir)):
+#                 os.removedirs(os.path.join(path, dir))

-
-def del_empty_dir(*paths):
-    ''' 删除目录下所有空文件夹
-    '''
-    for path in paths:
-        dirs = os.listdir(path)
-        for dir in dirs:
-            if not os.listdir(os.path.join(path, dir)):
-                os.removedirs(os.path.join(path, dir))
-
-class NpEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.integer):
-            return int(obj)
-        if isinstance(obj, np.floating):
-            return float(obj)
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-        return json.JSONEncoder.default(self, obj)
+# class NpEncoder(json.JSONEncoder):
+#     def default(self, obj):
+#         if isinstance(obj, np.integer):
+#             return int(obj)
+#         if isinstance(obj, np.floating):
+#             return float(obj)
+#         if isinstance(obj, np.ndarray):
+#             return obj.tolist()
+#         return json.JSONEncoder.default(self, obj)
        
-def save_args(args,path=None):
-    # save parameters  
-    Path(path).mkdir(parents=True, exist_ok=True) 
-    with open(f"{path}/params.json", 'w') as fp:
-        json.dump(args, fp,cls=NpEncoder)   
-    print("Parameters saved!")
+# def save_args(args,path=None):
+#     # save parameters  
+#     Path(path).mkdir(parents=True, exist_ok=True) 
+#     with open(f"{path}/params.json", 'w') as fp:
+#         json.dump(args, fp,cls=NpEncoder)   
+#     print("Parameters saved!")

+
+def timing(func):
+    ''' a decorator to print the running time of a function
+    ''' 
+    @wraps(func)
+    def wrap(*args, **kw):
+        ts = time()
+        result = func(*args, **kw)
+        te = time()
+        print(f"func: {func.__name__}, took: {te-ts:2.4f} seconds")
+        return result
+    return wrap
 def all_seed(env,seed = 1):
    ''' omnipotent seed for RL, attention the position of seed function, you'd better put it just following the env create function
    Args:
@@ -136,7 +179,7 @@ def all_seed(env,seed = 1):
    import torch
    import numpy as np
    import random
-    print(f"seed = {seed}")
+    # print(f"seed = {seed}")
    env.seed(seed) # env config
    np.random.seed(seed)
    random.seed(seed)